pycoustic 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pycoustic/log.py +10 -4
- pycoustic/pycoustic_gui_app-ai.py +635 -0
- pycoustic/pycoustic_streamlit_gpt5.py +421 -0
- pycoustic/streamlit-ai.py +401 -431
- pycoustic/streamlit-new.py +142 -0
- pycoustic/streamlit_pycoustic_gpt5_dead.py +234 -0
- pycoustic/survey.py +148 -33
- {pycoustic-0.1.8.dist-info → pycoustic-0.1.10.dist-info}/METADATA +4 -2
- pycoustic-0.1.10.dist-info/RECORD +14 -0
- pycoustic-0.1.8.dist-info/RECORD +0 -10
- {pycoustic-0.1.8.dist-info → pycoustic-0.1.10.dist-info}/WHEEL +0 -0
pycoustic/streamlit-ai.py
CHANGED
@@ -1,522 +1,492 @@
|
|
1
|
-
#
|
2
|
-
# Streamlit
|
1
|
+
# streamlit-ai.py
|
2
|
+
# Streamlit UI for inspecting noise survey data and plots
|
3
3
|
|
4
4
|
from __future__ import annotations
|
5
5
|
|
6
|
+
import os
|
6
7
|
import io
|
7
8
|
import re
|
9
|
+
import tempfile
|
8
10
|
from typing import Dict, Iterable, List, Optional, Tuple
|
9
11
|
|
10
12
|
import numpy as np
|
11
13
|
import pandas as pd
|
12
14
|
import plotly.graph_objects as go
|
13
15
|
import streamlit as st
|
16
|
+
import hashlib
|
14
17
|
|
18
|
+
# Ensure submodules are imported via the package path so their relative imports work.
|
19
|
+
try:
|
20
|
+
from pycoustic.survey import *
|
21
|
+
except Exception:
|
22
|
+
# Fallback for local runs
|
23
|
+
from survey import *
|
15
24
|
|
16
|
-
|
17
|
-
|
18
|
-
|
25
|
+
try:
|
26
|
+
from pycoustic.log import *
|
27
|
+
except Exception:
|
28
|
+
from log import *
|
29
|
+
|
30
|
+
from pycoustic import Log # expects a Log class exposing a .df with a DateTimeIndex
|
31
|
+
|
32
|
+
# Support both package and script usage for the WeatherHistory import
|
33
|
+
try:
|
34
|
+
from .weather import WeatherHistory
|
35
|
+
except ImportError:
|
36
|
+
from weather import WeatherHistory
|
37
|
+
|
38
|
+
# Plot styling
|
19
39
|
COLOURS: List[str] = [
|
20
|
-
"#1f77b4",
|
21
|
-
"#
|
40
|
+
"#1f77b4",
|
41
|
+
"#ff7f0e",
|
42
|
+
"#2ca02c",
|
43
|
+
"#d62728",
|
44
|
+
"#9467bd",
|
45
|
+
"#8c564b",
|
46
|
+
"#e377c2",
|
47
|
+
"#7f7f7f",
|
48
|
+
"#bcbd22",
|
49
|
+
"#17becf",
|
22
50
|
]
|
23
51
|
TEMPLATE: str = "plotly_white"
|
24
52
|
|
25
53
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
if
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
if
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
54
|
+
def _try_read_table(
|
55
|
+
file_like_or_bytes: io.BytesIO | bytes | str,
|
56
|
+
filename: Optional[str] = None,
|
57
|
+
encoding: Optional[str] = None,
|
58
|
+
) -> pd.DataFrame:
|
59
|
+
"""
|
60
|
+
Attempt to read a tabular file (CSV preferred; Excel fallback).
|
61
|
+
- file_like_or_bytes may be a path, raw bytes, or a BytesIO-like object.
|
62
|
+
- filename helps determine the extension when not a path.
|
63
|
+
"""
|
64
|
+
def _ensure_buffer(obj) -> io.BytesIO:
|
65
|
+
if isinstance(obj, (bytes, bytearray)):
|
66
|
+
return io.BytesIO(obj)
|
67
|
+
if hasattr(obj, "read"):
|
68
|
+
return obj # assume file-like opened in binary mode
|
69
|
+
if isinstance(obj, str):
|
70
|
+
# Path-like; will be handled by pandas directly, so we return None
|
71
|
+
return None
|
72
|
+
raise TypeError("Unsupported input type for _try_read_table")
|
73
|
+
|
74
|
+
ext = None
|
75
|
+
if isinstance(file_like_or_bytes, str):
|
76
|
+
lower = file_like_or_bytes.lower()
|
77
|
+
if lower.endswith(".csv"):
|
78
|
+
ext = ".csv"
|
79
|
+
elif lower.endswith((".xlsx", ".xlsm", ".xlsb", ".xls")):
|
80
|
+
ext = ".xlsx"
|
81
|
+
elif filename:
|
82
|
+
lower = filename.lower()
|
83
|
+
if lower.endswith(".csv"):
|
84
|
+
ext = ".csv"
|
85
|
+
elif lower.endswith((".xlsx", ".xlsm", ".xlsb", ".xls")):
|
86
|
+
ext = ".xlsx"
|
87
|
+
|
88
|
+
# Prefer CSV
|
89
|
+
if ext in (None, ".csv"):
|
90
|
+
try:
|
91
|
+
if isinstance(file_like_or_bytes, str):
|
92
|
+
df = pd.read_csv(file_like_or_bytes, encoding=encoding)
|
93
|
+
else:
|
94
|
+
buf = _ensure_buffer(file_like_or_bytes)
|
95
|
+
if buf is None: # pragma: no cover
|
96
|
+
raise ValueError("Failed to open buffer for CSV")
|
97
|
+
df = pd.read_csv(buf, encoding=encoding)
|
98
|
+
return _flatten_columns(df)
|
99
|
+
except Exception:
|
100
|
+
# Try Excel as fallback
|
101
|
+
pass
|
102
|
+
|
103
|
+
# Excel fallback
|
104
|
+
if isinstance(file_like_or_bytes, str):
|
105
|
+
df = pd.read_excel(file_like_or_bytes)
|
106
|
+
else:
|
107
|
+
buf = _ensure_buffer(file_like_or_bytes)
|
108
|
+
if buf is None: # pragma: no cover
|
109
|
+
raise ValueError("Failed to open buffer for Excel")
|
110
|
+
df = pd.read_excel(buf)
|
111
|
+
return _flatten_columns(df)
|
56
112
|
|
57
113
|
|
58
114
|
def _flatten_columns(df: pd.DataFrame) -> pd.DataFrame:
|
59
|
-
|
115
|
+
"""
|
116
|
+
Flatten MultiIndex columns into simple strings.
|
117
|
+
"""
|
60
118
|
if isinstance(df.columns, pd.MultiIndex):
|
61
|
-
flat = [" | ".join([str(x) for x in tup if x is not None]) for tup in df.columns]
|
62
119
|
df = df.copy()
|
63
|
-
df.columns =
|
120
|
+
df.columns = [" ".join([str(p) for p in tup if p is not None]).strip() for tup in df.columns.values]
|
121
|
+
else:
|
122
|
+
# Ensure all columns are strings, stripped
|
123
|
+
df = df.rename(columns=lambda c: str(c).strip())
|
64
124
|
return df
|
65
125
|
|
66
126
|
|
67
|
-
def _maybe_parse_datetime(
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
127
|
+
def _maybe_parse_datetime(
|
128
|
+
df: pd.DataFrame,
|
129
|
+
candidates: Iterable[str] = ("timestamp", "time", "date", "datetime"),
|
130
|
+
utc: bool = False,
|
131
|
+
) -> pd.DataFrame:
|
132
|
+
"""
|
133
|
+
If a plausible timestamp column exists, convert to DatetimeIndex.
|
134
|
+
Does nothing if index is already DatetimeIndex.
|
135
|
+
"""
|
136
|
+
if isinstance(df.index, pd.DatetimeIndex):
|
137
|
+
return df
|
138
|
+
df = df.copy()
|
139
|
+
lower_cols = {str(c).lower(): c for c in df.columns}
|
140
|
+
for key in candidates:
|
141
|
+
if key in lower_cols:
|
142
|
+
col = lower_cols[key]
|
72
143
|
try:
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
return
|
144
|
+
ts = pd.to_datetime(df[col], utc=utc, errors="raise")
|
145
|
+
df = df.set_index(ts)
|
146
|
+
df.index.name = "timestamp"
|
147
|
+
return df.drop(columns=[col])
|
77
148
|
except Exception:
|
78
149
|
continue
|
79
150
|
return df
|
80
151
|
|
81
152
|
|
82
153
|
def _detect_position_col(df: pd.DataFrame) -> Optional[str]:
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
154
|
+
"""
|
155
|
+
Attempt to find a 'position' or 'location' column.
|
156
|
+
"""
|
157
|
+
patterns = [
|
158
|
+
re.compile(r"\bposition\b", re.I),
|
159
|
+
re.compile(r"\bpos\b", re.I),
|
160
|
+
re.compile(r"\blocation\b", re.I),
|
161
|
+
re.compile(r"\bsite\b", re.I),
|
162
|
+
]
|
163
|
+
for c in df.columns:
|
164
|
+
name = str(c)
|
165
|
+
for pat in patterns:
|
166
|
+
if pat.search(name):
|
167
|
+
return c
|
92
168
|
return None
|
93
169
|
|
94
170
|
|
95
171
|
def _metric_patterns() -> Dict[str, re.Pattern]:
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
#
|
100
|
-
def freq_part():
|
101
|
-
# numbers like 31.5, 1000, 1k, 2 kHz, etc.
|
102
|
-
return r"(?P<freq>(\d+(\.\d+)?)(\s*k(hz)?)?)"
|
103
|
-
|
104
|
-
# metric-first naming: "Leq_31.5", "Lmax 1000", "Leq-1k", "Leq 1 kHz"
|
105
|
-
metric_first = rf"^(?P<metric>Leq|Lmax)[\s_\-]*{freq_part()}(hz)?$"
|
106
|
-
# freq-first naming: "31.5", "63 Hz", "1k", with an optional suffix metric after a sep: "63Hz_Leq"
|
107
|
-
freq_first = rf"^{freq_part()}(hz)?[\s_\-]*(?P<metric>Leq|Lmax)?$"
|
172
|
+
"""
|
173
|
+
Patterns for commonly used acoustic metrics.
|
174
|
+
"""
|
175
|
+
# Matches LAeq, Leq A, Leq(A), etc.
|
108
176
|
return {
|
109
|
-
"
|
110
|
-
"
|
177
|
+
"Leq": re.compile(r"\bL\s*eq\b(?:\s*\(?\s*A\s*\)?)?", re.I),
|
178
|
+
"Lmax": re.compile(r"\bL\s*max\b(?:\s*\(?\s*A\s*\)?)?", re.I),
|
179
|
+
"L90": re.compile(r"\bL\s*90\b(?:\s*\(?\s*A\s*\)?)?", re.I),
|
111
180
|
}
|
112
181
|
|
113
182
|
|
114
|
-
def _parse_freq_to_hz(
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
s =
|
119
|
-
|
120
|
-
|
121
|
-
if m:
|
122
|
-
return float(m.group(1)) * 1000.0
|
123
|
-
try:
|
124
|
-
return float(s)
|
125
|
-
except Exception:
|
183
|
+
def _parse_freq_to_hz(label: str) -> Optional[float]:
|
184
|
+
"""
|
185
|
+
Parse frequency-like column labels such as '63 Hz', '1k', '1 kHz', etc.
|
186
|
+
"""
|
187
|
+
s = str(label).strip().lower()
|
188
|
+
m = re.match(r"^\s*([0-9]*\.?[0-9]+)\s*(k|khz|hz)?\s*$", s)
|
189
|
+
if not m:
|
126
190
|
return None
|
191
|
+
val = float(m.group(1))
|
192
|
+
unit = m.group(2)
|
193
|
+
if not unit or unit == "hz":
|
194
|
+
return val
|
195
|
+
return val * 1000.0
|
127
196
|
|
128
197
|
|
129
|
-
def spectra_to_rows(
|
198
|
+
def spectra_to_rows(
|
199
|
+
df: pd.DataFrame,
|
200
|
+
value_col_name: str = "Level (dB)",
|
201
|
+
) -> Optional[pd.DataFrame]:
|
130
202
|
"""
|
131
|
-
Convert wide
|
132
|
-
|
133
|
-
Non-spectral columns are carried along.
|
203
|
+
Convert wide spectra columns (e.g., '63 Hz', '1 kHz') into (freq_hz, value) rows.
|
204
|
+
Returns None if no spectra-like columns are detected.
|
134
205
|
"""
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
# metric first
|
144
|
-
m1 = patterns["metric_first"].match(col_str)
|
145
|
-
if m1:
|
146
|
-
metric = m1.group("metric").upper()
|
147
|
-
freq_hz = _parse_freq_to_hz(m1.group("freq"))
|
148
|
-
if metric in ("LEQ", "LMAX") and freq_hz is not None:
|
149
|
-
spectral_cols.append((col, metric, freq_hz))
|
150
|
-
matched = True
|
151
|
-
|
152
|
-
if matched:
|
153
|
-
continue
|
154
|
-
|
155
|
-
# frequency first
|
156
|
-
m2 = patterns["freq_first"].match(col_str)
|
157
|
-
if m2:
|
158
|
-
metric = m2.group("metric")
|
159
|
-
metric = metric.upper() if metric else None
|
160
|
-
freq_hz = _parse_freq_to_hz(m2.group("freq"))
|
161
|
-
if freq_hz is not None:
|
162
|
-
# If metric is not embedded, we will treat it as "LEQ" by default for plotting,
|
163
|
-
# but also keep the column name when we pivot.
|
164
|
-
spectral_cols.append((col, metric or "LEQ", freq_hz))
|
165
|
-
|
166
|
-
if not spectral_cols:
|
167
|
-
return pd.DataFrame(columns=["Frequency_Hz", "Metric", "Value"])
|
168
|
-
|
169
|
-
# Build tidy rows
|
170
|
-
id_cols = [c for c in df.columns if c not in [c0 for (c0, _, _) in spectral_cols]]
|
171
|
-
tidies: List[pd.DataFrame] = []
|
172
|
-
for (col, metric, f_hz) in spectral_cols:
|
173
|
-
block = pd.DataFrame(
|
174
|
-
{
|
175
|
-
"Frequency_Hz": f_hz,
|
176
|
-
"Metric": metric,
|
177
|
-
"Value": df[col].astype("float64").values,
|
178
|
-
}
|
179
|
-
)
|
180
|
-
# Attach IDs if present
|
181
|
-
if id_cols:
|
182
|
-
block = pd.concat([df[id_cols].reset_index(drop=True), block], axis=1)
|
183
|
-
tidies.append(block)
|
206
|
+
freq_cols: List[Tuple[str, float]] = []
|
207
|
+
for c in df.columns:
|
208
|
+
hz = _parse_freq_to_hz(c)
|
209
|
+
if hz is not None:
|
210
|
+
freq_cols.append((c, hz))
|
211
|
+
|
212
|
+
if not freq_cols:
|
213
|
+
return None
|
184
214
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
215
|
+
freq_cols_sorted = sorted(freq_cols, key=lambda x: x[1])
|
216
|
+
melted = df[fname_list := [c for c, _ in freq_cols_sorted]].copy()
|
217
|
+
melted.columns = [f"{hz:.6g}" for _, hz in freq_cols_sorted]
|
218
|
+
out = melted.melt(var_name="freq_hz", value_name=value_col_name)
|
219
|
+
out["freq_hz"] = out["freq_hz"].astype(float)
|
220
|
+
return out.sort_values("freq_hz").reset_index(drop=True)
|
189
221
|
|
190
222
|
|
191
|
-
def _resample_if_possible(df: pd.DataFrame,
|
223
|
+
def _resample_if_possible(df: pd.DataFrame, rule: str = "1S") -> pd.DataFrame:
|
192
224
|
"""
|
193
|
-
|
225
|
+
Downsample a DateTimeIndex DataFrame for responsiveness.
|
194
226
|
"""
|
195
|
-
if not
|
227
|
+
if not isinstance(df.index, pd.DatetimeIndex):
|
196
228
|
return df
|
197
|
-
|
198
|
-
|
199
|
-
dt_col = None
|
200
|
-
for c in df.columns:
|
201
|
-
if pd.api.types.is_datetime64_any_dtype(df[c]):
|
202
|
-
dt_col = c
|
203
|
-
break
|
204
|
-
|
205
|
-
if dt_col is None:
|
206
|
-
return df # nothing to resample on
|
207
|
-
|
208
|
-
df_sorted = df.sort_values(dt_col)
|
209
|
-
df_sorted = df_sorted.set_index(dt_col)
|
210
|
-
|
211
|
-
# numeric only for resample
|
212
|
-
numeric_cols = df_sorted.select_dtypes(include=["number"]).columns
|
213
|
-
if len(numeric_cols) == 0:
|
229
|
+
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
230
|
+
if numeric_cols.empty:
|
214
231
|
return df
|
232
|
+
return df[numeric_cols].resample(rule).median().dropna(how="all")
|
233
|
+
|
215
234
|
|
216
|
-
|
217
|
-
|
218
|
-
|
235
|
+
def _build_summary(
|
236
|
+
df: pd.DataFrame,
|
237
|
+
position_col: Optional[str] = None,
|
238
|
+
metrics: Optional[Iterable[str]] = None,
|
239
|
+
) -> pd.DataFrame:
|
240
|
+
"""
|
241
|
+
Build simple summary stats for selected metrics, optionally grouped by position.
|
242
|
+
"""
|
243
|
+
if metrics is None:
|
244
|
+
metrics = ["Leq", "Lmax", "L90"]
|
219
245
|
|
246
|
+
pats = _metric_patterns()
|
247
|
+
metric_cols: Dict[str, List[str]] = {m: [] for m in metrics}
|
248
|
+
for c in df.columns:
|
249
|
+
for m in metrics:
|
250
|
+
if pats.get(m) and pats[m].search(str(c)):
|
251
|
+
metric_cols[m].append(c)
|
220
252
|
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
return pd.DataFrame()
|
227
|
-
s = df[numeric_cols].agg(["count", "mean", "std", "min", "max"]).T.reset_index()
|
228
|
-
s.columns = ["Metric"] + list(s.columns[1:])
|
229
|
-
return s
|
253
|
+
work = {}
|
254
|
+
for m, cols in metric_cols.items():
|
255
|
+
sel = df[cols].select_dtypes(include=[np.number]) if cols else pd.DataFrame()
|
256
|
+
if not sel.empty:
|
257
|
+
work[m] = sel.mean(axis=1)
|
230
258
|
|
231
|
-
|
232
|
-
numeric_cols = df.select_dtypes(include=["number"]).columns
|
233
|
-
if len(numeric_cols) == 0:
|
259
|
+
if not work:
|
234
260
|
return pd.DataFrame()
|
235
261
|
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
out_cols.append(str(lvl0))
|
246
|
-
elif lvl0 in group_cols:
|
247
|
-
out_cols.append(str(lvl0))
|
248
|
-
else:
|
249
|
-
out_cols.append(f"{lvl0}__{lvl1}")
|
250
|
-
else:
|
251
|
-
out_cols.append(str(tup))
|
252
|
-
g.columns = out_cols
|
253
|
-
return g
|
254
|
-
|
255
|
-
|
256
|
-
def _guess_resample_options(df: pd.DataFrame) -> List[Tuple[str, str]]:
|
257
|
-
# Label, pandas rule
|
258
|
-
has_dt = any(pd.api.types.is_datetime64_any_dtype(df[c]) for c in df.columns)
|
259
|
-
if not has_dt:
|
260
|
-
return [("None", "")]
|
261
|
-
return [
|
262
|
-
("None", ""),
|
263
|
-
("1 minute", "1min"),
|
264
|
-
("5 minutes", "5min"),
|
265
|
-
("15 minutes", "15min"),
|
266
|
-
("Hourly", "1H"),
|
267
|
-
("Daily", "1D"),
|
268
|
-
]
|
262
|
+
agg_df = pd.DataFrame(work)
|
263
|
+
if position_col and position_col in df.columns:
|
264
|
+
agg_df[position_col] = df[position_col].values[: len(agg_df)]
|
265
|
+
g = agg_df.groupby(position_col, dropna=False).agg(["mean", "min", "max", "count"])
|
266
|
+
# Flatten columns
|
267
|
+
g.columns = [" ".join([str(p) for p in col if p]).strip() for col in g.columns.values]
|
268
|
+
return g.reset_index()
|
269
|
+
else:
|
270
|
+
return agg_df.agg(["mean", "min", "max", "count"]).T.reset_index(names=["Metric"])
|
269
271
|
|
270
272
|
|
271
|
-
def
|
273
|
+
def _guess_resample_options(n: int) -> str:
|
274
|
+
"""
|
275
|
+
Heuristically pick a resampling rule based on number of points.
|
276
|
+
"""
|
277
|
+
if n > 200_000:
|
278
|
+
return "10S"
|
279
|
+
if n > 50_000:
|
280
|
+
return "5S"
|
281
|
+
if n > 10_000:
|
282
|
+
return "2S"
|
283
|
+
return "1S"
|
284
|
+
|
285
|
+
|
286
|
+
def _plot_spectra(
|
287
|
+
df_rows: pd.DataFrame,
|
288
|
+
title: str = "Spectral Levels",
|
289
|
+
value_col_name: str = "Level (dB)",
|
290
|
+
) -> go.Figure:
|
291
|
+
"""
|
292
|
+
Plot spectra contained in a (freq_hz, value) rows dataframe.
|
293
|
+
"""
|
272
294
|
fig = go.Figure()
|
273
|
-
if
|
274
|
-
fig.
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
if color_by and color_by in tidy_spec.columns:
|
282
|
-
groups = list(tidy_spec[color_by].astype(str).unique())
|
283
|
-
for i, key in enumerate(groups):
|
284
|
-
sub = tidy_spec[tidy_spec[color_by].astype(str) == str(key)]
|
285
|
-
# Keep metric separated as line style if available
|
286
|
-
if "Metric" in sub.columns and sub["Metric"].nunique() > 1:
|
287
|
-
for metric in sorted(sub["Metric"].unique()):
|
288
|
-
subm = sub[sub["Metric"] == metric]
|
289
|
-
fig.add_trace(
|
290
|
-
go.Scatter(
|
291
|
-
x=subm["Frequency_Hz"],
|
292
|
-
y=subm["Value"],
|
293
|
-
mode="lines+markers",
|
294
|
-
name=f"{key} – {metric}",
|
295
|
-
line=dict(color=COLOURS[i % len(COLOURS)], dash="solid" if metric == "LEQ" else "dash"),
|
296
|
-
marker=dict(size=6),
|
297
|
-
)
|
298
|
-
)
|
299
|
-
else:
|
300
|
-
fig.add_trace(
|
301
|
-
go.Scatter(
|
302
|
-
x=sub["Frequency_Hz"],
|
303
|
-
y=sub["Value"],
|
304
|
-
mode="lines+markers",
|
305
|
-
name=str(key),
|
306
|
-
line=dict(color=COLOURS[i % len(COLOURS)]),
|
307
|
-
marker=dict(size=6),
|
308
|
-
)
|
309
|
-
)
|
310
|
-
else:
|
311
|
-
# single trace per metric
|
312
|
-
if "Metric" in tidy_spec.columns:
|
313
|
-
for i, metric in enumerate(sorted(tidy_spec["Metric"].unique())):
|
314
|
-
sub = tidy_spec[tidy_spec["Metric"] == metric]
|
315
|
-
fig.add_trace(
|
316
|
-
go.Scatter(
|
317
|
-
x=sub["Frequency_Hz"],
|
318
|
-
y=sub["Value"],
|
319
|
-
mode="lines+markers",
|
320
|
-
name=str(metric),
|
321
|
-
line=dict(color=COLOURS[i % len(COLOURS)]),
|
322
|
-
marker=dict(size=6),
|
323
|
-
)
|
324
|
-
)
|
325
|
-
else:
|
326
|
-
fig.add_trace(
|
327
|
-
go.Scatter(
|
328
|
-
x=x,
|
329
|
-
y=tidy_spec["Value"],
|
330
|
-
mode="lines+markers",
|
331
|
-
name="Spectrum",
|
332
|
-
line=dict(color=COLOURS[0]),
|
333
|
-
marker=dict(size=6),
|
334
|
-
)
|
295
|
+
if {"freq_hz", value_col_name}.issubset(df_rows.columns):
|
296
|
+
fig.add_trace(
|
297
|
+
go.Scatter(
|
298
|
+
x=df_rows["freq_hz"],
|
299
|
+
y=df_rows[value_col_name],
|
300
|
+
mode="lines+markers",
|
301
|
+
line=dict(color=COLOURS[0]),
|
302
|
+
name=value_col_name,
|
335
303
|
)
|
336
|
-
|
304
|
+
)
|
337
305
|
fig.update_layout(
|
338
306
|
template=TEMPLATE,
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
gridcolor="rgba(0,0,0,0.1)",
|
345
|
-
),
|
346
|
-
yaxis=dict(title="Level (dB)", gridcolor="rgba(0,0,0,0.1)"),
|
347
|
-
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
|
348
|
-
margin=dict(l=50, r=30, t=40, b=50),
|
307
|
+
xaxis_title="Frequency (Hz)",
|
308
|
+
yaxis_title=value_col_name,
|
309
|
+
hovermode="x",
|
310
|
+
margin=dict(l=10, r=10, t=30, b=10),
|
311
|
+
title=title,
|
349
312
|
)
|
313
|
+
fig.update_xaxes(type="log", tickformat=".0f")
|
350
314
|
return fig
|
351
315
|
|
352
316
|
|
353
|
-
def _download_csv_button(label: str, df: pd.DataFrame,
|
354
|
-
|
317
|
+
def _download_csv_button(label: str, df: pd.DataFrame, file_name: str) -> None:
|
318
|
+
"""
|
319
|
+
Render a download button for a dataframe as CSV.
|
320
|
+
"""
|
321
|
+
csv_bytes = df.to_csv(index=False).encode("utf-8")
|
355
322
|
st.download_button(
|
356
323
|
label=label,
|
357
|
-
data=
|
358
|
-
file_name=
|
324
|
+
data=csv_bytes,
|
325
|
+
file_name=file_name,
|
359
326
|
mime="text/csv",
|
360
|
-
use_container_width=True,
|
361
327
|
)
|
362
328
|
|
363
329
|
|
364
|
-
#
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
330
|
+
# === New helpers for DateTimeIndex-based Log flow ===
|
331
|
+
|
332
|
+
@st.cache_data(
|
333
|
+
show_spinner=False,
|
334
|
+
# Ensure memoryview objects are safely hashable for the cache.
|
335
|
+
hash_funcs={memoryview: lambda v: v.tobytes()},
|
336
|
+
)
|
337
|
+
def _load_log_df_from_bytes(
|
338
|
+
_file_bytes: bytes | memoryview,
|
339
|
+
file_suffix: str = ".csv",
|
340
|
+
content_key: str | None = None,
|
341
|
+
) -> pd.DataFrame:
|
342
|
+
"""
|
343
|
+
Persist uploaded bytes to a temp file and create a Log to obtain a DataFrame.
|
344
|
+
Requires Log().df to have a DateTimeIndex.
|
345
|
+
|
346
|
+
_file_bytes is ignored by Streamlit's cache hasher (leading underscore).
|
347
|
+
Optionally pass content_key (e.g., a sha256 of the bytes) so the cache
|
348
|
+
invalidates when the content changes.
|
349
|
+
"""
|
350
|
+
# Keep content_key in the signature so it participates in the cache key.
|
351
|
+
_ = content_key
|
352
|
+
|
353
|
+
# Coerce memoryview to bytes so file IO is consistent.
|
354
|
+
if isinstance(_file_bytes, memoryview):
|
355
|
+
_file_bytes = _file_bytes.tobytes()
|
356
|
+
|
357
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=file_suffix) as tmp:
|
358
|
+
tmp.write(_file_bytes)
|
359
|
+
tmp_path = tmp.name
|
360
|
+
try:
|
361
|
+
log = Log(path=tmp_path)
|
362
|
+
if hasattr(log, "df") and isinstance(log.df, pd.DataFrame):
|
363
|
+
df = log.df.copy()
|
364
|
+
else:
|
365
|
+
raise ValueError("Log() did not expose a DataFrame on .df")
|
366
|
+
finally:
|
367
|
+
try:
|
368
|
+
os.unlink(tmp_path)
|
369
|
+
except Exception:
|
370
|
+
pass
|
371
|
+
|
372
|
+
if not isinstance(df.index, pd.DatetimeIndex):
|
373
|
+
raise ValueError("Expected Log().df to have a DateTimeIndex for timestamps.")
|
374
|
+
return df
|
375
|
+
|
376
|
+
|
377
|
+
def _find_metric_columns(df: pd.DataFrame) -> Dict[str, str]:
|
378
|
+
"""
|
379
|
+
Locate the canonical metric columns using case-insensitive matching.
|
380
|
+
Returns a mapping from UI label to actual column name.
|
381
|
+
"""
|
382
|
+
targets: List[str] = ["Leq A", "Lmax A", "L90 A"]
|
383
|
+
norm_map = {str(c).strip().lower(): c for c in df.columns}
|
384
|
+
found: Dict[str, str] = {}
|
385
|
+
for label in targets:
|
386
|
+
key = label.lower()
|
387
|
+
if key in norm_map:
|
388
|
+
found[label] = norm_map[key]
|
389
|
+
return found
|
390
|
+
|
391
|
+
|
392
|
+
def _build_time_history_figure(df: pd.DataFrame, series_map: Dict[str, str]) -> go.Figure:
|
393
|
+
"""
|
394
|
+
Build a Plotly figure for time history lines using the DataFrame's DateTimeIndex.
|
395
|
+
"""
|
396
|
+
fig = go.Figure()
|
397
|
+
for i, (label, col) in enumerate(series_map.items()):
|
398
|
+
fig.add_trace(
|
399
|
+
go.Scatter(
|
400
|
+
x=df.index,
|
401
|
+
y=df[col],
|
402
|
+
mode="lines",
|
403
|
+
name=label,
|
404
|
+
line=dict(color=COLOURS[i % len(COLOURS)]),
|
405
|
+
)
|
406
|
+
)
|
407
|
+
fig.update_layout(
|
408
|
+
template=TEMPLATE,
|
409
|
+
xaxis_title="Timestamp",
|
410
|
+
yaxis_title="Level (dB)",
|
411
|
+
hovermode="x unified",
|
412
|
+
legend_title_text="Series",
|
413
|
+
margin=dict(l=10, r=10, t=30, b=10),
|
414
|
+
height=420,
|
415
|
+
)
|
416
|
+
return fig
|
417
|
+
|
418
|
+
|
419
|
+
def main() -> None:
|
420
|
+
st.set_page_config(page_title="pycoustic - Noise survey toolkit", layout="wide")
|
421
|
+
st.title("Noise survey toolkit")
|
369
422
|
|
370
|
-
st.
|
371
|
-
st.caption("Upload measurement logs, explore summaries and spectra, and export results.")
|
423
|
+
st.caption("Upload logs to visualize time histories. CSV files are supported.")
|
372
424
|
|
373
|
-
with
|
374
|
-
|
375
|
-
|
376
|
-
"Upload one or more
|
377
|
-
type=["csv"
|
425
|
+
# === Time history plots based on Log().df with a DateTimeIndex ===
|
426
|
+
with st.expander("Time history plots (Leq A, Lmax A, L90 A)", expanded=True):
|
427
|
+
uploaded = st.file_uploader(
|
428
|
+
"Upload one or more CSV files",
|
429
|
+
type=["csv"],
|
378
430
|
accept_multiple_files=True,
|
431
|
+
key="time_history_logs_uploader",
|
379
432
|
)
|
380
433
|
|
381
|
-
|
382
|
-
|
434
|
+
if uploaded:
|
435
|
+
# User options
|
436
|
+
max_points = st.number_input("Max points per series (downsampling)", 1_000, 1_000_000, value=10_000, step=1_000)
|
437
|
+
for file in uploaded:
|
438
|
+
st.markdown(f"**File:** {file.name}")
|
383
439
|
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
440
|
+
# Load via Log() and enforce DateTimeIndex
|
441
|
+
try:
|
442
|
+
# Suppose you have file_bytes or a memoryview named mv
|
443
|
+
file_bytes = file.getbuffer()
|
444
|
+
data = file_bytes if isinstance(file_bytes, (bytes, bytearray)) else file_bytes.tobytes()
|
445
|
+
content_key = hashlib.sha256(data).hexdigest()
|
388
446
|
|
389
|
-
|
390
|
-
|
447
|
+
df = _load_log_df_from_bytes(data, file_suffix=".csv", content_key=content_key)
|
448
|
+
except Exception as e:
|
449
|
+
st.error(f"Could not load Log() from file: {e}")
|
450
|
+
continue
|
391
451
|
|
392
|
-
|
452
|
+
if df.empty:
|
453
|
+
st.warning("No data available.")
|
454
|
+
continue
|
393
455
|
|
394
|
-
|
395
|
-
|
396
|
-
colour_by = st.text_input("Colour lines by column (e.g., Position or source)", value="source")
|
397
|
-
show_markers = st.checkbox("Show markers", value=True)
|
456
|
+
# Ensure sorted index
|
457
|
+
df = df.sort_index()
|
398
458
|
|
399
|
-
|
400
|
-
|
401
|
-
|
459
|
+
# Identify standard metric columns
|
460
|
+
available = _find_metric_columns(df)
|
461
|
+
missing = [label for label in ("Leq A", "Lmax A", "L90 A") if label not in available]
|
402
462
|
|
403
|
-
|
404
|
-
|
405
|
-
return
|
463
|
+
if missing:
|
464
|
+
st.info(f"Missing columns: {', '.join(missing)}")
|
406
465
|
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
# Now that we have data, rebuild resample options
|
428
|
-
resample_options = _guess_resample_options(raw_master)
|
429
|
-
resample_choice_label = st.sidebar.selectbox(
|
430
|
-
"Resample period",
|
431
|
-
[x[0] for x in resample_options],
|
432
|
-
index=0,
|
433
|
-
key="resample_choice",
|
434
|
-
)
|
435
|
-
resample_rule = dict(resample_options)[resample_choice_label]
|
436
|
-
|
437
|
-
# Optional resampling
|
438
|
-
df_used = _resample_if_possible(raw_master, resample_rule) if resample_rule else raw_master
|
439
|
-
|
440
|
-
# Try to determine a reasonable group column
|
441
|
-
detected_group_col = _detect_position_col(df_used)
|
442
|
-
group_col = (st.sidebar.text_input("Detected Group Column", value=detected_group_col or "") or "").strip()
|
443
|
-
if not group_col and group_hint.strip():
|
444
|
-
group_col = group_hint.strip()
|
445
|
-
if group_col and group_col not in df_used.columns:
|
446
|
-
st.warning(f"Group column '{group_col}' not found. It will be ignored.")
|
447
|
-
group_col = ""
|
448
|
-
|
449
|
-
# Build spectra in tidy form
|
450
|
-
tidy_spec = spectra_to_rows(df_used)
|
451
|
-
|
452
|
-
tabs = st.tabs(["Summary", "Spectra", "Raw Data", "Logs"])
|
453
|
-
|
454
|
-
# Summary tab
|
455
|
-
with tabs[0]:
|
456
|
-
st.subheader("Summary")
|
457
|
-
group_cols: List[str] = []
|
458
|
-
if group_col:
|
459
|
-
group_cols.append(group_col)
|
460
|
-
if "source" in df_used.columns:
|
461
|
-
group_cols.append("source")
|
462
|
-
|
463
|
-
summary_df = _build_summary(df_used, group_cols)
|
464
|
-
if summary_df.empty:
|
465
|
-
st.info("No numeric data to summarize.")
|
466
|
-
else:
|
467
|
-
st.dataframe(summary_df, use_container_width=True, hide_index=True)
|
468
|
-
_download_csv_button("Download summary CSV", summary_df, "summary")
|
469
|
-
|
470
|
-
# Spectra tab
|
471
|
-
with tabs[1]:
|
472
|
-
st.subheader("Spectra")
|
473
|
-
|
474
|
-
# Allow user to filter a group (optional)
|
475
|
-
filters_cols = []
|
476
|
-
if group_col:
|
477
|
-
filters_cols.append(group_col)
|
478
|
-
if "source" in tidy_spec.columns:
|
479
|
-
filters_cols.append("source")
|
480
|
-
|
481
|
-
sub = tidy_spec.copy()
|
482
|
-
# Dynamic filters
|
483
|
-
if not sub.empty and filters_cols:
|
484
|
-
cols = st.columns(len(filters_cols))
|
485
|
-
for i, colname in enumerate(filters_cols):
|
486
|
-
with cols[i]:
|
487
|
-
uniq = sorted([str(x) for x in sub[colname].dropna().unique()])
|
488
|
-
if len(uniq) <= 1:
|
489
|
-
continue
|
490
|
-
selected = st.multiselect(f"Filter {colname}", options=uniq, default=uniq)
|
491
|
-
sub = sub[sub[colname].astype(str).isin(selected)]
|
492
|
-
|
493
|
-
# Plot
|
494
|
-
if sub.empty:
|
495
|
-
st.info("No spectral data detected in the uploaded files.")
|
496
|
-
else:
|
497
|
-
fig = _plot_spectra(sub, color_by=(colour_by if colour_by in sub.columns else None))
|
498
|
-
if not show_markers:
|
499
|
-
for tr in fig.data:
|
500
|
-
tr.mode = "lines"
|
501
|
-
st.plotly_chart(fig, use_container_width=True)
|
502
|
-
|
503
|
-
# Download tidy spectra
|
504
|
-
_download_csv_button("Download tidy spectra CSV", sub, "spectra_tidy")
|
505
|
-
|
506
|
-
# Raw Data tab
|
507
|
-
with tabs[2]:
|
508
|
-
st.subheader("Raw Data")
|
509
|
-
if show_raw_preview:
|
510
|
-
st.dataframe(raw_master, use_container_width=True, hide_index=True)
|
511
|
-
else:
|
512
|
-
st.caption("Enable 'Show raw preview table' in the sidebar to render the full table.")
|
513
|
-
_download_csv_button("Download combined raw CSV", raw_master, "raw_combined")
|
466
|
+
if not available:
|
467
|
+
st.warning("None of the required columns were found. Expected any of: Leq A, Lmax A, L90 A.")
|
468
|
+
continue
|
469
|
+
|
470
|
+
# Optional downsampling for responsiveness
|
471
|
+
if len(df) > max_points:
|
472
|
+
step = max(1, len(df) // int(max_points))
|
473
|
+
df_plot = df.iloc[::step].copy()
|
474
|
+
else:
|
475
|
+
df_plot = df
|
476
|
+
|
477
|
+
# Plot
|
478
|
+
fig = _build_time_history_figure(df_plot, available)
|
479
|
+
st.plotly_chart(fig, use_container_width=True)
|
480
|
+
|
481
|
+
# Simple summary table per visible series
|
482
|
+
with st.expander("Summary (visible series)"):
|
483
|
+
numeric_summary = df_plot[list(available.values())].describe().T.reset_index(names=["Series"])
|
484
|
+
_download_csv_button("Download summary CSV", numeric_summary, f"{os.path.splitext(file.name)[0]}_summary.csv")
|
485
|
+
st.dataframe(numeric_summary, use_container_width=True)
|
514
486
|
|
515
|
-
#
|
516
|
-
|
517
|
-
st.subheader("Logs")
|
518
|
-
st.text_area("Ingestion log", value="\n".join(logs), height=240, label_visibility="collapsed")
|
487
|
+
# Placeholder for additional tools/sections (spectra, etc.) can go below.
|
488
|
+
# Existing/legacy time-history upload/plotting sections should be removed to avoid duplication.
|
519
489
|
|
520
490
|
|
521
491
|
if __name__ == "__main__":
|
522
|
-
main()
|
492
|
+
main()
|