pycoustic 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pycoustic/streamlit-ai.py CHANGED
@@ -1,522 +1,492 @@
1
- # Python 3.12
2
- # Streamlit app entrypoint for PyCoustic-like workflow
1
+ # streamlit-ai.py
2
+ # Streamlit UI for inspecting noise survey data and plots
3
3
 
4
4
  from __future__ import annotations
5
5
 
6
+ import os
6
7
  import io
7
8
  import re
9
+ import tempfile
8
10
  from typing import Dict, Iterable, List, Optional, Tuple
9
11
 
10
12
  import numpy as np
11
13
  import pandas as pd
12
14
  import plotly.graph_objects as go
13
15
  import streamlit as st
16
+ import hashlib
14
17
 
18
+ # Ensure submodules are imported via the package path so their relative imports work.
19
+ try:
20
+ from pycoustic.survey import *
21
+ except Exception:
22
+ # Fallback for local runs
23
+ from survey import *
15
24
 
16
- # -----------------------------
17
- # Plotting configuration
18
- # -----------------------------
25
+ try:
26
+ from pycoustic.log import *
27
+ except Exception:
28
+ from log import *
29
+
30
+ from pycoustic import Log # expects a Log class exposing a .df with a DateTimeIndex
31
+
32
+ # Support both package and script usage for the WeatherHistory import
33
+ try:
34
+ from .weather import WeatherHistory
35
+ except ImportError:
36
+ from weather import WeatherHistory
37
+
38
+ # Plot styling
19
39
  COLOURS: List[str] = [
20
- "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
21
- "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf",
40
+ "#1f77b4",
41
+ "#ff7f0e",
42
+ "#2ca02c",
43
+ "#d62728",
44
+ "#9467bd",
45
+ "#8c564b",
46
+ "#e377c2",
47
+ "#7f7f7f",
48
+ "#bcbd22",
49
+ "#17becf",
22
50
  ]
23
51
  TEMPLATE: str = "plotly_white"
24
52
 
25
53
 
26
- # -----------------------------
27
- # Helpers
28
- # -----------------------------
29
- def _try_read_table(upload) -> pd.DataFrame:
30
- # Try CSV first, then Excel
31
- name = getattr(upload, "name", "uploaded")
32
- data = upload.read() if hasattr(upload, "read") else upload.getvalue()
33
- # ensure the buffer can be reused for Excel attempt
34
- buf = io.BytesIO(data)
35
-
36
- # CSV attempt
37
- try:
38
- df_csv = pd.read_csv(io.BytesIO(data))
39
- if not df_csv.empty:
40
- df_csv.attrs["__source_name__"] = name
41
- return df_csv
42
- except Exception:
43
- pass
44
-
45
- # Excel attempt
46
- buf.seek(0)
47
- try:
48
- df_xls = pd.read_excel(buf)
49
- if not df_xls.empty:
50
- df_xls.attrs["__source_name__"] = name
51
- return df_xls
52
- except Exception:
53
- pass
54
-
55
- raise ValueError(f"Could not parse file: {name}")
54
+ def _try_read_table(
55
+ file_like_or_bytes: io.BytesIO | bytes | str,
56
+ filename: Optional[str] = None,
57
+ encoding: Optional[str] = None,
58
+ ) -> pd.DataFrame:
59
+ """
60
+ Attempt to read a tabular file (CSV preferred; Excel fallback).
61
+ - file_like_or_bytes may be a path, raw bytes, or a BytesIO-like object.
62
+ - filename helps determine the extension when not a path.
63
+ """
64
+ def _ensure_buffer(obj) -> io.BytesIO:
65
+ if isinstance(obj, (bytes, bytearray)):
66
+ return io.BytesIO(obj)
67
+ if hasattr(obj, "read"):
68
+ return obj # assume file-like opened in binary mode
69
+ if isinstance(obj, str):
70
+ # Path-like; will be handled by pandas directly, so we return None
71
+ return None
72
+ raise TypeError("Unsupported input type for _try_read_table")
73
+
74
+ ext = None
75
+ if isinstance(file_like_or_bytes, str):
76
+ lower = file_like_or_bytes.lower()
77
+ if lower.endswith(".csv"):
78
+ ext = ".csv"
79
+ elif lower.endswith((".xlsx", ".xlsm", ".xlsb", ".xls")):
80
+ ext = ".xlsx"
81
+ elif filename:
82
+ lower = filename.lower()
83
+ if lower.endswith(".csv"):
84
+ ext = ".csv"
85
+ elif lower.endswith((".xlsx", ".xlsm", ".xlsb", ".xls")):
86
+ ext = ".xlsx"
87
+
88
+ # Prefer CSV
89
+ if ext in (None, ".csv"):
90
+ try:
91
+ if isinstance(file_like_or_bytes, str):
92
+ df = pd.read_csv(file_like_or_bytes, encoding=encoding)
93
+ else:
94
+ buf = _ensure_buffer(file_like_or_bytes)
95
+ if buf is None: # pragma: no cover
96
+ raise ValueError("Failed to open buffer for CSV")
97
+ df = pd.read_csv(buf, encoding=encoding)
98
+ return _flatten_columns(df)
99
+ except Exception:
100
+ # Try Excel as fallback
101
+ pass
102
+
103
+ # Excel fallback
104
+ if isinstance(file_like_or_bytes, str):
105
+ df = pd.read_excel(file_like_or_bytes)
106
+ else:
107
+ buf = _ensure_buffer(file_like_or_bytes)
108
+ if buf is None: # pragma: no cover
109
+ raise ValueError("Failed to open buffer for Excel")
110
+ df = pd.read_excel(buf)
111
+ return _flatten_columns(df)
56
112
 
57
113
 
58
114
  def _flatten_columns(df: pd.DataFrame) -> pd.DataFrame:
59
- # Flatten MultiIndex columns if any
115
+ """
116
+ Flatten MultiIndex columns into simple strings.
117
+ """
60
118
  if isinstance(df.columns, pd.MultiIndex):
61
- flat = [" | ".join([str(x) for x in tup if x is not None]) for tup in df.columns]
62
119
  df = df.copy()
63
- df.columns = flat
120
+ df.columns = [" ".join([str(p) for p in tup if p is not None]).strip() for tup in df.columns.values]
121
+ else:
122
+ # Ensure all columns are strings, stripped
123
+ df = df.rename(columns=lambda c: str(c).strip())
64
124
  return df
65
125
 
66
126
 
67
- def _maybe_parse_datetime(df: pd.DataFrame) -> pd.DataFrame:
68
- # Heuristic: try common datetime column names and parse them
69
- dt_candidates = ["Datetime", "DateTime", "Timestamp", "Time", "Date", "Date_Time", "datetime", "time", "timestamp"]
70
- for col in df.columns:
71
- if col in dt_candidates or re.search(r"time|date|stamp", str(col), re.IGNORECASE):
127
+ def _maybe_parse_datetime(
128
+ df: pd.DataFrame,
129
+ candidates: Iterable[str] = ("timestamp", "time", "date", "datetime"),
130
+ utc: bool = False,
131
+ ) -> pd.DataFrame:
132
+ """
133
+ If a plausible timestamp column exists, convert to DatetimeIndex.
134
+ Does nothing if index is already DatetimeIndex.
135
+ """
136
+ if isinstance(df.index, pd.DatetimeIndex):
137
+ return df
138
+ df = df.copy()
139
+ lower_cols = {str(c).lower(): c for c in df.columns}
140
+ for key in candidates:
141
+ if key in lower_cols:
142
+ col = lower_cols[key]
72
143
  try:
73
- parsed = pd.to_datetime(df[col], errors="raise", utc=False, infer_datetime_format=True)
74
- out = df.copy()
75
- out[col] = parsed
76
- return out
144
+ ts = pd.to_datetime(df[col], utc=utc, errors="raise")
145
+ df = df.set_index(ts)
146
+ df.index.name = "timestamp"
147
+ return df.drop(columns=[col])
77
148
  except Exception:
78
149
  continue
79
150
  return df
80
151
 
81
152
 
82
153
  def _detect_position_col(df: pd.DataFrame) -> Optional[str]:
83
- candidates = ["Position", "Pos", "Mic", "Channel", "Location", "Site"]
84
- for c in candidates:
85
- if c in df.columns:
86
- return c
87
- # also try case-insensitive exact matches
88
- lower_map = {str(c).lower(): c for c in df.columns}
89
- for c in candidates:
90
- if c.lower() in lower_map:
91
- return lower_map[c.lower()]
154
+ """
155
+ Attempt to find a 'position' or 'location' column.
156
+ """
157
+ patterns = [
158
+ re.compile(r"\bposition\b", re.I),
159
+ re.compile(r"\bpos\b", re.I),
160
+ re.compile(r"\blocation\b", re.I),
161
+ re.compile(r"\bsite\b", re.I),
162
+ ]
163
+ for c in df.columns:
164
+ name = str(c)
165
+ for pat in patterns:
166
+ if pat.search(name):
167
+ return c
92
168
  return None
93
169
 
94
170
 
95
171
  def _metric_patterns() -> Dict[str, re.Pattern]:
96
- # Detect wide spectral columns, e.g., "Leq_31.5", "Lmax_1000", "Leq 4k", "Lmax 1 kHz", "63 Hz"
97
- # Strategy:
98
- # - Either prefixed by a metric (Leq/Lmax) and then frequency
99
- # - Or pure frequency with "Hz" and a separate metric column naming is handled by selection
100
- def freq_part():
101
- # numbers like 31.5, 1000, 1k, 2 kHz, etc.
102
- return r"(?P<freq>(\d+(\.\d+)?)(\s*k(hz)?)?)"
103
-
104
- # metric-first naming: "Leq_31.5", "Lmax 1000", "Leq-1k", "Leq 1 kHz"
105
- metric_first = rf"^(?P<metric>Leq|Lmax)[\s_\-]*{freq_part()}(hz)?$"
106
- # freq-first naming: "31.5", "63 Hz", "1k", with an optional suffix metric after a sep: "63Hz_Leq"
107
- freq_first = rf"^{freq_part()}(hz)?[\s_\-]*(?P<metric>Leq|Lmax)?$"
172
+ """
173
+ Patterns for commonly used acoustic metrics.
174
+ """
175
+ # Matches LAeq, Leq A, Leq(A), etc.
108
176
  return {
109
- "metric_first": re.compile(metric_first, re.IGNORECASE),
110
- "freq_first": re.compile(freq_first, re.IGNORECASE),
177
+ "Leq": re.compile(r"\bL\s*eq\b(?:\s*\(?\s*A\s*\)?)?", re.I),
178
+ "Lmax": re.compile(r"\bL\s*max\b(?:\s*\(?\s*A\s*\)?)?", re.I),
179
+ "L90": re.compile(r"\bL\s*90\b(?:\s*\(?\s*A\s*\)?)?", re.I),
111
180
  }
112
181
 
113
182
 
114
- def _parse_freq_to_hz(freq_str: str) -> Optional[float]:
115
- if freq_str is None:
116
- return None
117
- s = str(freq_str).strip().lower().replace(" ", "")
118
- s = s.replace("khz", "k").replace("hz", "")
119
- # handle "1k" or "1.0k"
120
- m = re.match(r"^(\d+(\.\d+)?)k$", s)
121
- if m:
122
- return float(m.group(1)) * 1000.0
123
- try:
124
- return float(s)
125
- except Exception:
183
+ def _parse_freq_to_hz(label: str) -> Optional[float]:
184
+ """
185
+ Parse frequency-like column labels such as '63 Hz', '1k', '1 kHz', etc.
186
+ """
187
+ s = str(label).strip().lower()
188
+ m = re.match(r"^\s*([0-9]*\.?[0-9]+)\s*(k|khz|hz)?\s*$", s)
189
+ if not m:
126
190
  return None
191
+ val = float(m.group(1))
192
+ unit = m.group(2)
193
+ if not unit or unit == "hz":
194
+ return val
195
+ return val * 1000.0
127
196
 
128
197
 
129
- def spectra_to_rows(df: pd.DataFrame) -> pd.DataFrame:
198
+ def spectra_to_rows(
199
+ df: pd.DataFrame,
200
+ value_col_name: str = "Level (dB)",
201
+ ) -> Optional[pd.DataFrame]:
130
202
  """
131
- Convert wide spectral columns into a tidy long form:
132
- Columns like "Leq_31.5", "Lmax 63", "63 Hz Leq" -> rows with Frequency (Hz), Metric, Value.
133
- Non-spectral columns are carried along.
203
+ Convert wide spectra columns (e.g., '63 Hz', '1 kHz') into (freq_hz, value) rows.
204
+ Returns None if no spectra-like columns are detected.
134
205
  """
135
- df = _flatten_columns(df)
136
- patterns = _metric_patterns()
137
-
138
- spectral_cols: List[Tuple[str, str, float]] = [] # (original_col, metric, freq_hz)
139
- for col in df.columns:
140
- col_str = str(col)
141
- matched = False
142
-
143
- # metric first
144
- m1 = patterns["metric_first"].match(col_str)
145
- if m1:
146
- metric = m1.group("metric").upper()
147
- freq_hz = _parse_freq_to_hz(m1.group("freq"))
148
- if metric in ("LEQ", "LMAX") and freq_hz is not None:
149
- spectral_cols.append((col, metric, freq_hz))
150
- matched = True
151
-
152
- if matched:
153
- continue
154
-
155
- # frequency first
156
- m2 = patterns["freq_first"].match(col_str)
157
- if m2:
158
- metric = m2.group("metric")
159
- metric = metric.upper() if metric else None
160
- freq_hz = _parse_freq_to_hz(m2.group("freq"))
161
- if freq_hz is not None:
162
- # If metric is not embedded, we will treat it as "LEQ" by default for plotting,
163
- # but also keep the column name when we pivot.
164
- spectral_cols.append((col, metric or "LEQ", freq_hz))
165
-
166
- if not spectral_cols:
167
- return pd.DataFrame(columns=["Frequency_Hz", "Metric", "Value"])
168
-
169
- # Build tidy rows
170
- id_cols = [c for c in df.columns if c not in [c0 for (c0, _, _) in spectral_cols]]
171
- tidies: List[pd.DataFrame] = []
172
- for (col, metric, f_hz) in spectral_cols:
173
- block = pd.DataFrame(
174
- {
175
- "Frequency_Hz": f_hz,
176
- "Metric": metric,
177
- "Value": df[col].astype("float64").values,
178
- }
179
- )
180
- # Attach IDs if present
181
- if id_cols:
182
- block = pd.concat([df[id_cols].reset_index(drop=True), block], axis=1)
183
- tidies.append(block)
206
+ freq_cols: List[Tuple[str, float]] = []
207
+ for c in df.columns:
208
+ hz = _parse_freq_to_hz(c)
209
+ if hz is not None:
210
+ freq_cols.append((c, hz))
211
+
212
+ if not freq_cols:
213
+ return None
184
214
 
185
- tidy = pd.concat(tidies, axis=0, ignore_index=True)
186
- # Sort by frequency numeric
187
- tidy = tidy.sort_values(["Metric", "Frequency_Hz"]).reset_index(drop=True)
188
- return tidy
215
+ freq_cols_sorted = sorted(freq_cols, key=lambda x: x[1])
216
+ melted = df[fname_list := [c for c, _ in freq_cols_sorted]].copy()
217
+ melted.columns = [f"{hz:.6g}" for _, hz in freq_cols_sorted]
218
+ out = melted.melt(var_name="freq_hz", value_name=value_col_name)
219
+ out["freq_hz"] = out["freq_hz"].astype(float)
220
+ return out.sort_values("freq_hz").reset_index(drop=True)
189
221
 
190
222
 
191
- def _resample_if_possible(df: pd.DataFrame, how: str) -> pd.DataFrame:
223
+ def _resample_if_possible(df: pd.DataFrame, rule: str = "1S") -> pd.DataFrame:
192
224
  """
193
- how: '', '1min', '5min', '1H', '1D'
225
+ Downsample a DateTimeIndex DataFrame for responsiveness.
194
226
  """
195
- if not how:
227
+ if not isinstance(df.index, pd.DatetimeIndex):
196
228
  return df
197
-
198
- # find a datetime column
199
- dt_col = None
200
- for c in df.columns:
201
- if pd.api.types.is_datetime64_any_dtype(df[c]):
202
- dt_col = c
203
- break
204
-
205
- if dt_col is None:
206
- return df # nothing to resample on
207
-
208
- df_sorted = df.sort_values(dt_col)
209
- df_sorted = df_sorted.set_index(dt_col)
210
-
211
- # numeric only for resample
212
- numeric_cols = df_sorted.select_dtypes(include=["number"]).columns
213
- if len(numeric_cols) == 0:
229
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
230
+ if numeric_cols.empty:
214
231
  return df
232
+ return df[numeric_cols].resample(rule).median().dropna(how="all")
233
+
215
234
 
216
- grouped = df_sorted[numeric_cols].resample(how).mean().reset_index()
217
- # put back other columns in a sensible way (drop or take first)
218
- return grouped
235
+ def _build_summary(
236
+ df: pd.DataFrame,
237
+ position_col: Optional[str] = None,
238
+ metrics: Optional[Iterable[str]] = None,
239
+ ) -> pd.DataFrame:
240
+ """
241
+ Build simple summary stats for selected metrics, optionally grouped by position.
242
+ """
243
+ if metrics is None:
244
+ metrics = ["Leq", "Lmax", "L90"]
219
245
 
246
+ pats = _metric_patterns()
247
+ metric_cols: Dict[str, List[str]] = {m: [] for m in metrics}
248
+ for c in df.columns:
249
+ for m in metrics:
250
+ if pats.get(m) and pats[m].search(str(c)):
251
+ metric_cols[m].append(c)
220
252
 
221
- def _build_summary(df: pd.DataFrame, group_cols: List[str]) -> pd.DataFrame:
222
- if not group_cols:
223
- # simple numeric summary
224
- numeric_cols = df.select_dtypes(include=["number"]).columns
225
- if len(numeric_cols) == 0:
226
- return pd.DataFrame()
227
- s = df[numeric_cols].agg(["count", "mean", "std", "min", "max"]).T.reset_index()
228
- s.columns = ["Metric"] + list(s.columns[1:])
229
- return s
253
+ work = {}
254
+ for m, cols in metric_cols.items():
255
+ sel = df[cols].select_dtypes(include=[np.number]) if cols else pd.DataFrame()
256
+ if not sel.empty:
257
+ work[m] = sel.mean(axis=1)
230
258
 
231
- # groupby summary on numeric columns
232
- numeric_cols = df.select_dtypes(include=["number"]).columns
233
- if len(numeric_cols) == 0:
259
+ if not work:
234
260
  return pd.DataFrame()
235
261
 
236
- g = df.groupby(group_cols, dropna=False)[numeric_cols].agg(["count", "mean", "std", "min", "max"])
237
- g = g.reset_index()
238
-
239
- # flatten resulting MultiIndex columns
240
- out_cols = []
241
- for tup in g.columns:
242
- if isinstance(tup, tuple):
243
- lvl0, lvl1 = tup
244
- if lvl1 == "":
245
- out_cols.append(str(lvl0))
246
- elif lvl0 in group_cols:
247
- out_cols.append(str(lvl0))
248
- else:
249
- out_cols.append(f"{lvl0}__{lvl1}")
250
- else:
251
- out_cols.append(str(tup))
252
- g.columns = out_cols
253
- return g
254
-
255
-
256
- def _guess_resample_options(df: pd.DataFrame) -> List[Tuple[str, str]]:
257
- # Label, pandas rule
258
- has_dt = any(pd.api.types.is_datetime64_any_dtype(df[c]) for c in df.columns)
259
- if not has_dt:
260
- return [("None", "")]
261
- return [
262
- ("None", ""),
263
- ("1 minute", "1min"),
264
- ("5 minutes", "5min"),
265
- ("15 minutes", "15min"),
266
- ("Hourly", "1H"),
267
- ("Daily", "1D"),
268
- ]
262
+ agg_df = pd.DataFrame(work)
263
+ if position_col and position_col in df.columns:
264
+ agg_df[position_col] = df[position_col].values[: len(agg_df)]
265
+ g = agg_df.groupby(position_col, dropna=False).agg(["mean", "min", "max", "count"])
266
+ # Flatten columns
267
+ g.columns = [" ".join([str(p) for p in col if p]).strip() for col in g.columns.values]
268
+ return g.reset_index()
269
+ else:
270
+ return agg_df.agg(["mean", "min", "max", "count"]).T.reset_index(names=["Metric"])
269
271
 
270
272
 
271
- def _plot_spectra(tidy_spec: pd.DataFrame, color_by: Optional[str]) -> go.Figure:
273
+ def _guess_resample_options(n: int) -> str:
274
+ """
275
+ Heuristically pick a resampling rule based on number of points.
276
+ """
277
+ if n > 200_000:
278
+ return "10S"
279
+ if n > 50_000:
280
+ return "5S"
281
+ if n > 10_000:
282
+ return "2S"
283
+ return "1S"
284
+
285
+
286
+ def _plot_spectra(
287
+ df_rows: pd.DataFrame,
288
+ title: str = "Spectral Levels",
289
+ value_col_name: str = "Level (dB)",
290
+ ) -> go.Figure:
291
+ """
292
+ Plot spectra contained in a (freq_hz, value) rows dataframe.
293
+ """
272
294
  fig = go.Figure()
273
- if tidy_spec.empty:
274
- fig.update_layout(template=TEMPLATE)
275
- return fig
276
-
277
- # X is frequency Hz (log10)
278
- x = tidy_spec["Frequency_Hz"].to_numpy(dtype=float)
279
-
280
- # determine trace grouping
281
- if color_by and color_by in tidy_spec.columns:
282
- groups = list(tidy_spec[color_by].astype(str).unique())
283
- for i, key in enumerate(groups):
284
- sub = tidy_spec[tidy_spec[color_by].astype(str) == str(key)]
285
- # Keep metric separated as line style if available
286
- if "Metric" in sub.columns and sub["Metric"].nunique() > 1:
287
- for metric in sorted(sub["Metric"].unique()):
288
- subm = sub[sub["Metric"] == metric]
289
- fig.add_trace(
290
- go.Scatter(
291
- x=subm["Frequency_Hz"],
292
- y=subm["Value"],
293
- mode="lines+markers",
294
- name=f"{key} – {metric}",
295
- line=dict(color=COLOURS[i % len(COLOURS)], dash="solid" if metric == "LEQ" else "dash"),
296
- marker=dict(size=6),
297
- )
298
- )
299
- else:
300
- fig.add_trace(
301
- go.Scatter(
302
- x=sub["Frequency_Hz"],
303
- y=sub["Value"],
304
- mode="lines+markers",
305
- name=str(key),
306
- line=dict(color=COLOURS[i % len(COLOURS)]),
307
- marker=dict(size=6),
308
- )
309
- )
310
- else:
311
- # single trace per metric
312
- if "Metric" in tidy_spec.columns:
313
- for i, metric in enumerate(sorted(tidy_spec["Metric"].unique())):
314
- sub = tidy_spec[tidy_spec["Metric"] == metric]
315
- fig.add_trace(
316
- go.Scatter(
317
- x=sub["Frequency_Hz"],
318
- y=sub["Value"],
319
- mode="lines+markers",
320
- name=str(metric),
321
- line=dict(color=COLOURS[i % len(COLOURS)]),
322
- marker=dict(size=6),
323
- )
324
- )
325
- else:
326
- fig.add_trace(
327
- go.Scatter(
328
- x=x,
329
- y=tidy_spec["Value"],
330
- mode="lines+markers",
331
- name="Spectrum",
332
- line=dict(color=COLOURS[0]),
333
- marker=dict(size=6),
334
- )
295
+ if {"freq_hz", value_col_name}.issubset(df_rows.columns):
296
+ fig.add_trace(
297
+ go.Scatter(
298
+ x=df_rows["freq_hz"],
299
+ y=df_rows[value_col_name],
300
+ mode="lines+markers",
301
+ line=dict(color=COLOURS[0]),
302
+ name=value_col_name,
335
303
  )
336
-
304
+ )
337
305
  fig.update_layout(
338
306
  template=TEMPLATE,
339
- xaxis=dict(
340
- type="log",
341
- title="Frequency (Hz)",
342
- tickvals=[31.5, 63, 125, 250, 500, 1000, 2000, 4000, 8000],
343
- ticktext=["31.5", "63", "125", "250", "500", "1k", "2k", "4k", "8k"],
344
- gridcolor="rgba(0,0,0,0.1)",
345
- ),
346
- yaxis=dict(title="Level (dB)", gridcolor="rgba(0,0,0,0.1)"),
347
- legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
348
- margin=dict(l=50, r=30, t=40, b=50),
307
+ xaxis_title="Frequency (Hz)",
308
+ yaxis_title=value_col_name,
309
+ hovermode="x",
310
+ margin=dict(l=10, r=10, t=30, b=10),
311
+ title=title,
349
312
  )
313
+ fig.update_xaxes(type="log", tickformat=".0f")
350
314
  return fig
351
315
 
352
316
 
353
- def _download_csv_button(label: str, df: pd.DataFrame, key: str):
354
- csv = df.to_csv(index=False).encode("utf-8")
317
+ def _download_csv_button(label: str, df: pd.DataFrame, file_name: str) -> None:
318
+ """
319
+ Render a download button for a dataframe as CSV.
320
+ """
321
+ csv_bytes = df.to_csv(index=False).encode("utf-8")
355
322
  st.download_button(
356
323
  label=label,
357
- data=csv,
358
- file_name=f"{key}.csv",
324
+ data=csv_bytes,
325
+ file_name=file_name,
359
326
  mime="text/csv",
360
- use_container_width=True,
361
327
  )
362
328
 
363
329
 
364
- # -----------------------------
365
- # UI
366
- # -----------------------------
367
- def main():
368
- st.set_page_config(page_title="PyCoustic Streamlit", layout="wide")
330
+ # === New helpers for DateTimeIndex-based Log flow ===
331
+
332
+ @st.cache_data(
333
+ show_spinner=False,
334
+ # Ensure memoryview objects are safely hashable for the cache.
335
+ hash_funcs={memoryview: lambda v: v.tobytes()},
336
+ )
337
+ def _load_log_df_from_bytes(
338
+ _file_bytes: bytes | memoryview,
339
+ file_suffix: str = ".csv",
340
+ content_key: str | None = None,
341
+ ) -> pd.DataFrame:
342
+ """
343
+ Persist uploaded bytes to a temp file and create a Log to obtain a DataFrame.
344
+ Requires Log().df to have a DateTimeIndex.
345
+
346
+ _file_bytes is ignored by Streamlit's cache hasher (leading underscore).
347
+ Optionally pass content_key (e.g., a sha256 of the bytes) so the cache
348
+ invalidates when the content changes.
349
+ """
350
+ # Keep content_key in the signature so it participates in the cache key.
351
+ _ = content_key
352
+
353
+ # Coerce memoryview to bytes so file IO is consistent.
354
+ if isinstance(_file_bytes, memoryview):
355
+ _file_bytes = _file_bytes.tobytes()
356
+
357
+ with tempfile.NamedTemporaryFile(delete=False, suffix=file_suffix) as tmp:
358
+ tmp.write(_file_bytes)
359
+ tmp_path = tmp.name
360
+ try:
361
+ log = Log(path=tmp_path)
362
+ if hasattr(log, "df") and isinstance(log.df, pd.DataFrame):
363
+ df = log.df.copy()
364
+ else:
365
+ raise ValueError("Log() did not expose a DataFrame on .df")
366
+ finally:
367
+ try:
368
+ os.unlink(tmp_path)
369
+ except Exception:
370
+ pass
371
+
372
+ if not isinstance(df.index, pd.DatetimeIndex):
373
+ raise ValueError("Expected Log().df to have a DateTimeIndex for timestamps.")
374
+ return df
375
+
376
+
377
+ def _find_metric_columns(df: pd.DataFrame) -> Dict[str, str]:
378
+ """
379
+ Locate the canonical metric columns using case-insensitive matching.
380
+ Returns a mapping from UI label to actual column name.
381
+ """
382
+ targets: List[str] = ["Leq A", "Lmax A", "L90 A"]
383
+ norm_map = {str(c).strip().lower(): c for c in df.columns}
384
+ found: Dict[str, str] = {}
385
+ for label in targets:
386
+ key = label.lower()
387
+ if key in norm_map:
388
+ found[label] = norm_map[key]
389
+ return found
390
+
391
+
392
+ def _build_time_history_figure(df: pd.DataFrame, series_map: Dict[str, str]) -> go.Figure:
393
+ """
394
+ Build a Plotly figure for time history lines using the DataFrame's DateTimeIndex.
395
+ """
396
+ fig = go.Figure()
397
+ for i, (label, col) in enumerate(series_map.items()):
398
+ fig.add_trace(
399
+ go.Scatter(
400
+ x=df.index,
401
+ y=df[col],
402
+ mode="lines",
403
+ name=label,
404
+ line=dict(color=COLOURS[i % len(COLOURS)]),
405
+ )
406
+ )
407
+ fig.update_layout(
408
+ template=TEMPLATE,
409
+ xaxis_title="Timestamp",
410
+ yaxis_title="Level (dB)",
411
+ hovermode="x unified",
412
+ legend_title_text="Series",
413
+ margin=dict(l=10, r=10, t=30, b=10),
414
+ height=420,
415
+ )
416
+ return fig
417
+
418
+
419
+ def main() -> None:
420
+ st.set_page_config(page_title="pycoustic - Noise survey toolkit", layout="wide")
421
+ st.title("Noise survey toolkit")
369
422
 
370
- st.title("PyCoustic Streamlit App")
371
- st.caption("Upload measurement logs, explore summaries and spectra, and export results.")
423
+ st.caption("Upload logs to visualize time histories. CSV files are supported.")
372
424
 
373
- with st.sidebar:
374
- st.header("Inputs")
375
- uploads = st.file_uploader(
376
- "Upload one or more files (CSV or Excel)",
377
- type=["csv", "txt", "xlsx", "xls"],
425
+ # === Time history plots based on Log().df with a DateTimeIndex ===
426
+ with st.expander("Time history plots (Leq A, Lmax A, L90 A)", expanded=True):
427
+ uploaded = st.file_uploader(
428
+ "Upload one or more CSV files",
429
+ type=["csv"],
378
430
  accept_multiple_files=True,
431
+ key="time_history_logs_uploader",
379
432
  )
380
433
 
381
- st.markdown("---")
382
- st.subheader("Options")
434
+ if uploaded:
435
+ # User options
436
+ max_points = st.number_input("Max points per series (downsampling)", 1_000, 1_000_000, value=10_000, step=1_000)
437
+ for file in uploaded:
438
+ st.markdown(f"**File:** {file.name}")
383
439
 
384
- # These options approximate a typical workflow
385
- resample_label = "Resample period"
386
- resample_options: List[Tuple[str, str]] = [("None", "")]
387
- # temporarily show None; we'll refine after reading a file
440
+ # Load via Log() and enforce DateTimeIndex
441
+ try:
442
+ # Suppose you have file_bytes or a memoryview named mv
443
+ file_bytes = file.getbuffer()
444
+ data = file_bytes if isinstance(file_bytes, (bytes, bytearray)) else file_bytes.tobytes()
445
+ content_key = hashlib.sha256(data).hexdigest()
388
446
 
389
- # placeholder UI to avoid reflow
390
- resample_choice_label = st.selectbox(resample_label, [x[0] for x in resample_options], index=0, key="resample_placeholder")
447
+ df = _load_log_df_from_bytes(data, file_suffix=".csv", content_key=content_key)
448
+ except Exception as e:
449
+ st.error(f"Could not load Log() from file: {e}")
450
+ continue
391
451
 
392
- group_hint = st.text_input("Optional Group Column (e.g., Position/Location)", value="")
452
+ if df.empty:
453
+ st.warning("No data available.")
454
+ continue
393
455
 
394
- st.markdown("---")
395
- st.subheader("Spectra")
396
- colour_by = st.text_input("Colour lines by column (e.g., Position or source)", value="source")
397
- show_markers = st.checkbox("Show markers", value=True)
456
+ # Ensure sorted index
457
+ df = df.sort_index()
398
458
 
399
- st.markdown("---")
400
- st.subheader("Display")
401
- show_raw_preview = st.checkbox("Show raw preview table", value=False)
459
+ # Identify standard metric columns
460
+ available = _find_metric_columns(df)
461
+ missing = [label for label in ("Leq A", "Lmax A", "L90 A") if label not in available]
402
462
 
403
- if not uploads:
404
- st.info("Upload files to begin.")
405
- return
463
+ if missing:
464
+ st.info(f"Missing columns: {', '.join(missing)}")
406
465
 
407
- # Read and combine
408
- logs: List[str] = []
409
- frames: List[pd.DataFrame] = []
410
- for uf in uploads:
411
- try:
412
- df = _try_read_table(uf)
413
- df = _flatten_columns(df)
414
- df = _maybe_parse_datetime(df)
415
- df["source"] = getattr(uf, "name", df.attrs.get("__source_name__", "uploaded"))
416
- frames.append(df)
417
- logs.append(f"Loaded: {df['source'].iloc[0]} (rows={len(df)}, cols={len(df.columns)})")
418
- except Exception as e:
419
- logs.append(f"Error reading {getattr(uf, 'name', '?')}: {e}")
420
-
421
- if not frames:
422
- st.error("No readable files.")
423
- st.text_area("Logs", value="\n".join(logs), height=160)
424
- return
425
-
426
- raw_master = pd.concat(frames, axis=0, ignore_index=True)
427
- # Now that we have data, rebuild resample options
428
- resample_options = _guess_resample_options(raw_master)
429
- resample_choice_label = st.sidebar.selectbox(
430
- "Resample period",
431
- [x[0] for x in resample_options],
432
- index=0,
433
- key="resample_choice",
434
- )
435
- resample_rule = dict(resample_options)[resample_choice_label]
436
-
437
- # Optional resampling
438
- df_used = _resample_if_possible(raw_master, resample_rule) if resample_rule else raw_master
439
-
440
- # Try to determine a reasonable group column
441
- detected_group_col = _detect_position_col(df_used)
442
- group_col = (st.sidebar.text_input("Detected Group Column", value=detected_group_col or "") or "").strip()
443
- if not group_col and group_hint.strip():
444
- group_col = group_hint.strip()
445
- if group_col and group_col not in df_used.columns:
446
- st.warning(f"Group column '{group_col}' not found. It will be ignored.")
447
- group_col = ""
448
-
449
- # Build spectra in tidy form
450
- tidy_spec = spectra_to_rows(df_used)
451
-
452
- tabs = st.tabs(["Summary", "Spectra", "Raw Data", "Logs"])
453
-
454
- # Summary tab
455
- with tabs[0]:
456
- st.subheader("Summary")
457
- group_cols: List[str] = []
458
- if group_col:
459
- group_cols.append(group_col)
460
- if "source" in df_used.columns:
461
- group_cols.append("source")
462
-
463
- summary_df = _build_summary(df_used, group_cols)
464
- if summary_df.empty:
465
- st.info("No numeric data to summarize.")
466
- else:
467
- st.dataframe(summary_df, use_container_width=True, hide_index=True)
468
- _download_csv_button("Download summary CSV", summary_df, "summary")
469
-
470
- # Spectra tab
471
- with tabs[1]:
472
- st.subheader("Spectra")
473
-
474
- # Allow user to filter a group (optional)
475
- filters_cols = []
476
- if group_col:
477
- filters_cols.append(group_col)
478
- if "source" in tidy_spec.columns:
479
- filters_cols.append("source")
480
-
481
- sub = tidy_spec.copy()
482
- # Dynamic filters
483
- if not sub.empty and filters_cols:
484
- cols = st.columns(len(filters_cols))
485
- for i, colname in enumerate(filters_cols):
486
- with cols[i]:
487
- uniq = sorted([str(x) for x in sub[colname].dropna().unique()])
488
- if len(uniq) <= 1:
489
- continue
490
- selected = st.multiselect(f"Filter {colname}", options=uniq, default=uniq)
491
- sub = sub[sub[colname].astype(str).isin(selected)]
492
-
493
- # Plot
494
- if sub.empty:
495
- st.info("No spectral data detected in the uploaded files.")
496
- else:
497
- fig = _plot_spectra(sub, color_by=(colour_by if colour_by in sub.columns else None))
498
- if not show_markers:
499
- for tr in fig.data:
500
- tr.mode = "lines"
501
- st.plotly_chart(fig, use_container_width=True)
502
-
503
- # Download tidy spectra
504
- _download_csv_button("Download tidy spectra CSV", sub, "spectra_tidy")
505
-
506
- # Raw Data tab
507
- with tabs[2]:
508
- st.subheader("Raw Data")
509
- if show_raw_preview:
510
- st.dataframe(raw_master, use_container_width=True, hide_index=True)
511
- else:
512
- st.caption("Enable 'Show raw preview table' in the sidebar to render the full table.")
513
- _download_csv_button("Download combined raw CSV", raw_master, "raw_combined")
466
+ if not available:
467
+ st.warning("None of the required columns were found. Expected any of: Leq A, Lmax A, L90 A.")
468
+ continue
469
+
470
+ # Optional downsampling for responsiveness
471
+ if len(df) > max_points:
472
+ step = max(1, len(df) // int(max_points))
473
+ df_plot = df.iloc[::step].copy()
474
+ else:
475
+ df_plot = df
476
+
477
+ # Plot
478
+ fig = _build_time_history_figure(df_plot, available)
479
+ st.plotly_chart(fig, use_container_width=True)
480
+
481
+ # Simple summary table per visible series
482
+ with st.expander("Summary (visible series)"):
483
+ numeric_summary = df_plot[list(available.values())].describe().T.reset_index(names=["Series"])
484
+ _download_csv_button("Download summary CSV", numeric_summary, f"{os.path.splitext(file.name)[0]}_summary.csv")
485
+ st.dataframe(numeric_summary, use_container_width=True)
514
486
 
515
- # Logs tab
516
- with tabs[3]:
517
- st.subheader("Logs")
518
- st.text_area("Ingestion log", value="\n".join(logs), height=240, label_visibility="collapsed")
487
+ # Placeholder for additional tools/sections (spectra, etc.) can go below.
488
+ # Existing/legacy time-history upload/plotting sections should be removed to avoid duplication.
519
489
 
520
490
 
521
491
  if __name__ == "__main__":
522
- main()
492
+ main()