pycoustic 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pycoustic/streamlit-ai.py DELETED
@@ -1,492 +0,0 @@
1
- # streamlit-ai.py
2
- # Streamlit UI for inspecting noise survey data and plots
3
-
4
- from __future__ import annotations
5
-
6
- import os
7
- import io
8
- import re
9
- import tempfile
10
- from typing import Dict, Iterable, List, Optional, Tuple
11
-
12
- import numpy as np
13
- import pandas as pd
14
- import plotly.graph_objects as go
15
- import streamlit as st
16
- import hashlib
17
-
18
- # Ensure submodules are imported via the package path so their relative imports work.
19
- try:
20
- from pycoustic.survey import *
21
- except Exception:
22
- # Fallback for local runs
23
- from survey import *
24
-
25
- try:
26
- from pycoustic.log import *
27
- except Exception:
28
- from log import *
29
-
30
- from pycoustic import Log # expects a Log class exposing a .df with a DateTimeIndex
31
-
32
- # Support both package and script usage for the WeatherHistory import
33
- try:
34
- from .weather import WeatherHistory
35
- except ImportError:
36
- from weather import WeatherHistory
37
-
38
- # Plot styling
39
- COLOURS: List[str] = [
40
- "#1f77b4",
41
- "#ff7f0e",
42
- "#2ca02c",
43
- "#d62728",
44
- "#9467bd",
45
- "#8c564b",
46
- "#e377c2",
47
- "#7f7f7f",
48
- "#bcbd22",
49
- "#17becf",
50
- ]
51
- TEMPLATE: str = "plotly_white"
52
-
53
-
54
- def _try_read_table(
55
- file_like_or_bytes: io.BytesIO | bytes | str,
56
- filename: Optional[str] = None,
57
- encoding: Optional[str] = None,
58
- ) -> pd.DataFrame:
59
- """
60
- Attempt to read a tabular file (CSV preferred; Excel fallback).
61
- - file_like_or_bytes may be a path, raw bytes, or a BytesIO-like object.
62
- - filename helps determine the extension when not a path.
63
- """
64
- def _ensure_buffer(obj) -> io.BytesIO:
65
- if isinstance(obj, (bytes, bytearray)):
66
- return io.BytesIO(obj)
67
- if hasattr(obj, "read"):
68
- return obj # assume file-like opened in binary mode
69
- if isinstance(obj, str):
70
- # Path-like; will be handled by pandas directly, so we return None
71
- return None
72
- raise TypeError("Unsupported input type for _try_read_table")
73
-
74
- ext = None
75
- if isinstance(file_like_or_bytes, str):
76
- lower = file_like_or_bytes.lower()
77
- if lower.endswith(".csv"):
78
- ext = ".csv"
79
- elif lower.endswith((".xlsx", ".xlsm", ".xlsb", ".xls")):
80
- ext = ".xlsx"
81
- elif filename:
82
- lower = filename.lower()
83
- if lower.endswith(".csv"):
84
- ext = ".csv"
85
- elif lower.endswith((".xlsx", ".xlsm", ".xlsb", ".xls")):
86
- ext = ".xlsx"
87
-
88
- # Prefer CSV
89
- if ext in (None, ".csv"):
90
- try:
91
- if isinstance(file_like_or_bytes, str):
92
- df = pd.read_csv(file_like_or_bytes, encoding=encoding)
93
- else:
94
- buf = _ensure_buffer(file_like_or_bytes)
95
- if buf is None: # pragma: no cover
96
- raise ValueError("Failed to open buffer for CSV")
97
- df = pd.read_csv(buf, encoding=encoding)
98
- return _flatten_columns(df)
99
- except Exception:
100
- # Try Excel as fallback
101
- pass
102
-
103
- # Excel fallback
104
- if isinstance(file_like_or_bytes, str):
105
- df = pd.read_excel(file_like_or_bytes)
106
- else:
107
- buf = _ensure_buffer(file_like_or_bytes)
108
- if buf is None: # pragma: no cover
109
- raise ValueError("Failed to open buffer for Excel")
110
- df = pd.read_excel(buf)
111
- return _flatten_columns(df)
112
-
113
-
114
- def _flatten_columns(df: pd.DataFrame) -> pd.DataFrame:
115
- """
116
- Flatten MultiIndex columns into simple strings.
117
- """
118
- if isinstance(df.columns, pd.MultiIndex):
119
- df = df.copy()
120
- df.columns = [" ".join([str(p) for p in tup if p is not None]).strip() for tup in df.columns.values]
121
- else:
122
- # Ensure all columns are strings, stripped
123
- df = df.rename(columns=lambda c: str(c).strip())
124
- return df
125
-
126
-
127
- def _maybe_parse_datetime(
128
- df: pd.DataFrame,
129
- candidates: Iterable[str] = ("timestamp", "time", "date", "datetime"),
130
- utc: bool = False,
131
- ) -> pd.DataFrame:
132
- """
133
- If a plausible timestamp column exists, convert to DatetimeIndex.
134
- Does nothing if index is already DatetimeIndex.
135
- """
136
- if isinstance(df.index, pd.DatetimeIndex):
137
- return df
138
- df = df.copy()
139
- lower_cols = {str(c).lower(): c for c in df.columns}
140
- for key in candidates:
141
- if key in lower_cols:
142
- col = lower_cols[key]
143
- try:
144
- ts = pd.to_datetime(df[col], utc=utc, errors="raise")
145
- df = df.set_index(ts)
146
- df.index.name = "timestamp"
147
- return df.drop(columns=[col])
148
- except Exception:
149
- continue
150
- return df
151
-
152
-
153
- def _detect_position_col(df: pd.DataFrame) -> Optional[str]:
154
- """
155
- Attempt to find a 'position' or 'location' column.
156
- """
157
- patterns = [
158
- re.compile(r"\bposition\b", re.I),
159
- re.compile(r"\bpos\b", re.I),
160
- re.compile(r"\blocation\b", re.I),
161
- re.compile(r"\bsite\b", re.I),
162
- ]
163
- for c in df.columns:
164
- name = str(c)
165
- for pat in patterns:
166
- if pat.search(name):
167
- return c
168
- return None
169
-
170
-
171
- def _metric_patterns() -> Dict[str, re.Pattern]:
172
- """
173
- Patterns for commonly used acoustic metrics.
174
- """
175
- # Matches LAeq, Leq A, Leq(A), etc.
176
- return {
177
- "Leq": re.compile(r"\bL\s*eq\b(?:\s*\(?\s*A\s*\)?)?", re.I),
178
- "Lmax": re.compile(r"\bL\s*max\b(?:\s*\(?\s*A\s*\)?)?", re.I),
179
- "L90": re.compile(r"\bL\s*90\b(?:\s*\(?\s*A\s*\)?)?", re.I),
180
- }
181
-
182
-
183
- def _parse_freq_to_hz(label: str) -> Optional[float]:
184
- """
185
- Parse frequency-like column labels such as '63 Hz', '1k', '1 kHz', etc.
186
- """
187
- s = str(label).strip().lower()
188
- m = re.match(r"^\s*([0-9]*\.?[0-9]+)\s*(k|khz|hz)?\s*$", s)
189
- if not m:
190
- return None
191
- val = float(m.group(1))
192
- unit = m.group(2)
193
- if not unit or unit == "hz":
194
- return val
195
- return val * 1000.0
196
-
197
-
198
- def spectra_to_rows(
199
- df: pd.DataFrame,
200
- value_col_name: str = "Level (dB)",
201
- ) -> Optional[pd.DataFrame]:
202
- """
203
- Convert wide spectra columns (e.g., '63 Hz', '1 kHz') into (freq_hz, value) rows.
204
- Returns None if no spectra-like columns are detected.
205
- """
206
- freq_cols: List[Tuple[str, float]] = []
207
- for c in df.columns:
208
- hz = _parse_freq_to_hz(c)
209
- if hz is not None:
210
- freq_cols.append((c, hz))
211
-
212
- if not freq_cols:
213
- return None
214
-
215
- freq_cols_sorted = sorted(freq_cols, key=lambda x: x[1])
216
- melted = df[fname_list := [c for c, _ in freq_cols_sorted]].copy()
217
- melted.columns = [f"{hz:.6g}" for _, hz in freq_cols_sorted]
218
- out = melted.melt(var_name="freq_hz", value_name=value_col_name)
219
- out["freq_hz"] = out["freq_hz"].astype(float)
220
- return out.sort_values("freq_hz").reset_index(drop=True)
221
-
222
-
223
- def _resample_if_possible(df: pd.DataFrame, rule: str = "1S") -> pd.DataFrame:
224
- """
225
- Downsample a DateTimeIndex DataFrame for responsiveness.
226
- """
227
- if not isinstance(df.index, pd.DatetimeIndex):
228
- return df
229
- numeric_cols = df.select_dtypes(include=[np.number]).columns
230
- if numeric_cols.empty:
231
- return df
232
- return df[numeric_cols].resample(rule).median().dropna(how="all")
233
-
234
-
235
- def _build_summary(
236
- df: pd.DataFrame,
237
- position_col: Optional[str] = None,
238
- metrics: Optional[Iterable[str]] = None,
239
- ) -> pd.DataFrame:
240
- """
241
- Build simple summary stats for selected metrics, optionally grouped by position.
242
- """
243
- if metrics is None:
244
- metrics = ["Leq", "Lmax", "L90"]
245
-
246
- pats = _metric_patterns()
247
- metric_cols: Dict[str, List[str]] = {m: [] for m in metrics}
248
- for c in df.columns:
249
- for m in metrics:
250
- if pats.get(m) and pats[m].search(str(c)):
251
- metric_cols[m].append(c)
252
-
253
- work = {}
254
- for m, cols in metric_cols.items():
255
- sel = df[cols].select_dtypes(include=[np.number]) if cols else pd.DataFrame()
256
- if not sel.empty:
257
- work[m] = sel.mean(axis=1)
258
-
259
- if not work:
260
- return pd.DataFrame()
261
-
262
- agg_df = pd.DataFrame(work)
263
- if position_col and position_col in df.columns:
264
- agg_df[position_col] = df[position_col].values[: len(agg_df)]
265
- g = agg_df.groupby(position_col, dropna=False).agg(["mean", "min", "max", "count"])
266
- # Flatten columns
267
- g.columns = [" ".join([str(p) for p in col if p]).strip() for col in g.columns.values]
268
- return g.reset_index()
269
- else:
270
- return agg_df.agg(["mean", "min", "max", "count"]).T.reset_index(names=["Metric"])
271
-
272
-
273
- def _guess_resample_options(n: int) -> str:
274
- """
275
- Heuristically pick a resampling rule based on number of points.
276
- """
277
- if n > 200_000:
278
- return "10S"
279
- if n > 50_000:
280
- return "5S"
281
- if n > 10_000:
282
- return "2S"
283
- return "1S"
284
-
285
-
286
- def _plot_spectra(
287
- df_rows: pd.DataFrame,
288
- title: str = "Spectral Levels",
289
- value_col_name: str = "Level (dB)",
290
- ) -> go.Figure:
291
- """
292
- Plot spectra contained in a (freq_hz, value) rows dataframe.
293
- """
294
- fig = go.Figure()
295
- if {"freq_hz", value_col_name}.issubset(df_rows.columns):
296
- fig.add_trace(
297
- go.Scatter(
298
- x=df_rows["freq_hz"],
299
- y=df_rows[value_col_name],
300
- mode="lines+markers",
301
- line=dict(color=COLOURS[0]),
302
- name=value_col_name,
303
- )
304
- )
305
- fig.update_layout(
306
- template=TEMPLATE,
307
- xaxis_title="Frequency (Hz)",
308
- yaxis_title=value_col_name,
309
- hovermode="x",
310
- margin=dict(l=10, r=10, t=30, b=10),
311
- title=title,
312
- )
313
- fig.update_xaxes(type="log", tickformat=".0f")
314
- return fig
315
-
316
-
317
- def _download_csv_button(label: str, df: pd.DataFrame, file_name: str) -> None:
318
- """
319
- Render a download button for a dataframe as CSV.
320
- """
321
- csv_bytes = df.to_csv(index=False).encode("utf-8")
322
- st.download_button(
323
- label=label,
324
- data=csv_bytes,
325
- file_name=file_name,
326
- mime="text/csv",
327
- )
328
-
329
-
330
- # === New helpers for DateTimeIndex-based Log flow ===
331
-
332
- @st.cache_data(
333
- show_spinner=False,
334
- # Ensure memoryview objects are safely hashable for the cache.
335
- hash_funcs={memoryview: lambda v: v.tobytes()},
336
- )
337
- def _load_log_df_from_bytes(
338
- _file_bytes: bytes | memoryview,
339
- file_suffix: str = ".csv",
340
- content_key: str | None = None,
341
- ) -> pd.DataFrame:
342
- """
343
- Persist uploaded bytes to a temp file and create a Log to obtain a DataFrame.
344
- Requires Log().df to have a DateTimeIndex.
345
-
346
- _file_bytes is ignored by Streamlit's cache hasher (leading underscore).
347
- Optionally pass content_key (e.g., a sha256 of the bytes) so the cache
348
- invalidates when the content changes.
349
- """
350
- # Keep content_key in the signature so it participates in the cache key.
351
- _ = content_key
352
-
353
- # Coerce memoryview to bytes so file IO is consistent.
354
- if isinstance(_file_bytes, memoryview):
355
- _file_bytes = _file_bytes.tobytes()
356
-
357
- with tempfile.NamedTemporaryFile(delete=False, suffix=file_suffix) as tmp:
358
- tmp.write(_file_bytes)
359
- tmp_path = tmp.name
360
- try:
361
- log = Log(path=tmp_path)
362
- if hasattr(log, "df") and isinstance(log.df, pd.DataFrame):
363
- df = log.df.copy()
364
- else:
365
- raise ValueError("Log() did not expose a DataFrame on .df")
366
- finally:
367
- try:
368
- os.unlink(tmp_path)
369
- except Exception:
370
- pass
371
-
372
- if not isinstance(df.index, pd.DatetimeIndex):
373
- raise ValueError("Expected Log().df to have a DateTimeIndex for timestamps.")
374
- return df
375
-
376
-
377
- def _find_metric_columns(df: pd.DataFrame) -> Dict[str, str]:
378
- """
379
- Locate the canonical metric columns using case-insensitive matching.
380
- Returns a mapping from UI label to actual column name.
381
- """
382
- targets: List[str] = ["Leq A", "Lmax A", "L90 A"]
383
- norm_map = {str(c).strip().lower(): c for c in df.columns}
384
- found: Dict[str, str] = {}
385
- for label in targets:
386
- key = label.lower()
387
- if key in norm_map:
388
- found[label] = norm_map[key]
389
- return found
390
-
391
-
392
- def _build_time_history_figure(df: pd.DataFrame, series_map: Dict[str, str]) -> go.Figure:
393
- """
394
- Build a Plotly figure for time history lines using the DataFrame's DateTimeIndex.
395
- """
396
- fig = go.Figure()
397
- for i, (label, col) in enumerate(series_map.items()):
398
- fig.add_trace(
399
- go.Scatter(
400
- x=df.index,
401
- y=df[col],
402
- mode="lines",
403
- name=label,
404
- line=dict(color=COLOURS[i % len(COLOURS)]),
405
- )
406
- )
407
- fig.update_layout(
408
- template=TEMPLATE,
409
- xaxis_title="Timestamp",
410
- yaxis_title="Level (dB)",
411
- hovermode="x unified",
412
- legend_title_text="Series",
413
- margin=dict(l=10, r=10, t=30, b=10),
414
- height=420,
415
- )
416
- return fig
417
-
418
-
419
- def main() -> None:
420
- st.set_page_config(page_title="pycoustic - Noise survey toolkit", layout="wide")
421
- st.title("Noise survey toolkit")
422
-
423
- st.caption("Upload logs to visualize time histories. CSV files are supported.")
424
-
425
- # === Time history plots based on Log().df with a DateTimeIndex ===
426
- with st.expander("Time history plots (Leq A, Lmax A, L90 A)", expanded=True):
427
- uploaded = st.file_uploader(
428
- "Upload one or more CSV files",
429
- type=["csv"],
430
- accept_multiple_files=True,
431
- key="time_history_logs_uploader",
432
- )
433
-
434
- if uploaded:
435
- # User options
436
- max_points = st.number_input("Max points per series (downsampling)", 1_000, 1_000_000, value=10_000, step=1_000)
437
- for file in uploaded:
438
- st.markdown(f"**File:** {file.name}")
439
-
440
- # Load via Log() and enforce DateTimeIndex
441
- try:
442
- # Suppose you have file_bytes or a memoryview named mv
443
- file_bytes = file.getbuffer()
444
- data = file_bytes if isinstance(file_bytes, (bytes, bytearray)) else file_bytes.tobytes()
445
- content_key = hashlib.sha256(data).hexdigest()
446
-
447
- df = _load_log_df_from_bytes(data, file_suffix=".csv", content_key=content_key)
448
- except Exception as e:
449
- st.error(f"Could not load Log() from file: {e}")
450
- continue
451
-
452
- if df.empty:
453
- st.warning("No data available.")
454
- continue
455
-
456
- # Ensure sorted index
457
- df = df.sort_index()
458
-
459
- # Identify standard metric columns
460
- available = _find_metric_columns(df)
461
- missing = [label for label in ("Leq A", "Lmax A", "L90 A") if label not in available]
462
-
463
- if missing:
464
- st.info(f"Missing columns: {', '.join(missing)}")
465
-
466
- if not available:
467
- st.warning("None of the required columns were found. Expected any of: Leq A, Lmax A, L90 A.")
468
- continue
469
-
470
- # Optional downsampling for responsiveness
471
- if len(df) > max_points:
472
- step = max(1, len(df) // int(max_points))
473
- df_plot = df.iloc[::step].copy()
474
- else:
475
- df_plot = df
476
-
477
- # Plot
478
- fig = _build_time_history_figure(df_plot, available)
479
- st.plotly_chart(fig, use_container_width=True)
480
-
481
- # Simple summary table per visible series
482
- with st.expander("Summary (visible series)"):
483
- numeric_summary = df_plot[list(available.values())].describe().T.reset_index(names=["Series"])
484
- _download_csv_button("Download summary CSV", numeric_summary, f"{os.path.splitext(file.name)[0]}_summary.csv")
485
- st.dataframe(numeric_summary, use_container_width=True)
486
-
487
- # Placeholder for additional tools/sections (spectra, etc.) can go below.
488
- # Existing/legacy time-history upload/plotting sections should be removed to avoid duplication.
489
-
490
-
491
- if __name__ == "__main__":
492
- main()
@@ -1,142 +0,0 @@
1
- import os
2
- import tempfile
3
- from typing import List, Dict
4
-
5
- import pandas as pd
6
- import plotly.graph_objects as go
7
- import streamlit as st
8
-
9
- # Import pycoustic classes
10
- from log import *
11
- from survey import *
12
- from weather import *
13
-
14
- st.set_page_config(page_title="pycoustic GUI", layout="wide")
15
- st.title("pycoustic Streamlit GUI")
16
-
17
- # Initialize session state
18
- ss = st.session_state
19
- ss.setdefault("tmp_paths", []) # List[str] for cleanup
20
- ss.setdefault("logs", {}) # Dict[str, Log]
21
- ss.setdefault("survey", None) # Survey or None
22
- ss.setdefault("resi_df", None) # Cached summary
23
- ss.setdefault("periods_times", { # Default times for set_periods()
24
- "day": (7, 0),
25
- "evening": (23, 0),
26
- "night": (23, 0),
27
- })
28
- ss.setdefault("lmax_n", 5)
29
- ss.setdefault("lmax_t", 30)
30
- ss.setdefault("extra_kwargs_raw", "{}")
31
-
32
-
33
- def save_upload_to_tmp(uploaded_file) -> str:
34
- """Persist an uploaded CSV to a temporary file and return its path."""
35
- # Create a persistent temporary file (delete later on reset)
36
- with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
37
- tmp.write(uploaded_file.getbuffer())
38
- return tmp.name
39
-
40
-
41
- def build_survey(log_map: dict, times_kwarg: dict | None = None) -> Survey:
42
- """Create a Survey, attach logs, and optionally call set_periods(times=...)."""
43
- survey = Survey()
44
-
45
- # Attach logs to the Survey (simple, direct assignment to internal storage)
46
- # If a public adder method exists, prefer that; fallback to internal attribute.
47
- if hasattr(survey, "add_log"):
48
- for key, lg in log_map.items():
49
- try:
50
- survey.add_log(key, lg) # type: ignore[attr-defined]
51
- except Exception:
52
- # Fallback if signature differs
53
- setattr(survey, "_logs", log_map)
54
- break
55
- else:
56
- setattr(survey, "_logs", log_map)
57
-
58
- # Apply periods if provided
59
- if times_kwarg is not None:
60
- try:
61
- survey.set_periods(times=times_kwarg)
62
- except Exception as e:
63
- st.warning(f"set_periods failed with provided times: {e}")
64
-
65
- return survey
66
-
67
-
68
- # File Upload in expander container
69
- with st.expander("1) Load CSV data", expanded=True):
70
- st.write("Upload one or more CSV files to create Log objects for a single Survey.")
71
-
72
- uploaded = st.file_uploader(
73
- "Select CSV files",
74
- type=["csv"],
75
- accept_multiple_files=True,
76
- help="Each CSV should match the expected pycoustic format."
77
- )
78
-
79
- if uploaded:
80
- st.caption("Assign a position name for each file (defaults to base filename).")
81
-
82
- # Build a list of (file, default_name) for user naming
83
- pos_names = []
84
- for idx, f in enumerate(uploaded):
85
- default_name = f.name.rsplit(".", 1)[0]
86
- name = st.text_input(
87
- f"Position name for file {idx + 1}: {f.name}",
88
- value=default_name,
89
- key=f"pos_name_{f.name}_{idx}"
90
- )
91
- pos_names.append((f, name.strip() or default_name))
92
-
93
- col_l, col_r = st.columns([1, 1])
94
- replace = col_l.checkbox("Replace existing survey/logs", value=True)
95
- load_btn = col_r.button("Load CSVs")
96
-
97
- if load_btn:
98
- if replace:
99
- # Reset previous state
100
- for p in ss["tmp_paths"]:
101
- try:
102
- # Cleanup files on supported OS; not critical if fails
103
- import os
104
- os.unlink(p)
105
- except Exception:
106
- pass
107
- ss["tmp_paths"] = []
108
- ss["logs"] = {}
109
- ss["survey"] = None
110
- ss["resi_df"] = None
111
-
112
- added = 0
113
- for f, pos_name in pos_names:
114
- try:
115
- tmp_path = save_upload_to_tmp(f)
116
- ss["tmp_paths"].append(tmp_path)
117
- log_obj = Log(path=tmp_path)
118
- ss["logs"][pos_name] = log_obj
119
- added += 1
120
- except Exception as e:
121
- st.error(f"Failed to load {f.name}: {e}")
122
-
123
- if added > 0:
124
- st.success(f"Loaded {added} file(s) into logs.")
125
- else:
126
- st.warning("No files loaded. Please check the CSV format and try again.")
127
-
128
- if ss["logs"]:
129
- st.info(f"Current logs in session: {', '.join(ss['logs'].keys())}")
130
-
131
- ss["survey"] = Survey()
132
- for k in ss["logs"].keys():
133
- ss["survey"].add_log(ss["survey"], name="k")
134
- st.text(k)
135
-
136
- st.text(type(ss["survey"]))
137
- st.table(ss["survey"].resi_summary())
138
-
139
- with st.expander("Broadband Summary", expanded=True):
140
- df = ss["survey"]._logs
141
- st.text(df)
142
- #test