fibphot 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fibphot/io/excel.py ADDED
@@ -0,0 +1,76 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Mapping, Sequence
4
+ from pathlib import Path
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+ from ..state import PhotometryState
10
+ from ..types import FloatArray
11
+
12
+
13
+ def _stack_signals(
14
+ signals: Mapping[str, FloatArray],
15
+ ) -> tuple[FloatArray, tuple[str, ...]]:
16
+ names = tuple(str(k).lower() for k in signals)
17
+ arrs = [np.asarray(signals[n], dtype=float) for n in signals]
18
+ stacked = np.stack(arrs, axis=0)
19
+ return stacked, names
20
+
21
+
22
+ def read_excel(
23
+ filename: Path | str,
24
+ *,
25
+ time_column: str = "time",
26
+ signal_columns: Sequence[str] | Mapping[str, str] | None = (
27
+ "gcamp",
28
+ "isosbestic",
29
+ ),
30
+ ) -> PhotometryState:
31
+ """
32
+ Read photometry data from an Excel file into a PhotometryState.
33
+
34
+ signal_columns:
35
+ - Sequence[str]: load these columns as channels (channel name == column name)
36
+ - Mapping[str, str]: {channel_name: column_name_in_excel}
37
+ - None: load all columns except the time column
38
+ """
39
+ path = Path(filename)
40
+ if not path.exists():
41
+ raise FileNotFoundError(f"File not found: {path}")
42
+
43
+ df = pd.read_excel(path)
44
+ df.columns = [str(c).lower() for c in df.columns]
45
+
46
+ tcol = time_column.lower()
47
+ if tcol not in df.columns:
48
+ raise ValueError(
49
+ f"Missing time column '{time_column}'. Found: {df.columns}"
50
+ )
51
+
52
+ if signal_columns is None:
53
+ column_map = {c: c for c in df.columns if c != tcol}
54
+ elif isinstance(signal_columns, Mapping):
55
+ column_map = {
56
+ str(k).lower(): str(v).lower() for k, v in signal_columns.items()
57
+ }
58
+ else:
59
+ cols = [str(c).lower() for c in signal_columns]
60
+ column_map = {c: c for c in cols}
61
+
62
+ missing = [col for col in column_map.values() if col not in df.columns]
63
+ if missing:
64
+ raise ValueError(
65
+ f"Missing signal columns: {missing}. Found: {df.columns}"
66
+ )
67
+
68
+ signals_dict = {
69
+ ch: df[col].to_numpy(dtype=float) for ch, col in column_map.items()
70
+ }
71
+ signals, names = _stack_signals(signals_dict)
72
+ time_s = df[tcol].to_numpy(dtype=float)
73
+
74
+ return PhotometryState(
75
+ time_seconds=time_s, signals=signals, channel_names=names
76
+ )
fibphot/io/h5.py ADDED
@@ -0,0 +1,321 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from datetime import datetime, timezone
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ import numpy as np
9
+
10
+ from ..collection import PhotometryCollection
11
+ from ..state import PhotometryState, StageRecord
12
+
13
+
14
+ def _json_safe(obj: Any) -> Any:
15
+ if isinstance(obj, (str, int, float, bool)) or obj is None:
16
+ return obj
17
+ if isinstance(obj, (list, tuple)):
18
+ return [_json_safe(x) for x in obj]
19
+ if isinstance(obj, dict):
20
+ return {str(k): _json_safe(v) for k, v in obj.items()}
21
+ if isinstance(obj, np.ndarray):
22
+ return {
23
+ "__ndarray__": True,
24
+ "dtype": str(obj.dtype),
25
+ "shape": obj.shape,
26
+ "data": obj.tolist(),
27
+ }
28
+ return str(obj)
29
+
30
+
31
+ def _json_dumps(obj: Any) -> str:
32
+ return json.dumps(_json_safe(obj), ensure_ascii=False)
33
+
34
+
35
+ def _json_loads(s: str) -> Any:
36
+ return json.loads(s)
37
+
38
+
39
+ def _require_h5py():
40
+ try:
41
+ import h5py # type: ignore[import-not-found]
42
+ except Exception as exc: # pragma: no cover
43
+ raise ImportError("Saving/loading HDF5 requires `h5py`.") from exc
44
+ return h5py
45
+
46
+
47
+ def save_state_h5(
48
+ state: PhotometryState,
49
+ path: Path | str,
50
+ *,
51
+ compression: str | None = "gzip",
52
+ compression_opts: int = 4,
53
+ ) -> None:
54
+ h5py = _require_h5py()
55
+ path = Path(path)
56
+
57
+ with h5py.File(path, "w") as f:
58
+ f.attrs["schema"] = "fibphot_state"
59
+ f.attrs["schema_version"] = 1
60
+ f.attrs["created_utc"] = datetime.now(timezone.utc).isoformat()
61
+
62
+ f.create_dataset("time_seconds", data=state.time_seconds)
63
+ f.create_dataset(
64
+ "signals",
65
+ data=state.signals,
66
+ compression=compression,
67
+ compression_opts=compression_opts,
68
+ )
69
+ f.create_dataset(
70
+ "history",
71
+ data=state.history,
72
+ compression=compression,
73
+ compression_opts=compression_opts,
74
+ )
75
+
76
+ dt = h5py.string_dtype(encoding="utf-8")
77
+ f.create_dataset(
78
+ "channel_names",
79
+ data=np.array(state.channel_names, dtype=object),
80
+ dtype=dt,
81
+ )
82
+
83
+ # metadata (json)
84
+ f.attrs["metadata_json"] = _json_dumps(state.metadata)
85
+
86
+ # derived arrays
87
+ g_derived = f.create_group("derived")
88
+ for k, arr in state.derived.items():
89
+ g_derived.create_dataset(
90
+ k,
91
+ data=np.asarray(arr, dtype=float),
92
+ compression=compression,
93
+ compression_opts=compression_opts,
94
+ )
95
+
96
+ # summary records
97
+ g_sum = f.create_group("summary")
98
+ for rec in state.summary:
99
+ g = g_sum.create_group(rec.stage_id)
100
+ g.attrs["name"] = rec.name
101
+ g.attrs["params_json"] = _json_dumps(rec.params)
102
+ g.attrs["metrics_json"] = _json_dumps(rec.metrics)
103
+ if rec.notes is not None:
104
+ g.attrs["notes"] = rec.notes
105
+
106
+ # results by stage_id
107
+ g_res = f.create_group("results")
108
+ for stage_id, payload in state.results.items():
109
+ g = g_res.create_group(stage_id)
110
+ g.attrs["json"] = _json_dumps(payload)
111
+
112
+
113
+ def load_state_h5(path: Path | str) -> PhotometryState:
114
+ h5py = _require_h5py()
115
+ path = Path(path)
116
+
117
+ with h5py.File(path, "r") as f:
118
+ t = np.asarray(f["time_seconds"], dtype=float)
119
+ s = np.asarray(f["signals"], dtype=float)
120
+ h = np.asarray(f["history"], dtype=float)
121
+ channel_names = tuple(str(x) for x in f["channel_names"][...])
122
+
123
+ metadata = {}
124
+ meta_json = f.attrs.get("metadata_json")
125
+ if meta_json:
126
+ metadata = _json_loads(str(meta_json))
127
+
128
+ derived: dict[str, np.ndarray] = {}
129
+ if "derived" in f:
130
+ for k in f["derived"].keys():
131
+ derived[k] = np.asarray(f["derived"][k], dtype=float)
132
+
133
+ summary: list[StageRecord] = []
134
+ if "summary" in f:
135
+ for stage_id in f["summary"].keys():
136
+ g = f["summary"][stage_id]
137
+ name = str(g.attrs["name"])
138
+ params = _json_loads(str(g.attrs.get("params_json", "{}")))
139
+ metrics = _json_loads(str(g.attrs.get("metrics_json", "{}")))
140
+ notes = g.attrs.get("notes")
141
+ summary.append(
142
+ StageRecord(
143
+ stage_id=str(stage_id),
144
+ name=name,
145
+ params=params if isinstance(params, dict) else {},
146
+ metrics=metrics if isinstance(metrics, dict) else {},
147
+ notes=str(notes) if notes is not None else None,
148
+ )
149
+ )
150
+ summary.sort(key=lambda r: r.stage_id)
151
+
152
+ results: dict[str, dict[str, Any]] = {}
153
+ if "results" in f:
154
+ for stage_id in f["results"].keys():
155
+ g = f["results"][stage_id]
156
+ payload = _json_loads(str(g.attrs.get("json", "{}")))
157
+ results[str(stage_id)] = (
158
+ payload if isinstance(payload, dict) else {}
159
+ )
160
+
161
+ return PhotometryState(
162
+ time_seconds=t,
163
+ signals=s,
164
+ channel_names=channel_names,
165
+ history=h,
166
+ summary=tuple(summary),
167
+ derived=derived,
168
+ results=results,
169
+ metadata=metadata,
170
+ )
171
+
172
+
173
+ def save_collection_h5(
174
+ coll: PhotometryCollection,
175
+ path: Path | str,
176
+ *,
177
+ compression: str | None = "gzip",
178
+ compression_opts: int = 4,
179
+ ) -> None:
180
+ h5py = _require_h5py()
181
+ path = Path(path)
182
+
183
+ with h5py.File(path, "w") as f:
184
+ f.attrs["schema"] = "fibphot_collection"
185
+ f.attrs["schema_version"] = 1
186
+ f.attrs["created_utc"] = datetime.now(timezone.utc).isoformat()
187
+
188
+ g_states = f.create_group("states")
189
+ order: list[str] = []
190
+
191
+ for i, st in enumerate(coll.states):
192
+ name = st.subject or f"state_{i:04d}"
193
+ base = name
194
+ j = 1
195
+ while name in g_states:
196
+ j += 1
197
+ name = f"{base}_{j}"
198
+ order.append(name)
199
+
200
+ g = g_states.create_group(name)
201
+ g.create_dataset("time_seconds", data=st.time_seconds)
202
+ g.create_dataset(
203
+ "signals",
204
+ data=st.signals,
205
+ compression=compression,
206
+ compression_opts=compression_opts,
207
+ )
208
+ g.create_dataset(
209
+ "history",
210
+ data=st.history,
211
+ compression=compression,
212
+ compression_opts=compression_opts,
213
+ )
214
+
215
+ dt = h5py.string_dtype(encoding="utf-8")
216
+ g.create_dataset(
217
+ "channel_names",
218
+ data=np.array(st.channel_names, dtype=object),
219
+ dtype=dt,
220
+ )
221
+ g.attrs["metadata_json"] = _json_dumps(st.metadata)
222
+
223
+ gd = g.create_group("derived")
224
+ for k, arr in st.derived.items():
225
+ gd.create_dataset(
226
+ k,
227
+ data=np.asarray(arr, dtype=float),
228
+ compression=compression,
229
+ compression_opts=compression_opts,
230
+ )
231
+
232
+ gs = g.create_group("summary")
233
+ for rec in st.summary:
234
+ gg = gs.create_group(rec.stage_id)
235
+ gg.attrs["name"] = rec.name
236
+ gg.attrs["params_json"] = _json_dumps(rec.params)
237
+ gg.attrs["metrics_json"] = _json_dumps(rec.metrics)
238
+ if rec.notes is not None:
239
+ gg.attrs["notes"] = rec.notes
240
+
241
+ gr = g.create_group("results")
242
+ for stage_id, payload in st.results.items():
243
+ gg = gr.create_group(stage_id)
244
+ gg.attrs["json"] = _json_dumps(payload)
245
+
246
+ dt = h5py.string_dtype(encoding="utf-8")
247
+ f.create_dataset("order", data=np.array(order, dtype=object), dtype=dt)
248
+
249
+
250
+ def load_collection_h5(path: Path | str) -> PhotometryCollection:
251
+ h5py = _require_h5py()
252
+ path = Path(path)
253
+
254
+ with h5py.File(path, "r") as f:
255
+ order = [str(x) for x in f["order"][...]]
256
+ g_states = f["states"]
257
+
258
+ states: list[PhotometryState] = []
259
+ for key in order:
260
+ g = g_states[key]
261
+ t = np.asarray(g["time_seconds"], dtype=float)
262
+ s = np.asarray(g["signals"], dtype=float)
263
+ h = np.asarray(g["history"], dtype=float)
264
+ channel_names = tuple(str(x) for x in g["channel_names"][...])
265
+
266
+ metadata = {}
267
+ meta_json = g.attrs.get("metadata_json")
268
+ if meta_json:
269
+ metadata = _json_loads(str(meta_json))
270
+
271
+ derived: dict[str, np.ndarray] = {}
272
+ if "derived" in g:
273
+ for k in g["derived"].keys():
274
+ derived[k] = np.asarray(g["derived"][k], dtype=float)
275
+
276
+ summary: list[StageRecord] = []
277
+ if "summary" in g:
278
+ for stage_id in g["summary"].keys():
279
+ gg = g["summary"][stage_id]
280
+ name = str(gg.attrs["name"])
281
+ params = _json_loads(str(gg.attrs.get("params_json", "{}")))
282
+ metrics = _json_loads(
283
+ str(gg.attrs.get("metrics_json", "{}"))
284
+ )
285
+ notes = gg.attrs.get("notes")
286
+ summary.append(
287
+ StageRecord(
288
+ stage_id=str(stage_id),
289
+ name=name,
290
+ params=params if isinstance(params, dict) else {},
291
+ metrics=metrics
292
+ if isinstance(metrics, dict)
293
+ else {},
294
+ notes=str(notes) if notes is not None else None,
295
+ )
296
+ )
297
+ summary.sort(key=lambda r: r.stage_id)
298
+
299
+ results: dict[str, dict[str, Any]] = {}
300
+ if "results" in g:
301
+ for stage_id in g["results"].keys():
302
+ gg = g["results"][stage_id]
303
+ payload = _json_loads(str(gg.attrs.get("json", "{}")))
304
+ results[str(stage_id)] = (
305
+ payload if isinstance(payload, dict) else {}
306
+ )
307
+
308
+ states.append(
309
+ PhotometryState(
310
+ time_seconds=t,
311
+ signals=s,
312
+ channel_names=channel_names,
313
+ history=h,
314
+ summary=tuple(summary),
315
+ derived=derived,
316
+ results=results,
317
+ metadata=metadata,
318
+ )
319
+ )
320
+
321
+ return PhotometryCollection.from_iterable(states)
fibphot/misc.py ADDED
@@ -0,0 +1,11 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+
6
+ def subject_from_stem(p: Path) -> str:
7
+ return p.stem.split("_", 1)[0]
8
+
9
+
10
+ def metadata_from_stem(p: Path) -> dict[str, object]:
11
+ return {"subject": subject_from_stem(p)}