PhoPro 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
PhoPro/__init__.py ADDED
@@ -0,0 +1,14 @@
1
+ from .core import PhotometryData, PhotometryExperiment, PhotometryLoader, PhotometryPipeline, TDTLoader, CSVLoader
2
+ from .sim import SimulatedPhotometry
3
+
4
+ __version__ = "0.5.0"
5
+
6
+ __all__ = [
7
+ "PhotometryData",
8
+ "PhotometryExperiment",
9
+ "PhotometryLoader",
10
+ "PhotometryPipeline",
11
+ "TDTLoader",
12
+ "CSVLoader",
13
+ "SimulatedPhotometry",
14
+ ]
PhoPro/analysis/FMM.py ADDED
@@ -0,0 +1,504 @@
1
+ """Functional mixed-model helpers backed by fastFMM."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from os import PathLike
7
+ from typing import TYPE_CHECKING
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+ import re
12
+
13
+ from plotnine import ggplot
14
+ from ..utils import graphing
15
+
16
+ if TYPE_CHECKING:
17
+ from ..core.PhotometeryData import PhotometryData
18
+
19
+ ############################
20
+ #region --- RESULT CLASS ---
21
+ ############################
22
+ @dataclass(slots=True)
23
+ class FMMResult:
24
+ """Wrapper around fastFMM's multi-indexed result table."""
25
+
26
+ df: pd.DataFrame
27
+ formula: str | None = None
28
+
29
+ # --- dunders ---
30
+ def __post_init__(self) -> None:
31
+ """Validate the fastFMM result table."""
32
+ if not isinstance(self.df.columns, pd.MultiIndex):
33
+ raise ValueError("FMMResult.df must have MultiIndex columns.")
34
+
35
+ def __str__(self) -> str:
36
+ """Return the string representation of the result table."""
37
+ return self.df.__str__()
38
+
39
+ def __repr__(self) -> str:
40
+ """Return the interactive representation of the result table."""
41
+ return self.df.__repr__()
42
+
43
+ # --- properties ---
44
+ @property
45
+ def column_groups(self) -> list[str]:
46
+ """Top-level column groups in the result table."""
47
+ return self.df.columns.get_level_values(0).unique().to_list()
48
+
49
+ @property
50
+ def terms(self) -> list[str]:
51
+ """Model coefficient terms, excluding metadata groups."""
52
+ return [
53
+ col for col in self.column_groups
54
+ if col not in {"time", "AIC"}
55
+ ]
56
+
57
+ @property
58
+ def time(self) -> pd.Series:
59
+ """Time values indexed like the result table."""
60
+ if ("time", "time") in self.df.columns:
61
+ return self.df[("time", "time")]
62
+
63
+ if "time" in self.column_groups:
64
+ time_df = self.df["time"]
65
+ if time_df.shape[1] == 1:
66
+ out = time_df.iloc[:, 0]
67
+ out.name = "time"
68
+ return out
69
+
70
+ raise KeyError("FMMResult does not contain a time column.")
71
+
72
+ @property
73
+ def coefficients(self) -> pd.DataFrame:
74
+ """All coefficient-term columns."""
75
+ if not self.terms:
76
+ return self.df.iloc[:, 0:0]
77
+
78
+ return self.df.loc[:, pd.IndexSlice[self.terms, :]]
79
+
80
+ @property
81
+ def aic(self) -> pd.DataFrame:
82
+ """AIC columns returned by fastFMM."""
83
+ if "AIC" not in self.column_groups:
84
+ raise KeyError("FMMResult does not contain AIC columns.")
85
+ return pd.DataFrame(self.df["AIC"])
86
+
87
+ # --- I/O ---
88
+ def to_csv(
89
+ self,
90
+ path: str | PathLike[str],
91
+ **kwargs,
92
+ ) -> None:
93
+ """Write the underlying multi-indexed result table to CSV.
94
+
95
+ Parameters
96
+ ----------
97
+ path : str or PathLike[str]
98
+ Output CSV path.
99
+ **kwargs
100
+ Additional keyword arguments passed to ``DataFrame.to_csv``.
101
+ """
102
+ self.df.to_csv(path, **kwargs)
103
+
104
+ @classmethod
105
+ def from_csv(
106
+ cls,
107
+ path: str | PathLike[str],
108
+ formula: str | None = None,
109
+ **kwargs,
110
+ ) -> "FMMResult":
111
+ """Read an ``FMMResult`` from a CSV written by ``to_csv``.
112
+
113
+ Parameters
114
+ ----------
115
+ path : str or PathLike[str]
116
+ Input CSV path.
117
+ formula : str or None, default=None
118
+ Formula associated with the result.
119
+ **kwargs
120
+ Additional keyword arguments passed to ``pandas.read_csv``.
121
+
122
+ Returns
123
+ -------
124
+ FMMResult
125
+ Loaded result object.
126
+ """
127
+ read_kwargs = {"header": [0, 1], "index_col": 0} | kwargs
128
+ df = pd.read_csv(path, **read_kwargs)
129
+ return cls(df=df, formula=formula)
130
+
131
+ # --- access ---
132
+ def term(self, name: str, include_time: bool = True) -> pd.DataFrame:
133
+ """Return all statistics for one model term.
134
+
135
+ Parameters
136
+ ----------
137
+ name : str
138
+ Model term name.
139
+ include_time : bool, default=True
140
+ If ``True``, prepend a ``time`` column.
141
+
142
+ Returns
143
+ -------
144
+ pd.DataFrame
145
+ Statistics for the selected term.
146
+
147
+ Raises
148
+ ------
149
+ KeyError
150
+ If ``name`` is not a model term.
151
+ """
152
+ if name not in self.terms:
153
+ raise KeyError(f"Unknown FMM term: {name}")
154
+
155
+ out: pd.DataFrame = self.df[name].copy()
156
+ if include_time:
157
+ out.insert(0, "time", self.time.to_numpy())
158
+ return out
159
+
160
+ def stat(self, name: str) -> pd.DataFrame:
161
+ """Return one statistic across all model terms.
162
+
163
+ Parameters
164
+ ----------
165
+ name : str
166
+ Statistic name in the second column-index level.
167
+
168
+ Returns
169
+ -------
170
+ pd.DataFrame
171
+ Statistic values across all coefficient terms.
172
+ """
173
+ return pd.DataFrame(self.coefficients.xs(name, level=1, axis=1).copy())
174
+
175
+ # --- export ---
176
+ def to_long(
177
+ self,
178
+ only_terms: list[str] | None = None,
179
+ stat_map: dict[str, str] | None = None,
180
+ ) -> pd.DataFrame:
181
+ """Convert the FMM result table to long format.
182
+
183
+ Parameters
184
+ ----------
185
+ only_terms : list[str] or None, default=None
186
+ Model terms to include. If ``None``, all coefficient terms are
187
+ included.
188
+ stat_map : dict[str, str] or None, default=None
189
+ Mapping from output column names to source statistic names in the
190
+ fastFMM table.
191
+
192
+ Returns
193
+ -------
194
+ pd.DataFrame
195
+ Long dataframe with time, term, and selected statistics.
196
+ """
197
+ if stat_map is None:
198
+ stat_map = {
199
+ "value": "beta",
200
+ "lower": "lower",
201
+ "upper": "upper",
202
+ "lower_joint": "lower_joint",
203
+ "upper_joint": "upper_joint",
204
+ }
205
+
206
+ coefficients = self.coefficients.rename_axis(
207
+ index="time_idx",
208
+ columns=["term", "stat"],
209
+ )
210
+
211
+ try:
212
+ long_df = coefficients.stack(level="term", future_stack=True)
213
+ except TypeError:
214
+ long_df = coefficients.stack(level="term")
215
+
216
+ long_df = long_df.reset_index()
217
+ long_df.columns.name = None
218
+
219
+ rename_map = {
220
+ source: target
221
+ for target, source in stat_map.items()
222
+ if source in long_df.columns
223
+ }
224
+ long_df = long_df.rename(columns=rename_map)
225
+
226
+ time_df = self.time.rename("time").rename_axis("time_idx").reset_index()
227
+ long_df = long_df.merge(time_df, on="time_idx", how="left")
228
+
229
+ keep_cols = ["time_idx", "time", "term"] + [
230
+ col for col in stat_map
231
+ if col in long_df.columns
232
+ ]
233
+
234
+ only_terms = self.terms if only_terms is None else only_terms
235
+ keep_terms = long_df['term'].isin(only_terms)
236
+
237
+ return long_df.loc[keep_terms, keep_cols]
238
+
239
+ # --- plotting ---
240
+ def plot(
241
+ self,
242
+ only_terms: list[str] | None = None,
243
+ line_kwargs: dict = {},
244
+ hline_kwargs: dict = {},
245
+ ribbon_inner_kwargs: dict = {},
246
+ ribbon_outer_kwargs: dict = {},
247
+ theme_kwargs: dict = {},
248
+ ) -> ggplot:
249
+ """Plot model terms and confidence bands.
250
+
251
+ Parameters
252
+ ----------
253
+ only_terms : list[str] or None, default=None
254
+ Model terms to plot. If ``None``, all coefficient terms are plotted.
255
+ line_kwargs : dict, default={}
256
+ Keyword arguments forwarded to line geoms.
257
+ hline_kwargs : dict, default={}
258
+ Keyword arguments forwarded to horizontal reference-line geoms.
259
+ ribbon_inner_kwargs : dict, default={}
260
+ Keyword arguments forwarded to inner confidence-band ribbon geoms.
261
+ ribbon_outer_kwargs : dict, default={}
262
+ Keyword arguments forwarded to outer confidence-band ribbon geoms.
263
+ theme_kwargs : dict, default={}
264
+ Keyword arguments forwarded to the plot theme helper.
265
+
266
+ Returns
267
+ -------
268
+ ggplot
269
+ Plot object.
270
+ """
271
+
272
+ long_df = self.to_long(only_terms=only_terms)
273
+
274
+ p = graphing.plot_FMM_result(
275
+ long_df,
276
+ line_kwargs=line_kwargs,
277
+ hline_kwargs=hline_kwargs,
278
+ ribbon_inner_kwargs=ribbon_inner_kwargs,
279
+ ribbon_outer_kwargs=ribbon_outer_kwargs,
280
+ theme_kwargs=theme_kwargs,
281
+ )
282
+
283
+ return p
284
+
285
+ #endregion
286
+
287
+ #######################
288
+ #region --- FMM API ---
289
+ #######################
290
+ def run_fastFMM(
291
+ data: PhotometryData,
292
+ formula: str,
293
+ factor_cols: dict[str, str | None] = {},
294
+ parallel: bool = True,
295
+ family: str = "gaussian",
296
+ analytic: bool = True,
297
+ var: bool = True,
298
+ silent: bool = False,
299
+ argvals: list[int] | None = None,
300
+ nknots_min: int | None = None,
301
+ nknots_min_cov: int | None = 35,
302
+ smooth_method: str = "GCV.Cp",
303
+ splines: str = "tp",
304
+ design_mat: bool = False,
305
+ residuals: bool = False,
306
+ n_boots: int = 500,
307
+ seed: int = 1,
308
+ subj_id: str | None = None,
309
+ n_cores: int | None = None,
310
+ caic: bool = False,
311
+ randeffs: bool = False,
312
+ non_neg: int = 0,
313
+ MoM: int = 1,
314
+ concurrent: bool = False,
315
+ impute_outcome: bool = False,
316
+ override_zero_var: bool = False,
317
+ unsmooth: bool = False,
318
+ ) -> FMMResult:
319
+ """Run a fastFMM model on trial-wise photometry data.
320
+
321
+ This is a wrapper around ``fast-fmm-rpy2`` that converts
322
+ `PhotometryData` into the wide table expected by fastFMM and wraps the
323
+ multi-indexed output table in `FMMResult`.
324
+
325
+ Parameters
326
+ ----------
327
+ data : PhotometryData
328
+ Photometry data used to fit the functional mixed model.
329
+ formula : str
330
+ Formula passed to fastFMM. It must begin with a signal column prefix
331
+ present in the exported wide dataframe.
332
+ factor_cols : dict[str, str or None], default={}
333
+ Metadata columns to convert to R factors. Values specify optional
334
+ reference levels.
335
+ parallel : bool, default=True
336
+ Whether fastFMM should run in parallel.
337
+ family : str, default='gaussian'
338
+ Model family passed to fastFMM.
339
+ analytic : bool, default=True
340
+ Whether to use analytic inference instead of bootstrap inference.
341
+ var : bool, default=True
342
+ Whether to include within-timepoint variance in the model.
343
+ silent : bool, default=False
344
+ Whether to suppress model output.
345
+ argvals : list[int] or None, default=None
346
+ Functional-domain indexes used in the model. ``None`` uses all points.
347
+ nknots_min : int or None, default=None
348
+ Minimum knots for coefficient smoothing.
349
+ nknots_min_cov : int or None, default=35
350
+ Minimum knots for covariance smoothing.
351
+ smooth_method : str, default='GCV.Cp'
352
+ Smoothing-parameter selection method.
353
+ splines : str, default='tp'
354
+ Spline type used by fastFMM.
355
+ design_mat : bool, default=False
356
+ Whether to return the design matrix.
357
+ residuals : bool, default=False
358
+ Whether to save residuals from the unsmoothed LME.
359
+ n_boots : int, default=500
360
+ Number of bootstrap samples.
361
+ seed : int, default=1
362
+ Seed used by fastFMM bootstrap routines.
363
+ subj_id : str or None, default=None
364
+ Column containing subject IDs.
365
+ n_cores : int or None, default=None
366
+ Number of cores used for parallelization.
367
+ caic : bool, default=False
368
+ Whether to calculate CAIC.
369
+ randeffs : bool, default=False
370
+ Whether to return random-effect estimates.
371
+ non_neg : int, default=0
372
+ Non-negativity constraint mode passed to fastFMM.
373
+ MoM : int, default=1
374
+ Method-of-moments estimator setting.
375
+ concurrent : bool, default=False
376
+ Whether to fit a concurrent model.
377
+ impute_outcome : bool, default=False
378
+ Whether to impute missing outcome values with FPCA.
379
+ override_zero_var : bool, default=False
380
+ Whether to proceed when exported columns have zero variance.
381
+ unsmooth : bool, default=False
382
+ Whether to return raw unsmoothed coefficient and variance estimates.
383
+
384
+ Returns
385
+ -------
386
+ FMMResult
387
+ Wrapped fastFMM result table.
388
+
389
+ Raises
390
+ ------
391
+ ValueError
392
+ If ``formula`` does not begin with a signal prefix present in the
393
+ exported wide dataframe.
394
+ """
395
+ # lazy import fast-fmm-rpy2
396
+ from rpy2.robjects import r
397
+ from rpy2.rinterface import NULL # type: ignore
398
+ from fast_fmm_rpy2.ingest import pass_pandas_to_r
399
+ from fast_fmm_rpy2.fmm_run import fui
400
+ from fast_fmm_rpy2.plot_fui import plot_fui
401
+
402
+ # set up function
403
+ def _factor_r_var(r_var: str, col: str, ref_lvl: str | None) -> None:
404
+ """Factor one R dataframe column and optionally set a reference level."""
405
+ r(f'{r_var}[,"{col}"] = factor({r_var}[,"{col}"], ordered = "FALSE")')
406
+ if ref_lvl is not None:
407
+ r(f'{r_var}[,"{col}"] = relevel({r_var}[,"{col}"], ref = "{ref_lvl}")')
408
+
409
+ # step 0: convert Photometry data to dataframe
410
+ # get necessary cols from formula
411
+ keep_cols = (
412
+ pd.Series(re.split(r"[\+\*\|:~]", formula))
413
+ .str.replace(r'[\(\)]', '', regex=True)
414
+ .str.strip()
415
+ .unique()
416
+ .tolist()
417
+ )
418
+ obs_cols = [col for col in keep_cols if col in data.obs]
419
+
420
+ df = data.trials_to_wide_df(
421
+ layer=None,
422
+ obs_cols=obs_cols,
423
+ signal_prefix='photometry',
424
+ downsample=None,
425
+ )
426
+
427
+ # step 1: validate inputs
428
+ # convert some None types to R NULL type
429
+ def _none_to_null(val):
430
+ """Convert Python None to R NULL."""
431
+ if val is None: return NULL
432
+ else: return val
433
+
434
+ argvals = _none_to_null(argvals)
435
+ nknots_min = _none_to_null(nknots_min)
436
+ subj_id = _none_to_null(subj_id)
437
+ n_cores = _none_to_null(n_cores)
438
+
439
+ # ensure formula starts with signal prefix
440
+ potential_prefixes = (
441
+ df.columns[
442
+ df.columns.str.contains(".", regex=False)
443
+ ]
444
+ .str.split(".")
445
+ .str[0].unique()
446
+ .to_list()
447
+ )
448
+ formula_valid = np.asarray([formula.startswith(prefix) for prefix in potential_prefixes]).any()
449
+ if not formula_valid:
450
+ raise ValueError(f'Formula ({formula}) does not begin with any potential signal prefixes ({potential_prefixes})')
451
+
452
+ # step 2: pass dataframe to R and factor vars
453
+ pass_pandas_to_r(df, r_var_name='dat')
454
+ for col, ref_lvl in factor_cols.items():
455
+ _factor_r_var('dat', col, ref_lvl)
456
+
457
+ # step 3: run fastFMM
458
+ model = fui(
459
+ csv_filepath=None,
460
+ r_var_name='dat',
461
+ formula=formula,
462
+ parallel=parallel,
463
+ family=family,
464
+ analytic=analytic,
465
+ var=var,
466
+ silent=silent,
467
+ argvals=argvals,
468
+ nknots_min=nknots_min,
469
+ nknots_min_cov=nknots_min_cov,
470
+ smooth_method=smooth_method,
471
+ splines=splines,
472
+ design_mat=design_mat,
473
+ residuals=residuals,
474
+ n_boots=n_boots,
475
+ seed=seed,
476
+ subj_id=subj_id,
477
+ n_cores=n_cores,
478
+ caic=caic,
479
+ randeffs=randeffs,
480
+ non_neg=non_neg,
481
+ MoM=MoM,
482
+ concurrent=concurrent,
483
+ impute_outcome=impute_outcome,
484
+ override_zero_var=override_zero_var,
485
+ unsmooth=unsmooth,
486
+ )
487
+
488
+ # step 4: plot results and retrieve data
489
+ coeff_figs, model_res = plot_fui(model, return_data=True) # type: ignore
490
+
491
+ # step 5: coerce results into multi indexed dataframe
492
+ first_df = next(iter(model_res.values()))
493
+ shared_idx = first_df.index.astype(int)
494
+
495
+ combined: dict[str, pd.DataFrame] = {
496
+ coeff : data.set_index(shared_idx).drop(columns='s') for coeff, data in model_res.items()
497
+ }
498
+ combined['time'] = pd.DataFrame({'time':data.ts}).set_index(shared_idx)
499
+ combined['AIC'] = model.getbyname('aic').set_index(shared_idx)
500
+
501
+ out = pd.concat(combined, axis=1)
502
+ return FMMResult(out, formula=formula)
503
+
504
+ #endregion
@@ -0,0 +1,20 @@
1
+ """Analysis tools for artifacts, peaks, group comparisons, and FMM models."""
2
+
3
+ from .artifact import ArtifactResult, ODS_Detector, Spline_Corrector
4
+ from .comparison import ClusterTestResult, cluster_depth_test, cluster_permutation_test
5
+ from .FMM import FMMResult, run_fastFMM
6
+ from .peaks import PeakResult, StaticThresholdDetector, RollingThresholdDetector
7
+
8
+ __all__ = [
9
+ 'ArtifactResult',
10
+ 'ODS_Detector',
11
+ 'Spline_Corrector',
12
+ 'ClusterTestResult',
13
+ 'cluster_depth_test',
14
+ 'cluster_permutation_test',
15
+ 'FMMResult',
16
+ 'run_fastFMM',
17
+ 'PeakResult',
18
+ 'StaticThresholdDetector',
19
+ 'RollingThresholdDetector'
20
+ ]