fips 0.1.0b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fips/__init__.py ADDED
@@ -0,0 +1,35 @@
1
+ """
2
+ Flexible Inverse Problem Solver (FIPS).
3
+
4
+ A Pythonic framework for solving linear inverse problems using Bayesian estimation.
5
+ Provides data structures for state vectors, observations, forward operators, and
6
+ covariance matrices; estimators for computing posteriors; and interfaces for
7
+ serialization, visualization, and specialized applications like atmospheric
8
+ flux inversion.
9
+ """
10
+
11
+ import logging
12
+
13
+ from .covariance import CovarianceMatrix
14
+ from .estimators import Estimator, available_estimators
15
+ from .matrix import Matrix, MatrixBlock
16
+ from .operators import ForwardOperator, convolve
17
+ from .pipeline import InversionPipeline
18
+ from .problem import InverseProblem
19
+ from .vector import Block, Vector
20
+
21
+ logging.getLogger(__name__).addHandler(logging.NullHandler())
22
+
23
+ __all__ = [
24
+ "Block",
25
+ "Vector",
26
+ "Matrix",
27
+ "MatrixBlock",
28
+ "Estimator",
29
+ "available_estimators",
30
+ "ForwardOperator",
31
+ "CovarianceMatrix",
32
+ "InversionPipeline",
33
+ "InverseProblem",
34
+ "convolve",
35
+ ]
fips/aggregators.py ADDED
@@ -0,0 +1,377 @@
1
+ """
2
+ Data aggregation utilities for inverse problems.
3
+
4
+ This module provides functions for aggregating and integrating data over time
5
+ and space, particularly useful for processing observations and state vectors
6
+ into compatible resolutions.
7
+ """
8
+
9
+ from collections.abc import Callable
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+ from scipy.sparse import csr_matrix
14
+
15
+ from fips.matrix import Matrix, MatrixBlock
16
+ from fips.vector import Block, Vector
17
+
18
+
19
+ def integrate_over_time_bins(
20
+ data: pd.DataFrame | pd.Series, time_bins: pd.IntervalIndex, time_dim: str = "time"
21
+ ) -> pd.DataFrame | pd.Series:
22
+ """
23
+ Integrate data over time bins.
24
+
25
+ Parameters
26
+ ----------
27
+ data : pd.DataFrame | pd.Series
28
+ Data to integrate.
29
+ time_bins : pd.IntervalIndex
30
+ Time bins for integration.
31
+ time_dim : str, optional
32
+ Time dimension name, by default 'time'
33
+
34
+ Returns
35
+ -------
36
+ pd.DataFrame | pd.Series
37
+ Integrated footprint. The bin labels are set to the left edge of the bin.
38
+ """
39
+ is_series = isinstance(data, pd.Series)
40
+
41
+ dims = data.index.names
42
+ if time_dim not in dims:
43
+ raise ValueError(f"time_dim '{time_dim}' not found in data index levels {dims}")
44
+ other_levels = [lvl for lvl in dims if lvl != time_dim]
45
+
46
+ data = data.reset_index()
47
+
48
+ # Use pd.cut to bin the data by time into time bins.
49
+ # Cast the time column to match the resolution of time_bins (e.g. datetime64[us]
50
+ # vs datetime64[ns]) to avoid a ValueError on pandas 3.x when the dtypes differ.
51
+ time_col = data[time_dim]
52
+ if hasattr(time_bins, "dtype") and hasattr(time_bins.dtype, "subtype"):
53
+ target_dtype = time_bins.dtype.subtype
54
+ if hasattr(time_col, "dt") and time_col.dtype != target_dtype:
55
+ time_col = time_col.astype(target_dtype)
56
+ data[time_dim] = pd.cut(time_col, bins=time_bins, include_lowest=True, right=False)
57
+
58
+ # Set Intervals to the left edge of the bin (start of time interval).
59
+ # Use cat.rename_categories() instead of .apply() so that NaN values
60
+ # (observations outside all bins) are left untouched — .apply() in
61
+ # pandas 3.x also passes np.nan through the mapper, which has no .left.
62
+ data[time_dim] = data[time_dim].cat.rename_categories(lambda x: x.left)
63
+
64
+ # Group the date by the time bins & any other existing levels
65
+ grouped = data.groupby([time_dim] + other_levels, observed=True)
66
+
67
+ # Sum over the groups
68
+ integrated = grouped.sum()
69
+
70
+ # Order the index levels if MultiIndex
71
+ if isinstance(integrated.index, pd.MultiIndex):
72
+ integrated = integrated.reorder_levels(list(dims))
73
+
74
+ if is_series:
75
+ # Return a Series if the input was a Series
76
+ return integrated.iloc[:, 0]
77
+ return integrated
78
+
79
+
80
+ class ObsAggregator:
81
+ """
82
+ Aggregates the observation space of an inverse problem.
83
+
84
+ Builds a sparse (n_agg x n_obs) weight matrix W and applies it to each
85
+ component of the problem::
86
+
87
+ z_agg = W @ z # aggregated observations
88
+ H_agg = W @ H # aggregated forward operator
89
+ S_z_agg = W @ S_z @ W.T # covariance propagation
90
+ c_agg = W @ c # aggregated constant (if vector)
91
+
92
+ For ``func='mean'`` each non-zero entry in row i equals 1/nᵢ (the
93
+ reciprocal of the group size), so ``W @ z`` yields group means and
94
+ ``W @ S_z @ W.T`` scales variances by 1/nᵢ². For ``func='sum'`` every
95
+ entry is 1. Only ``'mean'`` and ``'sum'`` are supported because other
96
+ functions do not have a well-defined covariance propagation rule.
97
+
98
+ Grouping interface
99
+ ------------------
100
+ Exactly one of ``by`` or ``level`` must be provided:
101
+
102
+ - ``by`` : an index level name (str), a list of level names, or a
103
+ callable that accepts the obs ``pd.Index`` and returns group labels.
104
+ - ``level`` + ``freq`` : resample a datetime index level at the given
105
+ pandas offset alias (e.g. ``level='obs_time', freq='D'``). All other
106
+ index levels are preserved as exact-match grouping keys.
107
+
108
+ When the obs index has a ``'block'`` level it is always prepended as a
109
+ grouping key, ensuring observations from different blocks are never
110
+ merged.
111
+
112
+ Partial aggregation
113
+ -------------------
114
+ ``blocks`` restricts aggregation to the named block(s). Observations
115
+ belonging to other blocks are passed through unchanged via identity rows
116
+ in W, so the returned arrays cover the full observation space.
117
+
118
+ Parameters
119
+ ----------
120
+ by : str | list[str] | Callable, optional
121
+ Explicit grouping specification. Mutually exclusive with ``level``.
122
+ level : str, optional
123
+ Index level to group / resample. Requires either a matching level
124
+ name in the obs index or use alongside ``freq``.
125
+ freq : str, optional
126
+ Pandas offset alias for resampling ``level`` (e.g. ``'D'``, ``'h'``).
127
+ func : {'mean', 'sum'}
128
+ Aggregation function. Default ``'mean'``.
129
+ blocks : str | list[str], optional
130
+ Block name(s) to aggregate. Unlisted blocks pass through as-is.
131
+
132
+ Apply to inverse problem components
133
+ -----------------------------------
134
+ apply(obs, forward_operator, modeldata_mismatch, constant)
135
+ Apply the aggregation to the inverse problem components.
136
+ """
137
+
138
+ by: str | list[str] | Callable | None
139
+ """Explicit grouping specification. Mutually exclusive with 'level'."""
140
+ level: str | None
141
+ """Index level to group / resample. Requires either a matching level name in the obs index or use alongside 'freq'."""
142
+ freq: str | None
143
+ """Pandas offset alias for resampling 'level' (e.g. 'D', 'h')."""
144
+ func: str
145
+ """Aggregation function, either 'mean' or 'sum'. Default is 'mean'."""
146
+ blocks: list[str] | None
147
+ """Block name(s) to aggregate. Unlisted blocks pass through as-is."""
148
+
149
+ def __init__(
150
+ self,
151
+ by: str | list[str] | Callable | None = None,
152
+ level: str | None = None,
153
+ freq: str | None = None,
154
+ func: str = "mean",
155
+ blocks: str | list[str] | None = None,
156
+ ):
157
+ if func not in {"mean", "sum"}:
158
+ raise ValueError(
159
+ "func must be 'mean' or 'sum' for valid covariance propagation."
160
+ )
161
+
162
+ if by is None and level is None:
163
+ raise ValueError("Must provide either 'by' or 'level'.")
164
+
165
+ self.by = by
166
+ self.level = level
167
+ self.freq = freq
168
+ self.func = func
169
+ self.blocks = [blocks] if isinstance(blocks, str) else blocks
170
+
171
+ def _build_operator(self, obs_index: pd.Index) -> tuple[csr_matrix, pd.Index]:
172
+ """
173
+ Build W and the aggregated index from an obs ``pd.Index``.
174
+
175
+ Constructs W in COO format (data, row, col) — one entry per input
176
+ observation — then converts to CSR for efficient matrix products.
177
+ Returns the (n_agg x n_obs) weight matrix and the new row index.
178
+ """
179
+ n_obs = len(obs_index)
180
+
181
+ # Which observations are targeted for aggregation?
182
+ # Untargeted observations become identity (passthrough) rows in W.
183
+ if self.blocks is not None and "block" in obs_index.names:
184
+ mask = obs_index.get_level_values("block").isin(self.blocks)
185
+ else:
186
+ mask = np.ones(n_obs, dtype=bool)
187
+
188
+ # COO triplets — one entry per obs; col_indices[i] = i always
189
+ # (each obs maps to exactly one output row).
190
+ row_indices = np.empty(n_obs, dtype=int)
191
+ col_indices = np.arange(n_obs)
192
+ data = np.empty(n_obs, dtype=float)
193
+
194
+ new_index_list = [] # index labels for each output row
195
+ n_agg_targets = 0 # rows produced by aggregation (excludes passthrough)
196
+
197
+ # --- Build aggregated rows for the targeted block(s) ---
198
+ if mask.any():
199
+ target_idx = obs_index[mask]
200
+ # dummy_series is just a carrier for the index so we can use groupby
201
+ dummy_series = pd.Series(np.arange(len(target_idx)), index=target_idx)
202
+
203
+ # Build grouping keys. "block" is always first to prevent
204
+ # cross-block merging even when other index values coincide.
205
+ keys = []
206
+ if "block" in target_idx.names:
207
+ keys.append(pd.Grouper(level="block"))
208
+
209
+ if self.by is not None:
210
+ # Use explicit user instructions if provided
211
+ user_keys = self.by if isinstance(self.by, (list, tuple)) else [self.by]
212
+ for k in user_keys:
213
+ if isinstance(k, str):
214
+ keys.append(pd.Grouper(level=k))
215
+ elif callable(k):
216
+ keys.append(k(target_idx))
217
+ else:
218
+ keys.append(k)
219
+ else:
220
+ for lvl in target_idx.names:
221
+ if lvl == "block":
222
+ continue # already grouped as the first key
223
+ if lvl == self.level:
224
+ if self.freq is not None:
225
+ keys.append(pd.Grouper(level=lvl, freq=self.freq))
226
+ else:
227
+ keys.append(pd.Grouper(level=lvl))
228
+ else:
229
+ keys.append(pd.Grouper(level=lvl))
230
+
231
+ # pandas groupby requires a scalar (not a list) for a single key
232
+ if len(keys) == 1:
233
+ keys = keys[0]
234
+
235
+ grouper = dummy_series.groupby(keys, sort=True)
236
+
237
+ # ngroup() maps each obs to a consecutive integer row id in W.
238
+ agg_target_idx = grouper.size().index
239
+ target_group_ids = grouper.ngroup().to_numpy()
240
+ n_agg_targets = len(agg_target_idx)
241
+
242
+ row_indices[mask] = target_group_ids
243
+
244
+ if self.func == "mean":
245
+ counts = np.bincount(target_group_ids, minlength=n_agg_targets)
246
+ data[mask] = 1.0 / counts[target_group_ids] # weight = 1/nᵢ
247
+ else: # "sum"
248
+ data[mask] = 1.0
249
+
250
+ new_index_list.append(agg_target_idx)
251
+
252
+ # --- Identity passthrough rows for untargeted observations ---
253
+ if (~mask).any():
254
+ other_idx = obs_index[~mask]
255
+ n_other = (~mask).sum()
256
+
257
+ # Row ids continue after the aggregated rows.
258
+ other_group_ids = np.arange(n_agg_targets, n_agg_targets + n_other)
259
+ row_indices[~mask] = other_group_ids
260
+ data[~mask] = 1.0
261
+
262
+ new_index_list.append(other_idx)
263
+
264
+ W = csr_matrix(
265
+ (data, (row_indices, col_indices)),
266
+ shape=(n_agg_targets + (~mask).sum(), n_obs),
267
+ )
268
+
269
+ # Aggregated rows first, passthrough rows second.
270
+ if len(new_index_list) == 1:
271
+ agg_index = new_index_list[0]
272
+ else:
273
+ agg_index = new_index_list[0].append(new_index_list[1])
274
+
275
+ return W, agg_index
276
+
277
+ def apply(
278
+ self,
279
+ obs: pd.Series | Block | Vector,
280
+ forward_operator: pd.DataFrame | MatrixBlock | Matrix,
281
+ modeldata_mismatch: pd.DataFrame | MatrixBlock | Matrix,
282
+ constant: float | pd.Series | Block | Vector | None = None,
283
+ ):
284
+ """
285
+ Apply W to the inverse problem components.
286
+
287
+ Inputs may be bare pandas objects or fips wrapper types (``Vector``,
288
+ ``ForwardOperator``, ``CovarianceMatrix``); return types mirror the
289
+ inputs. See the class docstring for the mathematical transforms.
290
+
291
+ The aggregator ensures all inputs are properly aligned to obs.index
292
+ before building the weight matrix W.
293
+
294
+ Parameters
295
+ ----------
296
+ obs : pd.Series | Block | Vector
297
+ Observation vector to be aggregated.
298
+ forward_operator : pd.DataFrame | MatrixBlock | Matrix
299
+ Forward operator matrix to be aggregated.
300
+ modeldata_mismatch : pd.DataFrame | MatrixBlock | Matrix
301
+ Model-data mismatch covariance matrix to be aggregated.
302
+ constant : float | pd.Series | Block | Vector | None, optional
303
+ Optional constant offset vector to be aggregated. Scalars are invariant to aggregation. Default is None.
304
+
305
+ Returns
306
+ -------
307
+ tuple
308
+ Aggregated (obs, forward_operator, modeldata_mismatch, constant) in the same types as the inputs.
309
+ """
310
+
311
+ # Unwrap fips types to the underlying pandas object for arithmetic.
312
+ def unwrap(obj):
313
+ return obj.data if hasattr(obj, "data") else obj
314
+
315
+ z_df = unwrap(obs)
316
+ H_df = unwrap(forward_operator)
317
+ S_z_df = unwrap(modeldata_mismatch)
318
+
319
+ # Ensure matrices are aligned to obs.index before aggregation
320
+ # This guarantees W has compatible dimensions for matrix operations
321
+ H_df = H_df.reindex(index=z_df.index, fill_value=0.0)
322
+ S_z_df = S_z_df.reindex(index=z_df.index, columns=z_df.index, fill_value=0.0)
323
+ if constant is not None and not np.isscalar(constant):
324
+ c_df = unwrap(constant)
325
+ c_df = c_df.reindex(index=z_df.index, fill_value=0.0)
326
+ else:
327
+ c_df = None
328
+ if any(len(x) == 0 for x in [z_df, H_df, S_z_df]):
329
+ raise ValueError("Input data contains empty dimensions, cannot aggregate.")
330
+
331
+ W, agg_idx = self._build_operator(z_df.index)
332
+
333
+ # Obs vector aggregation
334
+ # z_agg = W @ z
335
+ z_agg = pd.Series(W @ z_df.values, index=agg_idx, name=z_df.name)
336
+
337
+ # Forward operator aggregation
338
+ # H_agg = W @ H (preserve SparseDtype if present)
339
+ if all(isinstance(dt, pd.SparseDtype) for dt in H_df.dtypes):
340
+ H_agg_vals = W @ H_df.sparse.to_coo().tocsr()
341
+ H_agg = pd.DataFrame.sparse.from_spmatrix(
342
+ H_agg_vals, index=agg_idx, columns=H_df.columns
343
+ ).fillna(0.0) # Ensure fill_value is 0.0 for sparse DataFrame
344
+ else:
345
+ H_agg = pd.DataFrame(W @ H_df.values, index=agg_idx, columns=H_df.columns)
346
+
347
+ # Covariance aggregation with full propagation
348
+ # S_z_agg = W @ S_z @ W.T — for diagonal S_z with variance σ² and
349
+ # mean aggregation of n obs this yields σ²/n on the diagonal.
350
+ S_z_agg_vals = W @ S_z_df.values @ W.T
351
+ if hasattr(S_z_agg_vals, "toarray"):
352
+ S_z_agg_vals = S_z_agg_vals.toarray()
353
+ S_z_agg = pd.DataFrame(S_z_agg_vals, index=agg_idx, columns=agg_idx)
354
+
355
+ def repack(orig_obj, new_df):
356
+ """Re-wrap new_df in the same type as orig_obj if it is a fips wrapper."""
357
+ return (
358
+ type(orig_obj)(new_df, name=orig_obj.name)
359
+ if hasattr(orig_obj, "data")
360
+ else new_df
361
+ )
362
+
363
+ # c_agg = W @ c (scalars are invariant to aggregation)
364
+ if constant is None:
365
+ c_agg = None
366
+ elif np.isscalar(constant):
367
+ c_agg = constant # scalars pass through unchanged
368
+ else:
369
+ c_vals = pd.Series(W @ c_df.values, index=agg_idx, name=c_df.name)
370
+ c_agg = repack(constant, c_vals)
371
+
372
+ return (
373
+ repack(obs, z_agg),
374
+ repack(forward_operator, H_agg),
375
+ repack(modeldata_mismatch, S_z_agg),
376
+ c_agg,
377
+ )