pattern-fill 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ from pattern_fill.pattern import DailyPattern, SineComponent
2
+ from pattern_fill.fitting import extract_daily_profile, fit_pattern
3
+ from pattern_fill.sine_fitting import fit_sine_pattern
4
+ from pattern_fill.gap_fill import pattern_fill, pattern_fill_dataset
5
+
6
+ __all__ = [
7
+ "DailyPattern",
8
+ "SineComponent",
9
+ "extract_daily_profile",
10
+ "fit_pattern",
11
+ "fit_sine_pattern",
12
+ "pattern_fill",
13
+ "pattern_fill_dataset",
14
+ ]
@@ -0,0 +1,80 @@
1
+ from __future__ import annotations
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ from pattern_fill.pattern import DailyPattern
7
+
8
+
9
+ def extract_daily_profile(
10
+ series: pd.Series,
11
+ resolution_minutes: int = 15,
12
+ aggregation: str = "median",
13
+ ) -> pd.Series:
14
+ """Group a time series by fractional hour-of-day and aggregate.
15
+
16
+ Returns a Series indexed by fractional hour (e.g. 8.25 for 08:15)
17
+ with one value per bin.
18
+ """
19
+ if not isinstance(series.index, pd.DatetimeIndex):
20
+ raise TypeError("series must have a DatetimeIndex")
21
+
22
+ s = series.dropna()
23
+ fractional_hour = s.index.hour + s.index.minute / 60.0 + s.index.second / 3600.0
24
+ bin_edges = np.arange(0, 24 + resolution_minutes / 60, resolution_minutes / 60)
25
+ bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2.0
26
+ bin_idx = np.digitize(fractional_hour, bin_edges) - 1
27
+ bin_idx = np.clip(bin_idx, 0, len(bin_centers) - 1)
28
+
29
+ grouped = pd.Series(s.values, index=bin_idx)
30
+ if aggregation == "median":
31
+ profile_values = grouped.groupby(level=0).median()
32
+ elif aggregation == "mean":
33
+ profile_values = grouped.groupby(level=0).mean()
34
+ else:
35
+ raise ValueError(f"aggregation must be 'median' or 'mean', got {aggregation!r}")
36
+
37
+ result = pd.Series(
38
+ index=bin_centers[profile_values.index.values],
39
+ data=profile_values.values,
40
+ name="daily_profile",
41
+ )
42
+ result.index.name = "hour"
43
+ return result
44
+
45
+
46
+ def fit_pattern(
47
+ series: pd.Series,
48
+ n_control_points: int = 8,
49
+ resolution_minutes: int = 15,
50
+ aggregation: str = "median",
51
+ name: str = "fitted",
52
+ day_type: str = "all",
53
+ ) -> DailyPattern:
54
+ """Fit a DailyPattern from observed time series data.
55
+
56
+ Extracts a daily profile, picks *n_control_points* evenly spaced along
57
+ the 0-24 h axis, normalizes values to 0-1, and returns a DailyPattern.
58
+ """
59
+ profile = extract_daily_profile(
60
+ series,
61
+ resolution_minutes=resolution_minutes,
62
+ aggregation=aggregation,
63
+ )
64
+
65
+ target_hours = np.linspace(0, 24, n_control_points + 1)[:-1]
66
+ cp_values = np.interp(target_hours, profile.index.values, profile.values)
67
+
68
+ v_min, v_max = cp_values.min(), cp_values.max()
69
+ if v_max - v_min > 0:
70
+ cp_norm = (cp_values - v_min) / (v_max - v_min)
71
+ else:
72
+ cp_norm = np.full_like(cp_values, 0.5)
73
+
74
+ return DailyPattern(
75
+ hours=target_hours.tolist(),
76
+ values=cp_norm.tolist(),
77
+ name=name,
78
+ periodic=True,
79
+ day_type=day_type,
80
+ )
@@ -0,0 +1,581 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ from typing import Any
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from meteaudata.types import (
9
+ FunctionInfo,
10
+ Parameters,
11
+ ProcessingStep,
12
+ ProcessingType,
13
+ Signal,
14
+ TimeSeries,
15
+ )
16
+
17
+ from pattern_fill.fitting import extract_daily_profile
18
+ from pattern_fill.pattern import DailyPattern
19
+
20
+
21
+ def _find_nan_runs(mask: np.ndarray) -> list[tuple[int, int]]:
22
+ """Return (start, stop) index pairs for contiguous True runs in *mask*."""
23
+ if not mask.any():
24
+ return []
25
+ diff = np.diff(mask.astype(int))
26
+ starts = np.where(diff == 1)[0] + 1
27
+ ends = np.where(diff == -1)[0] + 1
28
+ if mask[0]:
29
+ starts = np.r_[0, starts]
30
+ if mask[-1]:
31
+ ends = np.r_[ends, len(mask)]
32
+ return list(zip(starts.tolist(), ends.tolist()))
33
+
34
+
35
+ def _classify_runs(
36
+ runs: list[tuple[int, int]], series_len: int
37
+ ) -> list[tuple[int, int]]:
38
+ """Return only interior NaN runs, excluding leading and trailing."""
39
+ return [
40
+ (start, stop)
41
+ for start, stop in runs
42
+ if start != 0 and stop != series_len
43
+ ]
44
+
45
+
46
+ def _select_pattern(
47
+ pattern: DailyPattern | dict[str, DailyPattern],
48
+ timestamp: pd.Timestamp,
49
+ ) -> DailyPattern:
50
+ """Pick the right pattern for a timestamp based on day_type."""
51
+ if isinstance(pattern, DailyPattern):
52
+ return pattern
53
+ dow = timestamp.dayofweek # 0=Monday … 6=Sunday
54
+ key = "weekday" if dow < 5 else "weekend"
55
+ if key in pattern:
56
+ return pattern[key]
57
+ if "all" in pattern:
58
+ return pattern["all"]
59
+ raise KeyError(
60
+ f"No pattern for day_type={key!r} or 'all' in pattern dict "
61
+ f"(available keys: {list(pattern.keys())})"
62
+ )
63
+
64
+
65
+ def _infer_freq_minutes(index: pd.DatetimeIndex) -> float:
66
+ """Infer the sampling frequency in minutes from a DatetimeIndex."""
67
+ freq = pd.infer_freq(index)
68
+ if freq is not None:
69
+ offset = pd.tseries.frequencies.to_offset(freq)
70
+ return offset.nanos / 1e9 / 60
71
+ # Fallback: median of first diffs
72
+ n = min(10, len(index))
73
+ diffs = pd.Series(index[:n]).diff().dropna()
74
+ if len(diffs) == 0:
75
+ return 15.0
76
+ return diffs.median().total_seconds() / 60
77
+
78
+
79
+ def _smoothed_anchor(
80
+ col: pd.Series,
81
+ gap_edge_idx: int,
82
+ side: str,
83
+ blend_n: int,
84
+ ) -> float | None:
85
+ """Compute a noise-resistant anchor value near a gap edge.
86
+
87
+ Returns the weighted average of up to *blend_n* non-NaN points
88
+ adjacent to the gap. Weights increase linearly toward the gap edge
89
+ (the closest point gets the highest weight).
90
+ """
91
+ if side == "left":
92
+ start = max(0, gap_edge_idx - blend_n + 1)
93
+ window = col.iloc[start : gap_edge_idx + 1]
94
+ else:
95
+ end = min(len(col), gap_edge_idx + blend_n)
96
+ window = col.iloc[gap_edge_idx:end]
97
+
98
+ valid = window.dropna()
99
+ if len(valid) == 0:
100
+ return None
101
+
102
+ k = len(valid)
103
+ if side == "left":
104
+ weights = np.arange(1, k + 1, dtype=float)
105
+ else:
106
+ weights = np.arange(k, 0, -1, dtype=float)
107
+
108
+ return float(np.average(valid.values, weights=weights))
109
+
110
+
111
+ def _estimate_data_range(
112
+ col: pd.Series,
113
+ gap_start: int,
114
+ gap_stop: int,
115
+ window: str,
116
+ scaling: str,
117
+ ) -> tuple[float, float]:
118
+ """Estimate (data_min, data_max) for pattern scaling."""
119
+ if scaling == "none":
120
+ return 0.0, 1.0
121
+
122
+ if scaling == "global":
123
+ clean = col.dropna()
124
+ if len(clean) < 2:
125
+ return 0.0, 1.0
126
+ return float(clean.min()), float(clean.max())
127
+
128
+ if scaling != "local":
129
+ raise ValueError(
130
+ f"scaling must be 'local', 'global', or 'none', got {scaling!r}"
131
+ )
132
+
133
+ # Local: data within time window on each side of the gap
134
+ gap_left_ts = col.index[gap_start]
135
+ gap_right_ts = col.index[gap_stop - 1]
136
+ td = pd.Timedelta(window)
137
+
138
+ left_data = col.loc[: col.index[gap_start - 1]].dropna() if gap_start > 0 else pd.Series(dtype=float)
139
+ if len(left_data) > 0:
140
+ left_data = left_data.loc[left_data.index >= (gap_left_ts - td)]
141
+
142
+ right_data = col.loc[col.index[gap_stop] :].dropna() if gap_stop < len(col) else pd.Series(dtype=float)
143
+ if len(right_data) > 0:
144
+ right_data = right_data.loc[right_data.index <= (gap_right_ts + td)]
145
+
146
+ nearby = pd.concat([left_data, right_data])
147
+ if len(nearby) < 2:
148
+ clean = col.dropna()
149
+ if len(clean) < 2:
150
+ return 0.0, 1.0
151
+ return float(clean.min()), float(clean.max())
152
+
153
+ return float(nearby.min()), float(nearby.max())
154
+
155
+
156
+ def _blend_fill(
157
+ p_scaled: np.ndarray,
158
+ left_anchor: float | None,
159
+ right_anchor: float | None,
160
+ blend_n: int,
161
+ ) -> np.ndarray:
162
+ """Apply cosine-decay boundary corrections to pre-scaled pattern values.
163
+
164
+ When both boundaries are available, linearly interpolates between the
165
+ anchors and blends with the scaled pattern using a cosine edge-weight
166
+ (1 at each edge, 0 at ``blend_n`` steps inward). This guarantees exact
167
+ boundary continuity even when blend zones overlap in short gaps.
168
+
169
+ For single-boundary cases an additive correction is used instead.
170
+ """
171
+ N = len(p_scaled)
172
+ positions = np.arange(N, dtype=float)
173
+
174
+ if left_anchor is not None and right_anchor is not None:
175
+ # Linearly interpolate between anchors
176
+ alpha = positions / max(N - 1, 1)
177
+ anchor_interp = (1.0 - alpha) * left_anchor + alpha * right_anchor
178
+
179
+ # Cosine edge weight: 1 at the edges, 0 beyond blend_n from edge
180
+ edge_dist = np.minimum(positions, float(N - 1) - positions)
181
+ t = np.clip(edge_dist / max(blend_n, 1), 0.0, 1.0)
182
+ w = 0.5 * (1.0 + np.cos(np.pi * t))
183
+
184
+ return w * anchor_interp + (1.0 - w) * p_scaled
185
+
186
+ # Single-boundary: additive correction that decays inward
187
+ result = p_scaled.copy()
188
+
189
+ if left_anchor is not None:
190
+ r_L = left_anchor - p_scaled[0]
191
+ t_L = np.clip(positions / max(blend_n, 1), 0.0, 1.0)
192
+ w_L = 0.5 * (1.0 + np.cos(np.pi * t_L))
193
+ result += w_L * r_L
194
+
195
+ if right_anchor is not None:
196
+ r_R = right_anchor - p_scaled[-1]
197
+ dist_from_right = float(N - 1) - positions
198
+ t_R = np.clip(dist_from_right / max(blend_n, 1), 0.0, 1.0)
199
+ w_R = 0.5 * (1.0 + np.cos(np.pi * t_R))
200
+ result += w_R * r_R
201
+
202
+ return result
203
+
204
+
205
+ def _compute_expected_area(
206
+ col: pd.Series,
207
+ gap_idx: pd.DatetimeIndex,
208
+ pattern: DailyPattern | dict[str, DailyPattern],
209
+ ) -> float | None:
210
+ """Expected sum of values across a gap, based on the daily profile of clean data.
211
+
212
+ Splits clean data by day type when *pattern* is a dict, so the expected
213
+ area respects weekday/weekend differences.
214
+ """
215
+ clean = col.dropna()
216
+ if len(clean) < 10:
217
+ return None
218
+
219
+ uses_day_types = isinstance(pattern, dict)
220
+
221
+ if uses_day_types:
222
+ profiles: dict[str, pd.Series] = {}
223
+ for dtype in ("weekday", "weekend"):
224
+ mask = (
225
+ clean.index.dayofweek < 5
226
+ if dtype == "weekday"
227
+ else clean.index.dayofweek >= 5
228
+ )
229
+ subset = clean[mask]
230
+ profiles[dtype] = extract_daily_profile(
231
+ subset if len(subset) > 10 else clean, aggregation="mean"
232
+ )
233
+ else:
234
+ profile_all = extract_daily_profile(clean, aggregation="mean")
235
+
236
+ expected = np.empty(len(gap_idx))
237
+ for i, ts in enumerate(gap_idx):
238
+ frac_h = ts.hour + ts.minute / 60.0 + ts.second / 3600.0
239
+ if uses_day_types:
240
+ dtype = "weekday" if ts.dayofweek < 5 else "weekend"
241
+ profile = profiles[dtype]
242
+ else:
243
+ profile = profile_all
244
+ expected[i] = np.interp(frac_h, profile.index.values, profile.values)
245
+
246
+ total = expected.sum()
247
+ return total if abs(total) > 1e-12 else None
248
+
249
+
250
+ # ---------------------------------------------------------------------------
251
+ # Public API
252
+ # ---------------------------------------------------------------------------
253
+
254
+
255
+ def pattern_fill(
256
+ input_series: list[pd.Series],
257
+ pattern: DailyPattern | dict[str, DailyPattern],
258
+ scaling: str = "local",
259
+ window: str = "24h",
260
+ blend_minutes: int = 60,
261
+ normalize_area: bool = False,
262
+ *args: Any,
263
+ **kwargs: Any,
264
+ ) -> list[tuple[pd.Series, list[ProcessingStep]]]:
265
+ """Fill NaN gaps using a daily diurnal pattern.
266
+
267
+ Conforms to metEAUdata's ``SignalTransformFunctionProtocol``.
268
+
269
+ Parameters
270
+ ----------
271
+ blend_minutes : int
272
+ Width (in minutes) of the smoothing window used for anchor
273
+ computation **and** the cosine blend zone inside each gap.
274
+ normalize_area : bool
275
+ When True, the fill's area is normalized to match the expected daily
276
+ profile computed from the clean portions of the series.
277
+ """
278
+ if isinstance(pattern, DailyPattern):
279
+ pattern_meta = pattern.to_dict()
280
+ else:
281
+ pattern_meta = {k: v.to_dict() for k, v in pattern.items()}
282
+
283
+ func_info = FunctionInfo(
284
+ name="pattern_fill",
285
+ version="0.2.0",
286
+ author="pattern-fill",
287
+ reference="https://github.com/jeandavidt/pattern-fill",
288
+ )
289
+ parameters = Parameters(
290
+ pattern=pattern_meta,
291
+ scaling=scaling,
292
+ window=window,
293
+ blend_minutes=blend_minutes,
294
+ normalize_area=normalize_area,
295
+ )
296
+ processing_step = ProcessingStep(
297
+ type=ProcessingType.GAP_FILLING,
298
+ function_info=func_info,
299
+ parameters=parameters,
300
+ description=(
301
+ "Gap-filling using a daily diurnal pattern with "
302
+ "cosine-blended boundary matching"
303
+ ),
304
+ run_datetime=datetime.datetime.now(),
305
+ requires_calibration=False,
306
+ input_series_names=[str(col.name) for col in input_series],
307
+ suffix="PAT-FILL",
308
+ )
309
+
310
+ outputs: list[tuple[pd.Series, list[ProcessingStep]]] = []
311
+ for col in input_series:
312
+ col = col.copy()
313
+ signal_name, _, _ = Signal.extract_ts_base_and_number(str(col.name))
314
+
315
+ if not isinstance(col.index, pd.DatetimeIndex):
316
+ raise TypeError(
317
+ f"Series {col.name} must have a DatetimeIndex, "
318
+ f"got {type(col.index)}"
319
+ )
320
+
321
+ freq_min = _infer_freq_minutes(col.index)
322
+ blend_n = max(1, int(round(blend_minutes / freq_min)))
323
+
324
+ nan_mask = col.isna().values
325
+ runs = _find_nan_runs(nan_mask)
326
+ interior_runs = _classify_runs(runs, len(col))
327
+
328
+ for start, stop in interior_runs:
329
+ gap_idx = col.index[start:stop]
330
+
331
+ # Evaluate pattern over the gap
332
+ pat_vals = np.empty(len(gap_idx))
333
+ for i, ts in enumerate(gap_idx):
334
+ pat = _select_pattern(pattern, ts)
335
+ frac_h = ts.hour + ts.minute / 60.0 + ts.second / 3600.0
336
+ pat_vals[i] = pat.evaluate(np.array([frac_h]))[0]
337
+
338
+ # Scale pattern to data range
339
+ data_min, data_max = _estimate_data_range(
340
+ col, start, stop, window, scaling
341
+ )
342
+ dr = data_max - data_min
343
+ if abs(dr) < 1e-12:
344
+ dr = 1.0
345
+ p_scaled = pat_vals * dr + data_min
346
+
347
+ # Area normalization (opt-in)
348
+ if normalize_area:
349
+ expected = _compute_expected_area(col, gap_idx, pattern)
350
+ actual = p_scaled.sum()
351
+ if expected is not None and abs(actual) > 1e-12:
352
+ p_scaled *= expected / actual
353
+
354
+ # Smoothed boundary anchors
355
+ left_anchor = (
356
+ _smoothed_anchor(col, start - 1, "left", blend_n)
357
+ if start > 0
358
+ else None
359
+ )
360
+ right_anchor = (
361
+ _smoothed_anchor(col, stop, "right", blend_n)
362
+ if stop < len(col)
363
+ else None
364
+ )
365
+
366
+ filled = _blend_fill(p_scaled, left_anchor, right_anchor, blend_n)
367
+ col.iloc[start:stop] = filled
368
+
369
+ col.name = f"{signal_name}_{processing_step.suffix}"
370
+ outputs.append((col, [processing_step]))
371
+
372
+ return outputs
373
+
374
+
375
+ def pattern_fill_dataset(
376
+ input_signals: list[Signal],
377
+ input_series_names: list[str],
378
+ patterns: list[DailyPattern | dict[str, DailyPattern]],
379
+ mode: str = "load",
380
+ blend_minutes: int = 60,
381
+ scaling: str = "local",
382
+ window: str = "24h",
383
+ *args: Any,
384
+ **kwargs: Any,
385
+ ) -> list[Signal]:
386
+ """Fill NaN gaps with area normalization at the dataset level.
387
+
388
+ Conforms to metEAUdata's ``DatasetTransformFunctionProtocol``.
389
+
390
+ Parameters
391
+ ----------
392
+ mode : str
393
+ ``"concentration"`` or ``"flow"`` — single signal, area normalized to
394
+ its own daily profile. ``"load"`` — two signals (concentration first,
395
+ flow second), both filled, then concentration normalized so that
396
+ ``conc × flow`` matches the expected daily load.
397
+ """
398
+ valid_modes = ("concentration", "flow", "load")
399
+ if mode not in valid_modes:
400
+ raise ValueError(f"mode must be one of {valid_modes}, got {mode!r}")
401
+
402
+ if mode == "load":
403
+ if len(input_signals) != 2 or len(patterns) != 2:
404
+ raise ValueError(
405
+ "load mode requires exactly 2 signals and 2 patterns "
406
+ f"(concentration, flow); got {len(input_signals)} signals "
407
+ f"and {len(patterns)} patterns"
408
+ )
409
+ else:
410
+ if len(input_signals) != 1 or len(patterns) != 1:
411
+ raise ValueError(
412
+ f"{mode} mode requires exactly 1 signal and 1 pattern; "
413
+ f"got {len(input_signals)} signals and {len(patterns)} patterns"
414
+ )
415
+
416
+ func_info = FunctionInfo(
417
+ name="pattern_fill_dataset",
418
+ version="0.2.0",
419
+ author="pattern-fill",
420
+ reference="https://github.com/jeandavidt/pattern-fill",
421
+ )
422
+
423
+ # ---- concentration / flow modes (single signal, area-normalized) --------
424
+ if mode in ("concentration", "flow"):
425
+ signal = input_signals[0]
426
+ ts_name = input_series_names[0]
427
+ series = signal.time_series[ts_name].series
428
+ pat = patterns[0]
429
+
430
+ results = pattern_fill(
431
+ [series],
432
+ pattern=pat,
433
+ scaling=scaling,
434
+ window=window,
435
+ blend_minutes=blend_minutes,
436
+ normalize_area=True,
437
+ )
438
+ filled_series, steps = results[0]
439
+
440
+ ts_obj = TimeSeries(series=filled_series, processing_steps=steps)
441
+ out_signal = Signal(
442
+ input_data=ts_obj,
443
+ name=Signal.extract_ts_base_and_number(str(filled_series.name))[0],
444
+ provenance=signal.provenance,
445
+ units=signal.units,
446
+ )
447
+ return [out_signal]
448
+
449
+ # ---- load mode (two signals: concentration + flow) ----------------------
450
+ conc_signal, flow_signal = input_signals
451
+ conc_ts_name, flow_ts_name = input_series_names
452
+ conc_series = conc_signal.time_series[conc_ts_name].series
453
+ flow_series = flow_signal.time_series[flow_ts_name].series
454
+ conc_pat, flow_pat = patterns
455
+
456
+ # Step 1: fill both signals (without area normalization)
457
+ conc_results = pattern_fill(
458
+ [conc_series],
459
+ pattern=conc_pat,
460
+ scaling=scaling,
461
+ window=window,
462
+ blend_minutes=blend_minutes,
463
+ normalize_area=False,
464
+ )
465
+ flow_results = pattern_fill(
466
+ [flow_series],
467
+ pattern=flow_pat,
468
+ scaling=scaling,
469
+ window=window,
470
+ blend_minutes=blend_minutes,
471
+ normalize_area=False,
472
+ )
473
+ filled_conc, conc_steps = conc_results[0]
474
+ filled_flow, flow_steps = flow_results[0]
475
+
476
+ # Step 2: compute daily load profile from clean data
477
+ conc_clean = conc_series.dropna()
478
+ flow_clean = flow_series.dropna()
479
+ # Align clean data to timestamps present in both
480
+ common_idx = conc_clean.index.intersection(flow_clean.index)
481
+
482
+ if len(common_idx) > 20:
483
+ load_clean = pd.Series(
484
+ conc_clean.loc[common_idx].values * flow_clean.loc[common_idx].values,
485
+ index=common_idx,
486
+ name="load",
487
+ )
488
+ uses_day_types = isinstance(conc_pat, dict)
489
+
490
+ if uses_day_types:
491
+ load_profiles: dict[str, pd.Series] = {}
492
+ for dtype in ("weekday", "weekend"):
493
+ mask = (
494
+ load_clean.index.dayofweek < 5
495
+ if dtype == "weekday"
496
+ else load_clean.index.dayofweek >= 5
497
+ )
498
+ subset = load_clean[mask]
499
+ load_profiles[dtype] = extract_daily_profile(
500
+ subset if len(subset) > 10 else load_clean,
501
+ aggregation="mean",
502
+ )
503
+ else:
504
+ load_profile_all = extract_daily_profile(
505
+ load_clean, aggregation="mean"
506
+ )
507
+
508
+ # Step 3: normalize concentration fills so load matches expected
509
+ nan_mask = conc_series.isna().values
510
+ runs = _find_nan_runs(nan_mask)
511
+ interior_runs = _classify_runs(runs, len(conc_series))
512
+
513
+ for gap_start, gap_stop in interior_runs:
514
+ gap_idx = conc_series.index[gap_start:gap_stop]
515
+
516
+ # Expected load for the gap period
517
+ expected_load = np.empty(len(gap_idx))
518
+ for i, ts in enumerate(gap_idx):
519
+ frac_h = ts.hour + ts.minute / 60.0 + ts.second / 3600.0
520
+ if uses_day_types:
521
+ dtype = "weekday" if ts.dayofweek < 5 else "weekend"
522
+ lp = load_profiles[dtype]
523
+ else:
524
+ lp = load_profile_all
525
+ expected_load[i] = np.interp(
526
+ frac_h, lp.index.values, lp.values
527
+ )
528
+
529
+ expected_sum = expected_load.sum()
530
+
531
+ # Actual load from fills
532
+ gap_conc = filled_conc.iloc[gap_start:gap_stop].values
533
+ gap_flow = filled_flow.iloc[gap_start:gap_stop].values
534
+ actual_sum = (gap_conc * gap_flow).sum()
535
+
536
+ if abs(actual_sum) > 1e-12 and abs(expected_sum) > 1e-12:
537
+ ratio = expected_sum / actual_sum
538
+ filled_conc.iloc[gap_start:gap_stop] = gap_conc * ratio
539
+
540
+ # Build processing step for load normalization
541
+ load_step = ProcessingStep(
542
+ type=ProcessingType.GAP_FILLING,
543
+ function_info=func_info,
544
+ parameters=Parameters(
545
+ mode=mode,
546
+ scaling=scaling,
547
+ window=window,
548
+ blend_minutes=blend_minutes,
549
+ ),
550
+ description=(
551
+ "Gap-filling with load-normalized daily pattern "
552
+ "(concentration adjusted so conc × flow matches expected load)"
553
+ ),
554
+ run_datetime=datetime.datetime.now(),
555
+ requires_calibration=False,
556
+ input_series_names=input_series_names,
557
+ suffix="PAT-FILL",
558
+ )
559
+
560
+ conc_ts = TimeSeries(
561
+ series=filled_conc,
562
+ processing_steps=conc_steps + [load_step],
563
+ )
564
+ flow_ts = TimeSeries(
565
+ series=filled_flow,
566
+ processing_steps=flow_steps,
567
+ )
568
+
569
+ out_conc = Signal(
570
+ input_data=conc_ts,
571
+ name=Signal.extract_ts_base_and_number(str(filled_conc.name))[0],
572
+ provenance=conc_signal.provenance,
573
+ units=conc_signal.units,
574
+ )
575
+ out_flow = Signal(
576
+ input_data=flow_ts,
577
+ name=Signal.extract_ts_base_and_number(str(filled_flow.name))[0],
578
+ provenance=flow_signal.provenance,
579
+ units=flow_signal.units,
580
+ )
581
+ return [out_conc, out_flow]