pattern-fill 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,354 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass, field
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from scipy.interpolate import CubicSpline
9
+
10
+
11
+ @dataclass
12
+ class SineComponent:
13
+ """A single sine wave component for building daily patterns.
14
+
15
+ Parameters
16
+ ----------
17
+ amplitude : float
18
+ The amplitude of the sine wave (normalized, 0-1 range recommended)
19
+ frequency : float
20
+ Frequency as cycles per day. Examples:
21
+ - 1.0 = once per day (24-hour cycle)
22
+ - 2.0 = twice per day (12-hour cycle)
23
+ - 0.5 = once every 2 days (48-hour cycle)
24
+ phase : float
25
+ Phase offset in hours from midnight (0-24)
26
+ Examples:
27
+ - 0 = peak at midnight
28
+ - 6 = peak at 6:00 AM
29
+ - 12 = peak at noon
30
+ """
31
+
32
+ amplitude: float
33
+ frequency: float
34
+ phase: float = 0.0
35
+
36
+ def __post_init__(self):
37
+ if self.amplitude < 0:
38
+ raise ValueError("amplitude must be non-negative")
39
+ if self.frequency <= 0:
40
+ raise ValueError("frequency must be positive")
41
+ # Normalize phase to [0, 24) range
42
+ self.phase = self.phase % 24.0
43
+
44
+ def evaluate(self, hours: np.ndarray) -> np.ndarray:
45
+ """Evaluate this sine component at given hours.
46
+
47
+ Formula: amplitude * sin(2π * frequency * (hours - phase)/24 + π/2)
48
+ This gives a peak at hour = phase.
49
+ """
50
+ hours = np.asarray(hours, dtype=float)
51
+ # Shift so peak occurs at phase hour, using cosine (sine shifted by π/2)
52
+ return self.amplitude * np.cos(
53
+ 2 * np.pi * self.frequency * (hours - self.phase) / 24.0
54
+ )
55
+
56
+ def to_dict(self) -> dict:
57
+ return {
58
+ "amplitude": self.amplitude,
59
+ "frequency": self.frequency,
60
+ "phase": self.phase,
61
+ }
62
+
63
+ @classmethod
64
+ def from_dict(cls, d: dict) -> SineComponent:
65
+ return cls(
66
+ amplitude=d["amplitude"],
67
+ frequency=d["frequency"],
68
+ phase=d.get("phase", 0.0),
69
+ )
70
+
71
+
72
+ @dataclass
73
+ class DailyPattern:
74
+ """A periodic 24-hour curve defined by spline control points or sine waves.
75
+
76
+ The pattern is normalized to the 0-1 range and can be created in two ways:
77
+
78
+ **Spline Mode**:
79
+ Cubic spline through user-defined control points (hours, values).
80
+
81
+ **Sine Mode**:
82
+ Sum of sine wave components with configurable amplitude, frequency, and phase.
83
+
84
+ Examples
85
+ --------
86
+ Spline mode:
87
+
88
+ >>> pattern = DailyPattern(
89
+ ... hours=[0, 6, 12, 18],
90
+ ... values=[0.2, 0.8, 0.9, 0.5]
91
+ ... )
92
+
93
+ Sine mode - simple single wave:
94
+
95
+ >>> pattern = DailyPattern.from_simple_sine(
96
+ ... amplitude=0.4,
97
+ ... frequency=1.0, # Once per day
98
+ ... phase=6.0, # Peak at 6 AM
99
+ ... baseline=0.5
100
+ ... )
101
+
102
+ Sine mode - complex multi-component:
103
+
104
+ >>> pattern = DailyPattern.from_sine_waves(
105
+ ... components=[
106
+ ... (0.35, 1.0, 8.0), # Daily cycle, peak at 8 AM
107
+ ... (0.15, 2.0, 13.0), # Twice-daily, peaks at 1 PM/AM
108
+ ... ],
109
+ ... baseline=0.45
110
+ ... )
111
+ """
112
+
113
+ # Spline mode parameters (existing)
114
+ hours: list[float] | None = None
115
+ values: list[float] | None = None
116
+
117
+ # Sine mode parameters (new)
118
+ sine_components: list[SineComponent] | None = None
119
+ baseline: float = 0.5 # Baseline offset for sine waves (0-1 range)
120
+
121
+ # Common parameters
122
+ name: str = "pattern"
123
+ periodic: bool = True
124
+ day_type: str = "all" # "all", "weekday", or "weekend"
125
+
126
+ # Mode tracking (new)
127
+ mode: str = field(init=False, repr=True) # "spline" or "sine"
128
+ _spline: CubicSpline | None = field(init=False, repr=False, compare=False, default=None)
129
+
130
+ def __post_init__(self) -> None:
131
+ # Determine mode
132
+ has_spline_params = self.hours is not None and self.values is not None
133
+ has_sine_params = self.sine_components is not None
134
+
135
+ if has_spline_params and has_sine_params:
136
+ raise ValueError(
137
+ "Cannot specify both spline parameters (hours/values) "
138
+ "and sine_components. Use one mode only."
139
+ )
140
+
141
+ if not has_spline_params and not has_sine_params:
142
+ raise ValueError(
143
+ "Must specify either spline parameters (hours/values) "
144
+ "or sine_components"
145
+ )
146
+
147
+ if has_spline_params:
148
+ self.mode = "spline"
149
+ self._validate_spline_params()
150
+ self._build_spline()
151
+ else:
152
+ self.mode = "sine"
153
+ self._validate_sine_params()
154
+
155
+ if self.day_type not in ("all", "weekday", "weekend"):
156
+ raise ValueError(
157
+ f"day_type must be 'all', 'weekday', or 'weekend', got {self.day_type!r}"
158
+ )
159
+
160
+ def _validate_spline_params(self) -> None:
161
+ if len(self.hours) != len(self.values):
162
+ raise ValueError(
163
+ f"hours and values must have the same length, "
164
+ f"got {len(self.hours)} and {len(self.values)}"
165
+ )
166
+ if len(self.hours) < 2:
167
+ raise ValueError("Need at least 2 control points")
168
+
169
+ def _validate_sine_params(self) -> None:
170
+ if not self.sine_components:
171
+ raise ValueError("sine_components cannot be empty")
172
+ if not (0.0 <= self.baseline <= 1.0):
173
+ raise ValueError(f"baseline must be in [0,1], got {self.baseline}")
174
+
175
+ def _build_spline(self) -> None:
176
+ h = np.asarray(self.hours)
177
+ v = np.asarray(self.values)
178
+ order = np.argsort(h)
179
+ h = h[order]
180
+ v = v[order]
181
+
182
+ if self.periodic:
183
+ # CubicSpline requires y[0] == y[-1] for periodic BC.
184
+ # Append the first value at hour 24 to close the loop.
185
+ h = np.append(h, 24.0)
186
+ v = np.append(v, v[0])
187
+ self._spline = CubicSpline(h, v, bc_type="periodic")
188
+ else:
189
+ self._spline = CubicSpline(h, v)
190
+
191
+ def evaluate(self, hours: np.ndarray) -> np.ndarray:
192
+ """Evaluate the pattern at arbitrary hour-of-day values (0-24)."""
193
+ hours = np.asarray(hours, dtype=float)
194
+
195
+ if self.mode == "spline":
196
+ result = self._spline(hours % 24.0)
197
+ else: # sine mode
198
+ # Start with baseline, then add all sine components
199
+ result = np.full_like(hours, self.baseline, dtype=float)
200
+ for component in self.sine_components:
201
+ result += component.evaluate(hours % 24.0)
202
+
203
+ # Clip to [0, 1] range (prevents spline overshoot and sine sum overflow)
204
+ return np.clip(result, 0.0, 1.0)
205
+
206
+ def to_series(self, index: pd.DatetimeIndex) -> pd.Series:
207
+ """Project the pattern onto a real DatetimeIndex."""
208
+ fractional_hours = index.hour + index.minute / 60.0 + index.second / 3600.0
209
+ values = self.evaluate(fractional_hours.values)
210
+ return pd.Series(values, index=index, name=self.name)
211
+
212
+ # -- serialization --
213
+
214
+ def to_dict(self) -> dict:
215
+ """Serialize to dictionary, including mode information."""
216
+ base = {
217
+ "mode": self.mode,
218
+ "name": self.name,
219
+ "periodic": self.periodic,
220
+ "day_type": self.day_type,
221
+ }
222
+
223
+ if self.mode == "spline":
224
+ base.update({
225
+ "hours": list(self.hours),
226
+ "values": list(self.values),
227
+ })
228
+ else: # sine mode
229
+ base.update({
230
+ "sine_components": [c.to_dict() for c in self.sine_components],
231
+ "baseline": self.baseline,
232
+ })
233
+
234
+ return base
235
+
236
+ @classmethod
237
+ def from_dict(cls, d: dict) -> DailyPattern:
238
+ """Deserialize from dictionary, supporting both modes."""
239
+ mode = d.get("mode", "spline") # Default to spline for backward compatibility
240
+
241
+ if mode == "spline":
242
+ return cls(
243
+ hours=d["hours"],
244
+ values=d["values"],
245
+ name=d.get("name", "pattern"),
246
+ periodic=d.get("periodic", True),
247
+ day_type=d.get("day_type", "all"),
248
+ )
249
+ else: # sine mode
250
+ components = [
251
+ SineComponent.from_dict(c) for c in d["sine_components"]
252
+ ]
253
+ return cls(
254
+ sine_components=components,
255
+ baseline=d.get("baseline", 0.5),
256
+ name=d.get("name", "pattern"),
257
+ periodic=d.get("periodic", True),
258
+ day_type=d.get("day_type", "all"),
259
+ )
260
+
261
+ def to_json(self) -> str:
262
+ return json.dumps(self.to_dict())
263
+
264
+ @classmethod
265
+ def from_json(cls, s: str) -> DailyPattern:
266
+ return cls.from_dict(json.loads(s))
267
+
268
+ # -- factory methods for sine mode --
269
+
270
+ @classmethod
271
+ def from_sine_waves(
272
+ cls,
273
+ components: list[tuple[float, float, float]] | list[SineComponent],
274
+ baseline: float = 0.5,
275
+ name: str = "sine_pattern",
276
+ day_type: str = "all",
277
+ ) -> DailyPattern:
278
+ """Create a DailyPattern from sine wave components.
279
+
280
+ Parameters
281
+ ----------
282
+ components : list of tuples or SineComponent objects
283
+ If tuples: each is (amplitude, frequency, phase)
284
+ If SineComponent objects: used directly
285
+ baseline : float
286
+ Baseline offset (0-1 range)
287
+ name : str
288
+ Pattern name
289
+ day_type : str
290
+ "all", "weekday", or "weekend"
291
+
292
+ Examples
293
+ --------
294
+ # Simple daily cycle with peak at 6 AM
295
+ pattern = DailyPattern.from_sine_waves(
296
+ components=[(0.5, 1.0, 6.0)],
297
+ baseline=0.5
298
+ )
299
+
300
+ # Complex pattern: daily + twice-daily components
301
+ pattern = DailyPattern.from_sine_waves(
302
+ components=[
303
+ (0.3, 1.0, 8.0), # Daily cycle, peak at 8 AM
304
+ (0.2, 2.0, 12.0), # Twice-daily, peaks at noon and midnight
305
+ ],
306
+ baseline=0.4
307
+ )
308
+ """
309
+ # Convert tuples to SineComponent objects if needed
310
+ sine_comps = []
311
+ for comp in components:
312
+ if isinstance(comp, SineComponent):
313
+ sine_comps.append(comp)
314
+ else:
315
+ amplitude, frequency, phase = comp
316
+ sine_comps.append(SineComponent(amplitude, frequency, phase))
317
+
318
+ return cls(
319
+ sine_components=sine_comps,
320
+ baseline=baseline,
321
+ name=name,
322
+ day_type=day_type,
323
+ )
324
+
325
+ @classmethod
326
+ def from_simple_sine(
327
+ cls,
328
+ amplitude: float = 0.5,
329
+ frequency: float = 1.0,
330
+ phase: float = 6.0,
331
+ baseline: float = 0.5,
332
+ name: str = "simple_sine",
333
+ day_type: str = "all",
334
+ ) -> DailyPattern:
335
+ """Create a simple single-sine-wave pattern.
336
+
337
+ Convenience method for the most common case.
338
+
339
+ Examples
340
+ --------
341
+ # Daily pattern peaking at 6 AM
342
+ pattern = DailyPattern.from_simple_sine(
343
+ amplitude=0.4,
344
+ frequency=1.0,
345
+ phase=6.0,
346
+ baseline=0.5
347
+ )
348
+ """
349
+ return cls.from_sine_waves(
350
+ components=[(amplitude, frequency, phase)],
351
+ baseline=baseline,
352
+ name=name,
353
+ day_type=day_type,
354
+ )
@@ -0,0 +1,179 @@
1
+ """Utilities for fitting sine wave patterns to time series data."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from scipy.fft import rfft, rfftfreq
8
+ from scipy.optimize import least_squares
9
+
10
+ from pattern_fill.pattern import DailyPattern, SineComponent
11
+ from pattern_fill.fitting import extract_daily_profile
12
+
13
+
14
+ def fit_sine_pattern(
15
+ series: pd.Series,
16
+ n_components: int = 2,
17
+ frequencies: list[float] | None = None,
18
+ resolution_minutes: int = 15,
19
+ aggregation: str = "median",
20
+ name: str = "fitted_sine",
21
+ day_type: str = "all",
22
+ baseline: float | None = None,
23
+ ) -> DailyPattern:
24
+ """Fit a sine-based pattern to time series data using FFT.
25
+
26
+ Extracts the daily profile, performs FFT to identify dominant frequencies,
27
+ and fits sine wave components.
28
+
29
+ Parameters
30
+ ----------
31
+ series : pd.Series
32
+ Time series data with DatetimeIndex
33
+ n_components : int
34
+ Number of sine components to fit (ignored if frequencies specified)
35
+ frequencies : list[float], optional
36
+ Fixed frequencies to use (in cycles per day). If None, auto-detect
37
+ using FFT. Common values: [1.0] (daily), [1.0, 2.0] (daily + twice-daily)
38
+ resolution_minutes : int
39
+ Temporal resolution for daily profile extraction
40
+ aggregation : str
41
+ "median" or "mean" aggregation method
42
+ name : str
43
+ Pattern name
44
+ day_type : str
45
+ "all", "weekday", or "weekend"
46
+ baseline : float, optional
47
+ Fixed baseline value. If None, estimated from data mean
48
+
49
+ Returns
50
+ -------
51
+ DailyPattern
52
+ Fitted pattern in sine mode
53
+
54
+ Examples
55
+ --------
56
+ >>> # Auto-fit with 2 components (finds dominant frequencies)
57
+ >>> pattern = fit_sine_pattern(series, n_components=2)
58
+
59
+ >>> # Fit with fixed daily frequency
60
+ >>> pattern = fit_sine_pattern(series, frequencies=[1.0])
61
+
62
+ >>> # Fit daily + twice-daily pattern for wastewater
63
+ >>> pattern = fit_sine_pattern(series, frequencies=[1.0, 2.0])
64
+ """
65
+ # Extract daily profile
66
+ profile = extract_daily_profile(
67
+ series,
68
+ resolution_minutes=resolution_minutes,
69
+ aggregation=aggregation,
70
+ )
71
+
72
+ # Estimate baseline from profile mean if not provided
73
+ if baseline is None:
74
+ baseline = float(np.clip(profile.mean(), 0.0, 1.0))
75
+
76
+ # Get x (hours) and y (normalized values)
77
+ hours = profile.index.values
78
+ values = profile.values
79
+
80
+ # Normalize values to roughly [0, 1] range
81
+ v_min, v_max = values.min(), values.max()
82
+ if v_max - v_min > 0:
83
+ values_norm = (values - v_min) / (v_max - v_min)
84
+ else:
85
+ values_norm = np.full_like(values, 0.5)
86
+
87
+ # Determine frequencies to fit
88
+ if frequencies is None:
89
+ frequencies = _detect_frequencies_fft(hours, values_norm, n_components)
90
+
91
+ # Fit amplitude and phase for each frequency
92
+ components = []
93
+ for freq in frequencies:
94
+ amplitude, phase = _fit_single_sine(hours, values_norm, freq, baseline)
95
+ if amplitude > 0.01: # Only include significant components
96
+ components.append(SineComponent(amplitude, freq, phase))
97
+
98
+ # If no significant components found, create a simple flat pattern
99
+ if not components:
100
+ components = [SineComponent(0.1, 1.0, 0.0)]
101
+
102
+ return DailyPattern(
103
+ sine_components=components,
104
+ baseline=baseline,
105
+ name=name,
106
+ day_type=day_type,
107
+ )
108
+
109
+
110
+ def _detect_frequencies_fft(
111
+ hours: np.ndarray,
112
+ values: np.ndarray,
113
+ n_components: int,
114
+ ) -> list[float]:
115
+ """Detect dominant frequencies using FFT.
116
+
117
+ Returns the top n_components frequencies in cycles per day.
118
+ """
119
+ # Perform FFT
120
+ fft_values = rfft(values - values.mean())
121
+ fft_freqs = rfftfreq(len(values), d=hours[1] - hours[0])
122
+
123
+ # Convert frequencies from cycles/hour to cycles/day
124
+ fft_freqs_per_day = fft_freqs * 24.0
125
+
126
+ # Get magnitudes and find peaks
127
+ magnitudes = np.abs(fft_values)
128
+
129
+ # Exclude DC component (index 0) and find top peaks
130
+ peak_indices = np.argsort(magnitudes[1:])[::-1][:n_components] + 1
131
+ detected_freqs = fft_freqs_per_day[peak_indices]
132
+
133
+ # Round to common fractions (0.5, 1.0, 1.5, 2.0, etc.)
134
+ rounded_freqs = [round(f * 2) / 2 for f in detected_freqs]
135
+
136
+ # Filter out zero or negative frequencies
137
+ valid_freqs = [f for f in rounded_freqs if f > 0]
138
+
139
+ return valid_freqs if valid_freqs else [1.0]
140
+
141
+
142
+ def _fit_single_sine(
143
+ hours: np.ndarray,
144
+ values: np.ndarray,
145
+ frequency: float,
146
+ baseline: float,
147
+ ) -> tuple[float, float]:
148
+ """Fit amplitude and phase for a single sine wave component.
149
+
150
+ Given frequency and baseline, optimizes amplitude and phase to
151
+ minimize residual error.
152
+
153
+ Returns
154
+ -------
155
+ amplitude : float
156
+ phase : float (in hours)
157
+ """
158
+ def residual(params):
159
+ amp, phase = params
160
+ # Use cosine for peak at phase hour
161
+ predicted = baseline + amp * np.cos(
162
+ 2 * np.pi * frequency * (hours - phase) / 24.0
163
+ )
164
+ predicted = np.clip(predicted, 0.0, 1.0)
165
+ return predicted - values
166
+
167
+ # Initial guess: amplitude from std, phase from peak
168
+ initial_amp = np.std(values - baseline)
169
+ initial_phase = hours[np.argmax(values)]
170
+
171
+ # Optimize
172
+ result = least_squares(
173
+ residual,
174
+ x0=[initial_amp, initial_phase],
175
+ bounds=([0, 0], [1.0, 24.0]),
176
+ )
177
+
178
+ amplitude, phase = result.x
179
+ return float(amplitude), float(phase)