ilovetools 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ilovetools/__init__.py +42 -0
- ilovetools/ai/__init__.py +13 -0
- ilovetools/ai/embeddings.py +270 -0
- ilovetools/ai/inference.py +5 -0
- ilovetools/ai/llm_helpers.py +141 -0
- ilovetools/audio/__init__.py +5 -0
- ilovetools/automation/__init__.py +5 -0
- ilovetools/conversion/__init__.py +5 -0
- ilovetools/data/__init__.py +27 -0
- ilovetools/data/feature_engineering.py +497 -0
- ilovetools/data/preprocessing.py +234 -0
- ilovetools/database/__init__.py +5 -0
- ilovetools/datetime/__init__.py +5 -0
- ilovetools/files/__init__.py +5 -0
- ilovetools/image/__init__.py +5 -0
- ilovetools/ml/__init__.py +603 -0
- ilovetools/ml/clustering.py +1107 -0
- ilovetools/ml/cross_validation.py +612 -0
- ilovetools/ml/dimensionality.py +1001 -0
- ilovetools/ml/ensemble.py +872 -0
- ilovetools/ml/feature_selection.py +971 -0
- ilovetools/ml/imbalanced.py +797 -0
- ilovetools/ml/interpretation.py +915 -0
- ilovetools/ml/metrics.py +601 -0
- ilovetools/ml/pipeline.py +711 -0
- ilovetools/ml/timeseries.py +984 -0
- ilovetools/ml/tuning.py +781 -0
- ilovetools/security/__init__.py +5 -0
- ilovetools/text/__init__.py +5 -0
- ilovetools/utils/__init__.py +5 -0
- ilovetools/validation/__init__.py +5 -0
- ilovetools/web/__init__.py +5 -0
- ilovetools-0.2.3.dist-info/METADATA +143 -0
- ilovetools-0.2.3.dist-info/RECORD +38 -0
- ilovetools-0.2.3.dist-info/WHEEL +5 -0
- ilovetools-0.2.3.dist-info/licenses/LICENSE +21 -0
- ilovetools-0.2.3.dist-info/top_level.txt +2 -0
- tests/__init__.py +3 -0
|
@@ -0,0 +1,984 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Time series analysis utilities
|
|
3
|
+
Each function has TWO names: full descriptive name + abbreviated alias
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import List, Dict, Any, Tuple, Optional
|
|
7
|
+
import math
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
# Full names
|
|
11
|
+
'moving_average',
|
|
12
|
+
'exponential_moving_average',
|
|
13
|
+
'weighted_moving_average',
|
|
14
|
+
'seasonal_decompose',
|
|
15
|
+
'difference_series',
|
|
16
|
+
'autocorrelation',
|
|
17
|
+
'partial_autocorrelation',
|
|
18
|
+
'detect_trend',
|
|
19
|
+
'detect_seasonality',
|
|
20
|
+
'remove_trend',
|
|
21
|
+
'remove_seasonality',
|
|
22
|
+
'rolling_statistics',
|
|
23
|
+
'lag_features',
|
|
24
|
+
'time_series_split_cv',
|
|
25
|
+
'forecast_accuracy',
|
|
26
|
+
# Abbreviated aliases
|
|
27
|
+
'ma',
|
|
28
|
+
'ema',
|
|
29
|
+
'wma',
|
|
30
|
+
'decompose',
|
|
31
|
+
'diff',
|
|
32
|
+
'acf',
|
|
33
|
+
'pacf',
|
|
34
|
+
'trend',
|
|
35
|
+
'seasonality',
|
|
36
|
+
'detrend',
|
|
37
|
+
'deseasonalize',
|
|
38
|
+
'rolling_stats',
|
|
39
|
+
'lag',
|
|
40
|
+
'ts_cv',
|
|
41
|
+
'forecast_acc',
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def moving_average(
|
|
46
|
+
series: List[float],
|
|
47
|
+
window: int
|
|
48
|
+
) -> List[float]:
|
|
49
|
+
"""
|
|
50
|
+
Simple Moving Average (SMA).
|
|
51
|
+
|
|
52
|
+
Alias: ma()
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
series: Time series data
|
|
56
|
+
window: Window size for averaging
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
list: Moving averages
|
|
60
|
+
|
|
61
|
+
Examples:
|
|
62
|
+
>>> from ilovetools.ml import ma # Short alias
|
|
63
|
+
|
|
64
|
+
>>> series = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
|
65
|
+
>>> result = ma(series, window=3)
|
|
66
|
+
>>> print(len(result))
|
|
67
|
+
8
|
|
68
|
+
>>> print(result[0])
|
|
69
|
+
2.0
|
|
70
|
+
|
|
71
|
+
>>> from ilovetools.ml import moving_average # Full name
|
|
72
|
+
>>> result = moving_average(series, window=3)
|
|
73
|
+
|
|
74
|
+
Notes:
|
|
75
|
+
- Smooths noise
|
|
76
|
+
- Reveals trends
|
|
77
|
+
- Simple and effective
|
|
78
|
+
- Lags behind actual data
|
|
79
|
+
"""
|
|
80
|
+
if window <= 0 or window > len(series):
|
|
81
|
+
raise ValueError("Window must be positive and <= series length")
|
|
82
|
+
|
|
83
|
+
result = []
|
|
84
|
+
for i in range(len(series) - window + 1):
|
|
85
|
+
avg = sum(series[i:i + window]) / window
|
|
86
|
+
result.append(avg)
|
|
87
|
+
|
|
88
|
+
return result
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# Create alias
|
|
92
|
+
ma = moving_average
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def exponential_moving_average(
|
|
96
|
+
series: List[float],
|
|
97
|
+
alpha: float = 0.3
|
|
98
|
+
) -> List[float]:
|
|
99
|
+
"""
|
|
100
|
+
Exponential Moving Average (EMA).
|
|
101
|
+
|
|
102
|
+
Alias: ema()
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
series: Time series data
|
|
106
|
+
alpha: Smoothing factor (0 < alpha <= 1)
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
list: Exponential moving averages
|
|
110
|
+
|
|
111
|
+
Examples:
|
|
112
|
+
>>> from ilovetools.ml import ema # Short alias
|
|
113
|
+
|
|
114
|
+
>>> series = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
|
115
|
+
>>> result = ema(series, alpha=0.3)
|
|
116
|
+
>>> print(len(result))
|
|
117
|
+
10
|
|
118
|
+
>>> print(result[0])
|
|
119
|
+
1.0
|
|
120
|
+
|
|
121
|
+
>>> from ilovetools.ml import exponential_moving_average # Full name
|
|
122
|
+
>>> result = exponential_moving_average(series, alpha=0.3)
|
|
123
|
+
|
|
124
|
+
Notes:
|
|
125
|
+
- Recent data weighted more
|
|
126
|
+
- Responds faster to changes
|
|
127
|
+
- Less lag than SMA
|
|
128
|
+
- Common in trading
|
|
129
|
+
"""
|
|
130
|
+
if not 0 < alpha <= 1:
|
|
131
|
+
raise ValueError("Alpha must be between 0 and 1")
|
|
132
|
+
|
|
133
|
+
result = [series[0]]
|
|
134
|
+
for i in range(1, len(series)):
|
|
135
|
+
ema_val = alpha * series[i] + (1 - alpha) * result[-1]
|
|
136
|
+
result.append(ema_val)
|
|
137
|
+
|
|
138
|
+
return result
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# Create alias
|
|
142
|
+
ema = exponential_moving_average
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def weighted_moving_average(
|
|
146
|
+
series: List[float],
|
|
147
|
+
weights: List[float]
|
|
148
|
+
) -> List[float]:
|
|
149
|
+
"""
|
|
150
|
+
Weighted Moving Average (WMA).
|
|
151
|
+
|
|
152
|
+
Alias: wma()
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
series: Time series data
|
|
156
|
+
weights: Weights for each position (must sum to 1)
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
list: Weighted moving averages
|
|
160
|
+
|
|
161
|
+
Examples:
|
|
162
|
+
>>> from ilovetools.ml import wma # Short alias
|
|
163
|
+
|
|
164
|
+
>>> series = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
|
165
|
+
>>> weights = [0.5, 0.3, 0.2]
|
|
166
|
+
>>> result = wma(series, weights)
|
|
167
|
+
>>> print(len(result))
|
|
168
|
+
8
|
|
169
|
+
|
|
170
|
+
>>> from ilovetools.ml import weighted_moving_average # Full name
|
|
171
|
+
>>> result = weighted_moving_average(series, weights)
|
|
172
|
+
|
|
173
|
+
Notes:
|
|
174
|
+
- Custom weights
|
|
175
|
+
- Flexible averaging
|
|
176
|
+
- Control importance
|
|
177
|
+
- More complex than SMA
|
|
178
|
+
"""
|
|
179
|
+
window = len(weights)
|
|
180
|
+
if window > len(series):
|
|
181
|
+
raise ValueError("Weights length must be <= series length")
|
|
182
|
+
|
|
183
|
+
# Normalize weights
|
|
184
|
+
total = sum(weights)
|
|
185
|
+
if total == 0:
|
|
186
|
+
raise ValueError("Weights must sum to non-zero")
|
|
187
|
+
weights = [w / total for w in weights]
|
|
188
|
+
|
|
189
|
+
result = []
|
|
190
|
+
for i in range(len(series) - window + 1):
|
|
191
|
+
wma_val = sum(series[i + j] * weights[j] for j in range(window))
|
|
192
|
+
result.append(wma_val)
|
|
193
|
+
|
|
194
|
+
return result
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# Create alias
|
|
198
|
+
wma = weighted_moving_average
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def seasonal_decompose(
|
|
202
|
+
series: List[float],
|
|
203
|
+
period: int,
|
|
204
|
+
model: str = 'additive'
|
|
205
|
+
) -> Dict[str, List[float]]:
|
|
206
|
+
"""
|
|
207
|
+
Seasonal decomposition of time series.
|
|
208
|
+
|
|
209
|
+
Alias: decompose()
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
series: Time series data
|
|
213
|
+
period: Seasonal period
|
|
214
|
+
model: 'additive' or 'multiplicative'
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
dict: Components (trend, seasonal, residual)
|
|
218
|
+
|
|
219
|
+
Examples:
|
|
220
|
+
>>> from ilovetools.ml import decompose # Short alias
|
|
221
|
+
|
|
222
|
+
>>> series = [10, 12, 13, 12, 10, 12, 13, 12, 10, 12, 13, 12]
|
|
223
|
+
>>> result = decompose(series, period=3)
|
|
224
|
+
>>> print('trend' in result)
|
|
225
|
+
True
|
|
226
|
+
>>> print('seasonal' in result)
|
|
227
|
+
True
|
|
228
|
+
|
|
229
|
+
>>> from ilovetools.ml import seasonal_decompose # Full name
|
|
230
|
+
>>> result = seasonal_decompose(series, period=3)
|
|
231
|
+
|
|
232
|
+
Notes:
|
|
233
|
+
- Separates components
|
|
234
|
+
- Additive: Y = T + S + R
|
|
235
|
+
- Multiplicative: Y = T * S * R
|
|
236
|
+
- Essential for forecasting
|
|
237
|
+
"""
|
|
238
|
+
if period <= 0 or period > len(series):
|
|
239
|
+
raise ValueError("Period must be positive and <= series length")
|
|
240
|
+
|
|
241
|
+
# Calculate trend using moving average
|
|
242
|
+
trend = []
|
|
243
|
+
half_window = period // 2
|
|
244
|
+
|
|
245
|
+
for i in range(len(series)):
|
|
246
|
+
if i < half_window or i >= len(series) - half_window:
|
|
247
|
+
trend.append(None)
|
|
248
|
+
else:
|
|
249
|
+
start = i - half_window
|
|
250
|
+
end = i + half_window + 1
|
|
251
|
+
trend.append(sum(series[start:end]) / period)
|
|
252
|
+
|
|
253
|
+
# Calculate seasonal component
|
|
254
|
+
if model == 'additive':
|
|
255
|
+
detrended = [series[i] - trend[i] if trend[i] is not None else None
|
|
256
|
+
for i in range(len(series))]
|
|
257
|
+
else: # multiplicative
|
|
258
|
+
detrended = [series[i] / trend[i] if trend[i] is not None and trend[i] != 0 else None
|
|
259
|
+
for i in range(len(series))]
|
|
260
|
+
|
|
261
|
+
# Average seasonal pattern
|
|
262
|
+
seasonal_avg = [0.0] * period
|
|
263
|
+
seasonal_count = [0] * period
|
|
264
|
+
|
|
265
|
+
for i, val in enumerate(detrended):
|
|
266
|
+
if val is not None:
|
|
267
|
+
seasonal_avg[i % period] += val
|
|
268
|
+
seasonal_count[i % period] += 1
|
|
269
|
+
|
|
270
|
+
seasonal_avg = [seasonal_avg[i] / seasonal_count[i] if seasonal_count[i] > 0 else 0
|
|
271
|
+
for i in range(period)]
|
|
272
|
+
|
|
273
|
+
# Normalize seasonal component
|
|
274
|
+
if model == 'additive':
|
|
275
|
+
seasonal_mean = sum(seasonal_avg) / period
|
|
276
|
+
seasonal_avg = [s - seasonal_mean for s in seasonal_avg]
|
|
277
|
+
else:
|
|
278
|
+
seasonal_mean = sum(seasonal_avg) / period
|
|
279
|
+
if seasonal_mean != 0:
|
|
280
|
+
seasonal_avg = [s / seasonal_mean for s in seasonal_avg]
|
|
281
|
+
|
|
282
|
+
# Extend seasonal pattern
|
|
283
|
+
seasonal = [seasonal_avg[i % period] for i in range(len(series))]
|
|
284
|
+
|
|
285
|
+
# Calculate residual
|
|
286
|
+
if model == 'additive':
|
|
287
|
+
residual = [series[i] - (trend[i] if trend[i] is not None else 0) - seasonal[i]
|
|
288
|
+
for i in range(len(series))]
|
|
289
|
+
else:
|
|
290
|
+
residual = [series[i] / ((trend[i] if trend[i] is not None else 1) * seasonal[i])
|
|
291
|
+
if seasonal[i] != 0 else 0
|
|
292
|
+
for i in range(len(series))]
|
|
293
|
+
|
|
294
|
+
return {
|
|
295
|
+
'trend': trend,
|
|
296
|
+
'seasonal': seasonal,
|
|
297
|
+
'residual': residual,
|
|
298
|
+
'original': series,
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
# Create alias
|
|
303
|
+
decompose = seasonal_decompose
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def difference_series(
|
|
307
|
+
series: List[float],
|
|
308
|
+
lag: int = 1
|
|
309
|
+
) -> List[float]:
|
|
310
|
+
"""
|
|
311
|
+
Difference time series for stationarity.
|
|
312
|
+
|
|
313
|
+
Alias: diff()
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
series: Time series data
|
|
317
|
+
lag: Lag for differencing
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
list: Differenced series
|
|
321
|
+
|
|
322
|
+
Examples:
|
|
323
|
+
>>> from ilovetools.ml import diff # Short alias
|
|
324
|
+
|
|
325
|
+
>>> series = [1, 3, 6, 10, 15]
|
|
326
|
+
>>> result = diff(series, lag=1)
|
|
327
|
+
>>> print(result)
|
|
328
|
+
[2, 3, 4, 5]
|
|
329
|
+
|
|
330
|
+
>>> from ilovetools.ml import difference_series # Full name
|
|
331
|
+
>>> result = difference_series(series, lag=1)
|
|
332
|
+
|
|
333
|
+
Notes:
|
|
334
|
+
- Remove trend
|
|
335
|
+
- Achieve stationarity
|
|
336
|
+
- Required for ARIMA
|
|
337
|
+
- Can apply multiple times
|
|
338
|
+
"""
|
|
339
|
+
if lag <= 0 or lag >= len(series):
|
|
340
|
+
raise ValueError("Lag must be positive and < series length")
|
|
341
|
+
|
|
342
|
+
return [series[i] - series[i - lag] for i in range(lag, len(series))]
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
# Create alias
|
|
346
|
+
diff = difference_series
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def autocorrelation(
|
|
350
|
+
series: List[float],
|
|
351
|
+
max_lag: int = 10
|
|
352
|
+
) -> List[float]:
|
|
353
|
+
"""
|
|
354
|
+
Calculate autocorrelation function (ACF).
|
|
355
|
+
|
|
356
|
+
Alias: acf()
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
series: Time series data
|
|
360
|
+
max_lag: Maximum lag to calculate
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
list: ACF values for each lag
|
|
364
|
+
|
|
365
|
+
Examples:
|
|
366
|
+
>>> from ilovetools.ml import acf # Short alias
|
|
367
|
+
|
|
368
|
+
>>> series = [1, 2, 3, 4, 5, 4, 3, 2, 1]
|
|
369
|
+
>>> result = acf(series, max_lag=3)
|
|
370
|
+
>>> print(len(result))
|
|
371
|
+
4
|
|
372
|
+
>>> print(result[0])
|
|
373
|
+
1.0
|
|
374
|
+
|
|
375
|
+
>>> from ilovetools.ml import autocorrelation # Full name
|
|
376
|
+
>>> result = autocorrelation(series, max_lag=3)
|
|
377
|
+
|
|
378
|
+
Notes:
|
|
379
|
+
- Correlation with past
|
|
380
|
+
- Identifies patterns
|
|
381
|
+
- Determines lag order
|
|
382
|
+
- Essential for ARIMA
|
|
383
|
+
"""
|
|
384
|
+
n = len(series)
|
|
385
|
+
mean = sum(series) / n
|
|
386
|
+
|
|
387
|
+
# Variance
|
|
388
|
+
c0 = sum((x - mean) ** 2 for x in series) / n
|
|
389
|
+
|
|
390
|
+
if c0 == 0:
|
|
391
|
+
return [1.0] + [0.0] * max_lag
|
|
392
|
+
|
|
393
|
+
acf_values = [1.0] # ACF at lag 0 is always 1
|
|
394
|
+
|
|
395
|
+
for lag in range(1, max_lag + 1):
|
|
396
|
+
if lag >= n:
|
|
397
|
+
acf_values.append(0.0)
|
|
398
|
+
continue
|
|
399
|
+
|
|
400
|
+
c_lag = sum((series[i] - mean) * (series[i - lag] - mean)
|
|
401
|
+
for i in range(lag, n)) / n
|
|
402
|
+
acf_values.append(c_lag / c0)
|
|
403
|
+
|
|
404
|
+
return acf_values
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
# Create alias
|
|
408
|
+
acf = autocorrelation
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def partial_autocorrelation(
|
|
412
|
+
series: List[float],
|
|
413
|
+
max_lag: int = 10
|
|
414
|
+
) -> List[float]:
|
|
415
|
+
"""
|
|
416
|
+
Calculate partial autocorrelation function (PACF).
|
|
417
|
+
|
|
418
|
+
Alias: pacf()
|
|
419
|
+
|
|
420
|
+
Args:
|
|
421
|
+
series: Time series data
|
|
422
|
+
max_lag: Maximum lag to calculate
|
|
423
|
+
|
|
424
|
+
Returns:
|
|
425
|
+
list: PACF values for each lag
|
|
426
|
+
|
|
427
|
+
Examples:
|
|
428
|
+
>>> from ilovetools.ml import pacf # Short alias
|
|
429
|
+
|
|
430
|
+
>>> series = [1, 2, 3, 4, 5, 4, 3, 2, 1]
|
|
431
|
+
>>> result = pacf(series, max_lag=3)
|
|
432
|
+
>>> print(len(result))
|
|
433
|
+
4
|
|
434
|
+
|
|
435
|
+
>>> from ilovetools.ml import partial_autocorrelation # Full name
|
|
436
|
+
>>> result = partial_autocorrelation(series, max_lag=3)
|
|
437
|
+
|
|
438
|
+
Notes:
|
|
439
|
+
- Direct correlation
|
|
440
|
+
- Removes indirect effects
|
|
441
|
+
- Determines AR order
|
|
442
|
+
- Complements ACF
|
|
443
|
+
"""
|
|
444
|
+
acf_values = autocorrelation(series, max_lag)
|
|
445
|
+
pacf_values = [1.0] # PACF at lag 0 is always 1
|
|
446
|
+
|
|
447
|
+
if max_lag == 0:
|
|
448
|
+
return pacf_values
|
|
449
|
+
|
|
450
|
+
# Durbin-Levinson algorithm (simplified)
|
|
451
|
+
for k in range(1, max_lag + 1):
|
|
452
|
+
if k >= len(acf_values):
|
|
453
|
+
pacf_values.append(0.0)
|
|
454
|
+
continue
|
|
455
|
+
|
|
456
|
+
# Simplified PACF calculation
|
|
457
|
+
numerator = acf_values[k]
|
|
458
|
+
denominator = 1.0
|
|
459
|
+
|
|
460
|
+
for j in range(1, k):
|
|
461
|
+
if j < len(pacf_values):
|
|
462
|
+
numerator -= pacf_values[j] * acf_values[k - j]
|
|
463
|
+
|
|
464
|
+
if denominator != 0:
|
|
465
|
+
pacf_values.append(numerator / denominator)
|
|
466
|
+
else:
|
|
467
|
+
pacf_values.append(0.0)
|
|
468
|
+
|
|
469
|
+
return pacf_values
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
# Create alias
|
|
473
|
+
pacf = partial_autocorrelation
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def detect_trend(
|
|
477
|
+
series: List[float],
|
|
478
|
+
window: int = 5
|
|
479
|
+
) -> Dict[str, Any]:
|
|
480
|
+
"""
|
|
481
|
+
Detect trend in time series.
|
|
482
|
+
|
|
483
|
+
Alias: trend()
|
|
484
|
+
|
|
485
|
+
Args:
|
|
486
|
+
series: Time series data
|
|
487
|
+
window: Window for trend calculation
|
|
488
|
+
|
|
489
|
+
Returns:
|
|
490
|
+
dict: Trend information
|
|
491
|
+
|
|
492
|
+
Examples:
|
|
493
|
+
>>> from ilovetools.ml import trend # Short alias
|
|
494
|
+
|
|
495
|
+
>>> series = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
|
496
|
+
>>> result = trend(series, window=3)
|
|
497
|
+
>>> print(result['direction'])
|
|
498
|
+
'upward'
|
|
499
|
+
|
|
500
|
+
>>> from ilovetools.ml import detect_trend # Full name
|
|
501
|
+
>>> result = detect_trend(series, window=3)
|
|
502
|
+
|
|
503
|
+
Notes:
|
|
504
|
+
- Identify direction
|
|
505
|
+
- Upward, downward, flat
|
|
506
|
+
- Uses moving average
|
|
507
|
+
- Essential for forecasting
|
|
508
|
+
"""
|
|
509
|
+
if window > len(series):
|
|
510
|
+
window = len(series)
|
|
511
|
+
|
|
512
|
+
# Calculate moving average
|
|
513
|
+
ma_values = moving_average(series, window)
|
|
514
|
+
|
|
515
|
+
# Calculate trend slope
|
|
516
|
+
if len(ma_values) < 2:
|
|
517
|
+
return {
|
|
518
|
+
'direction': 'flat',
|
|
519
|
+
'slope': 0.0,
|
|
520
|
+
'strength': 0.0,
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
# Simple linear trend
|
|
524
|
+
n = len(ma_values)
|
|
525
|
+
x_mean = (n - 1) / 2
|
|
526
|
+
y_mean = sum(ma_values) / n
|
|
527
|
+
|
|
528
|
+
numerator = sum((i - x_mean) * (ma_values[i] - y_mean) for i in range(n))
|
|
529
|
+
denominator = sum((i - x_mean) ** 2 for i in range(n))
|
|
530
|
+
|
|
531
|
+
slope = numerator / denominator if denominator != 0 else 0
|
|
532
|
+
|
|
533
|
+
# Determine direction
|
|
534
|
+
if abs(slope) < 0.01:
|
|
535
|
+
direction = 'flat'
|
|
536
|
+
elif slope > 0:
|
|
537
|
+
direction = 'upward'
|
|
538
|
+
else:
|
|
539
|
+
direction = 'downward'
|
|
540
|
+
|
|
541
|
+
# Calculate strength (R-squared)
|
|
542
|
+
predictions = [y_mean + slope * (i - x_mean) for i in range(n)]
|
|
543
|
+
ss_res = sum((ma_values[i] - predictions[i]) ** 2 for i in range(n))
|
|
544
|
+
ss_tot = sum((ma_values[i] - y_mean) ** 2 for i in range(n))
|
|
545
|
+
|
|
546
|
+
strength = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0
|
|
547
|
+
|
|
548
|
+
return {
|
|
549
|
+
'direction': direction,
|
|
550
|
+
'slope': slope,
|
|
551
|
+
'strength': max(0, min(1, strength)),
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
# Create alias
|
|
556
|
+
trend = detect_trend
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
def detect_seasonality(
|
|
560
|
+
series: List[float],
|
|
561
|
+
max_period: int = 12
|
|
562
|
+
) -> Dict[str, Any]:
|
|
563
|
+
"""
|
|
564
|
+
Detect seasonality in time series.
|
|
565
|
+
|
|
566
|
+
Alias: seasonality()
|
|
567
|
+
|
|
568
|
+
Args:
|
|
569
|
+
series: Time series data
|
|
570
|
+
max_period: Maximum period to check
|
|
571
|
+
|
|
572
|
+
Returns:
|
|
573
|
+
dict: Seasonality information
|
|
574
|
+
|
|
575
|
+
Examples:
|
|
576
|
+
>>> from ilovetools.ml import seasonality # Short alias
|
|
577
|
+
|
|
578
|
+
>>> series = [1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
|
|
579
|
+
>>> result = seasonality(series, max_period=5)
|
|
580
|
+
>>> print(result['has_seasonality'])
|
|
581
|
+
True
|
|
582
|
+
|
|
583
|
+
>>> from ilovetools.ml import detect_seasonality # Full name
|
|
584
|
+
>>> result = detect_seasonality(series, max_period=5)
|
|
585
|
+
|
|
586
|
+
Notes:
|
|
587
|
+
- Identify repeating patterns
|
|
588
|
+
- Find period length
|
|
589
|
+
- Measure strength
|
|
590
|
+
- Essential for decomposition
|
|
591
|
+
"""
|
|
592
|
+
if max_period > len(series) // 2:
|
|
593
|
+
max_period = len(series) // 2
|
|
594
|
+
|
|
595
|
+
if max_period < 2:
|
|
596
|
+
return {
|
|
597
|
+
'has_seasonality': False,
|
|
598
|
+
'period': None,
|
|
599
|
+
'strength': 0.0,
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
# Calculate ACF
|
|
603
|
+
acf_values = autocorrelation(series, max_period)
|
|
604
|
+
|
|
605
|
+
# Find peaks in ACF (excluding lag 0)
|
|
606
|
+
best_period = None
|
|
607
|
+
best_strength = 0.0
|
|
608
|
+
|
|
609
|
+
for period in range(2, max_period + 1):
|
|
610
|
+
if period < len(acf_values):
|
|
611
|
+
strength = abs(acf_values[period])
|
|
612
|
+
if strength > best_strength and strength > 0.3:
|
|
613
|
+
best_strength = strength
|
|
614
|
+
best_period = period
|
|
615
|
+
|
|
616
|
+
has_seasonality = best_period is not None
|
|
617
|
+
|
|
618
|
+
return {
|
|
619
|
+
'has_seasonality': has_seasonality,
|
|
620
|
+
'period': best_period,
|
|
621
|
+
'strength': best_strength,
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
# Create alias
|
|
626
|
+
seasonality = detect_seasonality
|
|
627
|
+
|
|
628
|
+
|
|
629
|
+
def remove_trend(
|
|
630
|
+
series: List[float],
|
|
631
|
+
method: str = 'difference'
|
|
632
|
+
) -> List[float]:
|
|
633
|
+
"""
|
|
634
|
+
Remove trend from time series.
|
|
635
|
+
|
|
636
|
+
Alias: detrend()
|
|
637
|
+
|
|
638
|
+
Args:
|
|
639
|
+
series: Time series data
|
|
640
|
+
method: 'difference' or 'linear'
|
|
641
|
+
|
|
642
|
+
Returns:
|
|
643
|
+
list: Detrended series
|
|
644
|
+
|
|
645
|
+
Examples:
|
|
646
|
+
>>> from ilovetools.ml import detrend # Short alias
|
|
647
|
+
|
|
648
|
+
>>> series = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
|
649
|
+
>>> result = detrend(series, method='difference')
|
|
650
|
+
>>> print(len(result))
|
|
651
|
+
9
|
|
652
|
+
|
|
653
|
+
>>> from ilovetools.ml import remove_trend # Full name
|
|
654
|
+
>>> result = remove_trend(series, method='difference')
|
|
655
|
+
|
|
656
|
+
Notes:
|
|
657
|
+
- Achieve stationarity
|
|
658
|
+
- Difference or linear
|
|
659
|
+
- Required for modeling
|
|
660
|
+
- Reversible operation
|
|
661
|
+
"""
|
|
662
|
+
if method == 'difference':
|
|
663
|
+
return difference_series(series, lag=1)
|
|
664
|
+
elif method == 'linear':
|
|
665
|
+
# Remove linear trend
|
|
666
|
+
n = len(series)
|
|
667
|
+
x_mean = (n - 1) / 2
|
|
668
|
+
y_mean = sum(series) / n
|
|
669
|
+
|
|
670
|
+
numerator = sum((i - x_mean) * (series[i] - y_mean) for i in range(n))
|
|
671
|
+
denominator = sum((i - x_mean) ** 2 for i in range(n))
|
|
672
|
+
|
|
673
|
+
slope = numerator / denominator if denominator != 0 else 0
|
|
674
|
+
intercept = y_mean - slope * x_mean
|
|
675
|
+
|
|
676
|
+
return [series[i] - (slope * i + intercept) for i in range(n)]
|
|
677
|
+
else:
|
|
678
|
+
raise ValueError("Method must be 'difference' or 'linear'")
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
# Create alias
|
|
682
|
+
detrend = remove_trend
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
def remove_seasonality(
|
|
686
|
+
series: List[float],
|
|
687
|
+
period: int
|
|
688
|
+
) -> List[float]:
|
|
689
|
+
"""
|
|
690
|
+
Remove seasonality from time series.
|
|
691
|
+
|
|
692
|
+
Alias: deseasonalize()
|
|
693
|
+
|
|
694
|
+
Args:
|
|
695
|
+
series: Time series data
|
|
696
|
+
period: Seasonal period
|
|
697
|
+
|
|
698
|
+
Returns:
|
|
699
|
+
list: Deseasonalized series
|
|
700
|
+
|
|
701
|
+
Examples:
|
|
702
|
+
>>> from ilovetools.ml import deseasonalize # Short alias
|
|
703
|
+
|
|
704
|
+
>>> series = [10, 12, 13, 12, 10, 12, 13, 12]
|
|
705
|
+
>>> result = deseasonalize(series, period=4)
|
|
706
|
+
>>> print(len(result))
|
|
707
|
+
8
|
|
708
|
+
|
|
709
|
+
>>> from ilovetools.ml import remove_seasonality # Full name
|
|
710
|
+
>>> result = remove_seasonality(series, period=4)
|
|
711
|
+
|
|
712
|
+
Notes:
|
|
713
|
+
- Remove repeating patterns
|
|
714
|
+
- Achieve stationarity
|
|
715
|
+
- Use decomposition
|
|
716
|
+
- Reversible operation
|
|
717
|
+
"""
|
|
718
|
+
decomp = seasonal_decompose(series, period, model='additive')
|
|
719
|
+
seasonal = decomp['seasonal']
|
|
720
|
+
|
|
721
|
+
return [series[i] - seasonal[i] for i in range(len(series))]
|
|
722
|
+
|
|
723
|
+
|
|
724
|
+
# Create alias
|
|
725
|
+
deseasonalize = remove_seasonality
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def rolling_statistics(
|
|
729
|
+
series: List[float],
|
|
730
|
+
window: int
|
|
731
|
+
) -> Dict[str, List[float]]:
|
|
732
|
+
"""
|
|
733
|
+
Calculate rolling statistics.
|
|
734
|
+
|
|
735
|
+
Alias: rolling_stats()
|
|
736
|
+
|
|
737
|
+
Args:
|
|
738
|
+
series: Time series data
|
|
739
|
+
window: Window size
|
|
740
|
+
|
|
741
|
+
Returns:
|
|
742
|
+
dict: Rolling mean, std, min, max
|
|
743
|
+
|
|
744
|
+
Examples:
|
|
745
|
+
>>> from ilovetools.ml import rolling_stats # Short alias
|
|
746
|
+
|
|
747
|
+
>>> series = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
|
748
|
+
>>> result = rolling_stats(series, window=3)
|
|
749
|
+
>>> print('mean' in result)
|
|
750
|
+
True
|
|
751
|
+
>>> print('std' in result)
|
|
752
|
+
True
|
|
753
|
+
|
|
754
|
+
>>> from ilovetools.ml import rolling_statistics # Full name
|
|
755
|
+
>>> result = rolling_statistics(series, window=3)
|
|
756
|
+
|
|
757
|
+
Notes:
|
|
758
|
+
- Window-based calculations
|
|
759
|
+
- Track changes over time
|
|
760
|
+
- Feature engineering
|
|
761
|
+
- Anomaly detection
|
|
762
|
+
"""
|
|
763
|
+
if window <= 0 or window > len(series):
|
|
764
|
+
raise ValueError("Window must be positive and <= series length")
|
|
765
|
+
|
|
766
|
+
rolling_mean = []
|
|
767
|
+
rolling_std = []
|
|
768
|
+
rolling_min = []
|
|
769
|
+
rolling_max = []
|
|
770
|
+
|
|
771
|
+
for i in range(len(series) - window + 1):
|
|
772
|
+
window_data = series[i:i + window]
|
|
773
|
+
|
|
774
|
+
# Mean
|
|
775
|
+
mean = sum(window_data) / window
|
|
776
|
+
rolling_mean.append(mean)
|
|
777
|
+
|
|
778
|
+
# Std
|
|
779
|
+
variance = sum((x - mean) ** 2 for x in window_data) / window
|
|
780
|
+
rolling_std.append(math.sqrt(variance))
|
|
781
|
+
|
|
782
|
+
# Min and Max
|
|
783
|
+
rolling_min.append(min(window_data))
|
|
784
|
+
rolling_max.append(max(window_data))
|
|
785
|
+
|
|
786
|
+
return {
|
|
787
|
+
'mean': rolling_mean,
|
|
788
|
+
'std': rolling_std,
|
|
789
|
+
'min': rolling_min,
|
|
790
|
+
'max': rolling_max,
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
|
|
794
|
+
# Create alias
|
|
795
|
+
rolling_stats = rolling_statistics
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
def lag_features(
|
|
799
|
+
series: List[float],
|
|
800
|
+
lags: List[int]
|
|
801
|
+
) -> List[List[Optional[float]]]:
|
|
802
|
+
"""
|
|
803
|
+
Create lag features for time series.
|
|
804
|
+
|
|
805
|
+
Alias: lag()
|
|
806
|
+
|
|
807
|
+
Args:
|
|
808
|
+
series: Time series data
|
|
809
|
+
lags: List of lag values
|
|
810
|
+
|
|
811
|
+
Returns:
|
|
812
|
+
list: Lag features matrix
|
|
813
|
+
|
|
814
|
+
Examples:
|
|
815
|
+
>>> from ilovetools.ml import lag # Short alias
|
|
816
|
+
|
|
817
|
+
>>> series = [1, 2, 3, 4, 5]
|
|
818
|
+
>>> result = lag(series, lags=[1, 2])
|
|
819
|
+
>>> print(len(result))
|
|
820
|
+
5
|
|
821
|
+
>>> print(len(result[0]))
|
|
822
|
+
2
|
|
823
|
+
|
|
824
|
+
>>> from ilovetools.ml import lag_features # Full name
|
|
825
|
+
>>> result = lag_features(series, lags=[1, 2])
|
|
826
|
+
|
|
827
|
+
Notes:
|
|
828
|
+
- Use past values as features
|
|
829
|
+
- Essential for ML models
|
|
830
|
+
- t-1, t-2, t-7 common
|
|
831
|
+
- Handle missing values
|
|
832
|
+
"""
|
|
833
|
+
n = len(series)
|
|
834
|
+
max_lag = max(lags)
|
|
835
|
+
|
|
836
|
+
features = []
|
|
837
|
+
for i in range(n):
|
|
838
|
+
row = []
|
|
839
|
+
for lag in lags:
|
|
840
|
+
if i >= lag:
|
|
841
|
+
row.append(series[i - lag])
|
|
842
|
+
else:
|
|
843
|
+
row.append(None)
|
|
844
|
+
features.append(row)
|
|
845
|
+
|
|
846
|
+
return features
|
|
847
|
+
|
|
848
|
+
|
|
849
|
+
# Create alias
|
|
850
|
+
lag = lag_features
|
|
851
|
+
|
|
852
|
+
|
|
853
|
+
def time_series_split_cv(
|
|
854
|
+
series: List[float],
|
|
855
|
+
n_splits: int = 5,
|
|
856
|
+
test_size: Optional[int] = None
|
|
857
|
+
) -> List[Dict[str, List[int]]]:
|
|
858
|
+
"""
|
|
859
|
+
Time series cross-validation splits.
|
|
860
|
+
|
|
861
|
+
Alias: ts_cv()
|
|
862
|
+
|
|
863
|
+
Args:
|
|
864
|
+
series: Time series data
|
|
865
|
+
n_splits: Number of splits
|
|
866
|
+
test_size: Size of test set (optional)
|
|
867
|
+
|
|
868
|
+
Returns:
|
|
869
|
+
list: Train/test indices for each split
|
|
870
|
+
|
|
871
|
+
Examples:
|
|
872
|
+
>>> from ilovetools.ml import ts_cv # Short alias
|
|
873
|
+
|
|
874
|
+
>>> series = list(range(20))
|
|
875
|
+
>>> splits = ts_cv(series, n_splits=3)
|
|
876
|
+
>>> print(len(splits))
|
|
877
|
+
3
|
|
878
|
+
>>> print('train' in splits[0])
|
|
879
|
+
True
|
|
880
|
+
|
|
881
|
+
>>> from ilovetools.ml import time_series_split_cv # Full name
|
|
882
|
+
>>> splits = time_series_split_cv(series, n_splits=3)
|
|
883
|
+
|
|
884
|
+
Notes:
|
|
885
|
+
- No data leakage
|
|
886
|
+
- Expanding window
|
|
887
|
+
- Respects time order
|
|
888
|
+
- Essential for validation
|
|
889
|
+
"""
|
|
890
|
+
n = len(series)
|
|
891
|
+
|
|
892
|
+
if test_size is None:
|
|
893
|
+
test_size = n // (n_splits + 1)
|
|
894
|
+
|
|
895
|
+
if test_size <= 0 or test_size * n_splits >= n:
|
|
896
|
+
raise ValueError("Invalid test_size for given n_splits")
|
|
897
|
+
|
|
898
|
+
splits = []
|
|
899
|
+
for i in range(n_splits):
|
|
900
|
+
test_start = n - (n_splits - i) * test_size
|
|
901
|
+
test_end = test_start + test_size
|
|
902
|
+
|
|
903
|
+
train_indices = list(range(test_start))
|
|
904
|
+
test_indices = list(range(test_start, test_end))
|
|
905
|
+
|
|
906
|
+
splits.append({
|
|
907
|
+
'train': train_indices,
|
|
908
|
+
'test': test_indices,
|
|
909
|
+
})
|
|
910
|
+
|
|
911
|
+
return splits
|
|
912
|
+
|
|
913
|
+
|
|
914
|
+
# Create alias
|
|
915
|
+
ts_cv = time_series_split_cv
|
|
916
|
+
|
|
917
|
+
|
|
918
|
+
def forecast_accuracy(
|
|
919
|
+
actual: List[float],
|
|
920
|
+
predicted: List[float]
|
|
921
|
+
) -> Dict[str, float]:
|
|
922
|
+
"""
|
|
923
|
+
Calculate forecast accuracy metrics.
|
|
924
|
+
|
|
925
|
+
Alias: forecast_acc()
|
|
926
|
+
|
|
927
|
+
Args:
|
|
928
|
+
actual: Actual values
|
|
929
|
+
predicted: Predicted values
|
|
930
|
+
|
|
931
|
+
Returns:
|
|
932
|
+
dict: MAE, RMSE, MAPE metrics
|
|
933
|
+
|
|
934
|
+
Examples:
|
|
935
|
+
>>> from ilovetools.ml import forecast_acc # Short alias
|
|
936
|
+
|
|
937
|
+
>>> actual = [10, 20, 30, 40, 50]
|
|
938
|
+
>>> predicted = [12, 19, 31, 39, 51]
|
|
939
|
+
>>> result = forecast_acc(actual, predicted)
|
|
940
|
+
>>> print('mae' in result)
|
|
941
|
+
True
|
|
942
|
+
>>> print('rmse' in result)
|
|
943
|
+
True
|
|
944
|
+
|
|
945
|
+
>>> from ilovetools.ml import forecast_accuracy # Full name
|
|
946
|
+
>>> result = forecast_accuracy(actual, predicted)
|
|
947
|
+
|
|
948
|
+
Notes:
|
|
949
|
+
- MAE: Mean Absolute Error
|
|
950
|
+
- RMSE: Root Mean Squared Error
|
|
951
|
+
- MAPE: Mean Absolute Percentage Error
|
|
952
|
+
- Lower is better
|
|
953
|
+
"""
|
|
954
|
+
if len(actual) != len(predicted):
|
|
955
|
+
raise ValueError("Actual and predicted must have same length")
|
|
956
|
+
|
|
957
|
+
n = len(actual)
|
|
958
|
+
|
|
959
|
+
# MAE
|
|
960
|
+
mae = sum(abs(actual[i] - predicted[i]) for i in range(n)) / n
|
|
961
|
+
|
|
962
|
+
# RMSE
|
|
963
|
+
mse = sum((actual[i] - predicted[i]) ** 2 for i in range(n)) / n
|
|
964
|
+
rmse = math.sqrt(mse)
|
|
965
|
+
|
|
966
|
+
# MAPE
|
|
967
|
+
mape_sum = 0
|
|
968
|
+
mape_count = 0
|
|
969
|
+
for i in range(n):
|
|
970
|
+
if actual[i] != 0:
|
|
971
|
+
mape_sum += abs((actual[i] - predicted[i]) / actual[i])
|
|
972
|
+
mape_count += 1
|
|
973
|
+
|
|
974
|
+
mape = (mape_sum / mape_count * 100) if mape_count > 0 else 0
|
|
975
|
+
|
|
976
|
+
return {
|
|
977
|
+
'mae': mae,
|
|
978
|
+
'rmse': rmse,
|
|
979
|
+
'mape': mape,
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
|
|
983
|
+
# Create alias
|
|
984
|
+
forecast_acc = forecast_accuracy
|