anomaly-pipeline 0.1.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,546 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import plotly.graph_objects as go
4
+ import plotly.express as px
5
+
6
+
7
+ def initialize_fig(group, group_columns, variable, date_column, anomaly_detection_model):
8
+
9
+ plot_title = " -- ".join(list(group[group_columns].values[0])).upper() + " -- " + anomaly_detection_model
10
+
11
+ fig = go.Figure()
12
+
13
+ # Actuals
14
+ fig.add_trace(go.Scatter(
15
+ x=group[date_column],
16
+ y=group[variable],
17
+ mode='lines',
18
+ line=dict(color='seagreen', width=1.5),
19
+ name=variable if variable == variable.upper() else variable.title(),
20
+ ))
21
+
22
+ fig.update_layout(
23
+ title=dict(
24
+ text=plot_title,
25
+ y=0.96,
26
+ x=0.5,
27
+ xanchor='center',
28
+ yanchor='top',
29
+ font=dict(size=18, color='black', weight='bold'),
30
+ ),
31
+ height=350,
32
+ width=1200,
33
+ margin=dict(l=50, r=50, t=40, b=30),
34
+ plot_bgcolor='snow',
35
+ paper_bgcolor='whitesmoke',
36
+ xaxis=dict(
37
+ range=[group[date_column].min(), group[date_column].max()],
38
+ showline=True,
39
+ linewidth=0.5,
40
+ linecolor='orange',
41
+ zeroline=False,
42
+ gridcolor='rgba(255, 165, 0, 0.1)',
43
+ mirror=True
44
+ ),
45
+ yaxis=dict(
46
+ range=[group[variable].min()*0.9, group[variable].max()*1.06],
47
+ showline=True,
48
+ linewidth=0.5,
49
+ linecolor='orange',
50
+ zeroline=False,
51
+ gridcolor='rgba(255, 165, 0, 0.1)',
52
+ mirror=True
53
+ ),
54
+ yaxis_title=dict(
55
+ text=variable if variable == variable.upper() else variable.title(),
56
+ font=dict(size=16, weight='bold', color='black')
57
+ ),
58
+ legend=dict(
59
+ orientation="v",
60
+ yanchor="top",
61
+ y=1,
62
+ xanchor="left",
63
+ x=1.02,
64
+ )
65
+ )
66
+
67
+ return fig
68
+
69
+
70
+ def add_anomalies(fig, group, date_column, variable):
71
+ # Add anomalies markers
72
+ fig.add_trace(go.Scatter(
73
+ x=group[group['is_Anomaly'] == True][date_column],
74
+ y=group[group['is_Anomaly'] == True][variable],
75
+ mode='markers',
76
+ marker=dict(color='pink', symbol='cross', line=dict(width=1), size=9),
77
+ name='Anomalies',
78
+ hoverinfo='skip',
79
+ ))
80
+ return fig
81
+
82
+
83
+ def add_model_anomalies(fig, group, date_column, variable, model):
84
+ fig.add_trace(go.Scatter(
85
+ x=group[group[f'is_{model}_anomaly'] == True][date_column],
86
+ y=group[group[f'is_{model}_anomaly'] == True][variable],
87
+ mode='markers',
88
+ marker=dict(color='palevioletred', symbol='circle', line=dict(width=1), size=9),
89
+ name=f'{model} Anomalies',
90
+ customdata=group[group[f'is_{model}_anomaly'] == True][[f'{model}_anomaly']],
91
+ hovertemplate=(
92
+ f'Date: %{{x|%Y-%m-%d}}<br>' +
93
+ f'{variable if variable == variable.upper() else variable.title()}: %{{y:~.2s}}<br>' +
94
+ f'{model}' + ' Category: %{customdata[0]}<extra></extra>'
95
+ )
96
+ ))
97
+ return fig
98
+
99
+
100
+ def add_anomaly_region(fig, region, group, variable, date_column, threshold_column, threshold_label):
101
+
102
+ if region == 'upper':
103
+ y0 = group[threshold_column].values[0]
104
+ y1 = group[variable].max()*1.06
105
+ elif region == 'lower':
106
+ y0 = 0
107
+ y1 = group[threshold_column].values[0]
108
+
109
+ # Shading
110
+ fig.add_shape(
111
+ type="rect",
112
+ x0=0, x1=1, xref="paper",
113
+ y0=y0, y1=y1,
114
+ yref="y",
115
+ fillcolor="rgba(255, 0, 0, 0.055)",
116
+ line=dict(width=0),
117
+ layer="below"
118
+ )
119
+
120
+ # Upper Percentile line
121
+ fig.add_trace(go.Scatter(
122
+ x=group[date_column],
123
+ y=group[threshold_column],
124
+ mode='lines',
125
+ line=dict(color='orangered', width=1, dash='dashdot'),
126
+ name=threshold_label,
127
+ showlegend=False
128
+ ))
129
+
130
+ return fig
131
+
132
+
133
+ def add_eval_period_highlight(fig, group, date_column, variable, eval_period):
134
+ fig.add_trace(go.Scatter(
135
+ x=group[date_column][-eval_period:],
136
+ y=group[variable][-eval_period:],
137
+ mode='lines',
138
+ line=dict(
139
+ color='rgba(0, 255, 0, 0.25)', # 'lime' with 0.25 alpha
140
+ width=10
141
+ ),
142
+ name='Evalution Period',
143
+ hoverinfo='skip',
144
+ ))
145
+ return fig
146
+
147
+
148
+ def anomaly_eval_plot(group, group_columns, variable, date_column, eval_period=12, show_anomaly_scores_on_main_plot=False):
149
+
150
+ # IS ANOMALY Plot
151
+ # This is the main plot
152
+ try:
153
+ group = group.copy()
154
+
155
+ anomaly_cols = []
156
+ for col in group.columns.to_list():
157
+ if col.startswith('is_') and col.endswith('_anomaly') and col != 'is_anomaly':
158
+ anomaly_cols.append(col)
159
+ group['Anomaly Vote Models'] = group.apply(
160
+ lambda row: sorted([col.removeprefix('is_').removesuffix('_anomaly')
161
+ for col in anomaly_cols
162
+ if pd.notna(row[col]) and row[col] == True]),
163
+ axis=1)
164
+ group['Anomaly Vote Models'] = group['Anomaly Vote Models'].apply(lambda x: ', '.join(x))
165
+ group['Anomaly_Votes'] = group[anomaly_cols].sum(axis=1).astype(int)
166
+ group['Vote_Cnt'] = group[anomaly_cols].replace(False, True).sum(axis=1).astype(int)
167
+ group['Anomaly_Votes_Display'] = group['Anomaly_Votes'].astype(str) + ' out of ' + group['Vote_Cnt'].astype(str)
168
+ group['is_Anomaly'] = np.where(group['Anomaly_Votes']/group['Vote_Cnt'] >= 0.5, True, False)
169
+ group['Anomaly_Score'] = 2 * (group['Anomaly_Votes']/group['Vote_Cnt'] - 0.5).astype(float)
170
+ group['Anomaly_Score_Display'] = np.where(group['Anomaly_Score'] < 0, np.floor(100*group['Anomaly_Score']),
171
+ np.where(group['Anomaly_Score'] > 0, np.ceil(100*group['Anomaly_Score']),
172
+ 1)).astype(float)
173
+ group['Mean'] = group[variable].mean()
174
+ group['Median'] = group[variable].median()
175
+
176
+ fig = initialize_fig(group, group_columns, variable, date_column, "Anomalies")
177
+
178
+ # Mean
179
+ fig.add_trace(go.Scatter(
180
+ x=group[date_column],
181
+ y=group['Mean'],
182
+ mode='lines',
183
+ line=dict(color='maroon', width=0.7, dash='dash'),
184
+ name='Mean',
185
+ showlegend=True,
186
+ hoverinfo='skip',
187
+ ))
188
+
189
+ # Median
190
+ fig.add_trace(go.Scatter(
191
+ x=group[date_column],
192
+ y=group['Median'],
193
+ mode='lines',
194
+ line=dict(color='darkblue', width=0.7, dash='dot'),
195
+ name='Median',
196
+ showlegend=True,
197
+ hoverinfo='skip',
198
+ ))
199
+
200
+ # Anomalies
201
+ fig.add_trace(go.Scatter(
202
+ x=group[group['is_Anomaly'] == True][date_column],
203
+ y=group[group['is_Anomaly'] == True][variable],
204
+ mode='markers',
205
+ marker=dict(color='red', symbol='circle', line=dict(width=1), size=5*(group[group['is_Anomaly'] == True]['Anomaly_Score'] + 2)),
206
+ name='Anomalies',
207
+ customdata=group[group['is_Anomaly'] == True][['Anomaly_Votes_Display', 'Anomaly Vote Models', 'Anomaly_Score_Display']],
208
+ hovertemplate=(
209
+ f'Date: %{{x|%Y-%m-%d}}<br>' +
210
+ f'{variable if variable == variable.upper() else variable.title()}: %{{y:,d}}<br>' +
211
+ 'Anomaly Votes: %{customdata[0]}<br>' +
212
+ 'Anomaly Vote Models: %{customdata[1]}<br>' +
213
+ 'Anomaly Score: %{customdata[2]}<extra></extra><br>'
214
+ )
215
+ ))
216
+
217
+ # Near Anomalies
218
+ fig.add_trace(go.Scatter(
219
+ x=group[(group['is_Anomaly'] == False) & (group['Anomaly_Votes'] >= 1)][date_column],
220
+ y=group[(group['is_Anomaly'] == False) & (group['Anomaly_Votes'] >= 1)][variable],
221
+ mode='markers',
222
+ marker=dict(color='orange',
223
+ symbol='circle',
224
+ line=dict(width=1),
225
+ size=5*(group[(group['is_Anomaly'] == False) & (group['Anomaly_Votes'] >= 1)]['Anomaly_Score'] + 2)),
226
+ name='Not Quite Anomalies',
227
+ customdata=group[(group['is_Anomaly'] == False) & (group['Anomaly_Votes'] >= 1)][['Anomaly_Votes_Display', 'Anomaly Vote Models', 'Anomaly_Score_Display']],
228
+ hovertemplate=(
229
+ f'Date: %{{x|%Y-%m-%d}}<br>' +
230
+ f'{variable if variable == variable.upper() else variable.title()}: %{{y:,d}}<br>' +
231
+ 'Anomaly Votes: %{customdata[0]}<br>' +
232
+ 'Anomaly Vote Models: %{customdata[1]}<br>' +
233
+ 'Anomaly Score: %{customdata[2]}<extra></extra><br>'
234
+ )
235
+ ))
236
+
237
+ # Add Anomaly Scores to Secondary Axis
238
+ if show_anomaly_scores_on_main_plot:
239
+ fig.add_trace(go.Bar(
240
+ x=group[date_column],
241
+ y=group['Anomaly_Score_Display'],
242
+ name='Anomaly Score',
243
+ marker=dict(
244
+ color=np.where(group['Anomaly_Score_Display'] > 0, 'rgba(255, 0, 0, 0.35)', 'rgba(128, 128, 128, 0.15)'),
245
+ # line=dict(width=0.5, color='gray')
246
+ ),
247
+ yaxis='y2',
248
+ # Ensure it doesn't clutter the main hover box
249
+ hoverinfo='y+name',
250
+ showlegend=True
251
+ ))
252
+
253
+ fig.update_layout(
254
+ yaxis2=dict(
255
+ title='Anomaly Score',
256
+ overlaying='y',
257
+ side='right',
258
+ range=[-105, 105],
259
+ showgrid=False,
260
+ ),
261
+ margin=dict(l=50, r=200, t=50, b=50),
262
+ legend=dict(
263
+ orientation="v",
264
+ yanchor="top",
265
+ y=1,
266
+ xanchor="left",
267
+ x=1.07,
268
+ ))
269
+
270
+ fig.show()
271
+ print("\n")
272
+
273
+ except Exception as e:
274
+ print(f"Anomaly Plot Failed: {e}")
275
+
276
+
277
+ def anomaly_percentile_plot(group, group_columns, variable, date_column, final_anomalies=True, eval_period=12):
278
+ # Percentile Model Plot
279
+ try:
280
+ group = group.copy()
281
+ fig = initialize_fig(group, group_columns, variable, date_column, "Percentile Anomaly Detection")
282
+ # Lower anomaly region shading and threshold line
283
+ fig = add_anomaly_region(fig, 'lower', group, variable, date_column, 'Percentile_low', 'Percentile Low')
284
+ # Upper anomaly region shading and threshold line
285
+ fig = add_anomaly_region(fig, 'upper', group, variable, date_column, 'Percentile_high', 'Percentile High')
286
+ # Percentile Anomalies
287
+ fig = add_model_anomalies(fig, group, date_column, variable, 'Percentile')
288
+ # Add anomalies markers
289
+ if final_anomalies:
290
+ fig = add_anomalies(fig, group, date_column, variable)
291
+ fig.show()
292
+ print("\n")
293
+ except Exception as e:
294
+ print(f"Percentile Anomaly Plot Failed: {e}")
295
+
296
+
297
+ def anomaly_sd_plot(group, group_columns, variable, date_column, final_anomalies=True, eval_period=12):
298
+ # SD Model Plot
299
+ try:
300
+ group = group.copy()
301
+ fig = initialize_fig(group, group_columns, variable, date_column, "SD Anomaly Detection")
302
+ # Lower anomaly region shading and threshold line
303
+ fig = add_anomaly_region(fig, 'lower', group, variable, date_column, 'SD2_low', 'SD Low')
304
+ # Upper anomaly region shading and threshold line
305
+ fig = add_anomaly_region(fig, 'upper', group, variable, date_column, 'SD2_high', 'SD High')
306
+ # SD Anomalies
307
+ fig = add_model_anomalies(fig, group, date_column, variable, 'SD')
308
+ # Add anomalies markers
309
+ if final_anomalies:
310
+ fig = add_anomalies(fig, group, date_column, variable)
311
+ fig.show()
312
+ print("\n")
313
+ except Exception as e:
314
+ print(f"SD Anomaly Plot Failed: {e}")
315
+
316
+
317
+ def anomaly_mad_plot(group, group_columns, variable, date_column, final_anomalies=True, eval_period=12):
318
+ # MAD Model Plot
319
+ try:
320
+ group = group.copy()
321
+ fig = initialize_fig(group, group_columns, variable, date_column, "MAD Anomaly Detection")
322
+ # Lower anomaly region shading and threshold line
323
+ fig = add_anomaly_region(fig, 'lower', group, variable, date_column, 'MAD_low', 'MAD Low')
324
+ # Upper anomaly region shading and threshold line
325
+ fig = add_anomaly_region(fig, 'upper', group, variable, date_column, 'MAD_high', 'MAD High')
326
+ # MAD Anomalies
327
+ fig = add_model_anomalies(fig, group, date_column, variable, 'MAD')
328
+ # Add anomalies markers
329
+ if final_anomalies:
330
+ fig = add_anomalies(fig, group, date_column, variable)
331
+ fig.show()
332
+ print("\n")
333
+ except Exception as e:
334
+ print(f"MAD Anomaly Plot Failed: {e}")
335
+
336
+
337
+ def anomaly_iqr_plot(group, group_columns, variable, date_column, final_anomalies=True, eval_period=12):
338
+ # IQR Model Plot
339
+ try:
340
+ group = group.copy()
341
+ fig = initialize_fig(group, group_columns, variable, date_column, "IQR Anomaly Detection")
342
+ # Lower anomaly region shading and threshold line
343
+ fig = add_anomaly_region(fig, 'lower', group, variable, date_column, 'IQR_low', 'IQR Low')
344
+ # Upper anomaly region shading and threshold line
345
+ fig = add_anomaly_region(fig, 'upper', group, variable, date_column, 'IQR_high', 'IQR High')
346
+ # IQR Anomalies
347
+ fig = add_model_anomalies(fig, group, date_column, variable, 'IQR')
348
+ # Add anomalies markers
349
+ if final_anomalies:
350
+ fig = add_anomalies(fig, group, date_column, variable)
351
+ fig.show()
352
+ print("\n")
353
+ except Exception as e:
354
+ print(f"IQR Anomaly Plot Failed: {e}")
355
+
356
+
357
+ def anomaly_ewma_plot(group, group_columns, variable, date_column, final_anomalies=True, eval_period=12):
358
+ # EWMA Model Plot
359
+ try:
360
+ group = group.copy()
361
+ fig = initialize_fig(group, group_columns, variable, date_column, "EWMA Anomaly Detection")
362
+ # EWMA Forecast
363
+ fig.add_trace(go.Scatter(
364
+ x=group[date_column],
365
+ y=group['EWMA_forecast'],
366
+ mode='lines',
367
+ line=dict(color='slateblue', width=0.5, dash='dashdot'),
368
+ name='EWMA Forecast',
369
+ showlegend=True
370
+ ))
371
+ # EWMA low line
372
+ fig.add_trace(go.Scatter(
373
+ x=group[date_column],
374
+ y=group['EWMA_low'],
375
+ mode='lines',
376
+ line=dict(color='orangered', width=1, dash='dashdot'),
377
+ name='EWMA Low',
378
+ showlegend=False
379
+ ))
380
+ # Lower Shading
381
+ fig.add_trace(go.Scatter(
382
+ x=group[group['EWMA_low'].isna()==False][date_column],
383
+ y=[0] * len(group[group['EWMA_low'].isna()==False]),
384
+ mode="lines",
385
+ line=dict(width=0),
386
+ hoverinfo="skip",
387
+ showlegend=False,
388
+ fill="tonexty",
389
+ fillcolor="rgba(255, 0, 0, 0.07)"
390
+ ))
391
+ # EWMA high line
392
+ fig.add_trace(go.Scatter(
393
+ x=group[date_column],
394
+ y=group['EWMA_high'],
395
+ mode='lines',
396
+ line=dict(color='orangered', width=1, dash='dashdot'),
397
+ name='EWMA High',
398
+ showlegend=False,
399
+ ))
400
+ # Upper Shading
401
+ fig.add_trace(go.Scatter(
402
+ x=group[group['EWMA_high'].isna()==False][date_column],
403
+ y=[group[variable].max() * 1.06] * len(group[group['EWMA_high'].isna()==False]),
404
+ mode="lines",
405
+ line=dict(width=0),
406
+ hoverinfo="skip",
407
+ showlegend=False,
408
+ fill="tonexty",
409
+ fillcolor="rgba(255, 0, 0, 0.07)"
410
+ ))
411
+ # EWMA Anomalies
412
+ fig.add_trace(go.Scatter(
413
+ x=group[group['is_EWMA_anomaly'] == True][date_column],
414
+ y=group[group['is_EWMA_anomaly'] == True][variable],
415
+ mode='markers',
416
+ marker=dict(color='palevioletred', symbol='circle', line=dict(width=1), size=9),
417
+ name='EWMA Anomalies',
418
+ ))
419
+ # Add anomalies markers
420
+ if final_anomalies:
421
+ fig = add_anomalies(fig, group, date_column, variable)
422
+ fig.show()
423
+ print("\n")
424
+ except Exception as e:
425
+ print(f"EWMA Anomaly Plot Failed: {e}")
426
+
427
+
428
+ def anomaly_fb_plot(group, group_columns, variable, date_column, final_anomalies=True, eval_period=12):
429
+ # FB Prophet Model Plot
430
+ try:
431
+ group = group.copy()
432
+ fig = initialize_fig(group, group_columns, variable, date_column, "FB Prophet Anomaly Detection")
433
+ # FB Forecast
434
+ fig.add_trace(go.Scatter(
435
+ x=group[date_column],
436
+ y=group['FB_forecast'],
437
+ mode='lines',
438
+ line=dict(color='slateblue', width=0.5, dash='dashdot'),
439
+ name='FB Forecast',
440
+ showlegend=True
441
+ ))
442
+ # Lower FB line
443
+ fig.add_trace(go.Scatter(
444
+ x=group[date_column],
445
+ y=group['FB_low'],
446
+ mode='lines',
447
+ line=dict(color='orangered', width=1, dash='dashdot'),
448
+ name='FB Low',
449
+ showlegend=False
450
+ ))
451
+ # Lower Shading
452
+ fig.add_trace(go.Scatter(
453
+ x=group[group['FB_low'].isna()==False][date_column],
454
+ y=[0] * len(group[group['FB_low'].isna()==False]),
455
+ mode="lines",
456
+ line=dict(width=0),
457
+ hoverinfo="skip",
458
+ showlegend=False,
459
+ fill="tonexty",
460
+ fillcolor="rgba(255, 0, 0, 0.07)"
461
+ ))
462
+ # Upper FB line
463
+ fig.add_trace(go.Scatter(
464
+ x=group[date_column],
465
+ y=group['FB_high'],
466
+ mode='lines',
467
+ line=dict(color='orangered', width=1, dash='dashdot'),
468
+ name='FB High',
469
+ showlegend=False,
470
+ ))
471
+ # Upper Shading
472
+ fig.add_trace(go.Scatter(
473
+ x=group[group['FB_high'].isna()==False][date_column],
474
+ y=[group[variable].max() * 1.06] * len(group[group['FB_high'].isna()==False]),
475
+ mode="lines",
476
+ line=dict(width=0),
477
+ hoverinfo="skip",
478
+ showlegend=False,
479
+ fill="tonexty",
480
+ fillcolor="rgba(255, 0, 0, 0.07)"
481
+ ))
482
+ # FB Anomalies
483
+ fig.add_trace(go.Scatter(
484
+ x=group[group['is_FB_anomaly'] == True][date_column],
485
+ y=group[group['is_FB_anomaly'] == True][variable],
486
+ mode='markers',
487
+ marker=dict(color='palevioletred', symbol='circle', line=dict(width=1), size=9),
488
+ name='FB Anomalies',
489
+ ))
490
+ # Add anomalies markers
491
+ if final_anomalies:
492
+ fig = add_anomalies(fig, group, date_column, variable)
493
+ fig.show()
494
+ print("\n")
495
+ except Exception as e:
496
+ print(f"EWMA Anomaly Plot Failed: {e}")
497
+
498
+
499
+ def anomaly_dbscan_plot(group, group_columns, variable, date_column, final_anomalies=True, eval_period=12):
500
+ # DBSCAN Model Plot
501
+ try:
502
+ group = group.copy()
503
+ fig = initialize_fig(group, group_columns, variable, date_column, "DBSCAN Anomaly Detection")
504
+ # Evaluation Period
505
+ if eval_period >= 1:
506
+ fig = add_eval_period_highlight(fig, group, date_column, variable, eval_period)
507
+ # DBSCAN Anomalies
508
+ fig.add_trace(go.Scatter(
509
+ x=group[group['is_DBSCAN_anomaly'] == True][date_column],
510
+ y=group[group['is_DBSCAN_anomaly'] == True][variable],
511
+ mode='markers',
512
+ marker=dict(color='mediumorchid', symbol='circle', line=dict(width=1), size=7),
513
+ name='DBSCAN Anomalies',
514
+ ))
515
+ # Add anomalies markers
516
+ if final_anomalies:
517
+ fig = add_anomalies(fig, group, date_column, variable)
518
+ fig.show()
519
+ print("\n")
520
+ except Exception as e:
521
+ print(f"DBSCAN Anomaly Plot Failed: {e}")
522
+
523
+
524
+ def anomaly_isolation_forest_timeseries_plot(group, group_columns, variable, date_column, final_anomalies=True, eval_period=12):
525
+ # Isolation Forest Model Plot
526
+ try:
527
+ group = group.copy()
528
+ fig = initialize_fig(group, group_columns, variable, date_column, "Isolation Forest Anomaly Detection")
529
+ # Evaluation Period
530
+ if eval_period >= 1:
531
+ fig = add_eval_period_highlight(fig, group, date_column, variable, eval_period)
532
+ # Isolation Forest Anomalies
533
+ fig.add_trace(go.Scatter(
534
+ x=group[group['is_IsolationForest_anomaly_timeseries'] == True][date_column],
535
+ y=group[group['is_IsolationForest_anomaly_timeseries'] == True][variable],
536
+ mode='markers',
537
+ marker=dict(color='mediumorchid', symbol='circle', line=dict(width=1), size=7),
538
+ name='Isolation Forest Anomalies',
539
+ ))
540
+ # Add anomalies markers
541
+ if final_anomalies:
542
+ fig = add_anomalies(fig, group, date_column, variable)
543
+ fig.show()
544
+ print("\n")
545
+ except Exception as e:
546
+ print(f"Isolation Forest Time Series Anomaly Plot Failed: {e}")
@@ -0,0 +1,119 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import statistics
4
+
5
+ # # EWMA functions
6
+
7
+ def ewma_forecast(train, alpha):
8
+ """Return last EWMA forecast value based on training data."""
9
+ ewma = train.ewm(alpha=alpha, adjust=False).mean()
10
+ return ewma.iloc[-1]
11
+
12
+
13
+ """
14
+ def ew_std(series, alpha):
15
+
16
+ Compute exponentially weighted standard deviation.
17
+ Uses the same alpha as EWMA so recent points get more weight.
18
+
19
+ Parameters
20
+ ----------
21
+ series : pandas Series of actual values
22
+ alpha : float in (0,1)
23
+
24
+ Returns
25
+ -------
26
+ float : exponentially weighted std deviation
27
+ # mean
28
+
29
+ mean = series.mean()
30
+ #print(mean)
31
+
32
+ # Squared deviation from the mean
33
+ squared_diff = (series - mean) ** 2
34
+
35
+ #print(squared_diff)
36
+
37
+ # EWMA of squared deviation → variance
38
+ ewma_var = squared_diff.ewm(alpha=alpha, adjust=False).mean()
39
+ #print(ewma_var)
40
+ #print(ewma_var.iloc[-1])
41
+
42
+ # Std = sqrt(var)
43
+ return np.sqrt(ewma_var.iloc[-1]) """
44
+
45
+
46
+ def ewma_with_anomalies_rolling_group(group, group_columns, variable, date_column, alpha, sigma, eval_period):
47
+
48
+ """
49
+ Rolling (expanding window) EWMA anomaly detection for a SINGLE GROUP ONLY.
50
+ Expects `group` to already be filtered to one group.
51
+ """
52
+
53
+ group = group.sort_values(date_column).reset_index(drop=True)
54
+ n = len(group)
55
+
56
+ train_size = n - eval_period # rolling split
57
+
58
+ # Build group key dictionary
59
+ # group_columns can be list of multiple cols
60
+ key_dict = {col: group[col].iloc[0] for col in group_columns}
61
+
62
+ results = []
63
+
64
+ for i in range(train_size, n):
65
+
66
+ train = group.loc[:i-1, variable].astype(float)
67
+ test_value = group.loc[i, variable]
68
+
69
+ # --- EWMA + weighted STD ---
70
+ ewma_train = train.ewm(alpha=alpha, adjust=False).mean()
71
+ #last_std = ew_std(train, alpha)
72
+ last_std = np.std(train)
73
+ forecast = ewma_forecast(train, alpha)
74
+
75
+ upper_limit = forecast + sigma * last_std
76
+ lower_limit = max(forecast - sigma * last_std, 0)
77
+
78
+ anomaly = True if (test_value > upper_limit or test_value < lower_limit) else False
79
+
80
+ # TRAIN part (added only once)
81
+ if i == train_size:
82
+ train_part = pd.concat([
83
+ group.loc[:i-1, group_columns].reset_index(drop=True),
84
+ pd.DataFrame({
85
+ date_column: group.loc[:i-1, date_column].values,
86
+ "alpha": alpha,
87
+ "sigma":sigma,
88
+ "EWMA_forecast": ewma_train.values,
89
+ "STD": last_std,
90
+ "EWMA_high": np.nan,
91
+ "EWMA_low": np.nan,
92
+ "set": "TRAIN",
93
+ "is_EWMA_anomaly": pd.NA,
94
+ })
95
+ ], axis=1)
96
+
97
+ results.append(train_part)
98
+
99
+ # TEST row
100
+ test_part = pd.DataFrame({
101
+ **{col: [key_dict[col]] for col in key_dict},
102
+ date_column: [pd.to_datetime(group.loc[i, date_column])],
103
+ "alpha": [alpha],
104
+ "sigma":[sigma],
105
+ "EWMA_forecast": [forecast],
106
+ "STD": [last_std],
107
+ "EWMA_high": [upper_limit],
108
+ "EWMA_low": [lower_limit],
109
+ "set": ["TEST"],
110
+ "is_EWMA_anomaly": [anomaly],
111
+ })
112
+
113
+ results.append(test_part)
114
+
115
+ final_output = pd.concat(results, ignore_index=True)
116
+ # Type Safety Check: Ensure the date column is always datetime before returning
117
+ final_output[date_column] = pd.to_datetime(final_output[date_column])
118
+ return final_output
119
+