anomaly-pipeline 0.1.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anomaly_pipeline/__init__.py +2 -0
- anomaly_pipeline/helpers/DB_scan.py +188 -0
- anomaly_pipeline/helpers/IQR.py +71 -0
- anomaly_pipeline/helpers/MAD.py +88 -0
- anomaly_pipeline/helpers/Preprocessing.py +116 -0
- anomaly_pipeline/helpers/STD.py +70 -0
- anomaly_pipeline/helpers/__init__.py +1 -0
- anomaly_pipeline/helpers/baseline.py +112 -0
- anomaly_pipeline/helpers/cluster_functions.py +289 -0
- anomaly_pipeline/helpers/evaluation_info.py +121 -0
- anomaly_pipeline/helpers/evaluation_plots.py +546 -0
- anomaly_pipeline/helpers/ewma.py +119 -0
- anomaly_pipeline/helpers/fb_prophet.py +94 -0
- anomaly_pipeline/helpers/help_info.py +683 -0
- anomaly_pipeline/helpers/iso_forest_general.py +50 -0
- anomaly_pipeline/helpers/iso_forest_timeseries.py +123 -0
- anomaly_pipeline/helpers/percentile.py +65 -0
- anomaly_pipeline/main.py +63 -0
- anomaly_pipeline/pipeline.py +253 -0
- anomaly_pipeline-0.1.27.dist-info/METADATA +15 -0
- anomaly_pipeline-0.1.27.dist-info/RECORD +24 -0
- anomaly_pipeline-0.1.27.dist-info/WHEEL +5 -0
- anomaly_pipeline-0.1.27.dist-info/entry_points.txt +2 -0
- anomaly_pipeline-0.1.27.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,546 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
import plotly.graph_objects as go
|
|
4
|
+
import plotly.express as px
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def initialize_fig(group, group_columns, variable, date_column, anomaly_detection_model):
|
|
8
|
+
|
|
9
|
+
plot_title = " -- ".join(list(group[group_columns].values[0])).upper() + " -- " + anomaly_detection_model
|
|
10
|
+
|
|
11
|
+
fig = go.Figure()
|
|
12
|
+
|
|
13
|
+
# Actuals
|
|
14
|
+
fig.add_trace(go.Scatter(
|
|
15
|
+
x=group[date_column],
|
|
16
|
+
y=group[variable],
|
|
17
|
+
mode='lines',
|
|
18
|
+
line=dict(color='seagreen', width=1.5),
|
|
19
|
+
name=variable if variable == variable.upper() else variable.title(),
|
|
20
|
+
))
|
|
21
|
+
|
|
22
|
+
fig.update_layout(
|
|
23
|
+
title=dict(
|
|
24
|
+
text=plot_title,
|
|
25
|
+
y=0.96,
|
|
26
|
+
x=0.5,
|
|
27
|
+
xanchor='center',
|
|
28
|
+
yanchor='top',
|
|
29
|
+
font=dict(size=18, color='black', weight='bold'),
|
|
30
|
+
),
|
|
31
|
+
height=350,
|
|
32
|
+
width=1200,
|
|
33
|
+
margin=dict(l=50, r=50, t=40, b=30),
|
|
34
|
+
plot_bgcolor='snow',
|
|
35
|
+
paper_bgcolor='whitesmoke',
|
|
36
|
+
xaxis=dict(
|
|
37
|
+
range=[group[date_column].min(), group[date_column].max()],
|
|
38
|
+
showline=True,
|
|
39
|
+
linewidth=0.5,
|
|
40
|
+
linecolor='orange',
|
|
41
|
+
zeroline=False,
|
|
42
|
+
gridcolor='rgba(255, 165, 0, 0.1)',
|
|
43
|
+
mirror=True
|
|
44
|
+
),
|
|
45
|
+
yaxis=dict(
|
|
46
|
+
range=[group[variable].min()*0.9, group[variable].max()*1.06],
|
|
47
|
+
showline=True,
|
|
48
|
+
linewidth=0.5,
|
|
49
|
+
linecolor='orange',
|
|
50
|
+
zeroline=False,
|
|
51
|
+
gridcolor='rgba(255, 165, 0, 0.1)',
|
|
52
|
+
mirror=True
|
|
53
|
+
),
|
|
54
|
+
yaxis_title=dict(
|
|
55
|
+
text=variable if variable == variable.upper() else variable.title(),
|
|
56
|
+
font=dict(size=16, weight='bold', color='black')
|
|
57
|
+
),
|
|
58
|
+
legend=dict(
|
|
59
|
+
orientation="v",
|
|
60
|
+
yanchor="top",
|
|
61
|
+
y=1,
|
|
62
|
+
xanchor="left",
|
|
63
|
+
x=1.02,
|
|
64
|
+
)
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
return fig
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def add_anomalies(fig, group, date_column, variable):
|
|
71
|
+
# Add anomalies markers
|
|
72
|
+
fig.add_trace(go.Scatter(
|
|
73
|
+
x=group[group['is_Anomaly'] == True][date_column],
|
|
74
|
+
y=group[group['is_Anomaly'] == True][variable],
|
|
75
|
+
mode='markers',
|
|
76
|
+
marker=dict(color='pink', symbol='cross', line=dict(width=1), size=9),
|
|
77
|
+
name='Anomalies',
|
|
78
|
+
hoverinfo='skip',
|
|
79
|
+
))
|
|
80
|
+
return fig
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def add_model_anomalies(fig, group, date_column, variable, model):
|
|
84
|
+
fig.add_trace(go.Scatter(
|
|
85
|
+
x=group[group[f'is_{model}_anomaly'] == True][date_column],
|
|
86
|
+
y=group[group[f'is_{model}_anomaly'] == True][variable],
|
|
87
|
+
mode='markers',
|
|
88
|
+
marker=dict(color='palevioletred', symbol='circle', line=dict(width=1), size=9),
|
|
89
|
+
name=f'{model} Anomalies',
|
|
90
|
+
customdata=group[group[f'is_{model}_anomaly'] == True][[f'{model}_anomaly']],
|
|
91
|
+
hovertemplate=(
|
|
92
|
+
f'Date: %{{x|%Y-%m-%d}}<br>' +
|
|
93
|
+
f'{variable if variable == variable.upper() else variable.title()}: %{{y:~.2s}}<br>' +
|
|
94
|
+
f'{model}' + ' Category: %{customdata[0]}<extra></extra>'
|
|
95
|
+
)
|
|
96
|
+
))
|
|
97
|
+
return fig
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def add_anomaly_region(fig, region, group, variable, date_column, threshold_column, threshold_label):
|
|
101
|
+
|
|
102
|
+
if region == 'upper':
|
|
103
|
+
y0 = group[threshold_column].values[0]
|
|
104
|
+
y1 = group[variable].max()*1.06
|
|
105
|
+
elif region == 'lower':
|
|
106
|
+
y0 = 0
|
|
107
|
+
y1 = group[threshold_column].values[0]
|
|
108
|
+
|
|
109
|
+
# Shading
|
|
110
|
+
fig.add_shape(
|
|
111
|
+
type="rect",
|
|
112
|
+
x0=0, x1=1, xref="paper",
|
|
113
|
+
y0=y0, y1=y1,
|
|
114
|
+
yref="y",
|
|
115
|
+
fillcolor="rgba(255, 0, 0, 0.055)",
|
|
116
|
+
line=dict(width=0),
|
|
117
|
+
layer="below"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Upper Percentile line
|
|
121
|
+
fig.add_trace(go.Scatter(
|
|
122
|
+
x=group[date_column],
|
|
123
|
+
y=group[threshold_column],
|
|
124
|
+
mode='lines',
|
|
125
|
+
line=dict(color='orangered', width=1, dash='dashdot'),
|
|
126
|
+
name=threshold_label,
|
|
127
|
+
showlegend=False
|
|
128
|
+
))
|
|
129
|
+
|
|
130
|
+
return fig
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def add_eval_period_highlight(fig, group, date_column, variable, eval_period):
|
|
134
|
+
fig.add_trace(go.Scatter(
|
|
135
|
+
x=group[date_column][-eval_period:],
|
|
136
|
+
y=group[variable][-eval_period:],
|
|
137
|
+
mode='lines',
|
|
138
|
+
line=dict(
|
|
139
|
+
color='rgba(0, 255, 0, 0.25)', # 'lime' with 0.25 alpha
|
|
140
|
+
width=10
|
|
141
|
+
),
|
|
142
|
+
name='Evalution Period',
|
|
143
|
+
hoverinfo='skip',
|
|
144
|
+
))
|
|
145
|
+
return fig
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def anomaly_eval_plot(group, group_columns, variable, date_column, eval_period=12, show_anomaly_scores_on_main_plot=False):
|
|
149
|
+
|
|
150
|
+
# IS ANOMALY Plot
|
|
151
|
+
# This is the main plot
|
|
152
|
+
try:
|
|
153
|
+
group = group.copy()
|
|
154
|
+
|
|
155
|
+
anomaly_cols = []
|
|
156
|
+
for col in group.columns.to_list():
|
|
157
|
+
if col.startswith('is_') and col.endswith('_anomaly') and col != 'is_anomaly':
|
|
158
|
+
anomaly_cols.append(col)
|
|
159
|
+
group['Anomaly Vote Models'] = group.apply(
|
|
160
|
+
lambda row: sorted([col.removeprefix('is_').removesuffix('_anomaly')
|
|
161
|
+
for col in anomaly_cols
|
|
162
|
+
if pd.notna(row[col]) and row[col] == True]),
|
|
163
|
+
axis=1)
|
|
164
|
+
group['Anomaly Vote Models'] = group['Anomaly Vote Models'].apply(lambda x: ', '.join(x))
|
|
165
|
+
group['Anomaly_Votes'] = group[anomaly_cols].sum(axis=1).astype(int)
|
|
166
|
+
group['Vote_Cnt'] = group[anomaly_cols].replace(False, True).sum(axis=1).astype(int)
|
|
167
|
+
group['Anomaly_Votes_Display'] = group['Anomaly_Votes'].astype(str) + ' out of ' + group['Vote_Cnt'].astype(str)
|
|
168
|
+
group['is_Anomaly'] = np.where(group['Anomaly_Votes']/group['Vote_Cnt'] >= 0.5, True, False)
|
|
169
|
+
group['Anomaly_Score'] = 2 * (group['Anomaly_Votes']/group['Vote_Cnt'] - 0.5).astype(float)
|
|
170
|
+
group['Anomaly_Score_Display'] = np.where(group['Anomaly_Score'] < 0, np.floor(100*group['Anomaly_Score']),
|
|
171
|
+
np.where(group['Anomaly_Score'] > 0, np.ceil(100*group['Anomaly_Score']),
|
|
172
|
+
1)).astype(float)
|
|
173
|
+
group['Mean'] = group[variable].mean()
|
|
174
|
+
group['Median'] = group[variable].median()
|
|
175
|
+
|
|
176
|
+
fig = initialize_fig(group, group_columns, variable, date_column, "Anomalies")
|
|
177
|
+
|
|
178
|
+
# Mean
|
|
179
|
+
fig.add_trace(go.Scatter(
|
|
180
|
+
x=group[date_column],
|
|
181
|
+
y=group['Mean'],
|
|
182
|
+
mode='lines',
|
|
183
|
+
line=dict(color='maroon', width=0.7, dash='dash'),
|
|
184
|
+
name='Mean',
|
|
185
|
+
showlegend=True,
|
|
186
|
+
hoverinfo='skip',
|
|
187
|
+
))
|
|
188
|
+
|
|
189
|
+
# Median
|
|
190
|
+
fig.add_trace(go.Scatter(
|
|
191
|
+
x=group[date_column],
|
|
192
|
+
y=group['Median'],
|
|
193
|
+
mode='lines',
|
|
194
|
+
line=dict(color='darkblue', width=0.7, dash='dot'),
|
|
195
|
+
name='Median',
|
|
196
|
+
showlegend=True,
|
|
197
|
+
hoverinfo='skip',
|
|
198
|
+
))
|
|
199
|
+
|
|
200
|
+
# Anomalies
|
|
201
|
+
fig.add_trace(go.Scatter(
|
|
202
|
+
x=group[group['is_Anomaly'] == True][date_column],
|
|
203
|
+
y=group[group['is_Anomaly'] == True][variable],
|
|
204
|
+
mode='markers',
|
|
205
|
+
marker=dict(color='red', symbol='circle', line=dict(width=1), size=5*(group[group['is_Anomaly'] == True]['Anomaly_Score'] + 2)),
|
|
206
|
+
name='Anomalies',
|
|
207
|
+
customdata=group[group['is_Anomaly'] == True][['Anomaly_Votes_Display', 'Anomaly Vote Models', 'Anomaly_Score_Display']],
|
|
208
|
+
hovertemplate=(
|
|
209
|
+
f'Date: %{{x|%Y-%m-%d}}<br>' +
|
|
210
|
+
f'{variable if variable == variable.upper() else variable.title()}: %{{y:,d}}<br>' +
|
|
211
|
+
'Anomaly Votes: %{customdata[0]}<br>' +
|
|
212
|
+
'Anomaly Vote Models: %{customdata[1]}<br>' +
|
|
213
|
+
'Anomaly Score: %{customdata[2]}<extra></extra><br>'
|
|
214
|
+
)
|
|
215
|
+
))
|
|
216
|
+
|
|
217
|
+
# Near Anomalies
|
|
218
|
+
fig.add_trace(go.Scatter(
|
|
219
|
+
x=group[(group['is_Anomaly'] == False) & (group['Anomaly_Votes'] >= 1)][date_column],
|
|
220
|
+
y=group[(group['is_Anomaly'] == False) & (group['Anomaly_Votes'] >= 1)][variable],
|
|
221
|
+
mode='markers',
|
|
222
|
+
marker=dict(color='orange',
|
|
223
|
+
symbol='circle',
|
|
224
|
+
line=dict(width=1),
|
|
225
|
+
size=5*(group[(group['is_Anomaly'] == False) & (group['Anomaly_Votes'] >= 1)]['Anomaly_Score'] + 2)),
|
|
226
|
+
name='Not Quite Anomalies',
|
|
227
|
+
customdata=group[(group['is_Anomaly'] == False) & (group['Anomaly_Votes'] >= 1)][['Anomaly_Votes_Display', 'Anomaly Vote Models', 'Anomaly_Score_Display']],
|
|
228
|
+
hovertemplate=(
|
|
229
|
+
f'Date: %{{x|%Y-%m-%d}}<br>' +
|
|
230
|
+
f'{variable if variable == variable.upper() else variable.title()}: %{{y:,d}}<br>' +
|
|
231
|
+
'Anomaly Votes: %{customdata[0]}<br>' +
|
|
232
|
+
'Anomaly Vote Models: %{customdata[1]}<br>' +
|
|
233
|
+
'Anomaly Score: %{customdata[2]}<extra></extra><br>'
|
|
234
|
+
)
|
|
235
|
+
))
|
|
236
|
+
|
|
237
|
+
# Add Anomaly Scores to Secondary Axis
|
|
238
|
+
if show_anomaly_scores_on_main_plot:
|
|
239
|
+
fig.add_trace(go.Bar(
|
|
240
|
+
x=group[date_column],
|
|
241
|
+
y=group['Anomaly_Score_Display'],
|
|
242
|
+
name='Anomaly Score',
|
|
243
|
+
marker=dict(
|
|
244
|
+
color=np.where(group['Anomaly_Score_Display'] > 0, 'rgba(255, 0, 0, 0.35)', 'rgba(128, 128, 128, 0.15)'),
|
|
245
|
+
# line=dict(width=0.5, color='gray')
|
|
246
|
+
),
|
|
247
|
+
yaxis='y2',
|
|
248
|
+
# Ensure it doesn't clutter the main hover box
|
|
249
|
+
hoverinfo='y+name',
|
|
250
|
+
showlegend=True
|
|
251
|
+
))
|
|
252
|
+
|
|
253
|
+
fig.update_layout(
|
|
254
|
+
yaxis2=dict(
|
|
255
|
+
title='Anomaly Score',
|
|
256
|
+
overlaying='y',
|
|
257
|
+
side='right',
|
|
258
|
+
range=[-105, 105],
|
|
259
|
+
showgrid=False,
|
|
260
|
+
),
|
|
261
|
+
margin=dict(l=50, r=200, t=50, b=50),
|
|
262
|
+
legend=dict(
|
|
263
|
+
orientation="v",
|
|
264
|
+
yanchor="top",
|
|
265
|
+
y=1,
|
|
266
|
+
xanchor="left",
|
|
267
|
+
x=1.07,
|
|
268
|
+
))
|
|
269
|
+
|
|
270
|
+
fig.show()
|
|
271
|
+
print("\n")
|
|
272
|
+
|
|
273
|
+
except Exception as e:
|
|
274
|
+
print(f"Anomaly Plot Failed: {e}")
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def anomaly_percentile_plot(group, group_columns, variable, date_column, final_anomalies=True, eval_period=12):
|
|
278
|
+
# Percentile Model Plot
|
|
279
|
+
try:
|
|
280
|
+
group = group.copy()
|
|
281
|
+
fig = initialize_fig(group, group_columns, variable, date_column, "Percentile Anomaly Detection")
|
|
282
|
+
# Lower anomaly region shading and threshold line
|
|
283
|
+
fig = add_anomaly_region(fig, 'lower', group, variable, date_column, 'Percentile_low', 'Percentile Low')
|
|
284
|
+
# Upper anomaly region shading and threshold line
|
|
285
|
+
fig = add_anomaly_region(fig, 'upper', group, variable, date_column, 'Percentile_high', 'Percentile High')
|
|
286
|
+
# Percentile Anomalies
|
|
287
|
+
fig = add_model_anomalies(fig, group, date_column, variable, 'Percentile')
|
|
288
|
+
# Add anomalies markers
|
|
289
|
+
if final_anomalies:
|
|
290
|
+
fig = add_anomalies(fig, group, date_column, variable)
|
|
291
|
+
fig.show()
|
|
292
|
+
print("\n")
|
|
293
|
+
except Exception as e:
|
|
294
|
+
print(f"Percentile Anomaly Plot Failed: {e}")
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def anomaly_sd_plot(group, group_columns, variable, date_column, final_anomalies=True, eval_period=12):
|
|
298
|
+
# SD Model Plot
|
|
299
|
+
try:
|
|
300
|
+
group = group.copy()
|
|
301
|
+
fig = initialize_fig(group, group_columns, variable, date_column, "SD Anomaly Detection")
|
|
302
|
+
# Lower anomaly region shading and threshold line
|
|
303
|
+
fig = add_anomaly_region(fig, 'lower', group, variable, date_column, 'SD2_low', 'SD Low')
|
|
304
|
+
# Upper anomaly region shading and threshold line
|
|
305
|
+
fig = add_anomaly_region(fig, 'upper', group, variable, date_column, 'SD2_high', 'SD High')
|
|
306
|
+
# SD Anomalies
|
|
307
|
+
fig = add_model_anomalies(fig, group, date_column, variable, 'SD')
|
|
308
|
+
# Add anomalies markers
|
|
309
|
+
if final_anomalies:
|
|
310
|
+
fig = add_anomalies(fig, group, date_column, variable)
|
|
311
|
+
fig.show()
|
|
312
|
+
print("\n")
|
|
313
|
+
except Exception as e:
|
|
314
|
+
print(f"SD Anomaly Plot Failed: {e}")
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def anomaly_mad_plot(group, group_columns, variable, date_column, final_anomalies=True, eval_period=12):
|
|
318
|
+
# MAD Model Plot
|
|
319
|
+
try:
|
|
320
|
+
group = group.copy()
|
|
321
|
+
fig = initialize_fig(group, group_columns, variable, date_column, "MAD Anomaly Detection")
|
|
322
|
+
# Lower anomaly region shading and threshold line
|
|
323
|
+
fig = add_anomaly_region(fig, 'lower', group, variable, date_column, 'MAD_low', 'MAD Low')
|
|
324
|
+
# Upper anomaly region shading and threshold line
|
|
325
|
+
fig = add_anomaly_region(fig, 'upper', group, variable, date_column, 'MAD_high', 'MAD High')
|
|
326
|
+
# MAD Anomalies
|
|
327
|
+
fig = add_model_anomalies(fig, group, date_column, variable, 'MAD')
|
|
328
|
+
# Add anomalies markers
|
|
329
|
+
if final_anomalies:
|
|
330
|
+
fig = add_anomalies(fig, group, date_column, variable)
|
|
331
|
+
fig.show()
|
|
332
|
+
print("\n")
|
|
333
|
+
except Exception as e:
|
|
334
|
+
print(f"MAD Anomaly Plot Failed: {e}")
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def anomaly_iqr_plot(group, group_columns, variable, date_column, final_anomalies=True, eval_period=12):
|
|
338
|
+
# IQR Model Plot
|
|
339
|
+
try:
|
|
340
|
+
group = group.copy()
|
|
341
|
+
fig = initialize_fig(group, group_columns, variable, date_column, "IQR Anomaly Detection")
|
|
342
|
+
# Lower anomaly region shading and threshold line
|
|
343
|
+
fig = add_anomaly_region(fig, 'lower', group, variable, date_column, 'IQR_low', 'IQR Low')
|
|
344
|
+
# Upper anomaly region shading and threshold line
|
|
345
|
+
fig = add_anomaly_region(fig, 'upper', group, variable, date_column, 'IQR_high', 'IQR High')
|
|
346
|
+
# IQR Anomalies
|
|
347
|
+
fig = add_model_anomalies(fig, group, date_column, variable, 'IQR')
|
|
348
|
+
# Add anomalies markers
|
|
349
|
+
if final_anomalies:
|
|
350
|
+
fig = add_anomalies(fig, group, date_column, variable)
|
|
351
|
+
fig.show()
|
|
352
|
+
print("\n")
|
|
353
|
+
except Exception as e:
|
|
354
|
+
print(f"IQR Anomaly Plot Failed: {e}")
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def anomaly_ewma_plot(group, group_columns, variable, date_column, final_anomalies=True, eval_period=12):
|
|
358
|
+
# EWMA Model Plot
|
|
359
|
+
try:
|
|
360
|
+
group = group.copy()
|
|
361
|
+
fig = initialize_fig(group, group_columns, variable, date_column, "EWMA Anomaly Detection")
|
|
362
|
+
# EWMA Forecast
|
|
363
|
+
fig.add_trace(go.Scatter(
|
|
364
|
+
x=group[date_column],
|
|
365
|
+
y=group['EWMA_forecast'],
|
|
366
|
+
mode='lines',
|
|
367
|
+
line=dict(color='slateblue', width=0.5, dash='dashdot'),
|
|
368
|
+
name='EWMA Forecast',
|
|
369
|
+
showlegend=True
|
|
370
|
+
))
|
|
371
|
+
# EWMA low line
|
|
372
|
+
fig.add_trace(go.Scatter(
|
|
373
|
+
x=group[date_column],
|
|
374
|
+
y=group['EWMA_low'],
|
|
375
|
+
mode='lines',
|
|
376
|
+
line=dict(color='orangered', width=1, dash='dashdot'),
|
|
377
|
+
name='EWMA Low',
|
|
378
|
+
showlegend=False
|
|
379
|
+
))
|
|
380
|
+
# Lower Shading
|
|
381
|
+
fig.add_trace(go.Scatter(
|
|
382
|
+
x=group[group['EWMA_low'].isna()==False][date_column],
|
|
383
|
+
y=[0] * len(group[group['EWMA_low'].isna()==False]),
|
|
384
|
+
mode="lines",
|
|
385
|
+
line=dict(width=0),
|
|
386
|
+
hoverinfo="skip",
|
|
387
|
+
showlegend=False,
|
|
388
|
+
fill="tonexty",
|
|
389
|
+
fillcolor="rgba(255, 0, 0, 0.07)"
|
|
390
|
+
))
|
|
391
|
+
# EWMA high line
|
|
392
|
+
fig.add_trace(go.Scatter(
|
|
393
|
+
x=group[date_column],
|
|
394
|
+
y=group['EWMA_high'],
|
|
395
|
+
mode='lines',
|
|
396
|
+
line=dict(color='orangered', width=1, dash='dashdot'),
|
|
397
|
+
name='EWMA High',
|
|
398
|
+
showlegend=False,
|
|
399
|
+
))
|
|
400
|
+
# Upper Shading
|
|
401
|
+
fig.add_trace(go.Scatter(
|
|
402
|
+
x=group[group['EWMA_high'].isna()==False][date_column],
|
|
403
|
+
y=[group[variable].max() * 1.06] * len(group[group['EWMA_high'].isna()==False]),
|
|
404
|
+
mode="lines",
|
|
405
|
+
line=dict(width=0),
|
|
406
|
+
hoverinfo="skip",
|
|
407
|
+
showlegend=False,
|
|
408
|
+
fill="tonexty",
|
|
409
|
+
fillcolor="rgba(255, 0, 0, 0.07)"
|
|
410
|
+
))
|
|
411
|
+
# EWMA Anomalies
|
|
412
|
+
fig.add_trace(go.Scatter(
|
|
413
|
+
x=group[group['is_EWMA_anomaly'] == True][date_column],
|
|
414
|
+
y=group[group['is_EWMA_anomaly'] == True][variable],
|
|
415
|
+
mode='markers',
|
|
416
|
+
marker=dict(color='palevioletred', symbol='circle', line=dict(width=1), size=9),
|
|
417
|
+
name='EWMA Anomalies',
|
|
418
|
+
))
|
|
419
|
+
# Add anomalies markers
|
|
420
|
+
if final_anomalies:
|
|
421
|
+
fig = add_anomalies(fig, group, date_column, variable)
|
|
422
|
+
fig.show()
|
|
423
|
+
print("\n")
|
|
424
|
+
except Exception as e:
|
|
425
|
+
print(f"EWMA Anomaly Plot Failed: {e}")
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
def anomaly_fb_plot(group, group_columns, variable, date_column, final_anomalies=True, eval_period=12):
|
|
429
|
+
# FB Prophet Model Plot
|
|
430
|
+
try:
|
|
431
|
+
group = group.copy()
|
|
432
|
+
fig = initialize_fig(group, group_columns, variable, date_column, "FB Prophet Anomaly Detection")
|
|
433
|
+
# FB Forecast
|
|
434
|
+
fig.add_trace(go.Scatter(
|
|
435
|
+
x=group[date_column],
|
|
436
|
+
y=group['FB_forecast'],
|
|
437
|
+
mode='lines',
|
|
438
|
+
line=dict(color='slateblue', width=0.5, dash='dashdot'),
|
|
439
|
+
name='FB Forecast',
|
|
440
|
+
showlegend=True
|
|
441
|
+
))
|
|
442
|
+
# Lower FB line
|
|
443
|
+
fig.add_trace(go.Scatter(
|
|
444
|
+
x=group[date_column],
|
|
445
|
+
y=group['FB_low'],
|
|
446
|
+
mode='lines',
|
|
447
|
+
line=dict(color='orangered', width=1, dash='dashdot'),
|
|
448
|
+
name='FB Low',
|
|
449
|
+
showlegend=False
|
|
450
|
+
))
|
|
451
|
+
# Lower Shading
|
|
452
|
+
fig.add_trace(go.Scatter(
|
|
453
|
+
x=group[group['FB_low'].isna()==False][date_column],
|
|
454
|
+
y=[0] * len(group[group['FB_low'].isna()==False]),
|
|
455
|
+
mode="lines",
|
|
456
|
+
line=dict(width=0),
|
|
457
|
+
hoverinfo="skip",
|
|
458
|
+
showlegend=False,
|
|
459
|
+
fill="tonexty",
|
|
460
|
+
fillcolor="rgba(255, 0, 0, 0.07)"
|
|
461
|
+
))
|
|
462
|
+
# Upper FB line
|
|
463
|
+
fig.add_trace(go.Scatter(
|
|
464
|
+
x=group[date_column],
|
|
465
|
+
y=group['FB_high'],
|
|
466
|
+
mode='lines',
|
|
467
|
+
line=dict(color='orangered', width=1, dash='dashdot'),
|
|
468
|
+
name='FB High',
|
|
469
|
+
showlegend=False,
|
|
470
|
+
))
|
|
471
|
+
# Upper Shading
|
|
472
|
+
fig.add_trace(go.Scatter(
|
|
473
|
+
x=group[group['FB_high'].isna()==False][date_column],
|
|
474
|
+
y=[group[variable].max() * 1.06] * len(group[group['FB_high'].isna()==False]),
|
|
475
|
+
mode="lines",
|
|
476
|
+
line=dict(width=0),
|
|
477
|
+
hoverinfo="skip",
|
|
478
|
+
showlegend=False,
|
|
479
|
+
fill="tonexty",
|
|
480
|
+
fillcolor="rgba(255, 0, 0, 0.07)"
|
|
481
|
+
))
|
|
482
|
+
# FB Anomalies
|
|
483
|
+
fig.add_trace(go.Scatter(
|
|
484
|
+
x=group[group['is_FB_anomaly'] == True][date_column],
|
|
485
|
+
y=group[group['is_FB_anomaly'] == True][variable],
|
|
486
|
+
mode='markers',
|
|
487
|
+
marker=dict(color='palevioletred', symbol='circle', line=dict(width=1), size=9),
|
|
488
|
+
name='FB Anomalies',
|
|
489
|
+
))
|
|
490
|
+
# Add anomalies markers
|
|
491
|
+
if final_anomalies:
|
|
492
|
+
fig = add_anomalies(fig, group, date_column, variable)
|
|
493
|
+
fig.show()
|
|
494
|
+
print("\n")
|
|
495
|
+
except Exception as e:
|
|
496
|
+
print(f"EWMA Anomaly Plot Failed: {e}")
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
def anomaly_dbscan_plot(group, group_columns, variable, date_column, final_anomalies=True, eval_period=12):
|
|
500
|
+
# DBSCAN Model Plot
|
|
501
|
+
try:
|
|
502
|
+
group = group.copy()
|
|
503
|
+
fig = initialize_fig(group, group_columns, variable, date_column, "DBSCAN Anomaly Detection")
|
|
504
|
+
# Evaluation Period
|
|
505
|
+
if eval_period >= 1:
|
|
506
|
+
fig = add_eval_period_highlight(fig, group, date_column, variable, eval_period)
|
|
507
|
+
# DBSCAN Anomalies
|
|
508
|
+
fig.add_trace(go.Scatter(
|
|
509
|
+
x=group[group['is_DBSCAN_anomaly'] == True][date_column],
|
|
510
|
+
y=group[group['is_DBSCAN_anomaly'] == True][variable],
|
|
511
|
+
mode='markers',
|
|
512
|
+
marker=dict(color='mediumorchid', symbol='circle', line=dict(width=1), size=7),
|
|
513
|
+
name='DBSCAN Anomalies',
|
|
514
|
+
))
|
|
515
|
+
# Add anomalies markers
|
|
516
|
+
if final_anomalies:
|
|
517
|
+
fig = add_anomalies(fig, group, date_column, variable)
|
|
518
|
+
fig.show()
|
|
519
|
+
print("\n")
|
|
520
|
+
except Exception as e:
|
|
521
|
+
print(f"DBSCAN Anomaly Plot Failed: {e}")
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def anomaly_isolation_forest_timeseries_plot(group, group_columns, variable, date_column, final_anomalies=True, eval_period=12):
|
|
525
|
+
# Isolation Forest Model Plot
|
|
526
|
+
try:
|
|
527
|
+
group = group.copy()
|
|
528
|
+
fig = initialize_fig(group, group_columns, variable, date_column, "Isolation Forest Anomaly Detection")
|
|
529
|
+
# Evaluation Period
|
|
530
|
+
if eval_period >= 1:
|
|
531
|
+
fig = add_eval_period_highlight(fig, group, date_column, variable, eval_period)
|
|
532
|
+
# Isolation Forest Anomalies
|
|
533
|
+
fig.add_trace(go.Scatter(
|
|
534
|
+
x=group[group['is_IsolationForest_anomaly_timeseries'] == True][date_column],
|
|
535
|
+
y=group[group['is_IsolationForest_anomaly_timeseries'] == True][variable],
|
|
536
|
+
mode='markers',
|
|
537
|
+
marker=dict(color='mediumorchid', symbol='circle', line=dict(width=1), size=7),
|
|
538
|
+
name='Isolation Forest Anomalies',
|
|
539
|
+
))
|
|
540
|
+
# Add anomalies markers
|
|
541
|
+
if final_anomalies:
|
|
542
|
+
fig = add_anomalies(fig, group, date_column, variable)
|
|
543
|
+
fig.show()
|
|
544
|
+
print("\n")
|
|
545
|
+
except Exception as e:
|
|
546
|
+
print(f"Isolation Forest Time Series Anomaly Plot Failed: {e}")
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
import statistics
|
|
4
|
+
|
|
5
|
+
# # EWMA functions
|
|
6
|
+
|
|
7
|
+
def ewma_forecast(train, alpha):
|
|
8
|
+
"""Return last EWMA forecast value based on training data."""
|
|
9
|
+
ewma = train.ewm(alpha=alpha, adjust=False).mean()
|
|
10
|
+
return ewma.iloc[-1]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
"""
|
|
14
|
+
def ew_std(series, alpha):
|
|
15
|
+
|
|
16
|
+
Compute exponentially weighted standard deviation.
|
|
17
|
+
Uses the same alpha as EWMA so recent points get more weight.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
series : pandas Series of actual values
|
|
22
|
+
alpha : float in (0,1)
|
|
23
|
+
|
|
24
|
+
Returns
|
|
25
|
+
-------
|
|
26
|
+
float : exponentially weighted std deviation
|
|
27
|
+
# mean
|
|
28
|
+
|
|
29
|
+
mean = series.mean()
|
|
30
|
+
#print(mean)
|
|
31
|
+
|
|
32
|
+
# Squared deviation from the mean
|
|
33
|
+
squared_diff = (series - mean) ** 2
|
|
34
|
+
|
|
35
|
+
#print(squared_diff)
|
|
36
|
+
|
|
37
|
+
# EWMA of squared deviation → variance
|
|
38
|
+
ewma_var = squared_diff.ewm(alpha=alpha, adjust=False).mean()
|
|
39
|
+
#print(ewma_var)
|
|
40
|
+
#print(ewma_var.iloc[-1])
|
|
41
|
+
|
|
42
|
+
# Std = sqrt(var)
|
|
43
|
+
return np.sqrt(ewma_var.iloc[-1]) """
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def ewma_with_anomalies_rolling_group(group, group_columns, variable, date_column, alpha, sigma, eval_period):
|
|
47
|
+
|
|
48
|
+
"""
|
|
49
|
+
Rolling (expanding window) EWMA anomaly detection for a SINGLE GROUP ONLY.
|
|
50
|
+
Expects `group` to already be filtered to one group.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
group = group.sort_values(date_column).reset_index(drop=True)
|
|
54
|
+
n = len(group)
|
|
55
|
+
|
|
56
|
+
train_size = n - eval_period # rolling split
|
|
57
|
+
|
|
58
|
+
# Build group key dictionary
|
|
59
|
+
# group_columns can be list of multiple cols
|
|
60
|
+
key_dict = {col: group[col].iloc[0] for col in group_columns}
|
|
61
|
+
|
|
62
|
+
results = []
|
|
63
|
+
|
|
64
|
+
for i in range(train_size, n):
|
|
65
|
+
|
|
66
|
+
train = group.loc[:i-1, variable].astype(float)
|
|
67
|
+
test_value = group.loc[i, variable]
|
|
68
|
+
|
|
69
|
+
# --- EWMA + weighted STD ---
|
|
70
|
+
ewma_train = train.ewm(alpha=alpha, adjust=False).mean()
|
|
71
|
+
#last_std = ew_std(train, alpha)
|
|
72
|
+
last_std = np.std(train)
|
|
73
|
+
forecast = ewma_forecast(train, alpha)
|
|
74
|
+
|
|
75
|
+
upper_limit = forecast + sigma * last_std
|
|
76
|
+
lower_limit = max(forecast - sigma * last_std, 0)
|
|
77
|
+
|
|
78
|
+
anomaly = True if (test_value > upper_limit or test_value < lower_limit) else False
|
|
79
|
+
|
|
80
|
+
# TRAIN part (added only once)
|
|
81
|
+
if i == train_size:
|
|
82
|
+
train_part = pd.concat([
|
|
83
|
+
group.loc[:i-1, group_columns].reset_index(drop=True),
|
|
84
|
+
pd.DataFrame({
|
|
85
|
+
date_column: group.loc[:i-1, date_column].values,
|
|
86
|
+
"alpha": alpha,
|
|
87
|
+
"sigma":sigma,
|
|
88
|
+
"EWMA_forecast": ewma_train.values,
|
|
89
|
+
"STD": last_std,
|
|
90
|
+
"EWMA_high": np.nan,
|
|
91
|
+
"EWMA_low": np.nan,
|
|
92
|
+
"set": "TRAIN",
|
|
93
|
+
"is_EWMA_anomaly": pd.NA,
|
|
94
|
+
})
|
|
95
|
+
], axis=1)
|
|
96
|
+
|
|
97
|
+
results.append(train_part)
|
|
98
|
+
|
|
99
|
+
# TEST row
|
|
100
|
+
test_part = pd.DataFrame({
|
|
101
|
+
**{col: [key_dict[col]] for col in key_dict},
|
|
102
|
+
date_column: [pd.to_datetime(group.loc[i, date_column])],
|
|
103
|
+
"alpha": [alpha],
|
|
104
|
+
"sigma":[sigma],
|
|
105
|
+
"EWMA_forecast": [forecast],
|
|
106
|
+
"STD": [last_std],
|
|
107
|
+
"EWMA_high": [upper_limit],
|
|
108
|
+
"EWMA_low": [lower_limit],
|
|
109
|
+
"set": ["TEST"],
|
|
110
|
+
"is_EWMA_anomaly": [anomaly],
|
|
111
|
+
})
|
|
112
|
+
|
|
113
|
+
results.append(test_part)
|
|
114
|
+
|
|
115
|
+
final_output = pd.concat(results, ignore_index=True)
|
|
116
|
+
# Type Safety Check: Ensure the date column is always datetime before returning
|
|
117
|
+
final_output[date_column] = pd.to_datetime(final_output[date_column])
|
|
118
|
+
return final_output
|
|
119
|
+
|