anomaly-pipeline 0.1.27__py3-none-any.whl → 0.1.61__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anomaly_pipeline/__init__.py +73 -1
- anomaly_pipeline/helpers/DB_scan.py +144 -10
- anomaly_pipeline/helpers/MAD.py +45 -0
- anomaly_pipeline/helpers/Preprocessing.py +274 -73
- anomaly_pipeline/helpers/STD.py +64 -0
- anomaly_pipeline/helpers/__init__.py +13 -1
- anomaly_pipeline/helpers/evaluation_info.py +25 -17
- anomaly_pipeline/helpers/evaluation_plots.py +636 -30
- anomaly_pipeline/helpers/ewma.py +105 -7
- anomaly_pipeline/helpers/fb_prophet.py +150 -2
- anomaly_pipeline/helpers/{help_info.py → help_anomaly.py} +194 -89
- anomaly_pipeline/helpers/iso_forest_general.py +5 -3
- anomaly_pipeline/helpers/iso_forest_timeseries.py +195 -23
- anomaly_pipeline/helpers/percentile.py +46 -3
- anomaly_pipeline/main.py +158 -39
- anomaly_pipeline/pipeline.py +106 -34
- anomaly_pipeline-0.1.61.dist-info/METADATA +275 -0
- anomaly_pipeline-0.1.61.dist-info/RECORD +24 -0
- anomaly_pipeline-0.1.27.dist-info/METADATA +0 -15
- anomaly_pipeline-0.1.27.dist-info/RECORD +0 -24
- {anomaly_pipeline-0.1.27.dist-info → anomaly_pipeline-0.1.61.dist-info}/WHEEL +0 -0
- {anomaly_pipeline-0.1.27.dist-info → anomaly_pipeline-0.1.61.dist-info}/entry_points.txt +0 -0
- {anomaly_pipeline-0.1.27.dist-info → anomaly_pipeline-0.1.61.dist-info}/top_level.txt +0 -0
|
@@ -6,7 +6,7 @@ import plotly.express as px
|
|
|
6
6
|
|
|
7
7
|
def initialize_fig(group, group_columns, variable, date_column, anomaly_detection_model):
|
|
8
8
|
|
|
9
|
-
plot_title = "
|
|
9
|
+
plot_title = " - ".join(list(group[group_columns].values[0])).upper() + " -- " + anomaly_detection_model
|
|
10
10
|
|
|
11
11
|
fig = go.Figure()
|
|
12
12
|
|
|
@@ -19,6 +19,21 @@ def initialize_fig(group, group_columns, variable, date_column, anomaly_detectio
|
|
|
19
19
|
name=variable if variable == variable.upper() else variable.title(),
|
|
20
20
|
))
|
|
21
21
|
|
|
22
|
+
# --- Calculate X-Axis Padding (One Period) ---
|
|
23
|
+
dates = group[date_column].sort_values()
|
|
24
|
+
min_date = dates.min()
|
|
25
|
+
max_date = dates.max()
|
|
26
|
+
|
|
27
|
+
if len(dates) > 1:
|
|
28
|
+
# Calculate the most common time difference to determine the period
|
|
29
|
+
period = dates.diff().mode().iloc[0]
|
|
30
|
+
else:
|
|
31
|
+
period = pd.Timedelta(days=1)
|
|
32
|
+
|
|
33
|
+
# Apply padding
|
|
34
|
+
range_min = min_date - period
|
|
35
|
+
range_max = max_date + period
|
|
36
|
+
|
|
22
37
|
fig.update_layout(
|
|
23
38
|
title=dict(
|
|
24
39
|
text=plot_title,
|
|
@@ -34,7 +49,7 @@ def initialize_fig(group, group_columns, variable, date_column, anomaly_detectio
|
|
|
34
49
|
plot_bgcolor='snow',
|
|
35
50
|
paper_bgcolor='whitesmoke',
|
|
36
51
|
xaxis=dict(
|
|
37
|
-
range=[
|
|
52
|
+
range=[range_min, range_max],
|
|
38
53
|
showline=True,
|
|
39
54
|
linewidth=0.5,
|
|
40
55
|
linecolor='orange',
|
|
@@ -52,7 +67,7 @@ def initialize_fig(group, group_columns, variable, date_column, anomaly_detectio
|
|
|
52
67
|
mirror=True
|
|
53
68
|
),
|
|
54
69
|
yaxis_title=dict(
|
|
55
|
-
text=variable if variable == variable.upper() else variable.title(),
|
|
70
|
+
text=variable.replace('_', ' ') if variable == variable.upper() else variable.title().replace('_', ' '),
|
|
56
71
|
font=dict(size=16, weight='bold', color='black')
|
|
57
72
|
),
|
|
58
73
|
legend=dict(
|
|
@@ -139,16 +154,45 @@ def add_eval_period_highlight(fig, group, date_column, variable, eval_period):
|
|
|
139
154
|
color='rgba(0, 255, 0, 0.25)', # 'lime' with 0.25 alpha
|
|
140
155
|
width=10
|
|
141
156
|
),
|
|
142
|
-
name='
|
|
157
|
+
name='Evaluation Period',
|
|
143
158
|
hoverinfo='skip',
|
|
144
159
|
))
|
|
145
160
|
return fig
|
|
146
161
|
|
|
147
162
|
|
|
148
|
-
def
|
|
163
|
+
def anomaly_overview_plot(group, group_columns, variable, date_column, eval_period, show_anomaly_scores_on_main_plot=False):
|
|
149
164
|
|
|
150
165
|
# IS ANOMALY Plot
|
|
151
166
|
# This is the main plot
|
|
167
|
+
"""
|
|
168
|
+
Generates an ensemble anomaly evaluation plot using Plotly.
|
|
169
|
+
|
|
170
|
+
This function aggregates multiple anomaly detection models (columns starting with 'is_'
|
|
171
|
+
and ending with '_anomaly') to create a consensus 'Anomaly Score'. It visualizes
|
|
172
|
+
actual values, mean, median, and highlights points where the ensemble of models
|
|
173
|
+
agrees there is an anomaly.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
group (pd.DataFrame): The processed dataframe containing original data and
|
|
177
|
+
boolean anomaly flags from various models (e.g., 'is_FB_anomaly').
|
|
178
|
+
group_columns (list): List of column names used to identify the group
|
|
179
|
+
(e.g., ['Region', 'Product']).
|
|
180
|
+
variable (str): The name of the numeric column being analyzed.
|
|
181
|
+
date_column (str): The name of the datetime column.
|
|
182
|
+
eval_period (int, optional): The number of recent periods evaluated. Defaults to 12.
|
|
183
|
+
show_anomaly_scores_on_main_plot (bool, optional): If True, adds a secondary
|
|
184
|
+
Y-axis bar chart showing the normalized ensemble score (-100 to 100).
|
|
185
|
+
Defaults to False.
|
|
186
|
+
|
|
187
|
+
Logic:
|
|
188
|
+
- Voting: Counts all columns matching 'is_*_anomaly'.
|
|
189
|
+
- is_Anomaly: True if >= 50% of the active models flag the point.
|
|
190
|
+
- Anomaly Score: A normalized metric where 100 represents total consensus
|
|
191
|
+
among all models and negative values represent low-risk points.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
None: Displays an interactive Plotly figure.
|
|
195
|
+
"""
|
|
152
196
|
try:
|
|
153
197
|
group = group.copy()
|
|
154
198
|
|
|
@@ -157,24 +201,20 @@ def anomaly_eval_plot(group, group_columns, variable, date_column, eval_period=1
|
|
|
157
201
|
if col.startswith('is_') and col.endswith('_anomaly') and col != 'is_anomaly':
|
|
158
202
|
anomaly_cols.append(col)
|
|
159
203
|
group['Anomaly Vote Models'] = group.apply(
|
|
160
|
-
lambda row:
|
|
204
|
+
lambda row: ([
|
|
205
|
+
'IF' if 'IsolationForest' in col else
|
|
206
|
+
'PCNTL' if 'Percentile' in col else
|
|
207
|
+
col.removeprefix('is_').removesuffix('_anomaly')
|
|
161
208
|
for col in anomaly_cols
|
|
162
|
-
if pd.notna(row[col]) and row[col] == True
|
|
209
|
+
if pd.notna(row[col]) and row[col] == True
|
|
210
|
+
]),
|
|
163
211
|
axis=1)
|
|
164
212
|
group['Anomaly Vote Models'] = group['Anomaly Vote Models'].apply(lambda x: ', '.join(x))
|
|
165
|
-
group['Anomaly_Votes'] = group[anomaly_cols].sum(axis=1).astype(int)
|
|
166
|
-
group['Vote_Cnt'] = group[anomaly_cols].replace(False, True).sum(axis=1).astype(int)
|
|
167
|
-
group['Anomaly_Votes_Display'] = group['Anomaly_Votes'].astype(str) + ' out of ' + group['Vote_Cnt'].astype(str)
|
|
168
|
-
group['is_Anomaly'] = np.where(group['Anomaly_Votes']/group['Vote_Cnt'] >= 0.5, True, False)
|
|
169
|
-
group['Anomaly_Score'] = 2 * (group['Anomaly_Votes']/group['Vote_Cnt'] - 0.5).astype(float)
|
|
170
|
-
group['Anomaly_Score_Display'] = np.where(group['Anomaly_Score'] < 0, np.floor(100*group['Anomaly_Score']),
|
|
171
|
-
np.where(group['Anomaly_Score'] > 0, np.ceil(100*group['Anomaly_Score']),
|
|
172
|
-
1)).astype(float)
|
|
173
213
|
group['Mean'] = group[variable].mean()
|
|
174
214
|
group['Median'] = group[variable].median()
|
|
175
215
|
|
|
176
|
-
fig = initialize_fig(group, group_columns, variable, date_column, "Anomalies")
|
|
177
|
-
|
|
216
|
+
fig = initialize_fig(group, group_columns, variable, date_column, "Anomalies Overview Plot")
|
|
217
|
+
|
|
178
218
|
# Mean
|
|
179
219
|
fig.add_trace(go.Scatter(
|
|
180
220
|
x=group[date_column],
|
|
@@ -202,7 +242,11 @@ def anomaly_eval_plot(group, group_columns, variable, date_column, eval_period=1
|
|
|
202
242
|
x=group[group['is_Anomaly'] == True][date_column],
|
|
203
243
|
y=group[group['is_Anomaly'] == True][variable],
|
|
204
244
|
mode='markers',
|
|
205
|
-
marker=dict(color='
|
|
245
|
+
marker=dict(color='crimson',
|
|
246
|
+
symbol='circle',
|
|
247
|
+
line=dict(width=1),
|
|
248
|
+
size=10*(group[group['is_Anomaly'] == True]['Anomaly_Votes']) ** (1/4)
|
|
249
|
+
),
|
|
206
250
|
name='Anomalies',
|
|
207
251
|
customdata=group[group['is_Anomaly'] == True][['Anomaly_Votes_Display', 'Anomaly Vote Models', 'Anomaly_Score_Display']],
|
|
208
252
|
hovertemplate=(
|
|
@@ -222,7 +266,8 @@ def anomaly_eval_plot(group, group_columns, variable, date_column, eval_period=1
|
|
|
222
266
|
marker=dict(color='orange',
|
|
223
267
|
symbol='circle',
|
|
224
268
|
line=dict(width=1),
|
|
225
|
-
size=
|
|
269
|
+
size=8*(group[(group['is_Anomaly'] == False) & (group['Anomaly_Votes'] >= 1)]['Anomaly_Votes']) ** (1/4)
|
|
270
|
+
),
|
|
226
271
|
name='Not Quite Anomalies',
|
|
227
272
|
customdata=group[(group['is_Anomaly'] == False) & (group['Anomaly_Votes'] >= 1)][['Anomaly_Votes_Display', 'Anomaly Vote Models', 'Anomaly_Score_Display']],
|
|
228
273
|
hovertemplate=(
|
|
@@ -234,6 +279,22 @@ def anomaly_eval_plot(group, group_columns, variable, date_column, eval_period=1
|
|
|
234
279
|
)
|
|
235
280
|
))
|
|
236
281
|
|
|
282
|
+
# Not Anomalies
|
|
283
|
+
fig.add_trace(go.Scatter(
|
|
284
|
+
x=group[(group['is_Anomaly'] == False) & (group['Anomaly_Votes'] == 0)][date_column],
|
|
285
|
+
y=group[(group['is_Anomaly'] == False) & (group['Anomaly_Votes'] == 0)][variable],
|
|
286
|
+
mode='markers',
|
|
287
|
+
marker=dict(color='lightgray',
|
|
288
|
+
symbol='circle',
|
|
289
|
+
line=dict(width=0),
|
|
290
|
+
size=6),
|
|
291
|
+
name='Normal',
|
|
292
|
+
customdata=group[(group['is_Anomaly'] == False) & (group['Anomaly_Votes'] == 0)][['Anomaly_Votes_Display', 'Anomaly Vote Models', 'Anomaly_Score_Display']],
|
|
293
|
+
hovertemplate=(
|
|
294
|
+
f'Date: %{{x|%Y-%m-%d}}<br>' +
|
|
295
|
+
f'{variable if variable == variable.upper() else variable.title()}: %{{y:,d}}<br>'
|
|
296
|
+
)))
|
|
297
|
+
|
|
237
298
|
# Add Anomaly Scores to Secondary Axis
|
|
238
299
|
if show_anomaly_scores_on_main_plot:
|
|
239
300
|
fig.add_trace(go.Bar(
|
|
@@ -274,8 +335,38 @@ def anomaly_eval_plot(group, group_columns, variable, date_column, eval_period=1
|
|
|
274
335
|
print(f"Anomaly Plot Failed: {e}")
|
|
275
336
|
|
|
276
337
|
|
|
277
|
-
def anomaly_percentile_plot(group, group_columns, variable, date_column, final_anomalies=True
|
|
338
|
+
def anomaly_percentile_plot(group, group_columns, variable, date_column, eval_period, final_anomalies=True):
|
|
278
339
|
# Percentile Model Plot
|
|
340
|
+
"""
|
|
341
|
+
Visualizes anomaly detection based on Percentile-derived thresholds.
|
|
342
|
+
|
|
343
|
+
This function plots the time-series data alongside shaded regions representing
|
|
344
|
+
the upper and lower percentile boundaries. It highlights specific 'Percentile'
|
|
345
|
+
model anomalies and can optionally overlay the final consensus anomalies.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
group (pd.DataFrame): Dataframe containing the time-series data and
|
|
349
|
+
calculated percentile columns ('Percentile_low', 'Percentile_high',
|
|
350
|
+
and 'is_Percentile_anomaly').
|
|
351
|
+
group_columns (list): Column names used for grouping/title identification.
|
|
352
|
+
variable (str): The numeric column name being plotted on the Y-axis.
|
|
353
|
+
date_column (str): The datetime column name for the X-axis.
|
|
354
|
+
final_anomalies (bool, optional): If True, overlays the final ensemble
|
|
355
|
+
consensus markers (red circles) on top of the model-specific markers.
|
|
356
|
+
Defaults to True.
|
|
357
|
+
eval_period (int, optional): The look-back period used for the evaluation
|
|
358
|
+
context. Defaults to 12.
|
|
359
|
+
|
|
360
|
+
Logic:
|
|
361
|
+
- Shading: Uses `add_anomaly_region` to fill the area beyond 'Percentile_low'
|
|
362
|
+
and 'Percentile_high'.
|
|
363
|
+
- Model Markers: Highlights points where 'is_Percentile_anomaly' is True.
|
|
364
|
+
- Integration: Uses helper functions `initialize_fig`, `add_anomaly_region`,
|
|
365
|
+
and `add_model_anomalies` to maintain a consistent UI/UX.
|
|
366
|
+
|
|
367
|
+
Returns:
|
|
368
|
+
None: Displays an interactive Plotly figure.
|
|
369
|
+
"""
|
|
279
370
|
try:
|
|
280
371
|
group = group.copy()
|
|
281
372
|
fig = initialize_fig(group, group_columns, variable, date_column, "Percentile Anomaly Detection")
|
|
@@ -294,8 +385,41 @@ def anomaly_percentile_plot(group, group_columns, variable, date_column, final_a
|
|
|
294
385
|
print(f"Percentile Anomaly Plot Failed: {e}")
|
|
295
386
|
|
|
296
387
|
|
|
297
|
-
def anomaly_sd_plot(group, group_columns, variable, date_column, final_anomalies=True
|
|
388
|
+
def anomaly_sd_plot(group, group_columns, variable, date_column, eval_period, final_anomalies=True):
|
|
298
389
|
# SD Model Plot
|
|
390
|
+
"""
|
|
391
|
+
Visualizes anomaly detection based on Standard Deviation (SD) thresholds.
|
|
392
|
+
|
|
393
|
+
This function plots the time-series data and overlays shaded regions representing
|
|
394
|
+
statistical boundaries (typically 2 or 3 standard deviations from the mean).
|
|
395
|
+
It identifies 'SD' model-specific anomalies and can optionally display the
|
|
396
|
+
final ensemble consensus markers.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
group (pd.DataFrame): Dataframe containing the time-series data and
|
|
400
|
+
calculated SD boundary columns ('SD2_low', 'SD2_high', and
|
|
401
|
+
'is_SD_anomaly').
|
|
402
|
+
group_columns (list): Column names used for grouping/title identification.
|
|
403
|
+
variable (str): The numeric column name being plotted on the Y-axis.
|
|
404
|
+
date_column (str): The datetime column name for the X-axis.
|
|
405
|
+
final_anomalies (bool, optional): If True, overlays the final ensemble
|
|
406
|
+
consensus markers (red circles) on top of the SD model markers.
|
|
407
|
+
Defaults to True.
|
|
408
|
+
eval_period (int, optional): The look-back period used for the evaluation
|
|
409
|
+
context. Defaults to 12.
|
|
410
|
+
|
|
411
|
+
Logic:
|
|
412
|
+
- Shading: Utilizes `add_anomaly_region` to fill the areas outside the
|
|
413
|
+
'SD2_low' and 'SD2_high' thresholds, visually representing the
|
|
414
|
+
statistical "outlier zones."
|
|
415
|
+
- Model Markers: Highlights points where the SD model specifically
|
|
416
|
+
triggered an anomaly flag.
|
|
417
|
+
- Visualization Helpers: Relies on `initialize_fig`, `add_anomaly_region`,
|
|
418
|
+
and `add_model_anomalies` for UI consistency across the pipeline.
|
|
419
|
+
|
|
420
|
+
Returns:
|
|
421
|
+
None: Displays an interactive Plotly figure and prints a newline.
|
|
422
|
+
"""
|
|
299
423
|
try:
|
|
300
424
|
group = group.copy()
|
|
301
425
|
fig = initialize_fig(group, group_columns, variable, date_column, "SD Anomaly Detection")
|
|
@@ -314,8 +438,41 @@ def anomaly_sd_plot(group, group_columns, variable, date_column, final_anomalies
|
|
|
314
438
|
print(f"SD Anomaly Plot Failed: {e}")
|
|
315
439
|
|
|
316
440
|
|
|
317
|
-
def anomaly_mad_plot(group, group_columns, variable, date_column, final_anomalies=True
|
|
441
|
+
def anomaly_mad_plot(group, group_columns, variable, date_column, eval_period, final_anomalies=True):
|
|
318
442
|
# MAD Model Plot
|
|
443
|
+
"""
|
|
444
|
+
Visualizes anomaly detection based on Median Absolute Deviation (MAD).
|
|
445
|
+
|
|
446
|
+
MAD is a robust measure of statistical dispersion. This plot displays the
|
|
447
|
+
time-series data with shaded thresholds derived from the median and
|
|
448
|
+
the MAD scale factor. It is particularly effective for datasets where
|
|
449
|
+
mean and standard deviation are heavily skewed by extreme outliers.
|
|
450
|
+
|
|
451
|
+
Args:
|
|
452
|
+
group (pd.DataFrame): Dataframe containing the time-series data and
|
|
453
|
+
calculated MAD boundary columns ('MAD_low', 'MAD_high', and
|
|
454
|
+
'is_MAD_anomaly').
|
|
455
|
+
group_columns (list): Column names used for grouping/title identification.
|
|
456
|
+
variable (str): The numeric column name being plotted on the Y-axis.
|
|
457
|
+
date_column (str): The datetime column name for the X-axis.
|
|
458
|
+
final_anomalies (bool, optional): If True, overlays the final ensemble
|
|
459
|
+
consensus markers (red circles) on top of the MAD model markers.
|
|
460
|
+
Defaults to True.
|
|
461
|
+
eval_period (int, optional): The look-back period used for the evaluation
|
|
462
|
+
context. Defaults to 12.
|
|
463
|
+
|
|
464
|
+
Logic:
|
|
465
|
+
- Shading: Highlights the areas outside the 'MAD_low' and 'MAD_high'
|
|
466
|
+
thresholds. Because MAD uses the median as a baseline, these bands
|
|
467
|
+
are often tighter and more resistant to outlier-driven "threshold bloat."
|
|
468
|
+
- Model Markers: Specifically plots points flagged by the 'is_MAD_anomaly'
|
|
469
|
+
logic.
|
|
470
|
+
- Helper Integration: Uses `initialize_fig` for layout and `add_anomalies`
|
|
471
|
+
for consensus overlay.
|
|
472
|
+
|
|
473
|
+
Returns:
|
|
474
|
+
None: Displays an interactive Plotly figure.
|
|
475
|
+
"""
|
|
319
476
|
try:
|
|
320
477
|
group = group.copy()
|
|
321
478
|
fig = initialize_fig(group, group_columns, variable, date_column, "MAD Anomaly Detection")
|
|
@@ -334,7 +491,38 @@ def anomaly_mad_plot(group, group_columns, variable, date_column, final_anomalie
|
|
|
334
491
|
print(f"MAD Anomaly Plot Failed: {e}")
|
|
335
492
|
|
|
336
493
|
|
|
337
|
-
def anomaly_iqr_plot(group, group_columns, variable, date_column, final_anomalies=True
|
|
494
|
+
def anomaly_iqr_plot(group, group_columns, variable, date_column, eval_period, final_anomalies=True):
|
|
495
|
+
"""
|
|
496
|
+
Visualizes anomaly detection based on the Interquartile Range (IQR).
|
|
497
|
+
|
|
498
|
+
This function utilizes the Tukey's Fences method to identify outliers. It
|
|
499
|
+
calculates the spread between the 25th (Q1) and 75th (Q3) percentiles to
|
|
500
|
+
establish 'Normal' bounds. It is highly effective for skewed data as it
|
|
501
|
+
does not assume a normal distribution.
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
group (pd.DataFrame): Dataframe containing the time-series data and
|
|
505
|
+
calculated IQR boundary columns ('IQR_low', 'IQR_high', and
|
|
506
|
+
'is_IQR_anomaly').
|
|
507
|
+
group_columns (list): Column names used for grouping/title identification.
|
|
508
|
+
variable (str): The numeric column name being plotted on the Y-axis.
|
|
509
|
+
date_column (str): The datetime column name for the X-axis.
|
|
510
|
+
final_anomalies (bool, optional): If True, overlays the final ensemble
|
|
511
|
+
consensus markers (red circles) on top of the IQR-specific markers.
|
|
512
|
+
Defaults to True.
|
|
513
|
+
eval_period (int, optional): The look-back period used for the evaluation
|
|
514
|
+
context. Defaults to 12.
|
|
515
|
+
|
|
516
|
+
Logic:
|
|
517
|
+
- Shading: Fills the region below Q1 - 1.5*IQR and above Q3 + 1.5*IQR.
|
|
518
|
+
- Robustness: Because it uses quartiles rather than mean/SD, it is
|
|
519
|
+
resistant to being "fooled" by the outliers it is trying to detect.
|
|
520
|
+
- Consistency: Uses the standard suite of helpers (`initialize_fig`,
|
|
521
|
+
`add_anomaly_region`) to match the rest of the pipeline's visual style.
|
|
522
|
+
|
|
523
|
+
Returns:
|
|
524
|
+
None: Displays an interactive Plotly figure.
|
|
525
|
+
"""
|
|
338
526
|
# IQR Model Plot
|
|
339
527
|
try:
|
|
340
528
|
group = group.copy()
|
|
@@ -354,7 +542,38 @@ def anomaly_iqr_plot(group, group_columns, variable, date_column, final_anomalie
|
|
|
354
542
|
print(f"IQR Anomaly Plot Failed: {e}")
|
|
355
543
|
|
|
356
544
|
|
|
357
|
-
def anomaly_ewma_plot(group, group_columns, variable, date_column, final_anomalies=True
|
|
545
|
+
def anomaly_ewma_plot(group, group_columns, variable, date_column, eval_period, final_anomalies=True):
|
|
546
|
+
"""
|
|
547
|
+
Visualizes anomaly detection based on Exponentially Weighted Moving Average (EWMA).
|
|
548
|
+
|
|
549
|
+
This plot highlights anomalies using a moving baseline that gives more weight to
|
|
550
|
+
recent observations. It visualizes the EWMA forecast line, the calculated upper
|
|
551
|
+
and lower control limits (bands), and model-specific outliers. It is ideal for
|
|
552
|
+
detecting shifts in mean or variance in non-stationary time series.
|
|
553
|
+
|
|
554
|
+
Args:
|
|
555
|
+
group (pd.DataFrame): Dataframe containing the time-series data and
|
|
556
|
+
EWMA-specific columns ('EWMA_forecast', 'EWMA_low', 'EWMA_high',
|
|
557
|
+
and 'is_EWMA_anomaly').
|
|
558
|
+
group_columns (list): Column names used for grouping and plot titles.
|
|
559
|
+
variable (str): The name of the target numeric column.
|
|
560
|
+
date_column (str): The name of the datetime column.
|
|
561
|
+
final_anomalies (bool, optional): If True, overlays the final ensemble
|
|
562
|
+
consensus markers (red circles) on top of the EWMA markers.
|
|
563
|
+
Defaults to True.
|
|
564
|
+
eval_period (int, optional): The number of recent periods evaluated.
|
|
565
|
+
Used for context in title or scaling. Defaults to 12.
|
|
566
|
+
|
|
567
|
+
Logic:
|
|
568
|
+
- Forecast Line: Displays the weighted moving average ('slateblue').
|
|
569
|
+
- Dynamic Thresholds: Visualizes 'EWMA_low' and 'EWMA_high' as 'orangered'
|
|
570
|
+
dashdot lines with light red shading in the outlier zones.
|
|
571
|
+
- Model Markers: Highlights points where the EWMA logic specifically
|
|
572
|
+
triggered an anomaly flag.
|
|
573
|
+
|
|
574
|
+
Returns:
|
|
575
|
+
None: Displays an interactive Plotly figure.
|
|
576
|
+
"""
|
|
358
577
|
# EWMA Model Plot
|
|
359
578
|
try:
|
|
360
579
|
group = group.copy()
|
|
@@ -425,7 +644,39 @@ def anomaly_ewma_plot(group, group_columns, variable, date_column, final_anomali
|
|
|
425
644
|
print(f"EWMA Anomaly Plot Failed: {e}")
|
|
426
645
|
|
|
427
646
|
|
|
428
|
-
def anomaly_fb_plot(group, group_columns, variable, date_column, final_anomalies=True
|
|
647
|
+
def anomaly_fb_plot(group, group_columns, variable, date_column, eval_period, final_anomalies=True):
|
|
648
|
+
"""
|
|
649
|
+
Visualizes anomaly detection using the Facebook Prophet (FB) model.
|
|
650
|
+
|
|
651
|
+
This function displays the Prophet model's additive trend and seasonality
|
|
652
|
+
forecasts along with its uncertainty intervals (yhat_upper and yhat_lower).
|
|
653
|
+
It is particularly useful for identifying anomalies in data with strong
|
|
654
|
+
seasonality (weekly/yearly) that simpler statistical models might miss.
|
|
655
|
+
|
|
656
|
+
Args:
|
|
657
|
+
group (pd.DataFrame): Dataframe containing Prophet output columns
|
|
658
|
+
('FB_forecast', 'FB_low', 'FB_high', and 'is_FB_anomaly').
|
|
659
|
+
group_columns (list): Column names used to identify and title the group.
|
|
660
|
+
variable (str): The name of the target numeric column analyzed.
|
|
661
|
+
date_column (str): The name of the datetime column.
|
|
662
|
+
final_anomalies (bool, optional): If True, overlays the final ensemble
|
|
663
|
+
consensus markers (red circles) over the Prophet markers.
|
|
664
|
+
Defaults to True.
|
|
665
|
+
eval_period (int, optional): The number of recent periods analyzed.
|
|
666
|
+
Defaults to 12.
|
|
667
|
+
|
|
668
|
+
Logic:
|
|
669
|
+
- Recursive Visibility: Since FB Prophet is run in a walk-forward manner,
|
|
670
|
+
the shaded regions represent the prediction interval at the time
|
|
671
|
+
of forecast.
|
|
672
|
+
- Outlier Zones: Shaded red areas represent values that fall outside
|
|
673
|
+
the model's expected confidence interval (based on `prophet_CI`).
|
|
674
|
+
- Model Markers: Highlights points where Prophet specifically flagged
|
|
675
|
+
an anomaly based on its trend and seasonal expectations.
|
|
676
|
+
|
|
677
|
+
Returns:
|
|
678
|
+
None: Displays an interactive Plotly figure.
|
|
679
|
+
"""
|
|
429
680
|
# FB Prophet Model Plot
|
|
430
681
|
try:
|
|
431
682
|
group = group.copy()
|
|
@@ -493,10 +744,42 @@ def anomaly_fb_plot(group, group_columns, variable, date_column, final_anomalies
|
|
|
493
744
|
fig.show()
|
|
494
745
|
print("\n")
|
|
495
746
|
except Exception as e:
|
|
496
|
-
print(f"
|
|
747
|
+
print(f"FB Anomaly Plot Failed: {e}")
|
|
748
|
+
|
|
749
|
+
|
|
750
|
+
def anomaly_dbscan_plot(group, group_columns, variable, date_column, eval_period, final_anomalies=True):
|
|
751
|
+
"""
|
|
752
|
+
Visualizes anomaly detection using the DBSCAN clustering algorithm.
|
|
497
753
|
|
|
754
|
+
DBSCAN identifies anomalies as 'noise' points that reside in low-density
|
|
755
|
+
regions of the feature space. Unlike threshold-based methods, DBSCAN
|
|
756
|
+
looks for multi-dimensional patterns. This plot highlights points
|
|
757
|
+
flagged as noise by the algorithm, contextually placed within the
|
|
758
|
+
time-series trend.
|
|
498
759
|
|
|
499
|
-
|
|
760
|
+
Args:
|
|
761
|
+
group (pd.DataFrame): Dataframe containing the time-series data and
|
|
762
|
+
DBSCAN results (specifically the 'is_DBSCAN_anomaly' column).
|
|
763
|
+
group_columns (list): Column names used to identify and title the group.
|
|
764
|
+
variable (str): The name of the target numeric column analyzed.
|
|
765
|
+
date_column (str): The name of the datetime column.
|
|
766
|
+
final_anomalies (bool, optional): If True, overlays the final ensemble
|
|
767
|
+
consensus markers (red circles) over the DBSCAN markers.
|
|
768
|
+
Defaults to True.
|
|
769
|
+
eval_period (int, optional): The number of recent periods to highlight
|
|
770
|
+
as the evaluation window. Defaults to 12.
|
|
771
|
+
|
|
772
|
+
Logic:
|
|
773
|
+
- Density Clustering: Points are flagged as anomalies if they are
|
|
774
|
+
isolated from the main "clusters" of data points in the feature space.
|
|
775
|
+
- Eval Period Highlight: Uses `add_eval_period_highlight` to visually
|
|
776
|
+
distinguish the recent testing window from the historical training data.
|
|
777
|
+
- Model Markers: Highlights specific DBSCAN outliers using 'mediumorchid'
|
|
778
|
+
circles.
|
|
779
|
+
|
|
780
|
+
Returns:
|
|
781
|
+
None: Displays an interactive Plotly figure.
|
|
782
|
+
"""
|
|
500
783
|
# DBSCAN Model Plot
|
|
501
784
|
try:
|
|
502
785
|
group = group.copy()
|
|
@@ -521,7 +804,38 @@ def anomaly_dbscan_plot(group, group_columns, variable, date_column, final_anoma
|
|
|
521
804
|
print(f"DBSCAN Anomaly Plot Failed: {e}")
|
|
522
805
|
|
|
523
806
|
|
|
524
|
-
def
|
|
807
|
+
def anomaly_isolation_forest_plot(group, group_columns, variable, date_column, eval_period, final_anomalies=True):
|
|
808
|
+
"""
|
|
809
|
+
Visualizes anomaly detection using the Isolation Forest algorithm.
|
|
810
|
+
|
|
811
|
+
Isolation Forest is an unsupervised learning algorithm that isolates anomalies
|
|
812
|
+
by randomly selecting a feature and a split value. Since anomalies are few
|
|
813
|
+
and different, they are easier to isolate (shorter path length in the tree).
|
|
814
|
+
This plot shows points identified as anomalies based on this branching logic.
|
|
815
|
+
|
|
816
|
+
Args:
|
|
817
|
+
group (pd.DataFrame): Dataframe containing time-series data and
|
|
818
|
+
Isolation Forest results (specifically 'is_IsolationForest_anomaly_timeseries').
|
|
819
|
+
group_columns (list): Column names used to identify and title the group.
|
|
820
|
+
variable (str): The name of the target numeric column analyzed.
|
|
821
|
+
date_column (str): The name of the datetime column.
|
|
822
|
+
final_anomalies (bool, optional): If True, overlays the final ensemble
|
|
823
|
+
consensus markers (red circles) over the Isolation Forest markers.
|
|
824
|
+
Defaults to True.
|
|
825
|
+
eval_period (int, optional): The number of recent periods to highlight
|
|
826
|
+
as the evaluation window. Defaults to 12.
|
|
827
|
+
|
|
828
|
+
Logic:
|
|
829
|
+
- Tree-Based Isolation: Anomalies are identified by having shorter average
|
|
830
|
+
path lengths across a forest of random trees.
|
|
831
|
+
- Temporal Context: Uses `add_eval_period_highlight` to shade the recursive
|
|
832
|
+
testing window, helping users see if anomalies are recent.
|
|
833
|
+
- Model Markers: Highlights specific Isolation Forest outliers using
|
|
834
|
+
'mediumorchid' markers.
|
|
835
|
+
|
|
836
|
+
Returns:
|
|
837
|
+
None: Displays an interactive Plotly figure.
|
|
838
|
+
"""
|
|
525
839
|
# Isolation Forest Model Plot
|
|
526
840
|
try:
|
|
527
841
|
group = group.copy()
|
|
@@ -531,8 +845,8 @@ def anomaly_isolation_forest_timeseries_plot(group, group_columns, variable, dat
|
|
|
531
845
|
fig = add_eval_period_highlight(fig, group, date_column, variable, eval_period)
|
|
532
846
|
# Isolation Forest Anomalies
|
|
533
847
|
fig.add_trace(go.Scatter(
|
|
534
|
-
x=group[group['
|
|
535
|
-
y=group[group['
|
|
848
|
+
x=group[group['is_IsolationForest_anomaly'] == True][date_column],
|
|
849
|
+
y=group[group['is_IsolationForest_anomaly'] == True][variable],
|
|
536
850
|
mode='markers',
|
|
537
851
|
marker=dict(color='mediumorchid', symbol='circle', line=dict(width=1), size=7),
|
|
538
852
|
name='Isolation Forest Anomalies',
|
|
@@ -544,3 +858,295 @@ def anomaly_isolation_forest_timeseries_plot(group, group_columns, variable, dat
|
|
|
544
858
|
print("\n")
|
|
545
859
|
except Exception as e:
|
|
546
860
|
print(f"Isolation Forest Time Series Anomaly Plot Failed: {e}")
|
|
861
|
+
|
|
862
|
+
|
|
863
|
+
def anomaly_stacked_bar_plot(df, group_columns, variable, date_column, anomaly_col='is_Anomaly', secondary_line=None):
|
|
864
|
+
"""
|
|
865
|
+
Generates a time-ordered stacked bar chart showing Normal vs. Anomalous record counts.
|
|
866
|
+
|
|
867
|
+
Args:
|
|
868
|
+
df (pd.DataFrame): The dataframe containing the data.
|
|
869
|
+
date_column (str): The name of the datetime column.
|
|
870
|
+
anomaly_col (str): The name of the boolean column (True=Anomaly).
|
|
871
|
+
title (str): Title of the chart.
|
|
872
|
+
|
|
873
|
+
Returns:
|
|
874
|
+
None: Displays the interactive Plotly figure.
|
|
875
|
+
"""
|
|
876
|
+
try:
|
|
877
|
+
# 1. Aggregation
|
|
878
|
+
|
|
879
|
+
# Group by date to get counts across all unique_ids for that specific timestamp
|
|
880
|
+
df['normal_val'] = np.where(df[anomaly_col] != True, 1, 0)
|
|
881
|
+
df['anomaly_val'] = np.where(df[anomaly_col] == True, 1, 0)
|
|
882
|
+
agg_df = df.groupby(date_column).agg(
|
|
883
|
+
normal_sum=('normal_val', 'sum'),
|
|
884
|
+
anomaly_sum=('anomaly_val', 'sum'),
|
|
885
|
+
variable_mean=(variable, 'mean'),
|
|
886
|
+
score_mean=('Anomaly_Score', 'mean'),
|
|
887
|
+
).reset_index()
|
|
888
|
+
|
|
889
|
+
agg_df['total_sum'] = agg_df['normal_sum'] + agg_df['anomaly_sum']
|
|
890
|
+
|
|
891
|
+
# Calculate percentage (handle division by zero just in case)
|
|
892
|
+
agg_df['anomaly_pct'] = np.where(agg_df['total_sum'] > 0, (agg_df['anomaly_sum'] / agg_df['total_sum']) * 100, 0)
|
|
893
|
+
|
|
894
|
+
dates = agg_df[date_column].sort_values()
|
|
895
|
+
min_date = dates.min()
|
|
896
|
+
max_date = dates.max()
|
|
897
|
+
|
|
898
|
+
if len(dates) > 1:
|
|
899
|
+
# Calculate the most common time difference to determine the period
|
|
900
|
+
period = dates.diff().mode().iloc[0]
|
|
901
|
+
else:
|
|
902
|
+
period = pd.Timedelta(days=1)
|
|
903
|
+
|
|
904
|
+
# Apply padding
|
|
905
|
+
range_min = min_date - period
|
|
906
|
+
range_max = max_date + period
|
|
907
|
+
|
|
908
|
+
# 2. Initialize Figure
|
|
909
|
+
fig = go.Figure()
|
|
910
|
+
|
|
911
|
+
if secondary_line is None or secondary_line == variable:
|
|
912
|
+
line_var = 'variable_mean'
|
|
913
|
+
var_title = f"Avg {variable if variable == variable.upper() else variable.replace('_', ' ').title()}"
|
|
914
|
+
else:
|
|
915
|
+
line_var = 'score_mean'
|
|
916
|
+
var_title = 'Avg Anomaly Score'
|
|
917
|
+
|
|
918
|
+
# 3. Add Traces
|
|
919
|
+
# Bottom Bar: Non-Anomalous (Grey)
|
|
920
|
+
fig.add_trace(go.Bar(
|
|
921
|
+
x=agg_df[date_column],
|
|
922
|
+
y=agg_df['normal_sum'],
|
|
923
|
+
name='Normal',
|
|
924
|
+
marker_color='lightgray',
|
|
925
|
+
customdata=agg_df[['total_sum']],
|
|
926
|
+
hovertemplate=(
|
|
927
|
+
f'<b>Date:</b> %{{x|%Y-%m-%d}}<br>' +
|
|
928
|
+
f'<b>Normal Records:</b> %{{y:,}}<br>' +
|
|
929
|
+
f'<b>Total Volume:</b> %{{customdata[0]:,}}<extra></extra>'
|
|
930
|
+
)
|
|
931
|
+
))
|
|
932
|
+
|
|
933
|
+
# Top Bar: Anomalous (Red)
|
|
934
|
+
fig.add_trace(go.Bar(
|
|
935
|
+
x=agg_df[date_column],
|
|
936
|
+
y=agg_df['anomaly_sum'],
|
|
937
|
+
name='Anomaly',
|
|
938
|
+
marker_color='crimson', # Red for anomalies
|
|
939
|
+
customdata=agg_df[['total_sum', 'anomaly_pct']],
|
|
940
|
+
hovertemplate=(
|
|
941
|
+
f'<b>Date:</b> %{{x|%Y-%m-%d}}<br>' +
|
|
942
|
+
f'<b>Anomalies:</b> %{{y:,}}<br>' +
|
|
943
|
+
f'<b>Anomaly Rate:</b> %{{customdata[1]:.0f}}%<extra></extra>'
|
|
944
|
+
)
|
|
945
|
+
))
|
|
946
|
+
|
|
947
|
+
# Line on secondary axis
|
|
948
|
+
fig.add_trace(go.Scatter(
|
|
949
|
+
x=agg_df[date_column],
|
|
950
|
+
y=agg_df[line_var],
|
|
951
|
+
name=var_title,
|
|
952
|
+
yaxis='y2',
|
|
953
|
+
mode='lines',
|
|
954
|
+
line=dict(width=3, color='darkslategray'),
|
|
955
|
+
hovertemplate=(
|
|
956
|
+
f'<b>Date:</b> %{{x|%Y-%m-%d}}<br>' +
|
|
957
|
+
f'<b>Total {variable}:</b> %{{y:,.2f}}<extra></extra>'
|
|
958
|
+
)
|
|
959
|
+
))
|
|
960
|
+
|
|
961
|
+
# 4. Apply Visual Design (Matching your existing style)
|
|
962
|
+
fig.update_layout(
|
|
963
|
+
title=dict(
|
|
964
|
+
text=f'Anomalies and {var_title} per Group Over Time for {len(df[group_columns].drop_duplicates())} Groups',
|
|
965
|
+
y=0.96,
|
|
966
|
+
x=0.5,
|
|
967
|
+
xanchor='center',
|
|
968
|
+
yanchor='top',
|
|
969
|
+
font=dict(size=18, color='black', weight='bold'),
|
|
970
|
+
),
|
|
971
|
+
barmode='stack',
|
|
972
|
+
height=350,
|
|
973
|
+
width=1200,
|
|
974
|
+
margin=dict(l=50, r=100, t=60, b=30),
|
|
975
|
+
plot_bgcolor='snow',
|
|
976
|
+
paper_bgcolor='whitesmoke',
|
|
977
|
+
xaxis=dict(
|
|
978
|
+
range=[range_min, range_max],
|
|
979
|
+
showline=True,
|
|
980
|
+
linewidth=0.5,
|
|
981
|
+
linecolor='orange',
|
|
982
|
+
zeroline=False,
|
|
983
|
+
gridcolor='rgba(255, 165, 0, 0.1)',
|
|
984
|
+
mirror=True,
|
|
985
|
+
),
|
|
986
|
+
yaxis=dict(
|
|
987
|
+
# Dynamic range with a little headroom
|
|
988
|
+
range=[0, agg_df['total_sum'].max()],
|
|
989
|
+
showline=True,
|
|
990
|
+
linewidth=0.5,
|
|
991
|
+
linecolor='orange',
|
|
992
|
+
zeroline=False,
|
|
993
|
+
gridcolor='rgba(255, 165, 0, 0.1)',
|
|
994
|
+
mirror=True,
|
|
995
|
+
title=dict(text="Group Count", font=dict(size=16, weight='bold', color='black')),
|
|
996
|
+
),
|
|
997
|
+
yaxis2=dict(
|
|
998
|
+
title=dict(text=var_title, font=dict(size=14, weight='bold', color='darkslategray')),
|
|
999
|
+
tickfont=dict(color='darkslategray'),
|
|
1000
|
+
anchor="x",
|
|
1001
|
+
overlaying="y",
|
|
1002
|
+
side="right",
|
|
1003
|
+
showgrid=False, # Usually better to hide grid for 2nd axis to avoid clutter
|
|
1004
|
+
zeroline=False,
|
|
1005
|
+
range=[0, agg_df[line_var].max() * 1.1] # Give it some headroom
|
|
1006
|
+
),
|
|
1007
|
+
legend=dict(
|
|
1008
|
+
orientation="v",
|
|
1009
|
+
yanchor="top",
|
|
1010
|
+
y=1,
|
|
1011
|
+
xanchor="left",
|
|
1012
|
+
x=1.08,
|
|
1013
|
+
)
|
|
1014
|
+
)
|
|
1015
|
+
|
|
1016
|
+
fig.show()
|
|
1017
|
+
print("\n")
|
|
1018
|
+
except Exception as e:
|
|
1019
|
+
print(f"Stacked Bar Plot Failed: {e}")
|
|
1020
|
+
|
|
1021
|
+
|
|
1022
|
+
def summary_pie_plot(summary_df, title="Anomaly Detection Summary"):
|
|
1023
|
+
"""
|
|
1024
|
+
Generates a Pie Chart visualizing the distribution of Evaluated, Anomalous,
|
|
1025
|
+
and Dropped records using the specific project styling.
|
|
1026
|
+
|
|
1027
|
+
Args:
|
|
1028
|
+
summary_df (pd.DataFrame): Dataframe containing columns 'evaluated records',
|
|
1029
|
+
'anomalies', and 'dropped'.
|
|
1030
|
+
|
|
1031
|
+
Returns:
|
|
1032
|
+
None: Displays the interactive Plotly figure.
|
|
1033
|
+
"""
|
|
1034
|
+
try:
|
|
1035
|
+
colors = ['silver', 'crimson', 'gold']
|
|
1036
|
+
|
|
1037
|
+
# 2. Initialize Figure
|
|
1038
|
+
fig = go.Figure()
|
|
1039
|
+
|
|
1040
|
+
# 3. Add Trace
|
|
1041
|
+
fig.add_trace(go.Pie(
|
|
1042
|
+
labels=summary_df['Records'],
|
|
1043
|
+
values=summary_df['count'],
|
|
1044
|
+
marker=dict(
|
|
1045
|
+
colors=colors,
|
|
1046
|
+
line=dict(color='white', width=2)
|
|
1047
|
+
),
|
|
1048
|
+
textposition='auto',
|
|
1049
|
+
texttemplate='%{label}<br>%{percent:.0%}',
|
|
1050
|
+
# textinfo='percent+label',
|
|
1051
|
+
hoverinfo='label+value+percent',
|
|
1052
|
+
sort=False
|
|
1053
|
+
))
|
|
1054
|
+
|
|
1055
|
+
# 4. Apply Visual Design (Matching provided style)
|
|
1056
|
+
fig.update_layout(
|
|
1057
|
+
title=dict(
|
|
1058
|
+
text=title,
|
|
1059
|
+
y=0.96,
|
|
1060
|
+
x=0.5,
|
|
1061
|
+
xanchor='center',
|
|
1062
|
+
yanchor='top',
|
|
1063
|
+
font=dict(size=18, color='black', weight='bold'),
|
|
1064
|
+
),
|
|
1065
|
+
height=400,
|
|
1066
|
+
width=600,
|
|
1067
|
+
margin=dict(l=50, r=50, t=80, b=30),
|
|
1068
|
+
plot_bgcolor='snow',
|
|
1069
|
+
paper_bgcolor='whitesmoke',
|
|
1070
|
+
legend=dict(
|
|
1071
|
+
orientation="v",
|
|
1072
|
+
yanchor="top",
|
|
1073
|
+
y=1,
|
|
1074
|
+
xanchor="left",
|
|
1075
|
+
x=1.02,
|
|
1076
|
+
)
|
|
1077
|
+
)
|
|
1078
|
+
|
|
1079
|
+
fig.show()
|
|
1080
|
+
print("\n")
|
|
1081
|
+
|
|
1082
|
+
except Exception as e:
|
|
1083
|
+
print(f"Summary Pie Plot Failed: {e}")
|
|
1084
|
+
|
|
1085
|
+
|
|
1086
|
+
def avg_anomaly_score_plot(df, group_columns, date_column):
|
|
1087
|
+
|
|
1088
|
+
try:
|
|
1089
|
+
plot_title = f"Average Anomaly Scores Over Time for {len(df[group_columns].drop_duplicates())} Groups"
|
|
1090
|
+
|
|
1091
|
+
fig = go.Figure()
|
|
1092
|
+
|
|
1093
|
+
agg_df = df.groupby(date_column)['Anomaly_Score'].mean().reset_index()
|
|
1094
|
+
|
|
1095
|
+
# Average Anomaly Scores
|
|
1096
|
+
fig.add_trace(go.Scatter(
|
|
1097
|
+
x=agg_df[date_column],
|
|
1098
|
+
y=agg_df['Anomaly_Score'],
|
|
1099
|
+
mode='lines',
|
|
1100
|
+
line=dict(color='seagreen', width=1.5),
|
|
1101
|
+
name='Average Anomaly Score',
|
|
1102
|
+
))
|
|
1103
|
+
|
|
1104
|
+
fig.update_layout(
|
|
1105
|
+
title=dict(
|
|
1106
|
+
text=plot_title,
|
|
1107
|
+
y=0.96,
|
|
1108
|
+
x=0.5,
|
|
1109
|
+
xanchor='center',
|
|
1110
|
+
yanchor='top',
|
|
1111
|
+
font=dict(size=18, color='black', weight='bold'),
|
|
1112
|
+
),
|
|
1113
|
+
height=350,
|
|
1114
|
+
width=1200,
|
|
1115
|
+
margin=dict(l=50, r=50, t=40, b=30),
|
|
1116
|
+
plot_bgcolor='snow',
|
|
1117
|
+
paper_bgcolor='whitesmoke',
|
|
1118
|
+
xaxis=dict(
|
|
1119
|
+
range=[agg_df[date_column].min(), agg_df[date_column].max()],
|
|
1120
|
+
showline=True,
|
|
1121
|
+
linewidth=0.5,
|
|
1122
|
+
linecolor='orange',
|
|
1123
|
+
zeroline=False,
|
|
1124
|
+
gridcolor='rgba(255, 165, 0, 0.1)',
|
|
1125
|
+
mirror=True
|
|
1126
|
+
),
|
|
1127
|
+
yaxis=dict(
|
|
1128
|
+
range=[agg_df['Anomaly_Score'].min()*0.9, agg_df['Anomaly_Score'].max()*1.06],
|
|
1129
|
+
showline=True,
|
|
1130
|
+
linewidth=0.5,
|
|
1131
|
+
linecolor='orange',
|
|
1132
|
+
zeroline=False,
|
|
1133
|
+
gridcolor='rgba(255, 165, 0, 0.1)',
|
|
1134
|
+
mirror=True
|
|
1135
|
+
),
|
|
1136
|
+
yaxis_title=dict(
|
|
1137
|
+
text='Average Anomaly Score',
|
|
1138
|
+
font=dict(size=16, weight='bold', color='black')
|
|
1139
|
+
),
|
|
1140
|
+
legend=dict(
|
|
1141
|
+
orientation="v",
|
|
1142
|
+
yanchor="top",
|
|
1143
|
+
y=1,
|
|
1144
|
+
xanchor="left",
|
|
1145
|
+
x=1.02,
|
|
1146
|
+
)
|
|
1147
|
+
)
|
|
1148
|
+
|
|
1149
|
+
fig.show()
|
|
1150
|
+
print("\n")
|
|
1151
|
+
except Exception as e:
|
|
1152
|
+
print(f"Anomaly Score Plot Failed: {e}")
|