anomaly-pipeline 0.1.27__py3-none-any.whl → 0.1.61__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,102 +15,303 @@ def create_full_calendar_and_interpolate(
15
15
  group_columns,
16
16
  variable,
17
17
  date_column,
18
- freq
18
+ freq,
19
+ min_records,
20
+ max_records
19
21
  ):
20
- """
21
- Creates a complete weekly date range for each group,
22
- merges with the master data, marks missing rows,
23
- and fills missing values using linear interpolation.
24
-
25
- Parameters
26
- ----------
27
- master_data : pd.DataFrame
28
- group_columns : list
29
- One or multiple columns that define a group.
30
- date_column : str
31
- Name of the date column (must be datetime-like)
32
- missing_check_cols : list
33
- Columns used to detect missing values.
34
- If None → ALL numeric columns will be used.
35
- freq : str
36
- Frequency for calendar generation (default weekly Mondays).
37
- """
38
-
39
- # Ensure datetime
40
22
  master_data[date_column] = pd.to_datetime(master_data[date_column])
41
-
23
+
42
24
  full_group_data = []
25
+ success_metrics = []
26
+ dropped_metrics = []
43
27
 
44
28
  for group_key, group in master_data.groupby(group_columns):
45
-
46
- # ---- Step 1: Create full calendar for this group ----
47
- min_date = group[date_column].min()
48
- max_date = group[date_column].max()
49
-
29
+ # Create a dictionary of the group keys for structured reporting
30
+ # This maps {col1: val1, col2: val2}
31
+ current_group_info = {
32
+ col: group_key[i] if isinstance(group_key, (tuple, list)) else group_key
33
+ for i, col in enumerate(group_columns)
34
+ }
35
+
36
+ # 1. Calendar Generation
37
+ min_date, max_date = group[date_column].min(), group[date_column].max()
50
38
  full_dates = pd.date_range(start=min_date, end=max_date, freq=freq)
39
+
40
+ if max_records is not None and len(full_dates) > max_records:
41
+ full_dates = full_dates[-max_records:]
51
42
 
52
- # Build calendar DF dynamically using group_columns
53
- calendar_dict = {col: group_key[i] if isinstance(group_key, tuple) else group_key
54
- for i, col in enumerate(group_columns)}
43
+ # 2. Expansion
44
+ calendar_dict = current_group_info.copy()
55
45
  calendar_dict[date_column] = full_dates
56
-
57
46
  full_calendar = pd.DataFrame(calendar_dict)
58
47
 
59
- # ---- Step 2: Join with actual group data ----
60
- merged = full_calendar.merge(
61
- group,
62
- on=group_columns + [date_column],
63
- how="left"
64
- )
65
-
66
- # ---- Step 3: Mark missing rows based on selected columns ----
67
- merged["is_missing_record"] = merged[variable].isna()
48
+ # 3. Merge
49
+ merged = full_calendar.merge(group, on=group_columns + [date_column], how="left")
68
50
 
51
+ total_len = len(merged)
52
+ interpolated_count = merged[variable].isna().sum()
53
+ interpolation_rate = interpolated_count / total_len if total_len > 0 else 0
54
+
55
+ # --- Check 1: Min Records ---
56
+ if total_len < min_records:
57
+ drop_entry = current_group_info.copy()
58
+ drop_entry.update({
59
+ "reason": "Below Min Records",
60
+ "details": f"Total records {total_len} < {min_records}",
61
+ "dropped_records": total_len
62
+ })
63
+ dropped_metrics.append(drop_entry)
64
+ continue
69
65
 
70
- # ---- Step 4: Interpolate numeric columns ----
71
- numeric_cols = merged.select_dtypes(include=[np.number]).columns
66
+ # --- Check 2: Max Interpolation Rate ---
67
+ if interpolation_rate > 0.25:
68
+ drop_entry = current_group_info.copy()
69
+ drop_entry.update({
70
+ "reason": "High Interpolation",
71
+ "details": f"{interpolation_rate:.1%} > 25%",
72
+ "dropped_records": total_len
73
+ })
74
+ dropped_metrics.append(drop_entry)
75
+ continue
72
76
 
73
- for col in numeric_cols:
74
- merged[col] = merged[col].interpolate(method="linear", limit_direction="both")
77
+ # --- Success: Interpolate ---
78
+ merged["is_missing_record"] = merged[variable].isna()
79
+ merged[variable] = merged[variable].interpolate(method="linear", limit_direction="both")
75
80
 
81
+ success_entry = current_group_info.copy()
82
+ success_entry.update({
83
+ "initial_records": len(group),
84
+ "interpolated_count": interpolated_count
85
+ "final_records": total_len,
86
+ "interpolation_pct": round(interpolation_rate * 100, 2)
87
+ })
88
+ success_metrics.append(success_entry)
76
89
  full_group_data.append(merged)
77
90
 
78
- final_df = pd.concat(full_group_data, ignore_index=True)
79
- #print(f"The number of records missing {final_df['is_missing_record'].sum()}")
80
- return final_df
91
+ # Convert lists of dicts to DataFrames
92
+ final_df = pd.concat(full_group_data, ignore_index=True) if full_group_data else pd.DataFrame()
93
+ success_report = pd.DataFrame(success_metrics)
94
+ exclusion_report = pd.DataFrame(dropped_metrics)
95
+
96
+ return final_df, success_report, exclusion_report
97
+
98
+
99
+ def print_anomaly_stats(final_results, success_report, exclusion_report,group_columns,interpolation_method="linear"):
100
+ # 1. Calculate Global Counts
101
+ total_records = len(final_results)
102
+ total_anomalies = final_results['is_Anomaly'].fillna(False).astype(bool).sum()
103
+ anomaly_rate = (total_anomalies / total_records) * 100 if total_records > 0 else 0
104
+
105
+ # 2. Extract specific stats from reports
106
+ # If exclusion_report is passed but empty, len() returns 0
107
+ num_excluded = len(exclusion_report)
108
+ total_groups = len(success_report) + num_excluded
109
+ evaluated_groups = len(success_report)
110
+
111
+ # Interpolation stats
112
+ total_interpolated_records = success_report['interpolated_count'].sum() if not success_report.empty else 0
113
+ groups_with_interpolation = success_report[success_report['interpolated_count'] > 0].shape[0] if not success_report.empty else 0
114
+
115
+ # 3. Handle Exclusion stats (check if empty to avoid filtering errors)
116
+ if num_excluded > 0:
117
+ missing_data_exclusions = exclusion_report[exclusion_report['reason'] == "High Interpolation"].shape[0]
118
+ insufficient_history_exclusions = exclusion_report[exclusion_report['reason'] == "Below Min Records"].shape[0]
119
+ else:
120
+ missing_data_exclusions = 0
121
+ insufficient_history_exclusions = 0
81
122
 
123
+ # --- START PRINTING ---
124
+ print("\n" + "="*55)
125
+ print(f"{'ANOMALY DETECTION EXECUTIVE SUMMARY':^55}")
126
+ print("="*55)
127
+
128
+ stats_table = [
129
+ ["Total Groups", f"{total_groups:,}"],
130
+ ["Total Records", f"{total_records:,}"],
131
+ ["Evaluated Groups", f"{evaluated_groups:,}"],
132
+ ["Evaluated Records", f"{total_records:,}"],
133
+ ["Evaluated Anomalies", f"{total_anomalies:,}"],
134
+ ["Anomaly Rate", f"{anomaly_rate:.2f}%"]
135
+ ]
136
+
137
+ for label, val in stats_table:
138
+ print(f"{label:<25} : {val:>25}")
139
+
140
+ print("-" * 55)
82
141
 
83
- def print_anomaly_stats(df, group_columns):
84
- # Calculate global stats
85
- total_records = len(df)
86
- # Ensure is_anomaly is treated as boolean for counting
87
- total_anomalies = df['is_Anomaly'].fillna(False).astype(bool).sum()
88
- anomaly_rate = (total_anomalies / total_records) * 100
142
+ # Interpolation Details
143
+ print(f"INTERPOLATION REPORT:")
144
+ print(f"{total_interpolated_records:,} records were missing from {groups_with_interpolation} groups")
145
+ print(f"Values were interpolated using the {interpolation_method} method.")
146
+ print(f"Total {total_interpolated_records:,} additional records are added to the data.")
147
+
148
+ # Show 5 examples of interpolated records
149
+ if 'is_interpolated' in final_results.columns:
150
+ interpolated_samples = final_results[final_results['is_interpolated'] == True].head(5)
151
+ if not interpolated_samples.empty:
152
+ print("\nExample Interpolated Records:")
153
+ # Only show group columns, timestamp (ds), and value (y)
154
+ cols_to_show = group_columns + ['ds', 'y']
155
+ print(interpolated_samples[cols_to_show].to_string(index=False))
156
+
157
+ print("-" * 55)
89
158
 
90
- print("\n" + "="*45)
91
- print(f"{'ANOMALY DETECTION SUMMARY':^45}")
92
- print("="*45)
93
- print(f"{'Total Records:':<25} {total_records:,}")
94
- print(f"{'Total Anomalies:':<25} {total_anomalies:,}")
95
- print(f"{'Overall Anomaly Rate:':<25} {anomaly_rate:.2f}%")
96
- print("-" * 45)
159
+ # Exclusion Details
160
+ print(f"EXCLUSION SUMMARY:")
161
+ if num_excluded > 0:
162
+ print(f"- {missing_data_exclusions} groups had >25% missing data and could not be interpolated.")
163
+ print(f"- {insufficient_history_exclusions} groups lacked the minimum historical data to train.")
164
+ print(f"See exclusion_report for full list of IDs.")
165
+ else:
166
+ print("- No groups were excluded from this run.")
97
167
 
98
- # --- CHANGE START: Group by Rate ---
99
- print(f"Top 5 Groups by Anomaly Rate ({' > '.join(group_columns)}):")
168
+ print("-" * 55)
100
169
 
101
- # 1. Group by keys
102
- # 2. Calculate mean (rate) and count (to show absolute numbers too)
103
- group_stats = df.groupby(group_columns)['is_Anomaly'].agg(['mean', 'sum']).sort_values(by='mean', ascending=False).head(5)
170
+ # Group Breakdown
171
+ print(f"TOP 5 GROUPS BY ANOMALY RATE ({' > '.join(group_columns)}):")
172
+ group_stats = final_results.groupby(group_columns)['is_Anomaly'].agg(['mean', 'sum']).sort_values(by='mean', ascending=False).head(5)
104
173
 
105
174
  for label, row in group_stats.iterrows():
106
- # Handle single vs multiple group columns for clean printing
107
- group_label = label if isinstance(label, str) else " | ".join(map(str, label))
108
- rate_pct = row['mean'] * 100
109
- count = int(row['sum'])
175
+ # Handle tuple-based index for multi-grouping
176
+ group_label = label if isinstance(label, (str, int)) else " | ".join(map(str, label))
177
+ print(f" - {group_label:<30} : {row['mean']*100:>6.2f}% ({int(row['sum'])} anomalies)")
178
+
179
+ print("="*55 + "\n")
180
+
181
+ def calculate_ensemble_scores(df, variable):
182
+ """
183
+ Calculates the normalized consensus score across all anomaly models.
184
+ """
185
+
186
+ # Identify all columns that are model flags (is_..._anomaly)
187
+ anomaly_flags = [col for col in df.columns if col.startswith('is_') and col.endswith('_anomaly') and col != 'is_Anomaly']
188
+
189
+ # 1. Total Votes (Count of True)
190
+ df['Anomaly_Votes'] = df[anomaly_flags].sum(axis=1).astype(int)
191
+
192
+ # 2. Total Models active for that row (Count of non-NaN values)
193
+ df['Vote_Cnt'] = df[anomaly_flags].notna().sum(axis=1).astype(int)
194
+
195
+ # 3. Anomaly Votes Score Display (x out of N)
196
+ df['Anomaly_Votes_Display'] = df['Anomaly_Votes'].astype(int).astype(str) + " out of " + df['Vote_Cnt'].astype(int).astype(str)
197
+
198
+ # 5. Final Boolean Consensus (e.g., majority rule)
199
+ df['is_Anomaly'] = df['Anomaly_Votes'] / df['Vote_Cnt'] >= 0.5
200
+
201
+ # 6. Scale all the model scores to be between -1 and 1
202
+ try:
203
+ df['Percentile_score_scaled'] = np.where(df['is_Percentile_anomaly'].isna()==False,
204
+ abs(df[variable] - (df['Percentile_high'] + df['Percentile_low'])/2)/((df['Percentile_high'] - df['Percentile_low'])/2) - 1,
205
+ np.nan)
206
+ df['Percentile_score_scaled'] = df['Percentile_score_scaled']/abs(df['Percentile_score_scaled']).max()
207
+ except:
208
+ pass
209
+
210
+ try:
211
+ df['SD_score_scaled'] = np.where(df['is_SD_anomaly'].isna()==False,
212
+ abs(df[variable] - (df['SD2_high'] + df['SD2_low'])/2)/((df['SD2_high'] - df['SD2_low'])/2) - 1,
213
+ np.nan)
214
+ df['SD_score_scaled'] = df['SD_score_scaled']/abs(df['SD_score_scaled']).max()
215
+ except:
216
+ pass
217
+
218
+ try:
219
+ df['MAD_score_scaled'] = np.where(df['is_MAD_anomaly'].isna()==False,
220
+ abs(df[variable] - (df['MAD_high'] + df['MAD_low'])/2)/((df['MAD_high'] - df['MAD_low'])/2) - 1,
221
+ np.nan)
222
+ df['MAD_score_scaled'] = df['MAD_score_scaled']/abs(df['MAD_score_scaled']).max()
223
+ except:
224
+ pass
225
+
226
+ try:
227
+ df['IQR_score_scaled'] = np.where(df['is_IQR_anomaly'].isna()==False,
228
+ abs(df[variable] - (df['IQR_high'] + df['IQR_low'])/2)/((df['IQR_high'] - df['IQR_low'])/2) - 1,
229
+ np.nan)
230
+ df['IQR_score_scaled'] = df['IQR_score_scaled']/abs(df['IQR_score_scaled']).max()
231
+ except:
232
+ pass
233
+
234
+ try:
235
+ df['EWMA_score_scaled'] = np.where(df['is_EWMA_anomaly'].isna()==False,
236
+ abs(df[variable] - (df['EWMA_high'] + df['EWMA_low'])/2)/((df['EWMA_high'] - df['EWMA_low'])/2) - 1,
237
+ np.nan)
238
+ df['EWMA_score_scaled'] = df['EWMA_score_scaled']/abs(df['EWMA_score_scaled']).max()
239
+ except:
240
+ pass
241
+
242
+ try:
243
+ df['FB_score_scaled'] = np.where(df['is_FB_anomaly'].isna()==False,
244
+ abs(df[variable] - (df['FB_high'] + df['FB_low'])/2)/((df['FB_high'] - df['FB_low'])/2) - 1,
245
+ np.nan)
246
+ df['FB_score_scaled'] = df['FB_score_scaled']/abs(df['FB_score_scaled']).max()
247
+ except:
248
+ pass
249
+
250
+ try:
251
+ df['IsoForest_score_scaled'] = np.where(df['is_IsolationForest_anomaly'].isna()==False,
252
+ df['IsolationForest_score'] - df['IsolationForest_score_low'],
253
+ np.nan)
254
+ df['IsoForest_score_scaled'] = df['IsoForest_score_scaled']/abs(df['IsoForest_score_scaled']).max()
255
+ except:
256
+ pass
257
+
258
+ try:
259
+ df['dbscan_score_scaled'] = np.where(df['is_DBSCAN_anomaly'].isna()==False, df['dbscan_score_high'] - df['dbscan_score'], np.nan)
260
+ df['dbscan_score_scaled'] = df['dbscan_score_scaled']/abs(df['dbscan_score_scaled']).max()
261
+ except:
262
+ pass
263
+
264
+ score_scaled_cols = []
265
+ for col in df.columns.to_list():
266
+ if '_score_scaled' in col:
267
+ score_scaled_cols.append(col)
268
+
269
+ df['Anomaly_Score'] = df[score_scaled_cols].mean(axis=1)
270
+ # Rescale all non anomalies between 0 and 0.5 and anomalies between 0.5 and 1.0
271
+ if len(df[df['is_Anomaly'] == True]) >= 1:
272
+ # df.loc[df['is_Anomaly'] == True, 'Anomaly_Score'] = ((df.loc[df['is_Anomaly'] == True, 'Anomaly_Score'] + 1) * 0.245) + 0.51
110
273
 
111
- # Print the Rate % and the absolute count in brackets for context
112
- print(f" - {group_label:<25} : {rate_pct:>6.2f}% ({count:>3} anomalies)")
113
- # --- CHANGE END ---
274
+ is_anomaly_min = df[df['is_Anomaly'] == True]['Anomaly_Score'].min()
275
+ is_anomaly_max = df[df['is_Anomaly'] == True]['Anomaly_Score'].max()
276
+ # Scale to [0, 0.49] based on actual data range
277
+ if is_anomaly_max == is_anomaly_min:
278
+ df.loc[df['is_Anomaly'] == True, 'Anomaly_Score'] = df.loc[df['is_Anomaly'] == True, 'Anomaly_Score'] * 0 + 0.51
279
+ else:
280
+ df.loc[df['is_Anomaly'] == True, 'Anomaly_Score'] = (((df.loc[df['is_Anomaly'] == True, 'Anomaly_Score'] - is_anomaly_min) / (is_anomaly_max - is_anomaly_min)) * 0.48) + 0.52
114
281
 
115
- print("="*45 + "\n")
282
+ if len(df[df['is_Anomaly'] == False]) >= 1:
283
+ not_anomaly_min = df[df['is_Anomaly'] == False]['Anomaly_Score'].min()
284
+ not_anomaly_max = df[df['is_Anomaly'] == False]['Anomaly_Score'].max()
285
+ # Scale to [0, 0.49] based on actual data range
286
+ if not_anomaly_max == not_anomaly_min:
287
+ df.loc[df['is_Anomaly'] == False, 'Anomaly_Score'] = df.loc[df['is_Anomaly'] == False, 'Anomaly_Score'] * 0 # Default to 0 if constant
288
+ else:
289
+ df.loc[df['is_Anomaly'] == False, 'Anomaly_Score'] = ((df.loc[df['is_Anomaly'] == False, 'Anomaly_Score'] - not_anomaly_min) / (not_anomaly_max - not_anomaly_min)) * 0.48
290
+
291
+ df['Anomaly_Score_Display'] = np.where(df['is_Anomaly'] == True, np.ceil(100 * df['Anomaly_Score']), np.floor(100 * df['Anomaly_Score'])).astype(int)
292
+
293
+ # 7. Reposition is_Anomaly column to the end
294
+ df['is_Anomaly'] = df.pop('is_Anomaly')
295
+
296
+ return df
116
297
 
298
+
299
+
300
+ def min_records_extraction(freq,eval_period):
301
+ freq_upper = freq.upper()
302
+
303
+ if freq_upper.startswith('W'):
304
+ annual_count = 52
305
+ elif freq_upper.startswith('D') or freq_upper.startswith('B'):
306
+ annual_count = 365
307
+ elif freq_upper.startswith('M'):
308
+ annual_count = 12
309
+ else:
310
+ # Fallback to weekly if custom/unknown
311
+ annual_count = 52
312
+
313
+ # Logic: 1 year for min, 2 years for max
314
+ min_records = annual_count + eval_period
315
+ #max_records = (2 * annual_count) + eval_period
316
+
317
+ return min_records
@@ -3,6 +3,70 @@ import numpy as np
3
3
  from .Preprocessing import classify
4
4
 
5
5
  def detect_outliers_sd(group, variable, date_column, eval_period):
6
+
7
+ """
8
+ # 📈 Standard-Deviation–Based Outlier Detection (Expanding Window)
9
+
10
+ ## **Function:** `detect_outliers_sd`
11
+
12
+ This function detects anomalies in a time series using a mean ± 2 standard deviation (SD) rule, applied in a train–test, expanding-window framework.
13
+
14
+ ---
15
+
16
+ ## 🔍 **What the Function Does**
17
+
18
+ ### **1. Minimum Data Requirement**
19
+ - Requires **at least 10 observations**
20
+ - Returns an empty DataFrame if insufficient data is provided
21
+
22
+ ---
23
+
24
+ ## 🏋️ **Training Phase**
25
+ *(Initial fixed window)*
26
+
27
+ - Uses all observations **prior to the evaluation period**
28
+ - Computes:
29
+ - **Mean**
30
+ - **Standard Deviation**
31
+ - **Lower bound:** `max(mean − 2 × SD, 0)`
32
+ - **Upper bound:** `mean + 2 × SD`
33
+ - Flags anomalies where values fall **outside the 2-SD range**
34
+ - Labels rows as **TRAIN**
35
+
36
+ ---
37
+
38
+ ## 🔁 **Evaluation Phase**
39
+ *(Expanding window)*
40
+
41
+ For each step in the evaluation period:
42
+ - Expands the training window to include all prior observations
43
+ - Recomputes **mean and SD dynamically**
44
+ - Recalculates anomaly bounds
45
+ - Tests the current observation against updated bounds
46
+ - Labels rows as **TEST**
47
+
48
+ ---
49
+
50
+ ## 🚨 **Anomaly Classification**
51
+
52
+ Each observation receives:
53
+ - **`SD_anomaly`** → categorical label via `classify()`
54
+ - **`is_SD_anomaly`** → boolean flag
55
+ - `True` if outside ±2 SD
56
+ - `False` otherwise
57
+
58
+ ---
59
+
60
+ ## 📊 **Output Columns Added**
61
+
62
+ - **Mean**
63
+ - **SD**
64
+ - **SD2_low**
65
+ - **SD2_high**
66
+ - **set** (`TRAIN` or `TEST`)
67
+ - **SD_anomaly**
68
+ - **is_SD_anomaly**"""
69
+
6
70
  n = len(group)
7
71
  # checking the min_size requirements
8
72
  if n < 10:
@@ -1 +1,13 @@
1
- from .help_info import help_info
1
+ from .help_anomaly import help_anomaly, get_example_df
2
+ from .evaluation_info import evaluation_info
3
+ from .evaluation_plots import (
4
+ anomaly_overview_plot,
5
+ anomaly_percentile_plot,
6
+ anomaly_sd_plot,
7
+ anomaly_mad_plot,
8
+ anomaly_iqr_plot,
9
+ anomaly_ewma_plot,
10
+ anomaly_fb_plot,
11
+ anomaly_dbscan_plot,
12
+ anomaly_isolation_forest_plot
13
+ )
@@ -1,8 +1,15 @@
1
1
  import pandas as pd
2
2
  import numpy as np
3
3
  from IPython.display import display, Markdown
4
- from anomaly_pipeline.helpers.evaluation_plots import anomaly_eval_plot, anomaly_percentile_plot,\
5
- anomaly_sd_plot, anomaly_mad_plot, anomaly_iqr_plot, anomaly_ewma_plot, anomaly_fb_plot, anomaly_dbscan_plot, anomaly_isolation_forest_timeseries_plot
4
+ from .evaluation_plots import (anomaly_overview_plot,
5
+ anomaly_percentile_plot,
6
+ anomaly_sd_plot,
7
+ anomaly_mad_plot,
8
+ anomaly_iqr_plot,
9
+ anomaly_ewma_plot,
10
+ anomaly_fb_plot,
11
+ anomaly_dbscan_plot,
12
+ anomaly_isolation_forest_plot)
6
13
 
7
14
 
8
15
  def evaluation_info(
@@ -10,7 +17,7 @@ def evaluation_info(
10
17
  group_columns,
11
18
  variable,
12
19
  date_column,
13
- eval_period=12,
20
+ eval_period,
14
21
  models_to_plot=[]
15
22
  ):
16
23
 
@@ -42,7 +49,7 @@ def evaluation_info(
42
49
  interpolation_msg = ""
43
50
 
44
51
  no_eval_groups = (
45
- eval_df.groupby(['taxonomy', 'channel'])['is_Anomaly']\
52
+ eval_df.groupby(group_columns)['is_Anomaly']\
46
53
  .agg(is_all_na=lambda x: x.isna().all(), historical_data_points='size')\
47
54
  .reset_index()
48
55
  )
@@ -81,7 +88,7 @@ To increase the chance of evaluating these records, lower the `eval_period` para
81
88
  display(Markdown(eval_msg1))
82
89
 
83
90
  if interpolated_cnt >= 1:
84
- display(eval_df[eval_df['is_missing_record'] == True].sample(5))
91
+ display(eval_df[eval_df['is_missing_record'] == True].sample(min(interpolated_cnt, 5)))
85
92
 
86
93
  display(Markdown(eval_msg2))
87
94
 
@@ -102,20 +109,21 @@ To increase the chance of evaluating these records, lower the `eval_period` para
102
109
 
103
110
  for model in models_to_plot:
104
111
  if model == 'overall':
105
- anomaly_eval_plot(group_df, group_columns, variable, date_column, eval_period=12, show_anomaly_scores_on_main_plot=False)
112
+ anomaly_overview_plot(group_df, group_columns, variable, date_column, eval_period=12, show_anomaly_scores_on_main_plot=False)
106
113
  elif model == 'percentile':
107
- anomaly_percentile_plot(group=group_df, group_columns=group_columns, variable=variable, date_column=date_column, final_anomalies=False, eval_period=12)
108
- elif model == 'iqr':
109
- anomaly_iqr_plot(group=group_df, group_columns=group_columns, variable=variable, date_column=date_column, final_anomalies=False, eval_period=12)
110
- elif model == 'mad':
111
- anomaly_mad_plot(group=group_df, group_columns=group_columns, variable=variable, date_column=date_column, final_anomalies=False, eval_period=12)
114
+ anomaly_percentile_plot(group=group_df, group_columns=group_columns, variable=variable, date_column=date_column, eval_period=eval_period, final_anomalies=False)
112
115
  elif model == 'std':
113
- anomaly_sd_plot(group=group_df, group_columns=group_columns, variable=variable, date_column=date_column, final_anomalies=False, eval_period=12)
116
+ anomaly_sd_plot(group=group_df, group_columns=group_columns, variable=variable, date_column=date_column, eval_period=eval_period, final_anomalies=False)
117
+ elif model == 'mad':
118
+ anomaly_mad_plot(group=group_df, group_columns=group_columns, variable=variable, date_column=date_column, eval_period=eval_period, final_anomalies=False)
119
+ elif model == 'iqr':
120
+ anomaly_iqr_plot(group=group_df, group_columns=group_columns, variable=variable, date_column=date_column, eval_period=eval_period, final_anomalies=False)
114
121
  elif model == 'ewma':
115
- anomaly_ewma_plot(group=group_df, group_columns=group_columns, variable=variable, date_column=date_column, final_anomalies=False, eval_period=12)
122
+ anomaly_ewma_plot(group=group_df, group_columns=group_columns, variable=variable, date_column=date_column, eval_period=eval_period, final_anomalies=False)
116
123
  elif model == 'prophet':
117
- anomaly_fb_plot(group=group_df, group_columns=group_columns, variable=variable, date_column=date_column, final_anomalies=False, eval_period=12)
118
- elif model == 'dbscan':
119
- anomaly_dbscan_plot(group=group_df, group_columns=group_columns, variable=variable, date_column=date_column, final_anomalies=False, eval_period=12)
124
+ anomaly_fb_plot(group=group_df, group_columns=group_columns, variable=variable, date_column=date_column, eval_period=eval_period, final_anomalies=False)
120
125
  elif model == 'isolation_forest':
121
- anomaly_isolation_forest_timeseries_plot(group=group_df, group_columns=group_columns, variable=variable, date_column=date_column, final_anomalies=False, eval_period=12)
126
+ anomaly_isolation_forest_plot(group=group_df, group_columns=group_columns, variable=variable, date_column=date_column, eval_period=eval_period, final_anomalies=False)
127
+ elif model == 'dbscan':
128
+ anomaly_dbscan_plot(group=group_df, group_columns=group_columns, variable=variable, date_column=date_column, eval_period=eval_period, final_anomalies=False)
129
+