oracle-ads 2.11.6__py3-none-any.whl → 2.11.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +24 -14
- ads/aqua/base.py +0 -2
- ads/aqua/cli.py +50 -2
- ads/aqua/decorator.py +8 -0
- ads/aqua/deployment.py +37 -34
- ads/aqua/evaluation.py +106 -49
- ads/aqua/extension/base_handler.py +18 -10
- ads/aqua/extension/common_handler.py +21 -2
- ads/aqua/extension/deployment_handler.py +1 -4
- ads/aqua/extension/evaluation_handler.py +1 -2
- ads/aqua/extension/finetune_handler.py +0 -1
- ads/aqua/extension/ui_handler.py +1 -12
- ads/aqua/extension/utils.py +4 -4
- ads/aqua/finetune.py +24 -11
- ads/aqua/model.py +2 -4
- ads/aqua/utils.py +39 -23
- ads/catalog/model.py +3 -3
- ads/catalog/notebook.py +3 -3
- ads/catalog/project.py +2 -2
- ads/catalog/summary.py +2 -4
- ads/cli.py +21 -2
- ads/common/serializer.py +5 -4
- ads/common/utils.py +6 -2
- ads/config.py +1 -0
- ads/data_labeling/metadata.py +2 -2
- ads/dataset/dataset.py +3 -5
- ads/dataset/factory.py +2 -3
- ads/dataset/label_encoder.py +1 -1
- ads/dataset/sampled_dataset.py +3 -5
- ads/jobs/ads_job.py +26 -2
- ads/jobs/builders/infrastructure/dsc_job.py +20 -7
- ads/llm/serializers/runnable_parallel.py +7 -1
- ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +1 -1
- ads/opctl/operator/lowcode/anomaly/README.md +1 -1
- ads/opctl/operator/lowcode/anomaly/environment.yaml +1 -1
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +8 -15
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +16 -10
- ads/opctl/operator/lowcode/anomaly/model/autots.py +9 -10
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +36 -39
- ads/opctl/operator/lowcode/anomaly/model/tods.py +4 -4
- ads/opctl/operator/lowcode/anomaly/operator_config.py +18 -1
- ads/opctl/operator/lowcode/anomaly/schema.yaml +16 -4
- ads/opctl/operator/lowcode/common/data.py +16 -2
- ads/opctl/operator/lowcode/common/transformations.py +48 -14
- ads/opctl/operator/lowcode/forecast/README.md +1 -1
- ads/opctl/operator/lowcode/forecast/environment.yaml +5 -4
- ads/opctl/operator/lowcode/forecast/model/arima.py +36 -29
- ads/opctl/operator/lowcode/forecast/model/automlx.py +91 -90
- ads/opctl/operator/lowcode/forecast/model/autots.py +200 -166
- ads/opctl/operator/lowcode/forecast/model/base_model.py +144 -140
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +86 -80
- ads/opctl/operator/lowcode/forecast/model/prophet.py +68 -63
- ads/opctl/operator/lowcode/forecast/operator_config.py +18 -2
- ads/opctl/operator/lowcode/forecast/schema.yaml +20 -4
- ads/opctl/operator/lowcode/forecast/utils.py +8 -4
- ads/opctl/operator/lowcode/pii/README.md +1 -1
- ads/opctl/operator/lowcode/pii/environment.yaml +1 -1
- ads/opctl/operator/lowcode/pii/model/report.py +71 -70
- ads/pipeline/ads_pipeline_step.py +11 -12
- {oracle_ads-2.11.6.dist-info → oracle_ads-2.11.8.dist-info}/METADATA +8 -7
- {oracle_ads-2.11.6.dist-info → oracle_ads-2.11.8.dist-info}/RECORD +64 -64
- {oracle_ads-2.11.6.dist-info → oracle_ads-2.11.8.dist-info}/LICENSE.txt +0 -0
- {oracle_ads-2.11.6.dist-info → oracle_ads-2.11.8.dist-info}/WHEEL +0 -0
- {oracle_ads-2.11.6.dist-info → oracle_ads-2.11.8.dist-info}/entry_points.txt +0 -0
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# -*- coding: utf-8 -*--
|
3
3
|
|
4
|
-
# Copyright (c) 2023 Oracle and/or its affiliates.
|
4
|
+
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
5
5
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
6
|
|
7
7
|
import os
|
@@ -57,7 +57,7 @@ class AnomalyOperatorBaseModel(ABC):
|
|
57
57
|
|
58
58
|
def generate_report(self):
|
59
59
|
"""Generates the report."""
|
60
|
-
import
|
60
|
+
import report_creator as rc
|
61
61
|
import matplotlib.pyplot as plt
|
62
62
|
|
63
63
|
start_time = time.time()
|
@@ -79,12 +79,10 @@ class AnomalyOperatorBaseModel(ABC):
|
|
79
79
|
anomaly_output, test_data, elapsed_time
|
80
80
|
)
|
81
81
|
table_blocks = [
|
82
|
-
|
82
|
+
rc.DataTable(df, label=col, index=True)
|
83
83
|
for col, df in self.datasets.full_data_dict.items()
|
84
84
|
]
|
85
|
-
data_table = (
|
86
|
-
dp.Select(blocks=table_blocks) if len(table_blocks) > 1 else table_blocks[0]
|
87
|
-
)
|
85
|
+
data_table = rc.Select(blocks=table_blocks)
|
88
86
|
date_column = self.spec.datetime_column.name
|
89
87
|
|
90
88
|
blocks = []
|
@@ -106,44 +104,42 @@ class AnomalyOperatorBaseModel(ABC):
|
|
106
104
|
plt.xlabel(date_column)
|
107
105
|
plt.ylabel(col)
|
108
106
|
plt.title(f"`{col}` with reference to anomalies")
|
109
|
-
figure_blocks.append(ax)
|
110
|
-
blocks.append(
|
111
|
-
plots =
|
107
|
+
figure_blocks.append(rc.Widget(ax))
|
108
|
+
blocks.append(rc.Group(*figure_blocks, label=target))
|
109
|
+
plots = rc.Select(blocks)
|
112
110
|
|
113
111
|
report_sections = []
|
114
|
-
title_text =
|
115
|
-
|
116
|
-
yaml_appendix_title =
|
117
|
-
yaml_appendix =
|
118
|
-
summary =
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
)
|
132
|
-
]
|
112
|
+
title_text = rc.Heading("Anomaly Detection Report", level=1)
|
113
|
+
|
114
|
+
yaml_appendix_title = rc.Heading("Reference: YAML File", level=2)
|
115
|
+
yaml_appendix = rc.Yaml(self.config.to_dict())
|
116
|
+
summary = rc.Block(
|
117
|
+
rc.Group(
|
118
|
+
rc.Text(f"You selected the **`{self.spec.model}`** model."),
|
119
|
+
rc.Text(
|
120
|
+
"Based on your dataset, you could have also selected "
|
121
|
+
f"any of the models: `{'`, `'.join(SupportedModels.keys())}`."
|
122
|
+
),
|
123
|
+
rc.Metric(
|
124
|
+
heading="Analysis was completed in ",
|
125
|
+
value=human_time_friendly(elapsed_time),
|
126
|
+
),
|
127
|
+
label="Summary",
|
128
|
+
)
|
133
129
|
)
|
134
|
-
sec_text =
|
135
|
-
sec =
|
130
|
+
sec_text = rc.Heading("Train Evaluation Metrics", level=2)
|
131
|
+
sec = rc.DataTable(self._evaluation_metrics(anomaly_output), index=True)
|
136
132
|
evaluation_metrics_sec = [sec_text, sec]
|
137
133
|
|
138
134
|
test_metrics_sections = []
|
139
135
|
if total_metrics is not None and not total_metrics.empty:
|
140
|
-
sec_text =
|
141
|
-
sec =
|
136
|
+
sec_text = rc.Heading("Test Data Evaluation Metrics", level=2)
|
137
|
+
sec = rc.DataTable(total_metrics, index=True)
|
142
138
|
test_metrics_sections = test_metrics_sections + [sec_text, sec]
|
143
139
|
|
144
140
|
if summary_metrics is not None and not summary_metrics.empty:
|
145
|
-
sec_text =
|
146
|
-
sec =
|
141
|
+
sec_text = rc.Heading("Test Data Summary Metrics", level=2)
|
142
|
+
sec = rc.DataTable(summary_metrics, index=True)
|
147
143
|
test_metrics_sections = test_metrics_sections + [sec_text, sec]
|
148
144
|
|
149
145
|
report_sections = (
|
@@ -248,7 +244,7 @@ class AnomalyOperatorBaseModel(ABC):
|
|
248
244
|
test_metrics: pd.DataFrame,
|
249
245
|
):
|
250
246
|
"""Saves resulting reports to the given folder."""
|
251
|
-
import
|
247
|
+
import report_creator as rc
|
252
248
|
|
253
249
|
unique_output_dir = find_output_dirname(self.spec.output_directory)
|
254
250
|
|
@@ -257,11 +253,12 @@ class AnomalyOperatorBaseModel(ABC):
|
|
257
253
|
else:
|
258
254
|
storage_options = dict()
|
259
255
|
|
260
|
-
#
|
256
|
+
# report-creator html report
|
261
257
|
with tempfile.TemporaryDirectory() as temp_dir:
|
262
258
|
report_local_path = os.path.join(temp_dir, "___report.html")
|
263
259
|
disable_print()
|
264
|
-
|
260
|
+
with rc.ReportCreator("My Report") as report:
|
261
|
+
report.save(rc.Block(*report_sections), report_local_path)
|
265
262
|
enable_print()
|
266
263
|
with open(report_local_path) as f1:
|
267
264
|
with fsspec.open(
|
@@ -272,7 +269,7 @@ class AnomalyOperatorBaseModel(ABC):
|
|
272
269
|
f2.write(f1.read())
|
273
270
|
|
274
271
|
if self.spec.generate_inliers:
|
275
|
-
inliers = anomaly_output.get_inliers(self.datasets
|
272
|
+
inliers = anomaly_output.get_inliers(self.datasets)
|
276
273
|
write_data(
|
277
274
|
data=inliers,
|
278
275
|
filename=os.path.join(unique_output_dir, self.spec.inliers_filename),
|
@@ -280,7 +277,7 @@ class AnomalyOperatorBaseModel(ABC):
|
|
280
277
|
storage_options=storage_options,
|
281
278
|
)
|
282
279
|
|
283
|
-
outliers = anomaly_output.get_outliers(self.datasets
|
280
|
+
outliers = anomaly_output.get_outliers(self.datasets)
|
284
281
|
write_data(
|
285
282
|
data=outliers,
|
286
283
|
filename=os.path.join(unique_output_dir, self.spec.outliers_filename),
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# #!/usr/bin/env python
|
2
2
|
# # -*- coding: utf-8 -*--
|
3
3
|
|
4
|
-
# # Copyright (c) 2023 Oracle and/or its affiliates.
|
4
|
+
# # Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
5
5
|
# # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
6
|
|
7
7
|
# import importlib
|
@@ -97,16 +97,16 @@
|
|
97
97
|
# return anomaly_output
|
98
98
|
|
99
99
|
# def _generate_report(self):
|
100
|
-
# import
|
100
|
+
# import report_creator as rc
|
101
101
|
|
102
102
|
# """The method that needs to be implemented on the particular model level."""
|
103
|
-
# selected_models_text =
|
103
|
+
# selected_models_text = rc.Text(
|
104
104
|
# f"## Selected Models Overview \n "
|
105
105
|
# "The following tables provide information regarding the chosen model."
|
106
106
|
# )
|
107
107
|
# all_sections = [selected_models_text]
|
108
108
|
|
109
|
-
# model_description =
|
109
|
+
# model_description = rc.Text(
|
110
110
|
# "The tods model is a full-stack automated machine learning system for outlier detection "
|
111
111
|
# "on univariate / multivariate time-series data. It provides exhaustive modules for building "
|
112
112
|
# "machine learning-based outlier detection systems and wide range of algorithms."
|
@@ -36,6 +36,21 @@ class TestData(InputData):
|
|
36
36
|
"""Class representing operator specification test data details."""
|
37
37
|
|
38
38
|
|
39
|
+
@dataclass(repr=True)
|
40
|
+
class PreprocessingSteps(DataClassSerializable):
|
41
|
+
"""Class representing preprocessing steps for operator."""
|
42
|
+
|
43
|
+
missing_value_imputation: bool = True
|
44
|
+
outlier_treatment: bool = False
|
45
|
+
|
46
|
+
|
47
|
+
@dataclass(repr=True)
|
48
|
+
class DataPreprocessor(DataClassSerializable):
|
49
|
+
"""Class representing operator specification preprocessing details."""
|
50
|
+
|
51
|
+
enabled: bool = True
|
52
|
+
steps: PreprocessingSteps = field(default_factory=PreprocessingSteps)
|
53
|
+
|
39
54
|
@dataclass(repr=True)
|
40
55
|
class AnomalyOperatorSpec(DataClassSerializable):
|
41
56
|
"""Class representing operator specification."""
|
@@ -74,7 +89,9 @@ class AnomalyOperatorSpec(DataClassSerializable):
|
|
74
89
|
self.generate_inliers if self.generate_inliers is not None else False
|
75
90
|
)
|
76
91
|
self.model_kwargs = self.model_kwargs or dict()
|
77
|
-
|
92
|
+
self.preprocessing = (
|
93
|
+
self.preprocessing if self.preprocessing is not None else DataPreprocessor(enabled=True)
|
94
|
+
)
|
78
95
|
|
79
96
|
@dataclass(repr=True)
|
80
97
|
class AnomalyOperatorConfig(OperatorConfig):
|
@@ -307,11 +307,23 @@ spec:
|
|
307
307
|
description: "When provided, target_category_columns [list] indexes the data into multiple related datasets for anomaly detection"
|
308
308
|
|
309
309
|
preprocessing:
|
310
|
-
type:
|
310
|
+
type: dict
|
311
311
|
required: false
|
312
|
-
|
313
|
-
|
314
|
-
|
312
|
+
schema:
|
313
|
+
enabled:
|
314
|
+
type: boolean
|
315
|
+
required: false
|
316
|
+
default: true
|
317
|
+
meta:
|
318
|
+
description: "preprocessing and feature engineering can be disabled using this flag, Defaults to true"
|
319
|
+
steps:
|
320
|
+
type: dict
|
321
|
+
required: false
|
322
|
+
schema:
|
323
|
+
missing_value_imputation:
|
324
|
+
type: boolean
|
325
|
+
required: false
|
326
|
+
default: false
|
315
327
|
|
316
328
|
generate_report:
|
317
329
|
type: boolean
|
@@ -16,6 +16,7 @@ from ads.opctl.operator.lowcode.common.errors import (
|
|
16
16
|
DataMismatchError,
|
17
17
|
)
|
18
18
|
from abc import ABC
|
19
|
+
import pandas as pd
|
19
20
|
|
20
21
|
|
21
22
|
class AbstractData(ABC):
|
@@ -26,6 +27,19 @@ class AbstractData(ABC):
|
|
26
27
|
self.name = name
|
27
28
|
self.load_transform_ingest_data(spec)
|
28
29
|
|
30
|
+
def get_raw_data_by_cat(self, category):
|
31
|
+
mapping = self._data_transformer.get_target_category_columns_map()
|
32
|
+
# For given category, mapping gives the target_category_columns and it's values.
|
33
|
+
# condition filters raw_data based on the values of target_category_columns for the given category
|
34
|
+
condition = pd.Series(True, index=self.raw_data.index)
|
35
|
+
if category in mapping:
|
36
|
+
for col, val in mapping[category].items():
|
37
|
+
condition &= (self.raw_data[col] == val)
|
38
|
+
data_by_cat = self.raw_data[condition].reset_index(drop=True)
|
39
|
+
data_by_cat = self._data_transformer._format_datetime_col(data_by_cat)
|
40
|
+
return data_by_cat
|
41
|
+
|
42
|
+
|
29
43
|
def get_dict_by_series(self):
|
30
44
|
if not self._data_dict:
|
31
45
|
for s_id in self.list_series_ids():
|
@@ -73,8 +87,8 @@ class AbstractData(ABC):
|
|
73
87
|
return data
|
74
88
|
|
75
89
|
def load_transform_ingest_data(self, spec):
|
76
|
-
raw_data = self._load_data(getattr(spec, self.name))
|
77
|
-
self.data = self._transform_data(spec, raw_data)
|
90
|
+
self.raw_data = self._load_data(getattr(spec, self.name))
|
91
|
+
self.data = self._transform_data(spec, self.raw_data)
|
78
92
|
self._ingest_data(spec)
|
79
93
|
|
80
94
|
def _ingest_data(self, spec):
|
@@ -58,26 +58,33 @@ class Transformations(ABC):
|
|
58
58
|
clean_df = self._format_datetime_col(clean_df)
|
59
59
|
clean_df = self._set_multi_index(clean_df)
|
60
60
|
|
61
|
-
if self.
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
61
|
+
if self.preprocessing and self.preprocessing.enabled:
|
62
|
+
if self.name == "historical_data":
|
63
|
+
if self.preprocessing.steps.missing_value_imputation:
|
64
|
+
try:
|
65
|
+
clean_df = self._missing_value_imputation_hist(clean_df)
|
66
|
+
except Exception as e:
|
67
|
+
logger.debug(f"Missing value imputation failed with {e.args}")
|
68
|
+
else:
|
69
|
+
logger.info("Skipping missing value imputation because it is disabled")
|
70
|
+
if self.preprocessing.steps.outlier_treatment:
|
71
|
+
try:
|
72
|
+
clean_df = self._outlier_treatment(clean_df)
|
73
|
+
except Exception as e:
|
74
|
+
logger.debug(f"Outlier Treatment failed with {e.args}")
|
75
|
+
else:
|
76
|
+
logger.info("Skipping outlier treatment because it is disabled")
|
77
|
+
elif self.name == "additional_data":
|
78
|
+
clean_df = self._missing_value_imputation_add(clean_df)
|
79
|
+
else:
|
80
|
+
logger.info("Skipping all preprocessing steps because preprocessing is disabled")
|
75
81
|
return clean_df
|
76
82
|
|
77
83
|
def _remove_trailing_whitespace(self, df):
|
78
84
|
return df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
|
79
85
|
|
80
86
|
def _set_series_id_column(self, df):
|
87
|
+
self._target_category_columns_map = dict()
|
81
88
|
if not self.target_category_columns:
|
82
89
|
df[DataColumns.Series] = "Series 1"
|
83
90
|
self.has_artificial_series = True
|
@@ -85,6 +92,11 @@ class Transformations(ABC):
|
|
85
92
|
df[DataColumns.Series] = merge_category_columns(
|
86
93
|
df, self.target_category_columns
|
87
94
|
)
|
95
|
+
merged_values = df[DataColumns.Series].unique().tolist()
|
96
|
+
if self.target_category_columns:
|
97
|
+
for value in merged_values:
|
98
|
+
self._target_category_columns_map[value] = df[df[DataColumns.Series] == value][self.target_category_columns].drop_duplicates().iloc[0].to_dict()
|
99
|
+
|
88
100
|
df = df.drop(self.target_category_columns, axis=1)
|
89
101
|
return df
|
90
102
|
|
@@ -189,3 +201,25 @@ class Transformations(ABC):
|
|
189
201
|
raise DataMismatchError(
|
190
202
|
f"Expected {self.name} to have columns: {expected_names}, but instead found column names: {df.columns}. Is the {self.name} path correct?"
|
191
203
|
)
|
204
|
+
|
205
|
+
"""
|
206
|
+
Map between merged target category column values and target category column and its value
|
207
|
+
If target category columns are PPG_Code, Class, Num
|
208
|
+
Merged target category column values are Product Category 1__A__1, Product Category 2__A__2
|
209
|
+
Then target_category_columns_map would be
|
210
|
+
{
|
211
|
+
"Product Category 1__A__1": {
|
212
|
+
"PPG_Code": "Product Category 1",
|
213
|
+
"Class": "A",
|
214
|
+
"Num": 1
|
215
|
+
},
|
216
|
+
"Product Category 2__A__2": {
|
217
|
+
"PPG_Code": "Product Category 2",
|
218
|
+
"Class": "A",
|
219
|
+
"Num": 2
|
220
|
+
},
|
221
|
+
|
222
|
+
}
|
223
|
+
"""
|
224
|
+
def get_target_category_columns_map(self):
|
225
|
+
return self._target_category_columns_map
|
@@ -10,11 +10,12 @@ dependencies:
|
|
10
10
|
- neuralprophet
|
11
11
|
- pmdarima
|
12
12
|
- statsmodels
|
13
|
-
-
|
13
|
+
- report-creator
|
14
14
|
- cerberus
|
15
15
|
- sktime
|
16
16
|
- shap
|
17
17
|
- autots[additional]
|
18
|
-
- optuna
|
19
|
-
- oracle-automlx
|
20
|
-
- oracle-automlx[forecasting]
|
18
|
+
- optuna
|
19
|
+
- oracle-automlx>=23.4.1
|
20
|
+
- oracle-automlx[forecasting]>=23.4.1
|
21
|
+
- fire
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# -*- coding: utf-8 -*--
|
3
3
|
|
4
|
-
# Copyright (c) 2023 Oracle and/or its affiliates.
|
4
|
+
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
5
5
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
6
|
|
7
7
|
import pandas as pd
|
@@ -29,6 +29,7 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
|
|
29
29
|
self.local_explanation = {}
|
30
30
|
self.formatted_global_explanation = None
|
31
31
|
self.formatted_local_explanation = None
|
32
|
+
self.constant_cols = {}
|
32
33
|
|
33
34
|
def set_kwargs(self):
|
34
35
|
# Extract the Confidence Interval Width and convert to arima's equivalent - alpha
|
@@ -64,6 +65,10 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
|
|
64
65
|
try:
|
65
66
|
target = self.original_target_column
|
66
67
|
self.forecast_output.init_series_output(series_id=s_id, data_at_series=df)
|
68
|
+
# If trend is constant, remove constant columns
|
69
|
+
if "trend" not in model_kwargs or model_kwargs["trend"] == "c":
|
70
|
+
self.constant_cols[s_id] = df.columns[df.nunique() == 1]
|
71
|
+
df = df.drop(columns=self.constant_cols[s_id])
|
67
72
|
|
68
73
|
# format the dataframe for this target. Dropping NA on target[df] will remove all future data
|
69
74
|
data = self.preprocess(df, s_id)
|
@@ -74,7 +79,7 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
|
|
74
79
|
X_in = data_i.drop(target, axis=1) if len(data_i.columns) > 1 else None
|
75
80
|
X_pred = self.get_horizon(data).drop(target, axis=1)
|
76
81
|
|
77
|
-
if self.loaded_models is not None:
|
82
|
+
if self.loaded_models is not None and s_id in self.loaded_models:
|
78
83
|
model = self.loaded_models[s_id]
|
79
84
|
else:
|
80
85
|
# Build and fit model
|
@@ -142,28 +147,25 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
|
|
142
147
|
|
143
148
|
def _generate_report(self):
|
144
149
|
"""The method that needs to be implemented on the particular model level."""
|
145
|
-
import
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
150
|
+
import report_creator as rc
|
151
|
+
|
152
|
+
all_sections = []
|
153
|
+
if len(self.models) > 0:
|
154
|
+
sec5_text = rc.Heading("ARIMA Model Parameters", level=2)
|
155
|
+
blocks = [
|
156
|
+
rc.Html(
|
157
|
+
m.summary().as_html(),
|
158
|
+
label=s_id,
|
159
|
+
)
|
160
|
+
for i, (s_id, m) in enumerate(self.models.items())
|
161
|
+
]
|
162
|
+
sec5 = rc.Select(blocks=blocks)
|
163
|
+
all_sections = [sec5_text, sec5]
|
157
164
|
|
158
165
|
if self.spec.generate_explanations:
|
159
166
|
try:
|
160
167
|
# If the key is present, call the "explain_model" method
|
161
168
|
self.explain_model()
|
162
|
-
# Create a markdown text block for the global explanation section
|
163
|
-
global_explanation_text = dp.Text(
|
164
|
-
f"## Global Explanation of Models \n "
|
165
|
-
"The following tables provide the feature attribution for the global explainability."
|
166
|
-
)
|
167
169
|
|
168
170
|
# Convert the global explanation data to a DataFrame
|
169
171
|
global_explanation_df = pd.DataFrame(self.global_explanation)
|
@@ -179,9 +181,12 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
|
|
179
181
|
)
|
180
182
|
|
181
183
|
# Create a markdown section for the global explainability
|
182
|
-
global_explanation_section =
|
183
|
-
"
|
184
|
-
|
184
|
+
global_explanation_section = rc.Block(
|
185
|
+
rc.Heading("Global Explanation of Models", level=2),
|
186
|
+
rc.Text(
|
187
|
+
"The following tables provide the feature attribution for the global explainability."
|
188
|
+
),
|
189
|
+
rc.DataTable(self.formatted_global_explanation, index=True),
|
185
190
|
)
|
186
191
|
|
187
192
|
aggregate_local_explanations = pd.DataFrame()
|
@@ -193,30 +198,29 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
|
|
193
198
|
)
|
194
199
|
self.formatted_local_explanation = aggregate_local_explanations
|
195
200
|
|
196
|
-
local_explanation_text = dp.Text(f"## Local Explanation of Models \n ")
|
197
201
|
blocks = [
|
198
|
-
|
202
|
+
rc.DataTable(
|
199
203
|
local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100,
|
200
204
|
label=s_id,
|
205
|
+
index=True,
|
201
206
|
)
|
202
207
|
for s_id, local_ex_df in self.local_explanation.items()
|
203
208
|
]
|
204
|
-
local_explanation_section = (
|
205
|
-
|
209
|
+
local_explanation_section = rc.Block(
|
210
|
+
rc.Heading("Local Explanation of Models", level=2),
|
211
|
+
rc.Select(blocks=blocks),
|
206
212
|
)
|
207
213
|
|
208
214
|
# Append the global explanation text and section to the "all_sections" list
|
209
215
|
all_sections = all_sections + [
|
210
|
-
global_explanation_text,
|
211
216
|
global_explanation_section,
|
212
|
-
local_explanation_text,
|
213
217
|
local_explanation_section,
|
214
218
|
]
|
215
219
|
except Exception as e:
|
216
220
|
logger.warn(f"Failed to generate Explanations with error: {e}.")
|
217
221
|
logger.debug(f"Full Traceback: {traceback.format_exc()}")
|
218
222
|
|
219
|
-
model_description =
|
223
|
+
model_description = rc.Text(
|
220
224
|
"An autoregressive integrated moving average, or ARIMA, is a statistical "
|
221
225
|
"analysis model that uses time series data to either better understand the "
|
222
226
|
"data set or to predict future trends. A statistical model is autoregressive if "
|
@@ -239,6 +243,9 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
|
|
239
243
|
"""
|
240
244
|
data: ForecastDatasets.get_data_at_series(s_id)
|
241
245
|
"""
|
246
|
+
if series_id in self.constant_cols:
|
247
|
+
data = data.drop(columns=self.constant_cols[series_id])
|
248
|
+
|
242
249
|
data = data.drop([target_col], axis=1)
|
243
250
|
data[dt_column_name] = seconds_to_datetime(
|
244
251
|
data[dt_column_name], dt_format=self.spec.datetime_column.format
|