oracle-ads 2.10.0__py3-none-any.whl → 2.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +12 -0
- ads/aqua/base.py +324 -0
- ads/aqua/cli.py +19 -0
- ads/aqua/config/deployment_config_defaults.json +9 -0
- ads/aqua/config/resource_limit_names.json +7 -0
- ads/aqua/constants.py +45 -0
- ads/aqua/data.py +40 -0
- ads/aqua/decorator.py +101 -0
- ads/aqua/deployment.py +643 -0
- ads/aqua/dummy_data/icon.txt +1 -0
- ads/aqua/dummy_data/oci_model_deployments.json +56 -0
- ads/aqua/dummy_data/oci_models.json +1 -0
- ads/aqua/dummy_data/readme.md +26 -0
- ads/aqua/evaluation.py +1751 -0
- ads/aqua/exception.py +82 -0
- ads/aqua/extension/__init__.py +40 -0
- ads/aqua/extension/base_handler.py +138 -0
- ads/aqua/extension/common_handler.py +21 -0
- ads/aqua/extension/deployment_handler.py +202 -0
- ads/aqua/extension/evaluation_handler.py +135 -0
- ads/aqua/extension/finetune_handler.py +66 -0
- ads/aqua/extension/model_handler.py +59 -0
- ads/aqua/extension/ui_handler.py +201 -0
- ads/aqua/extension/utils.py +23 -0
- ads/aqua/finetune.py +579 -0
- ads/aqua/job.py +29 -0
- ads/aqua/model.py +819 -0
- ads/aqua/training/__init__.py +4 -0
- ads/aqua/training/exceptions.py +459 -0
- ads/aqua/ui.py +453 -0
- ads/aqua/utils.py +715 -0
- ads/cli.py +37 -6
- ads/common/auth.py +7 -0
- ads/common/decorator/__init__.py +7 -3
- ads/common/decorator/require_nonempty_arg.py +65 -0
- ads/common/object_storage_details.py +166 -7
- ads/common/oci_client.py +18 -1
- ads/common/oci_logging.py +2 -2
- ads/common/oci_mixin.py +4 -5
- ads/common/serializer.py +34 -5
- ads/common/utils.py +75 -10
- ads/config.py +40 -1
- ads/dataset/correlation_plot.py +10 -12
- ads/jobs/ads_job.py +43 -25
- ads/jobs/builders/infrastructure/base.py +4 -2
- ads/jobs/builders/infrastructure/dsc_job.py +49 -39
- ads/jobs/builders/runtimes/base.py +71 -1
- ads/jobs/builders/runtimes/container_runtime.py +4 -4
- ads/jobs/builders/runtimes/pytorch_runtime.py +10 -63
- ads/jobs/templates/driver_pytorch.py +27 -10
- ads/model/artifact_downloader.py +84 -14
- ads/model/artifact_uploader.py +25 -23
- ads/model/datascience_model.py +388 -38
- ads/model/deployment/model_deployment.py +10 -2
- ads/model/generic_model.py +8 -0
- ads/model/model_file_description_schema.json +68 -0
- ads/model/model_metadata.py +1 -1
- ads/model/service/oci_datascience_model.py +34 -5
- ads/opctl/config/merger.py +2 -2
- ads/opctl/operator/__init__.py +3 -1
- ads/opctl/operator/cli.py +7 -1
- ads/opctl/operator/cmd.py +3 -3
- ads/opctl/operator/common/errors.py +2 -1
- ads/opctl/operator/common/operator_config.py +22 -3
- ads/opctl/operator/common/utils.py +16 -0
- ads/opctl/operator/lowcode/anomaly/MLoperator +15 -0
- ads/opctl/operator/lowcode/anomaly/README.md +209 -0
- ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/__main__.py +104 -0
- ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
- ads/opctl/operator/lowcode/anomaly/const.py +88 -0
- ads/opctl/operator/lowcode/anomaly/environment.yaml +12 -0
- ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +147 -0
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +89 -0
- ads/opctl/operator/lowcode/anomaly/model/autots.py +103 -0
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +354 -0
- ads/opctl/operator/lowcode/anomaly/model/factory.py +67 -0
- ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
- ads/opctl/operator/lowcode/anomaly/operator_config.py +105 -0
- ads/opctl/operator/lowcode/anomaly/schema.yaml +359 -0
- ads/opctl/operator/lowcode/anomaly/utils.py +81 -0
- ads/opctl/operator/lowcode/common/__init__.py +5 -0
- ads/opctl/operator/lowcode/common/const.py +10 -0
- ads/opctl/operator/lowcode/common/data.py +96 -0
- ads/opctl/operator/lowcode/common/errors.py +41 -0
- ads/opctl/operator/lowcode/common/transformations.py +191 -0
- ads/opctl/operator/lowcode/common/utils.py +250 -0
- ads/opctl/operator/lowcode/forecast/README.md +3 -2
- ads/opctl/operator/lowcode/forecast/__main__.py +18 -2
- ads/opctl/operator/lowcode/forecast/cmd.py +8 -7
- ads/opctl/operator/lowcode/forecast/const.py +17 -1
- ads/opctl/operator/lowcode/forecast/environment.yaml +3 -2
- ads/opctl/operator/lowcode/forecast/model/arima.py +106 -117
- ads/opctl/operator/lowcode/forecast/model/automlx.py +204 -180
- ads/opctl/operator/lowcode/forecast/model/autots.py +144 -253
- ads/opctl/operator/lowcode/forecast/model/base_model.py +326 -259
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +325 -176
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +293 -237
- ads/opctl/operator/lowcode/forecast/model/prophet.py +191 -208
- ads/opctl/operator/lowcode/forecast/operator_config.py +24 -33
- ads/opctl/operator/lowcode/forecast/schema.yaml +116 -29
- ads/opctl/operator/lowcode/forecast/utils.py +186 -356
- ads/opctl/operator/lowcode/pii/model/guardrails.py +18 -15
- ads/opctl/operator/lowcode/pii/model/report.py +7 -7
- ads/opctl/operator/lowcode/pii/operator_config.py +1 -8
- ads/opctl/operator/lowcode/pii/utils.py +0 -82
- ads/opctl/operator/runtime/runtime.py +3 -2
- ads/telemetry/base.py +62 -0
- ads/telemetry/client.py +105 -0
- ads/telemetry/telemetry.py +6 -3
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/METADATA +44 -7
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/RECORD +116 -59
- ads/opctl/operator/lowcode/forecast/model/transformations.py +0 -125
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/LICENSE.txt +0 -0
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/WHEEL +0 -0
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/entry_points.txt +0 -0
@@ -4,6 +4,7 @@
|
|
4
4
|
# Copyright (c) 2023 Oracle and/or its affiliates.
|
5
5
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
6
|
|
7
|
+
import json
|
7
8
|
import os
|
8
9
|
import tempfile
|
9
10
|
import time
|
@@ -15,15 +16,39 @@ import fsspec
|
|
15
16
|
import numpy as np
|
16
17
|
import pandas as pd
|
17
18
|
|
18
|
-
from ads.opctl.operator.lowcode.forecast.utils import
|
19
|
+
from ads.opctl.operator.lowcode.forecast.utils import (
|
20
|
+
default_signer,
|
21
|
+
evaluate_train_metrics,
|
22
|
+
get_forecast_plots,
|
23
|
+
_build_metrics_df,
|
24
|
+
_build_metrics_per_horizon,
|
25
|
+
load_pkl,
|
26
|
+
write_pkl,
|
27
|
+
_label_encode_dataframe,
|
28
|
+
)
|
19
29
|
from ads.common.object_storage_details import ObjectStorageDetails
|
20
30
|
from ads.opctl import logger
|
21
31
|
|
22
|
-
from
|
23
|
-
|
32
|
+
from ads.opctl.operator.lowcode.common.utils import (
|
33
|
+
human_time_friendly,
|
34
|
+
enable_print,
|
35
|
+
disable_print,
|
36
|
+
write_data,
|
37
|
+
merged_category_column_name,
|
38
|
+
datetime_to_seconds,
|
39
|
+
seconds_to_datetime,
|
40
|
+
find_output_dirname,
|
41
|
+
)
|
42
|
+
from ..const import (
|
43
|
+
SUMMARY_METRICS_HORIZON_LIMIT,
|
44
|
+
SupportedMetrics,
|
45
|
+
SupportedModels,
|
46
|
+
SpeedAccuracyMode,
|
47
|
+
)
|
24
48
|
from ..operator_config import ForecastOperatorConfig, ForecastOperatorSpec
|
25
49
|
from ads.common.decorator.runtime_dependency import runtime_dependency
|
26
50
|
from .forecast_datasets import ForecastDatasets, ForecastOutput
|
51
|
+
from ads.opctl.operator.lowcode.forecast.model.forecast_datasets import TestData
|
27
52
|
|
28
53
|
|
29
54
|
class ForecastOperatorBaseModel(ABC):
|
@@ -41,23 +66,27 @@ class ForecastOperatorBaseModel(ABC):
|
|
41
66
|
self.spec: ForecastOperatorSpec = config.spec
|
42
67
|
self.datasets: ForecastDatasets = datasets
|
43
68
|
|
44
|
-
self.
|
45
|
-
self.original_total_data = datasets.original_total_data
|
46
|
-
self.original_additional_data = datasets.original_additional_data
|
47
|
-
self.full_data_dict = datasets.full_data_dict
|
48
|
-
self.target_columns = datasets.target_columns
|
49
|
-
self.categories = datasets.categories
|
69
|
+
self.full_data_dict = datasets.get_data_by_series()
|
50
70
|
|
51
71
|
self.test_eval_metrics = None
|
52
72
|
self.original_target_column = self.spec.target_column
|
73
|
+
self.dt_column_name = self.spec.datetime_column.name
|
74
|
+
|
75
|
+
self.model_parameters = dict()
|
76
|
+
self.loaded_models = None
|
53
77
|
|
54
78
|
# these fields are populated in the _build_model() method
|
55
79
|
self.models = None
|
80
|
+
|
56
81
|
# "outputs" is a list of outputs generated by the models. These should only be generated when the framework requires the original output for plotting
|
57
82
|
self.outputs = None
|
58
83
|
self.forecast_output = None
|
84
|
+
self.errors_dict = dict()
|
85
|
+
self.le = dict()
|
86
|
+
|
87
|
+
self.formatted_global_explanation = None
|
88
|
+
self.formatted_local_explanation = None
|
59
89
|
|
60
|
-
self.train_metrics = False
|
61
90
|
self.forecast_col_name = "yhat"
|
62
91
|
self.perform_tuning = self.spec.tuning != None
|
63
92
|
|
@@ -73,10 +102,14 @@ class ForecastOperatorBaseModel(ABC):
|
|
73
102
|
warnings.simplefilter(action="ignore", category=ConvergenceWarning)
|
74
103
|
import datapane as dp
|
75
104
|
|
76
|
-
# load
|
105
|
+
# load models if given
|
106
|
+
if self.spec.previous_output_dir is not None:
|
107
|
+
self._load_model()
|
108
|
+
|
77
109
|
start_time = time.time()
|
78
110
|
result_df = self._build_model()
|
79
111
|
elapsed_time = time.time() - start_time
|
112
|
+
logger.info("Building the models completed in %s seconds", elapsed_time)
|
80
113
|
|
81
114
|
# Generate metrics
|
82
115
|
summary_metrics = None
|
@@ -84,21 +117,7 @@ class ForecastOperatorBaseModel(ABC):
|
|
84
117
|
self.eval_metrics = None
|
85
118
|
|
86
119
|
if self.spec.generate_report or self.spec.generate_metrics:
|
87
|
-
|
88
|
-
self.eval_metrics = utils.evaluate_train_metrics(
|
89
|
-
self.target_columns,
|
90
|
-
self.datasets,
|
91
|
-
self.forecast_output,
|
92
|
-
self.spec.datetime_column.name,
|
93
|
-
target_col=self.forecast_col_name,
|
94
|
-
)
|
95
|
-
else:
|
96
|
-
try:
|
97
|
-
self.eval_metrics = self._generate_train_metrics()
|
98
|
-
except NotImplementedError:
|
99
|
-
logger.warn(
|
100
|
-
f"Training Metrics are not available for model type {self.spec.model}"
|
101
|
-
)
|
120
|
+
self.eval_metrics = self.generate_train_metrics()
|
102
121
|
|
103
122
|
if self.spec.test_data:
|
104
123
|
try:
|
@@ -107,10 +126,6 @@ class ForecastOperatorBaseModel(ABC):
|
|
107
126
|
summary_metrics,
|
108
127
|
test_data,
|
109
128
|
) = self._test_evaluate_metrics(
|
110
|
-
target_columns=self.target_columns,
|
111
|
-
test_filename=self.spec.test_data.url,
|
112
|
-
output=self.forecast_output,
|
113
|
-
target_col=self.forecast_col_name,
|
114
129
|
elapsed_time=elapsed_time,
|
115
130
|
)
|
116
131
|
except Exception as e:
|
@@ -125,94 +140,114 @@ class ForecastOperatorBaseModel(ABC):
|
|
125
140
|
other_sections,
|
126
141
|
) = self._generate_report()
|
127
142
|
|
128
|
-
ds_column_series = self.datasets.get_longest_datetime_column()
|
129
|
-
|
130
143
|
title_text = dp.Text("# Forecast Report")
|
131
144
|
|
132
|
-
md_columns = " * ".join(
|
145
|
+
md_columns = " * ".join(
|
146
|
+
[f"{s_id} \n" for s_id in self.datasets.list_series_ids()]
|
147
|
+
)
|
148
|
+
|
149
|
+
header_section = dp.Blocks(
|
150
|
+
blocks=[
|
151
|
+
dp.Text(f"You selected the **`{self.spec.model}`** model."),
|
152
|
+
model_description,
|
153
|
+
dp.Text(
|
154
|
+
"Based on your dataset, you could have also selected "
|
155
|
+
f"any of the models: `{'`, `'.join(SupportedModels.keys())}`."
|
156
|
+
),
|
157
|
+
dp.Group(
|
158
|
+
dp.BigNumber(
|
159
|
+
heading="Analysis was completed in ",
|
160
|
+
value=human_time_friendly(elapsed_time),
|
161
|
+
),
|
162
|
+
dp.BigNumber(
|
163
|
+
heading="Starting time index",
|
164
|
+
value=self.datasets.get_earliest_timestamp().strftime(
|
165
|
+
"%B %d, %Y"
|
166
|
+
),
|
167
|
+
),
|
168
|
+
dp.BigNumber(
|
169
|
+
heading="Ending time index",
|
170
|
+
value=self.datasets.get_latest_timestamp().strftime(
|
171
|
+
"%B %d, %Y"
|
172
|
+
),
|
173
|
+
),
|
174
|
+
dp.BigNumber(
|
175
|
+
heading="Num series",
|
176
|
+
value=len(self.datasets.list_series_ids()),
|
177
|
+
),
|
178
|
+
columns=4,
|
179
|
+
),
|
180
|
+
]
|
181
|
+
)
|
182
|
+
|
133
183
|
first_10_rows_blocks = [
|
134
184
|
dp.DataTable(
|
135
|
-
df.head(10)
|
185
|
+
df.head(10),
|
136
186
|
caption="Start",
|
137
|
-
label=
|
187
|
+
label=s_id,
|
138
188
|
)
|
139
|
-
for
|
189
|
+
for s_id, df in self.full_data_dict.items()
|
140
190
|
]
|
141
191
|
|
142
192
|
last_10_rows_blocks = [
|
143
193
|
dp.DataTable(
|
144
|
-
df.tail(10)
|
194
|
+
df.tail(10),
|
145
195
|
caption="End",
|
146
|
-
label=
|
196
|
+
label=s_id,
|
147
197
|
)
|
148
|
-
for
|
198
|
+
for s_id, df in self.full_data_dict.items()
|
149
199
|
]
|
150
200
|
|
151
201
|
data_summary_blocks = [
|
152
202
|
dp.DataTable(
|
153
|
-
df.
|
203
|
+
df.describe(),
|
154
204
|
caption="Summary Statistics",
|
155
|
-
label=
|
205
|
+
label=s_id,
|
156
206
|
)
|
157
|
-
for
|
207
|
+
for s_id, df in self.full_data_dict.items()
|
158
208
|
]
|
159
|
-
|
160
|
-
|
209
|
+
|
210
|
+
series_name = merged_category_column_name(
|
211
|
+
self.spec.target_category_columns
|
212
|
+
)
|
213
|
+
series_subtext = dp.Text(f"Indexed by {series_name}")
|
214
|
+
first_10_title = dp.Text("### First 10 Rows of Data")
|
215
|
+
last_10_title = dp.Text("### Last 10 Rows of Data")
|
216
|
+
summary_title = dp.Text("### Data Summary Statistics")
|
217
|
+
|
218
|
+
if series_name is not None and len(self.datasets.list_series_ids()) > 1:
|
219
|
+
data_summary_sec = dp.Blocks(
|
161
220
|
blocks=[
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
dp.BigNumber(
|
173
|
-
heading="Analysis was completed in ",
|
174
|
-
value=utils.human_time_friendly(elapsed_time),
|
175
|
-
),
|
176
|
-
dp.BigNumber(
|
177
|
-
heading="Starting time index",
|
178
|
-
value=ds_column_series.min().strftime(
|
179
|
-
"%B %d, %Y"
|
180
|
-
),
|
181
|
-
),
|
182
|
-
dp.BigNumber(
|
183
|
-
heading="Ending time index",
|
184
|
-
value=ds_column_series.max().strftime(
|
185
|
-
"%B %d, %Y"
|
186
|
-
),
|
187
|
-
),
|
188
|
-
dp.BigNumber(
|
189
|
-
heading="Num series",
|
190
|
-
value=len(self.target_columns),
|
191
|
-
),
|
192
|
-
columns=4,
|
193
|
-
),
|
194
|
-
dp.Text("### First 10 Rows of Data"),
|
195
|
-
dp.Select(blocks=first_10_rows_blocks)
|
196
|
-
if len(first_10_rows_blocks) > 1
|
197
|
-
else first_10_rows_blocks[0],
|
198
|
-
dp.Text("----"),
|
199
|
-
dp.Text("### Last 10 Rows of Data"),
|
200
|
-
dp.Select(blocks=last_10_rows_blocks)
|
201
|
-
if len(last_10_rows_blocks) > 1
|
202
|
-
else last_10_rows_blocks[0],
|
203
|
-
dp.Text("### Data Summary Statistics"),
|
204
|
-
dp.Select(blocks=data_summary_blocks)
|
205
|
-
if len(data_summary_blocks) > 1
|
206
|
-
else data_summary_blocks[0],
|
207
|
-
label="Summary",
|
208
|
-
),
|
209
|
-
dp.Text(
|
210
|
-
"The following report compares a variety of metrics and plots "
|
211
|
-
f"for your target columns: \n {md_columns}.\n",
|
212
|
-
label="Target Columns",
|
213
|
-
),
|
221
|
+
first_10_title,
|
222
|
+
series_subtext,
|
223
|
+
dp.Select(blocks=first_10_rows_blocks),
|
224
|
+
last_10_title,
|
225
|
+
series_subtext,
|
226
|
+
dp.Select(blocks=last_10_rows_blocks),
|
227
|
+
summary_title,
|
228
|
+
series_subtext,
|
229
|
+
dp.Select(blocks=data_summary_blocks),
|
230
|
+
dp.Text("----"),
|
214
231
|
]
|
215
|
-
)
|
232
|
+
)
|
233
|
+
else:
|
234
|
+
data_summary_sec = dp.Blocks(
|
235
|
+
blocks=[
|
236
|
+
first_10_title,
|
237
|
+
first_10_rows_blocks[0],
|
238
|
+
last_10_title,
|
239
|
+
last_10_rows_blocks[0],
|
240
|
+
summary_title,
|
241
|
+
data_summary_blocks[0],
|
242
|
+
dp.Text("----"),
|
243
|
+
]
|
244
|
+
)
|
245
|
+
|
246
|
+
summary = dp.Group(
|
247
|
+
blocks=[
|
248
|
+
header_section,
|
249
|
+
data_summary_sec,
|
250
|
+
]
|
216
251
|
)
|
217
252
|
|
218
253
|
test_metrics_sections = []
|
@@ -236,19 +271,22 @@ class ForecastOperatorBaseModel(ABC):
|
|
236
271
|
train_metrics_sections = [sec9_text, sec9]
|
237
272
|
|
238
273
|
forecast_text = dp.Text(f"## Forecasted Data Overlaying Historical")
|
239
|
-
forecast_sec =
|
274
|
+
forecast_sec = get_forecast_plots(
|
240
275
|
self.forecast_output,
|
241
|
-
self.target_columns,
|
242
276
|
horizon=self.spec.horizon,
|
243
277
|
test_data=test_data,
|
244
278
|
ci_interval_width=self.spec.confidence_interval_width,
|
245
279
|
)
|
246
|
-
|
280
|
+
if series_name is not None and len(self.datasets.list_series_ids()) > 1:
|
281
|
+
forecast_plots = [forecast_text, series_subtext, forecast_sec]
|
282
|
+
else:
|
283
|
+
forecast_plots = [forecast_text, forecast_sec]
|
247
284
|
|
248
285
|
yaml_appendix_title = dp.Text(f"## Reference: YAML File")
|
249
286
|
yaml_appendix = dp.Code(code=self.config.to_yaml(), language="yaml")
|
250
287
|
report_sections = (
|
251
|
-
[title_text
|
288
|
+
[title_text]
|
289
|
+
+ [summary]
|
252
290
|
+ forecast_plots
|
253
291
|
+ other_sections
|
254
292
|
+ test_metrics_sections
|
@@ -264,69 +302,44 @@ class ForecastOperatorBaseModel(ABC):
|
|
264
302
|
test_metrics_df=self.test_eval_metrics,
|
265
303
|
)
|
266
304
|
|
267
|
-
def _test_evaluate_metrics(
|
268
|
-
self, target_columns, test_filename, output, target_col="yhat", elapsed_time=0
|
269
|
-
):
|
305
|
+
def _test_evaluate_metrics(self, elapsed_time=0):
|
270
306
|
total_metrics = pd.DataFrame()
|
271
307
|
summary_metrics = pd.DataFrame()
|
272
|
-
data =
|
273
|
-
try:
|
274
|
-
storage_options = (
|
275
|
-
default_signer()
|
276
|
-
if ObjectStorageDetails.is_oci_path(test_filename)
|
277
|
-
else {}
|
278
|
-
)
|
279
|
-
data = utils._load_data(
|
280
|
-
filename=test_filename,
|
281
|
-
format=self.spec.test_data.format,
|
282
|
-
storage_options=storage_options,
|
283
|
-
columns=self.spec.test_data.columns,
|
284
|
-
)
|
285
|
-
except pd.errors.EmptyDataError:
|
286
|
-
logger.warn("Empty testdata file")
|
287
|
-
return total_metrics, summary_metrics, None
|
288
|
-
|
289
|
-
if data.empty:
|
290
|
-
return total_metrics, summary_metrics, None
|
308
|
+
data = TestData(self.spec)
|
291
309
|
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
target_column=self.original_target_column,
|
298
|
-
target_category_columns=self.spec.target_category_columns,
|
299
|
-
datetime_column="ds",
|
300
|
-
)
|
301
|
-
|
302
|
-
# Calculating Test Metrics
|
303
|
-
for cat in self.forecast_output.list_categories():
|
304
|
-
target_column_i = self.forecast_output.category_to_target[cat]
|
305
|
-
output_forecast_i = self.forecast_output.get_category(cat)
|
306
|
-
# Only columns present in test file will be used to generate test error
|
307
|
-
if target_column_i in data:
|
308
|
-
# Assuming that predictions have all forecast values
|
309
|
-
dates = output_forecast_i["Date"]
|
310
|
-
# Filling zeros for any date missing in test data to maintain consistency in metric calculation as in all other missing values cases it comes as 0
|
311
|
-
y_true = [
|
312
|
-
data.loc[data["ds"] == date, target_column_i].values[0]
|
313
|
-
if date in data["ds"].values
|
314
|
-
else 0
|
315
|
-
for date in dates
|
310
|
+
# Generate y_pred and y_true for each series
|
311
|
+
for s_id in self.forecast_output.list_series_ids():
|
312
|
+
try:
|
313
|
+
y_true = data.get_data_for_series(s_id)[data.target_name].values[
|
314
|
+
-self.spec.horizon :
|
316
315
|
]
|
317
|
-
|
318
|
-
y_pred = np.asarray(y_pred_i[-len(y_true) :])
|
319
|
-
|
320
|
-
metrics_df = utils._build_metrics_df(
|
321
|
-
y_true=y_true[-self.spec.horizon :],
|
322
|
-
y_pred=y_pred[-self.spec.horizon :],
|
323
|
-
column_name=target_column_i,
|
324
|
-
)
|
325
|
-
total_metrics = pd.concat([total_metrics, metrics_df], axis=1)
|
326
|
-
else:
|
316
|
+
except KeyError as ke:
|
327
317
|
logger.warn(
|
328
|
-
f"Error Generating Metrics: Unable to find {
|
318
|
+
f"Error Generating Metrics: Unable to find {s_id} in the test data. Error: {ke.args}"
|
329
319
|
)
|
320
|
+
y_pred = self.forecast_output.get_forecast(s_id)["forecast_value"].values[
|
321
|
+
-self.spec.horizon :
|
322
|
+
]
|
323
|
+
|
324
|
+
drop_na_mask = ~np.isnan(y_true) & ~np.isnan(y_pred)
|
325
|
+
if not drop_na_mask.all(): # There is a missing value
|
326
|
+
if drop_na_mask.any(): # All values are missing
|
327
|
+
logger.debug(
|
328
|
+
f"No values in the test data for series: {s_id}. This will affect the test metrics."
|
329
|
+
)
|
330
|
+
continue
|
331
|
+
logger.debug(
|
332
|
+
f"Missing values in the test data for series: {s_id}. This will affect the test metrics."
|
333
|
+
)
|
334
|
+
y_true = y_true[drop_na_mask]
|
335
|
+
y_pred = y_pred[drop_na_mask]
|
336
|
+
|
337
|
+
metrics_df = _build_metrics_df(
|
338
|
+
y_true=y_true,
|
339
|
+
y_pred=y_pred,
|
340
|
+
series_id=s_id,
|
341
|
+
)
|
342
|
+
total_metrics = pd.concat([total_metrics, metrics_df], axis=1)
|
330
343
|
|
331
344
|
if total_metrics.empty:
|
332
345
|
return total_metrics, summary_metrics, data
|
@@ -370,20 +383,10 @@ class ForecastOperatorBaseModel(ABC):
|
|
370
383
|
|
371
384
|
"""Calculates Mean sMAPE, Median sMAPE, Mean MAPE, Median MAPE, Mean wMAPE, Median wMAPE values for each horizon
|
372
385
|
if horizon <= 10."""
|
373
|
-
target_columns_in_output = set(target_columns).intersection(data.columns)
|
374
386
|
if self.spec.horizon <= SUMMARY_METRICS_HORIZON_LIMIT:
|
375
|
-
|
376
|
-
|
377
|
-
):
|
378
|
-
logger.warn(
|
379
|
-
f"Column Mismatch between Forecast Output and Target Columns"
|
380
|
-
)
|
381
|
-
metrics_per_horizon = utils._build_metrics_per_horizon(
|
382
|
-
data=data,
|
387
|
+
metrics_per_horizon = _build_metrics_per_horizon(
|
388
|
+
test_data=data,
|
383
389
|
output=self.forecast_output,
|
384
|
-
target_columns=target_columns,
|
385
|
-
target_col=target_col,
|
386
|
-
horizon_periods=self.spec.horizon,
|
387
390
|
)
|
388
391
|
if not metrics_per_horizon.empty:
|
389
392
|
summary_metrics = pd.concat([summary_metrics, metrics_per_horizon])
|
@@ -417,17 +420,9 @@ class ForecastOperatorBaseModel(ABC):
|
|
417
420
|
"""Saves resulting reports to the given folder."""
|
418
421
|
import datapane as dp
|
419
422
|
|
420
|
-
|
421
|
-
output_dir = self.spec.output_directory.url
|
422
|
-
else:
|
423
|
-
output_dir = "tmp_fc_operator_result"
|
424
|
-
logger.warn(
|
425
|
-
"Since the output directory was not specified, the output will be saved to {} directory.".format(
|
426
|
-
output_dir
|
427
|
-
)
|
428
|
-
)
|
423
|
+
unique_output_dir = find_output_dirname(self.spec.output_directory)
|
429
424
|
|
430
|
-
if ObjectStorageDetails.is_oci_path(
|
425
|
+
if ObjectStorageDetails.is_oci_path(unique_output_dir):
|
431
426
|
storage_options = default_signer()
|
432
427
|
else:
|
433
428
|
storage_options = dict()
|
@@ -437,11 +432,11 @@ class ForecastOperatorBaseModel(ABC):
|
|
437
432
|
# datapane html report
|
438
433
|
with tempfile.TemporaryDirectory() as temp_dir:
|
439
434
|
report_local_path = os.path.join(temp_dir, "___report.html")
|
440
|
-
|
435
|
+
disable_print()
|
441
436
|
dp.save_report(report_sections, report_local_path)
|
442
|
-
|
437
|
+
enable_print()
|
443
438
|
|
444
|
-
report_path = os.path.join(
|
439
|
+
report_path = os.path.join(unique_output_dir, self.spec.report_filename)
|
445
440
|
with open(report_local_path) as f1:
|
446
441
|
with fsspec.open(
|
447
442
|
report_path,
|
@@ -451,19 +446,28 @@ class ForecastOperatorBaseModel(ABC):
|
|
451
446
|
f2.write(f1.read())
|
452
447
|
|
453
448
|
# forecast csv report
|
454
|
-
|
449
|
+
write_data(
|
455
450
|
data=result_df,
|
456
|
-
filename=os.path.join(
|
451
|
+
filename=os.path.join(unique_output_dir, self.spec.forecast_filename),
|
457
452
|
format="csv",
|
458
453
|
storage_options=storage_options,
|
459
454
|
)
|
460
455
|
|
461
456
|
# metrics csv report
|
462
457
|
if self.spec.generate_metrics:
|
458
|
+
metrics_col_name = (
|
459
|
+
self.original_target_column
|
460
|
+
if self.datasets.has_artificial_series()
|
461
|
+
else "Series 1"
|
462
|
+
)
|
463
463
|
if metrics_df is not None:
|
464
|
-
|
465
|
-
data=metrics_df.
|
466
|
-
|
464
|
+
write_data(
|
465
|
+
data=metrics_df.reset_index().rename(
|
466
|
+
{"index": "metrics", "Series 1": metrics_col_name}, axis=1
|
467
|
+
),
|
468
|
+
filename=os.path.join(
|
469
|
+
unique_output_dir, self.spec.metrics_filename
|
470
|
+
),
|
467
471
|
format="csv",
|
468
472
|
storage_options=storage_options,
|
469
473
|
index=False,
|
@@ -476,10 +480,12 @@ class ForecastOperatorBaseModel(ABC):
|
|
476
480
|
# test_metrics csv report
|
477
481
|
if self.spec.test_data is not None:
|
478
482
|
if test_metrics_df is not None:
|
479
|
-
|
480
|
-
data=test_metrics_df.
|
483
|
+
write_data(
|
484
|
+
data=test_metrics_df.reset_index().rename(
|
485
|
+
{"index": "metrics", "Series 1": metrics_col_name}, axis=1
|
486
|
+
),
|
481
487
|
filename=os.path.join(
|
482
|
-
|
488
|
+
unique_output_dir, self.spec.test_metrics_filename
|
483
489
|
),
|
484
490
|
format="csv",
|
485
491
|
storage_options=storage_options,
|
@@ -493,10 +499,10 @@ class ForecastOperatorBaseModel(ABC):
|
|
493
499
|
if self.spec.generate_explanations:
|
494
500
|
try:
|
495
501
|
if self.formatted_global_explanation is not None:
|
496
|
-
|
502
|
+
write_data(
|
497
503
|
data=self.formatted_global_explanation,
|
498
504
|
filename=os.path.join(
|
499
|
-
|
505
|
+
unique_output_dir, self.spec.global_explanation_filename
|
500
506
|
),
|
501
507
|
format="csv",
|
502
508
|
storage_options=storage_options,
|
@@ -508,10 +514,10 @@ class ForecastOperatorBaseModel(ABC):
|
|
508
514
|
)
|
509
515
|
|
510
516
|
if self.formatted_local_explanation is not None:
|
511
|
-
|
517
|
+
write_data(
|
512
518
|
data=self.formatted_local_explanation,
|
513
519
|
filename=os.path.join(
|
514
|
-
|
520
|
+
unique_output_dir, self.spec.local_explanation_filename
|
515
521
|
),
|
516
522
|
format="csv",
|
517
523
|
storage_options=storage_options,
|
@@ -525,17 +531,52 @@ class ForecastOperatorBaseModel(ABC):
|
|
525
531
|
logger.warn(
|
526
532
|
"Unable to generate explanations for this model type or for this dataset."
|
527
533
|
)
|
534
|
+
logger.debug(f"Got error: {e.args}")
|
535
|
+
|
536
|
+
if self.spec.generate_model_parameters:
|
537
|
+
# model params
|
538
|
+
write_data(
|
539
|
+
data=pd.DataFrame.from_dict(self.model_parameters),
|
540
|
+
filename=os.path.join(unique_output_dir, "model_params.json"),
|
541
|
+
format="json",
|
542
|
+
storage_options=storage_options,
|
543
|
+
index=True,
|
544
|
+
indent=4,
|
545
|
+
)
|
546
|
+
|
547
|
+
# model pickle
|
548
|
+
if self.spec.generate_model_pickle:
|
549
|
+
self._save_model(unique_output_dir, storage_options)
|
550
|
+
|
528
551
|
logger.info(
|
529
552
|
f"The outputs have been successfully "
|
530
|
-
f"generated and placed into the directory: {
|
553
|
+
f"generated and placed into the directory: {unique_output_dir}."
|
531
554
|
)
|
555
|
+
print(
|
556
|
+
f"The outputs have been successfully generated and placed into the directory: {unique_output_dir}."
|
557
|
+
)
|
558
|
+
if self.errors_dict:
|
559
|
+
write_data(
|
560
|
+
data=pd.DataFrame(self.errors_dict.items(), columns=["model", "error"]),
|
561
|
+
filename=os.path.join(
|
562
|
+
unique_output_dir, self.spec.errors_dict_filename
|
563
|
+
),
|
564
|
+
format="csv",
|
565
|
+
storage_options=storage_options,
|
566
|
+
index=True,
|
567
|
+
)
|
568
|
+
else:
|
569
|
+
logger.info(f"All modeling completed successfully.")
|
532
570
|
|
533
|
-
def
|
571
|
+
def preprocess(self, df, series_id):
|
534
572
|
"""The method that needs to be implemented on the particular model level."""
|
535
|
-
data
|
536
|
-
|
537
|
-
|
538
|
-
|
573
|
+
data = df.rename(
|
574
|
+
{self.dt_column_name: "ds", self.original_target_column: "y"}, axis=1
|
575
|
+
)
|
576
|
+
self.le[series_id], df_encoded = _label_encode_dataframe(
|
577
|
+
data, no_encode={"ds", "y"}
|
578
|
+
)
|
579
|
+
return df_encoded
|
539
580
|
|
540
581
|
@abstractmethod
|
541
582
|
def _generate_report(self):
|
@@ -551,20 +592,40 @@ class ForecastOperatorBaseModel(ABC):
|
|
551
592
|
The method that needs to be implemented on the particular model level.
|
552
593
|
"""
|
553
594
|
|
554
|
-
def
|
595
|
+
def drop_horizon(self, df: pd.DataFrame) -> pd.DataFrame:
|
596
|
+
return df.iloc[: -self.spec.horizon]
|
597
|
+
|
598
|
+
def get_horizon(self, df: pd.DataFrame) -> pd.DataFrame:
|
599
|
+
return df.iloc[-self.spec.horizon :]
|
600
|
+
|
601
|
+
def generate_train_metrics(self) -> pd.DataFrame:
|
555
602
|
"""
|
556
603
|
Generate Training Metrics when fitted data is not available.
|
557
604
|
The method that needs to be implemented on the particular model level.
|
558
605
|
"""
|
559
|
-
|
606
|
+
return evaluate_train_metrics(self.forecast_output)
|
607
|
+
|
608
|
+
def _load_model(self):
|
609
|
+
try:
|
610
|
+
self.loaded_models = load_pkl(self.spec.previous_output_dir + "/model.pkl")
|
611
|
+
except:
|
612
|
+
logger.info("model.pkl is not present")
|
613
|
+
|
614
|
+
def _save_model(self, output_dir, storage_options):
|
615
|
+
write_pkl(
|
616
|
+
obj=self.models,
|
617
|
+
filename="model.pkl",
|
618
|
+
output_dir=output_dir,
|
619
|
+
storage_options=storage_options,
|
620
|
+
)
|
560
621
|
|
561
622
|
@runtime_dependency(
|
562
623
|
module="shap",
|
563
624
|
err_msg=(
|
564
|
-
"Please run `
|
625
|
+
"Please run `python3 -m pip install shap` to install the required dependencies for model explanation."
|
565
626
|
),
|
566
627
|
)
|
567
|
-
def explain_model(self
|
628
|
+
def explain_model(self):
|
568
629
|
"""
|
569
630
|
Generates an explanation for the model by using the SHAP (Shapley Additive exPlanations) library.
|
570
631
|
This function calculates the SHAP values for each feature in the dataset and stores the results in the `global_explanation` dictionary.
|
@@ -574,65 +635,61 @@ class ForecastOperatorBaseModel(ABC):
|
|
574
635
|
dict: A dictionary containing the global explanation for each feature in the dataset.
|
575
636
|
The keys are the feature names and the values are the average absolute SHAP values.
|
576
637
|
"""
|
577
|
-
from shap import
|
578
|
-
|
579
|
-
for series_id in self.target_columns:
|
580
|
-
self.series_id = series_id
|
581
|
-
if self.spec.model == SupportedModels.AutoTS:
|
582
|
-
self.dataset_cols = (
|
583
|
-
self.full_data_long.loc[
|
584
|
-
self.full_data_long.series_id == self.series_id
|
585
|
-
]
|
586
|
-
.set_index(datetime_col_name)
|
587
|
-
.columns
|
588
|
-
)
|
638
|
+
from shap import PermutationExplainer
|
589
639
|
|
590
|
-
|
591
|
-
self.full_data_long.series_id == self.series_id
|
592
|
-
].set_index(datetime_col_name)
|
640
|
+
datetime_col_name = self.datasets._datetime_column_name
|
593
641
|
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
642
|
+
exp_start_time = time.time()
|
643
|
+
global_ex_time = 0
|
644
|
+
local_ex_time = 0
|
645
|
+
logger.info(
|
646
|
+
f"Calculating explanations using {self.spec.explanations_accuracy_mode} mode"
|
647
|
+
)
|
648
|
+
ratio = SpeedAccuracyMode.ratio[self.spec.explanations_accuracy_mode]
|
601
649
|
|
602
|
-
|
603
|
-
|
604
|
-
|
650
|
+
for s_id, data_i in self.datasets.get_data_by_series(
|
651
|
+
include_horizon=False
|
652
|
+
).items():
|
653
|
+
explain_predict_fn = self.get_explain_predict_fn(series_id=s_id)
|
605
654
|
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
keep_index=False
|
612
|
-
if self.spec.model == SupportedModels.AutoMLX
|
613
|
-
else True,
|
655
|
+
data_trimmed = data_i.tail(max(int(len(data_i) * ratio), 5)).reset_index(
|
656
|
+
drop=True
|
657
|
+
)
|
658
|
+
data_trimmed[datetime_col_name] = data_trimmed[datetime_col_name].apply(
|
659
|
+
lambda x: x.timestamp()
|
614
660
|
)
|
615
661
|
|
616
|
-
|
617
|
-
|
618
|
-
nsamples=50,
|
662
|
+
kernel_explnr = PermutationExplainer(
|
663
|
+
model=explain_predict_fn, masker=data_trimmed
|
619
664
|
)
|
665
|
+
kernel_explnr_vals = kernel_explnr.shap_values(data_trimmed)
|
666
|
+
|
667
|
+
exp_end_time = time.time()
|
668
|
+
global_ex_time = global_ex_time + exp_end_time - exp_start_time
|
669
|
+
|
670
|
+
self.local_explainer(
|
671
|
+
kernel_explnr, series_id=s_id, datetime_col_name=datetime_col_name
|
672
|
+
)
|
673
|
+
local_ex_time = local_ex_time + time.time() - exp_end_time
|
620
674
|
|
621
675
|
if not len(kernel_explnr_vals):
|
622
676
|
logger.warn(
|
623
677
|
f"No explanations generated. Ensure that additional data has been provided."
|
624
678
|
)
|
625
679
|
else:
|
626
|
-
self.global_explanation[
|
680
|
+
self.global_explanation[s_id] = dict(
|
627
681
|
zip(
|
628
|
-
|
629
|
-
np.average(np.absolute(kernel_explnr_vals), axis=0),
|
682
|
+
data_trimmed.columns[1:],
|
683
|
+
np.average(np.absolute(kernel_explnr_vals[:, 1:]), axis=0),
|
630
684
|
)
|
631
685
|
)
|
632
686
|
|
633
|
-
|
634
|
-
|
635
|
-
|
687
|
+
logger.info(
|
688
|
+
"Global explanations generation completed in %s seconds", global_ex_time
|
689
|
+
)
|
690
|
+
logger.info(
|
691
|
+
"Local explanations generation completed in %s seconds", local_ex_time
|
692
|
+
)
|
636
693
|
|
637
694
|
def local_explainer(self, kernel_explainer, series_id, datetime_col_name) -> None:
|
638
695
|
"""
|
@@ -642,24 +699,34 @@ class ForecastOperatorBaseModel(ABC):
|
|
642
699
|
----------
|
643
700
|
kernel_explainer: The kernel explainer object to use for generating explanations.
|
644
701
|
"""
|
645
|
-
|
646
|
-
# data = self.full_data_dict.get(series_id).set_index(datetime_col_name)
|
647
|
-
data = self.bg_data[-self.spec.horizon :][list(self.dataset_cols)]
|
702
|
+
data = self.datasets.get_horizon_at_series(s_id=series_id)
|
648
703
|
|
704
|
+
data[datetime_col_name] = datetime_to_seconds(data[datetime_col_name])
|
705
|
+
data = data.reset_index(drop=True)
|
649
706
|
# Generate local SHAP values using the kernel explainer
|
650
|
-
local_kernel_explnr_vals = kernel_explainer.shap_values(data
|
707
|
+
local_kernel_explnr_vals = kernel_explainer.shap_values(data)
|
651
708
|
|
652
709
|
# Convert the SHAP values into a DataFrame
|
653
710
|
local_kernel_explnr_df = pd.DataFrame(
|
654
|
-
local_kernel_explnr_vals, columns=
|
711
|
+
local_kernel_explnr_vals, columns=data.columns
|
655
712
|
)
|
713
|
+
self.local_explanation[series_id] = local_kernel_explnr_df
|
656
714
|
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
715
|
+
def get_explain_predict_fn(self, series_id, fcst_col_name="yhat"):
|
716
|
+
def _custom_predict(
|
717
|
+
data,
|
718
|
+
model=self.models[series_id],
|
719
|
+
dt_column_name=self.datasets._datetime_column_name,
|
720
|
+
):
|
721
|
+
"""
|
722
|
+
data: ForecastDatasets.get_data_at_series(s_id)
|
723
|
+
"""
|
724
|
+
data[dt_column_name] = seconds_to_datetime(
|
725
|
+
data[dt_column_name], dt_format=self.spec.datetime_column.format
|
663
726
|
)
|
727
|
+
data = self.preprocess(df=data, series_id=series_id)
|
728
|
+
data[self.original_target_column] = None
|
729
|
+
fcst = model.predict(data)[fcst_col_name]
|
730
|
+
return fcst
|
664
731
|
|
665
|
-
|
732
|
+
return _custom_predict
|