oracle-ads 2.10.0__py3-none-any.whl → 2.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. ads/aqua/__init__.py +12 -0
  2. ads/aqua/base.py +324 -0
  3. ads/aqua/cli.py +19 -0
  4. ads/aqua/config/deployment_config_defaults.json +9 -0
  5. ads/aqua/config/resource_limit_names.json +7 -0
  6. ads/aqua/constants.py +45 -0
  7. ads/aqua/data.py +40 -0
  8. ads/aqua/decorator.py +101 -0
  9. ads/aqua/deployment.py +643 -0
  10. ads/aqua/dummy_data/icon.txt +1 -0
  11. ads/aqua/dummy_data/oci_model_deployments.json +56 -0
  12. ads/aqua/dummy_data/oci_models.json +1 -0
  13. ads/aqua/dummy_data/readme.md +26 -0
  14. ads/aqua/evaluation.py +1751 -0
  15. ads/aqua/exception.py +82 -0
  16. ads/aqua/extension/__init__.py +40 -0
  17. ads/aqua/extension/base_handler.py +138 -0
  18. ads/aqua/extension/common_handler.py +21 -0
  19. ads/aqua/extension/deployment_handler.py +202 -0
  20. ads/aqua/extension/evaluation_handler.py +135 -0
  21. ads/aqua/extension/finetune_handler.py +66 -0
  22. ads/aqua/extension/model_handler.py +59 -0
  23. ads/aqua/extension/ui_handler.py +201 -0
  24. ads/aqua/extension/utils.py +23 -0
  25. ads/aqua/finetune.py +579 -0
  26. ads/aqua/job.py +29 -0
  27. ads/aqua/model.py +819 -0
  28. ads/aqua/training/__init__.py +4 -0
  29. ads/aqua/training/exceptions.py +459 -0
  30. ads/aqua/ui.py +453 -0
  31. ads/aqua/utils.py +715 -0
  32. ads/cli.py +37 -6
  33. ads/common/auth.py +7 -0
  34. ads/common/decorator/__init__.py +7 -3
  35. ads/common/decorator/require_nonempty_arg.py +65 -0
  36. ads/common/object_storage_details.py +166 -7
  37. ads/common/oci_client.py +18 -1
  38. ads/common/oci_logging.py +2 -2
  39. ads/common/oci_mixin.py +4 -5
  40. ads/common/serializer.py +34 -5
  41. ads/common/utils.py +75 -10
  42. ads/config.py +40 -1
  43. ads/dataset/correlation_plot.py +10 -12
  44. ads/jobs/ads_job.py +43 -25
  45. ads/jobs/builders/infrastructure/base.py +4 -2
  46. ads/jobs/builders/infrastructure/dsc_job.py +49 -39
  47. ads/jobs/builders/runtimes/base.py +71 -1
  48. ads/jobs/builders/runtimes/container_runtime.py +4 -4
  49. ads/jobs/builders/runtimes/pytorch_runtime.py +10 -63
  50. ads/jobs/templates/driver_pytorch.py +27 -10
  51. ads/model/artifact_downloader.py +84 -14
  52. ads/model/artifact_uploader.py +25 -23
  53. ads/model/datascience_model.py +388 -38
  54. ads/model/deployment/model_deployment.py +10 -2
  55. ads/model/generic_model.py +8 -0
  56. ads/model/model_file_description_schema.json +68 -0
  57. ads/model/model_metadata.py +1 -1
  58. ads/model/service/oci_datascience_model.py +34 -5
  59. ads/opctl/config/merger.py +2 -2
  60. ads/opctl/operator/__init__.py +3 -1
  61. ads/opctl/operator/cli.py +7 -1
  62. ads/opctl/operator/cmd.py +3 -3
  63. ads/opctl/operator/common/errors.py +2 -1
  64. ads/opctl/operator/common/operator_config.py +22 -3
  65. ads/opctl/operator/common/utils.py +16 -0
  66. ads/opctl/operator/lowcode/anomaly/MLoperator +15 -0
  67. ads/opctl/operator/lowcode/anomaly/README.md +209 -0
  68. ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
  69. ads/opctl/operator/lowcode/anomaly/__main__.py +104 -0
  70. ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
  71. ads/opctl/operator/lowcode/anomaly/const.py +88 -0
  72. ads/opctl/operator/lowcode/anomaly/environment.yaml +12 -0
  73. ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
  74. ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +147 -0
  75. ads/opctl/operator/lowcode/anomaly/model/automlx.py +89 -0
  76. ads/opctl/operator/lowcode/anomaly/model/autots.py +103 -0
  77. ads/opctl/operator/lowcode/anomaly/model/base_model.py +354 -0
  78. ads/opctl/operator/lowcode/anomaly/model/factory.py +67 -0
  79. ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
  80. ads/opctl/operator/lowcode/anomaly/operator_config.py +105 -0
  81. ads/opctl/operator/lowcode/anomaly/schema.yaml +359 -0
  82. ads/opctl/operator/lowcode/anomaly/utils.py +81 -0
  83. ads/opctl/operator/lowcode/common/__init__.py +5 -0
  84. ads/opctl/operator/lowcode/common/const.py +10 -0
  85. ads/opctl/operator/lowcode/common/data.py +96 -0
  86. ads/opctl/operator/lowcode/common/errors.py +41 -0
  87. ads/opctl/operator/lowcode/common/transformations.py +191 -0
  88. ads/opctl/operator/lowcode/common/utils.py +250 -0
  89. ads/opctl/operator/lowcode/forecast/README.md +3 -2
  90. ads/opctl/operator/lowcode/forecast/__main__.py +18 -2
  91. ads/opctl/operator/lowcode/forecast/cmd.py +8 -7
  92. ads/opctl/operator/lowcode/forecast/const.py +17 -1
  93. ads/opctl/operator/lowcode/forecast/environment.yaml +3 -2
  94. ads/opctl/operator/lowcode/forecast/model/arima.py +106 -117
  95. ads/opctl/operator/lowcode/forecast/model/automlx.py +204 -180
  96. ads/opctl/operator/lowcode/forecast/model/autots.py +144 -253
  97. ads/opctl/operator/lowcode/forecast/model/base_model.py +326 -259
  98. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +325 -176
  99. ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +293 -237
  100. ads/opctl/operator/lowcode/forecast/model/prophet.py +191 -208
  101. ads/opctl/operator/lowcode/forecast/operator_config.py +24 -33
  102. ads/opctl/operator/lowcode/forecast/schema.yaml +116 -29
  103. ads/opctl/operator/lowcode/forecast/utils.py +186 -356
  104. ads/opctl/operator/lowcode/pii/model/guardrails.py +18 -15
  105. ads/opctl/operator/lowcode/pii/model/report.py +7 -7
  106. ads/opctl/operator/lowcode/pii/operator_config.py +1 -8
  107. ads/opctl/operator/lowcode/pii/utils.py +0 -82
  108. ads/opctl/operator/runtime/runtime.py +3 -2
  109. ads/telemetry/base.py +62 -0
  110. ads/telemetry/client.py +105 -0
  111. ads/telemetry/telemetry.py +6 -3
  112. {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/METADATA +44 -7
  113. {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/RECORD +116 -59
  114. ads/opctl/operator/lowcode/forecast/model/transformations.py +0 -125
  115. {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/LICENSE.txt +0 -0
  116. {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/WHEEL +0 -0
  117. {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/entry_points.txt +0 -0
@@ -4,6 +4,7 @@
4
4
  # Copyright (c) 2023 Oracle and/or its affiliates.
5
5
  # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
6
 
7
+ import json
7
8
  import os
8
9
  import tempfile
9
10
  import time
@@ -15,15 +16,39 @@ import fsspec
15
16
  import numpy as np
16
17
  import pandas as pd
17
18
 
18
- from ads.opctl.operator.lowcode.forecast.utils import default_signer
19
+ from ads.opctl.operator.lowcode.forecast.utils import (
20
+ default_signer,
21
+ evaluate_train_metrics,
22
+ get_forecast_plots,
23
+ _build_metrics_df,
24
+ _build_metrics_per_horizon,
25
+ load_pkl,
26
+ write_pkl,
27
+ _label_encode_dataframe,
28
+ )
19
29
  from ads.common.object_storage_details import ObjectStorageDetails
20
30
  from ads.opctl import logger
21
31
 
22
- from .. import utils
23
- from ..const import SUMMARY_METRICS_HORIZON_LIMIT, SupportedMetrics, SupportedModels
32
+ from ads.opctl.operator.lowcode.common.utils import (
33
+ human_time_friendly,
34
+ enable_print,
35
+ disable_print,
36
+ write_data,
37
+ merged_category_column_name,
38
+ datetime_to_seconds,
39
+ seconds_to_datetime,
40
+ find_output_dirname,
41
+ )
42
+ from ..const import (
43
+ SUMMARY_METRICS_HORIZON_LIMIT,
44
+ SupportedMetrics,
45
+ SupportedModels,
46
+ SpeedAccuracyMode,
47
+ )
24
48
  from ..operator_config import ForecastOperatorConfig, ForecastOperatorSpec
25
49
  from ads.common.decorator.runtime_dependency import runtime_dependency
26
50
  from .forecast_datasets import ForecastDatasets, ForecastOutput
51
+ from ads.opctl.operator.lowcode.forecast.model.forecast_datasets import TestData
27
52
 
28
53
 
29
54
  class ForecastOperatorBaseModel(ABC):
@@ -41,23 +66,27 @@ class ForecastOperatorBaseModel(ABC):
41
66
  self.spec: ForecastOperatorSpec = config.spec
42
67
  self.datasets: ForecastDatasets = datasets
43
68
 
44
- self.original_user_data = datasets.original_user_data
45
- self.original_total_data = datasets.original_total_data
46
- self.original_additional_data = datasets.original_additional_data
47
- self.full_data_dict = datasets.full_data_dict
48
- self.target_columns = datasets.target_columns
49
- self.categories = datasets.categories
69
+ self.full_data_dict = datasets.get_data_by_series()
50
70
 
51
71
  self.test_eval_metrics = None
52
72
  self.original_target_column = self.spec.target_column
73
+ self.dt_column_name = self.spec.datetime_column.name
74
+
75
+ self.model_parameters = dict()
76
+ self.loaded_models = None
53
77
 
54
78
  # these fields are populated in the _build_model() method
55
79
  self.models = None
80
+
56
81
  # "outputs" is a list of outputs generated by the models. These should only be generated when the framework requires the original output for plotting
57
82
  self.outputs = None
58
83
  self.forecast_output = None
84
+ self.errors_dict = dict()
85
+ self.le = dict()
86
+
87
+ self.formatted_global_explanation = None
88
+ self.formatted_local_explanation = None
59
89
 
60
- self.train_metrics = False
61
90
  self.forecast_col_name = "yhat"
62
91
  self.perform_tuning = self.spec.tuning != None
63
92
 
@@ -73,10 +102,14 @@ class ForecastOperatorBaseModel(ABC):
73
102
  warnings.simplefilter(action="ignore", category=ConvergenceWarning)
74
103
  import datapane as dp
75
104
 
76
- # load data and build models
105
+ # load models if given
106
+ if self.spec.previous_output_dir is not None:
107
+ self._load_model()
108
+
77
109
  start_time = time.time()
78
110
  result_df = self._build_model()
79
111
  elapsed_time = time.time() - start_time
112
+ logger.info("Building the models completed in %s seconds", elapsed_time)
80
113
 
81
114
  # Generate metrics
82
115
  summary_metrics = None
@@ -84,21 +117,7 @@ class ForecastOperatorBaseModel(ABC):
84
117
  self.eval_metrics = None
85
118
 
86
119
  if self.spec.generate_report or self.spec.generate_metrics:
87
- if self.train_metrics:
88
- self.eval_metrics = utils.evaluate_train_metrics(
89
- self.target_columns,
90
- self.datasets,
91
- self.forecast_output,
92
- self.spec.datetime_column.name,
93
- target_col=self.forecast_col_name,
94
- )
95
- else:
96
- try:
97
- self.eval_metrics = self._generate_train_metrics()
98
- except NotImplementedError:
99
- logger.warn(
100
- f"Training Metrics are not available for model type {self.spec.model}"
101
- )
120
+ self.eval_metrics = self.generate_train_metrics()
102
121
 
103
122
  if self.spec.test_data:
104
123
  try:
@@ -107,10 +126,6 @@ class ForecastOperatorBaseModel(ABC):
107
126
  summary_metrics,
108
127
  test_data,
109
128
  ) = self._test_evaluate_metrics(
110
- target_columns=self.target_columns,
111
- test_filename=self.spec.test_data.url,
112
- output=self.forecast_output,
113
- target_col=self.forecast_col_name,
114
129
  elapsed_time=elapsed_time,
115
130
  )
116
131
  except Exception as e:
@@ -125,94 +140,114 @@ class ForecastOperatorBaseModel(ABC):
125
140
  other_sections,
126
141
  ) = self._generate_report()
127
142
 
128
- ds_column_series = self.datasets.get_longest_datetime_column()
129
-
130
143
  title_text = dp.Text("# Forecast Report")
131
144
 
132
- md_columns = " * ".join([f"{x} \n" for x in self.target_columns])
145
+ md_columns = " * ".join(
146
+ [f"{s_id} \n" for s_id in self.datasets.list_series_ids()]
147
+ )
148
+
149
+ header_section = dp.Blocks(
150
+ blocks=[
151
+ dp.Text(f"You selected the **`{self.spec.model}`** model."),
152
+ model_description,
153
+ dp.Text(
154
+ "Based on your dataset, you could have also selected "
155
+ f"any of the models: `{'`, `'.join(SupportedModels.keys())}`."
156
+ ),
157
+ dp.Group(
158
+ dp.BigNumber(
159
+ heading="Analysis was completed in ",
160
+ value=human_time_friendly(elapsed_time),
161
+ ),
162
+ dp.BigNumber(
163
+ heading="Starting time index",
164
+ value=self.datasets.get_earliest_timestamp().strftime(
165
+ "%B %d, %Y"
166
+ ),
167
+ ),
168
+ dp.BigNumber(
169
+ heading="Ending time index",
170
+ value=self.datasets.get_latest_timestamp().strftime(
171
+ "%B %d, %Y"
172
+ ),
173
+ ),
174
+ dp.BigNumber(
175
+ heading="Num series",
176
+ value=len(self.datasets.list_series_ids()),
177
+ ),
178
+ columns=4,
179
+ ),
180
+ ]
181
+ )
182
+
133
183
  first_10_rows_blocks = [
134
184
  dp.DataTable(
135
- df.head(10).rename({col: self.spec.target_column}, axis=1),
185
+ df.head(10),
136
186
  caption="Start",
137
- label=col,
187
+ label=s_id,
138
188
  )
139
- for col, df in self.full_data_dict.items()
189
+ for s_id, df in self.full_data_dict.items()
140
190
  ]
141
191
 
142
192
  last_10_rows_blocks = [
143
193
  dp.DataTable(
144
- df.tail(10).rename({col: self.spec.target_column}, axis=1),
194
+ df.tail(10),
145
195
  caption="End",
146
- label=col,
196
+ label=s_id,
147
197
  )
148
- for col, df in self.full_data_dict.items()
198
+ for s_id, df in self.full_data_dict.items()
149
199
  ]
150
200
 
151
201
  data_summary_blocks = [
152
202
  dp.DataTable(
153
- df.rename({col: self.spec.target_column}, axis=1).describe(),
203
+ df.describe(),
154
204
  caption="Summary Statistics",
155
- label=col,
205
+ label=s_id,
156
206
  )
157
- for col, df in self.full_data_dict.items()
207
+ for s_id, df in self.full_data_dict.items()
158
208
  ]
159
- summary = dp.Blocks(
160
- dp.Select(
209
+
210
+ series_name = merged_category_column_name(
211
+ self.spec.target_category_columns
212
+ )
213
+ series_subtext = dp.Text(f"Indexed by {series_name}")
214
+ first_10_title = dp.Text("### First 10 Rows of Data")
215
+ last_10_title = dp.Text("### Last 10 Rows of Data")
216
+ summary_title = dp.Text("### Data Summary Statistics")
217
+
218
+ if series_name is not None and len(self.datasets.list_series_ids()) > 1:
219
+ data_summary_sec = dp.Blocks(
161
220
  blocks=[
162
- dp.Group(
163
- dp.Text(
164
- f"You selected the **`{self.spec.model}`** model."
165
- ),
166
- model_description,
167
- dp.Text(
168
- "Based on your dataset, you could have also selected "
169
- f"any of the models: `{'`, `'.join(SupportedModels.keys())}`."
170
- ),
171
- dp.Group(
172
- dp.BigNumber(
173
- heading="Analysis was completed in ",
174
- value=utils.human_time_friendly(elapsed_time),
175
- ),
176
- dp.BigNumber(
177
- heading="Starting time index",
178
- value=ds_column_series.min().strftime(
179
- "%B %d, %Y"
180
- ),
181
- ),
182
- dp.BigNumber(
183
- heading="Ending time index",
184
- value=ds_column_series.max().strftime(
185
- "%B %d, %Y"
186
- ),
187
- ),
188
- dp.BigNumber(
189
- heading="Num series",
190
- value=len(self.target_columns),
191
- ),
192
- columns=4,
193
- ),
194
- dp.Text("### First 10 Rows of Data"),
195
- dp.Select(blocks=first_10_rows_blocks)
196
- if len(first_10_rows_blocks) > 1
197
- else first_10_rows_blocks[0],
198
- dp.Text("----"),
199
- dp.Text("### Last 10 Rows of Data"),
200
- dp.Select(blocks=last_10_rows_blocks)
201
- if len(last_10_rows_blocks) > 1
202
- else last_10_rows_blocks[0],
203
- dp.Text("### Data Summary Statistics"),
204
- dp.Select(blocks=data_summary_blocks)
205
- if len(data_summary_blocks) > 1
206
- else data_summary_blocks[0],
207
- label="Summary",
208
- ),
209
- dp.Text(
210
- "The following report compares a variety of metrics and plots "
211
- f"for your target columns: \n {md_columns}.\n",
212
- label="Target Columns",
213
- ),
221
+ first_10_title,
222
+ series_subtext,
223
+ dp.Select(blocks=first_10_rows_blocks),
224
+ last_10_title,
225
+ series_subtext,
226
+ dp.Select(blocks=last_10_rows_blocks),
227
+ summary_title,
228
+ series_subtext,
229
+ dp.Select(blocks=data_summary_blocks),
230
+ dp.Text("----"),
214
231
  ]
215
- ),
232
+ )
233
+ else:
234
+ data_summary_sec = dp.Blocks(
235
+ blocks=[
236
+ first_10_title,
237
+ first_10_rows_blocks[0],
238
+ last_10_title,
239
+ last_10_rows_blocks[0],
240
+ summary_title,
241
+ data_summary_blocks[0],
242
+ dp.Text("----"),
243
+ ]
244
+ )
245
+
246
+ summary = dp.Group(
247
+ blocks=[
248
+ header_section,
249
+ data_summary_sec,
250
+ ]
216
251
  )
217
252
 
218
253
  test_metrics_sections = []
@@ -236,19 +271,22 @@ class ForecastOperatorBaseModel(ABC):
236
271
  train_metrics_sections = [sec9_text, sec9]
237
272
 
238
273
  forecast_text = dp.Text(f"## Forecasted Data Overlaying Historical")
239
- forecast_sec = utils.get_forecast_plots(
274
+ forecast_sec = get_forecast_plots(
240
275
  self.forecast_output,
241
- self.target_columns,
242
276
  horizon=self.spec.horizon,
243
277
  test_data=test_data,
244
278
  ci_interval_width=self.spec.confidence_interval_width,
245
279
  )
246
- forecast_plots = [forecast_text, forecast_sec]
280
+ if series_name is not None and len(self.datasets.list_series_ids()) > 1:
281
+ forecast_plots = [forecast_text, series_subtext, forecast_sec]
282
+ else:
283
+ forecast_plots = [forecast_text, forecast_sec]
247
284
 
248
285
  yaml_appendix_title = dp.Text(f"## Reference: YAML File")
249
286
  yaml_appendix = dp.Code(code=self.config.to_yaml(), language="yaml")
250
287
  report_sections = (
251
- [title_text, summary]
288
+ [title_text]
289
+ + [summary]
252
290
  + forecast_plots
253
291
  + other_sections
254
292
  + test_metrics_sections
@@ -264,69 +302,44 @@ class ForecastOperatorBaseModel(ABC):
264
302
  test_metrics_df=self.test_eval_metrics,
265
303
  )
266
304
 
267
- def _test_evaluate_metrics(
268
- self, target_columns, test_filename, output, target_col="yhat", elapsed_time=0
269
- ):
305
+ def _test_evaluate_metrics(self, elapsed_time=0):
270
306
  total_metrics = pd.DataFrame()
271
307
  summary_metrics = pd.DataFrame()
272
- data = None
273
- try:
274
- storage_options = (
275
- default_signer()
276
- if ObjectStorageDetails.is_oci_path(test_filename)
277
- else {}
278
- )
279
- data = utils._load_data(
280
- filename=test_filename,
281
- format=self.spec.test_data.format,
282
- storage_options=storage_options,
283
- columns=self.spec.test_data.columns,
284
- )
285
- except pd.errors.EmptyDataError:
286
- logger.warn("Empty testdata file")
287
- return total_metrics, summary_metrics, None
288
-
289
- if data.empty:
290
- return total_metrics, summary_metrics, None
308
+ data = TestData(self.spec)
291
309
 
292
- data = self._preprocess(
293
- data, self.spec.datetime_column.name, self.spec.datetime_column.format
294
- )
295
- data, confirm_targ_columns = utils._clean_data(
296
- data=data,
297
- target_column=self.original_target_column,
298
- target_category_columns=self.spec.target_category_columns,
299
- datetime_column="ds",
300
- )
301
-
302
- # Calculating Test Metrics
303
- for cat in self.forecast_output.list_categories():
304
- target_column_i = self.forecast_output.category_to_target[cat]
305
- output_forecast_i = self.forecast_output.get_category(cat)
306
- # Only columns present in test file will be used to generate test error
307
- if target_column_i in data:
308
- # Assuming that predictions have all forecast values
309
- dates = output_forecast_i["Date"]
310
- # Filling zeros for any date missing in test data to maintain consistency in metric calculation as in all other missing values cases it comes as 0
311
- y_true = [
312
- data.loc[data["ds"] == date, target_column_i].values[0]
313
- if date in data["ds"].values
314
- else 0
315
- for date in dates
310
+ # Generate y_pred and y_true for each series
311
+ for s_id in self.forecast_output.list_series_ids():
312
+ try:
313
+ y_true = data.get_data_for_series(s_id)[data.target_name].values[
314
+ -self.spec.horizon :
316
315
  ]
317
- y_pred_i = output_forecast_i["forecast_value"].values
318
- y_pred = np.asarray(y_pred_i[-len(y_true) :])
319
-
320
- metrics_df = utils._build_metrics_df(
321
- y_true=y_true[-self.spec.horizon :],
322
- y_pred=y_pred[-self.spec.horizon :],
323
- column_name=target_column_i,
324
- )
325
- total_metrics = pd.concat([total_metrics, metrics_df], axis=1)
326
- else:
316
+ except KeyError as ke:
327
317
  logger.warn(
328
- f"Error Generating Metrics: Unable to find {target_column_i} in the test data."
318
+ f"Error Generating Metrics: Unable to find {s_id} in the test data. Error: {ke.args}"
329
319
  )
320
+ y_pred = self.forecast_output.get_forecast(s_id)["forecast_value"].values[
321
+ -self.spec.horizon :
322
+ ]
323
+
324
+ drop_na_mask = ~np.isnan(y_true) & ~np.isnan(y_pred)
325
+ if not drop_na_mask.all(): # There is a missing value
326
+ if drop_na_mask.any(): # All values are missing
327
+ logger.debug(
328
+ f"No values in the test data for series: {s_id}. This will affect the test metrics."
329
+ )
330
+ continue
331
+ logger.debug(
332
+ f"Missing values in the test data for series: {s_id}. This will affect the test metrics."
333
+ )
334
+ y_true = y_true[drop_na_mask]
335
+ y_pred = y_pred[drop_na_mask]
336
+
337
+ metrics_df = _build_metrics_df(
338
+ y_true=y_true,
339
+ y_pred=y_pred,
340
+ series_id=s_id,
341
+ )
342
+ total_metrics = pd.concat([total_metrics, metrics_df], axis=1)
330
343
 
331
344
  if total_metrics.empty:
332
345
  return total_metrics, summary_metrics, data
@@ -370,20 +383,10 @@ class ForecastOperatorBaseModel(ABC):
370
383
 
371
384
  """Calculates Mean sMAPE, Median sMAPE, Mean MAPE, Median MAPE, Mean wMAPE, Median wMAPE values for each horizon
372
385
  if horizon <= 10."""
373
- target_columns_in_output = set(target_columns).intersection(data.columns)
374
386
  if self.spec.horizon <= SUMMARY_METRICS_HORIZON_LIMIT:
375
- if set(self.forecast_output.list_target_category_columns()) != set(
376
- target_columns_in_output
377
- ):
378
- logger.warn(
379
- f"Column Mismatch between Forecast Output and Target Columns"
380
- )
381
- metrics_per_horizon = utils._build_metrics_per_horizon(
382
- data=data,
387
+ metrics_per_horizon = _build_metrics_per_horizon(
388
+ test_data=data,
383
389
  output=self.forecast_output,
384
- target_columns=target_columns,
385
- target_col=target_col,
386
- horizon_periods=self.spec.horizon,
387
390
  )
388
391
  if not metrics_per_horizon.empty:
389
392
  summary_metrics = pd.concat([summary_metrics, metrics_per_horizon])
@@ -417,17 +420,9 @@ class ForecastOperatorBaseModel(ABC):
417
420
  """Saves resulting reports to the given folder."""
418
421
  import datapane as dp
419
422
 
420
- if self.spec.output_directory:
421
- output_dir = self.spec.output_directory.url
422
- else:
423
- output_dir = "tmp_fc_operator_result"
424
- logger.warn(
425
- "Since the output directory was not specified, the output will be saved to {} directory.".format(
426
- output_dir
427
- )
428
- )
423
+ unique_output_dir = find_output_dirname(self.spec.output_directory)
429
424
 
430
- if ObjectStorageDetails.is_oci_path(output_dir):
425
+ if ObjectStorageDetails.is_oci_path(unique_output_dir):
431
426
  storage_options = default_signer()
432
427
  else:
433
428
  storage_options = dict()
@@ -437,11 +432,11 @@ class ForecastOperatorBaseModel(ABC):
437
432
  # datapane html report
438
433
  with tempfile.TemporaryDirectory() as temp_dir:
439
434
  report_local_path = os.path.join(temp_dir, "___report.html")
440
- utils.block_print()
435
+ disable_print()
441
436
  dp.save_report(report_sections, report_local_path)
442
- utils.enable_print()
437
+ enable_print()
443
438
 
444
- report_path = os.path.join(output_dir, self.spec.report_filename)
439
+ report_path = os.path.join(unique_output_dir, self.spec.report_filename)
445
440
  with open(report_local_path) as f1:
446
441
  with fsspec.open(
447
442
  report_path,
@@ -451,19 +446,28 @@ class ForecastOperatorBaseModel(ABC):
451
446
  f2.write(f1.read())
452
447
 
453
448
  # forecast csv report
454
- utils._write_data(
449
+ write_data(
455
450
  data=result_df,
456
- filename=os.path.join(output_dir, self.spec.forecast_filename),
451
+ filename=os.path.join(unique_output_dir, self.spec.forecast_filename),
457
452
  format="csv",
458
453
  storage_options=storage_options,
459
454
  )
460
455
 
461
456
  # metrics csv report
462
457
  if self.spec.generate_metrics:
458
+ metrics_col_name = (
459
+ self.original_target_column
460
+ if self.datasets.has_artificial_series()
461
+ else "Series 1"
462
+ )
463
463
  if metrics_df is not None:
464
- utils._write_data(
465
- data=metrics_df.rename_axis("metrics").reset_index(),
466
- filename=os.path.join(output_dir, self.spec.metrics_filename),
464
+ write_data(
465
+ data=metrics_df.reset_index().rename(
466
+ {"index": "metrics", "Series 1": metrics_col_name}, axis=1
467
+ ),
468
+ filename=os.path.join(
469
+ unique_output_dir, self.spec.metrics_filename
470
+ ),
467
471
  format="csv",
468
472
  storage_options=storage_options,
469
473
  index=False,
@@ -476,10 +480,12 @@ class ForecastOperatorBaseModel(ABC):
476
480
  # test_metrics csv report
477
481
  if self.spec.test_data is not None:
478
482
  if test_metrics_df is not None:
479
- utils._write_data(
480
- data=test_metrics_df.rename_axis("metrics").reset_index(),
483
+ write_data(
484
+ data=test_metrics_df.reset_index().rename(
485
+ {"index": "metrics", "Series 1": metrics_col_name}, axis=1
486
+ ),
481
487
  filename=os.path.join(
482
- output_dir, self.spec.test_metrics_filename
488
+ unique_output_dir, self.spec.test_metrics_filename
483
489
  ),
484
490
  format="csv",
485
491
  storage_options=storage_options,
@@ -493,10 +499,10 @@ class ForecastOperatorBaseModel(ABC):
493
499
  if self.spec.generate_explanations:
494
500
  try:
495
501
  if self.formatted_global_explanation is not None:
496
- utils._write_data(
502
+ write_data(
497
503
  data=self.formatted_global_explanation,
498
504
  filename=os.path.join(
499
- output_dir, self.spec.global_explanation_filename
505
+ unique_output_dir, self.spec.global_explanation_filename
500
506
  ),
501
507
  format="csv",
502
508
  storage_options=storage_options,
@@ -508,10 +514,10 @@ class ForecastOperatorBaseModel(ABC):
508
514
  )
509
515
 
510
516
  if self.formatted_local_explanation is not None:
511
- utils._write_data(
517
+ write_data(
512
518
  data=self.formatted_local_explanation,
513
519
  filename=os.path.join(
514
- output_dir, self.spec.local_explanation_filename
520
+ unique_output_dir, self.spec.local_explanation_filename
515
521
  ),
516
522
  format="csv",
517
523
  storage_options=storage_options,
@@ -525,17 +531,52 @@ class ForecastOperatorBaseModel(ABC):
525
531
  logger.warn(
526
532
  "Unable to generate explanations for this model type or for this dataset."
527
533
  )
534
+ logger.debug(f"Got error: {e.args}")
535
+
536
+ if self.spec.generate_model_parameters:
537
+ # model params
538
+ write_data(
539
+ data=pd.DataFrame.from_dict(self.model_parameters),
540
+ filename=os.path.join(unique_output_dir, "model_params.json"),
541
+ format="json",
542
+ storage_options=storage_options,
543
+ index=True,
544
+ indent=4,
545
+ )
546
+
547
+ # model pickle
548
+ if self.spec.generate_model_pickle:
549
+ self._save_model(unique_output_dir, storage_options)
550
+
528
551
  logger.info(
529
552
  f"The outputs have been successfully "
530
- f"generated and placed into the directory: {output_dir}."
553
+ f"generated and placed into the directory: {unique_output_dir}."
531
554
  )
555
+ print(
556
+ f"The outputs have been successfully generated and placed into the directory: {unique_output_dir}."
557
+ )
558
+ if self.errors_dict:
559
+ write_data(
560
+ data=pd.DataFrame(self.errors_dict.items(), columns=["model", "error"]),
561
+ filename=os.path.join(
562
+ unique_output_dir, self.spec.errors_dict_filename
563
+ ),
564
+ format="csv",
565
+ storage_options=storage_options,
566
+ index=True,
567
+ )
568
+ else:
569
+ logger.info(f"All modeling completed successfully.")
532
570
 
533
- def _preprocess(self, data, ds_column, datetime_format):
571
+ def preprocess(self, df, series_id):
534
572
  """The method that needs to be implemented on the particular model level."""
535
- data["ds"] = pd.to_datetime(data[ds_column], format=datetime_format)
536
- if ds_column != "ds":
537
- data.drop([ds_column], axis=1, inplace=True)
538
- return data
573
+ data = df.rename(
574
+ {self.dt_column_name: "ds", self.original_target_column: "y"}, axis=1
575
+ )
576
+ self.le[series_id], df_encoded = _label_encode_dataframe(
577
+ data, no_encode={"ds", "y"}
578
+ )
579
+ return df_encoded
539
580
 
540
581
  @abstractmethod
541
582
  def _generate_report(self):
@@ -551,20 +592,40 @@ class ForecastOperatorBaseModel(ABC):
551
592
  The method that needs to be implemented on the particular model level.
552
593
  """
553
594
 
554
- def _generate_train_metrics(self) -> pd.DataFrame:
595
+ def drop_horizon(self, df: pd.DataFrame) -> pd.DataFrame:
596
+ return df.iloc[: -self.spec.horizon]
597
+
598
+ def get_horizon(self, df: pd.DataFrame) -> pd.DataFrame:
599
+ return df.iloc[-self.spec.horizon :]
600
+
601
+ def generate_train_metrics(self) -> pd.DataFrame:
555
602
  """
556
603
  Generate Training Metrics when fitted data is not available.
557
604
  The method that needs to be implemented on the particular model level.
558
605
  """
559
- raise NotImplementedError
606
+ return evaluate_train_metrics(self.forecast_output)
607
+
608
+ def _load_model(self):
609
+ try:
610
+ self.loaded_models = load_pkl(self.spec.previous_output_dir + "/model.pkl")
611
+ except:
612
+ logger.info("model.pkl is not present")
613
+
614
+ def _save_model(self, output_dir, storage_options):
615
+ write_pkl(
616
+ obj=self.models,
617
+ filename="model.pkl",
618
+ output_dir=output_dir,
619
+ storage_options=storage_options,
620
+ )
560
621
 
561
622
  @runtime_dependency(
562
623
  module="shap",
563
624
  err_msg=(
564
- "Please run `pip3 install shap` to install the required dependencies for model explanation."
625
+ "Please run `python3 -m pip install shap` to install the required dependencies for model explanation."
565
626
  ),
566
627
  )
567
- def explain_model(self, datetime_col_name, explain_predict_fn) -> dict:
628
+ def explain_model(self):
568
629
  """
569
630
  Generates an explanation for the model by using the SHAP (Shapley Additive exPlanations) library.
570
631
  This function calculates the SHAP values for each feature in the dataset and stores the results in the `global_explanation` dictionary.
@@ -574,65 +635,61 @@ class ForecastOperatorBaseModel(ABC):
574
635
  dict: A dictionary containing the global explanation for each feature in the dataset.
575
636
  The keys are the feature names and the values are the average absolute SHAP values.
576
637
  """
577
- from shap import KernelExplainer
578
-
579
- for series_id in self.target_columns:
580
- self.series_id = series_id
581
- if self.spec.model == SupportedModels.AutoTS:
582
- self.dataset_cols = (
583
- self.full_data_long.loc[
584
- self.full_data_long.series_id == self.series_id
585
- ]
586
- .set_index(datetime_col_name)
587
- .columns
588
- )
638
+ from shap import PermutationExplainer
589
639
 
590
- self.bg_data = self.full_data_long.loc[
591
- self.full_data_long.series_id == self.series_id
592
- ].set_index(datetime_col_name)
640
+ datetime_col_name = self.datasets._datetime_column_name
593
641
 
594
- else:
595
- self.dataset_cols = (
596
- self.full_data_dict.get(series_id)
597
- .set_index(datetime_col_name)
598
- .drop(series_id, axis=1)
599
- .columns
600
- )
642
+ exp_start_time = time.time()
643
+ global_ex_time = 0
644
+ local_ex_time = 0
645
+ logger.info(
646
+ f"Calculating explanations using {self.spec.explanations_accuracy_mode} mode"
647
+ )
648
+ ratio = SpeedAccuracyMode.ratio[self.spec.explanations_accuracy_mode]
601
649
 
602
- self.bg_data = self.full_data_dict.get(series_id).set_index(
603
- datetime_col_name
604
- )
650
+ for s_id, data_i in self.datasets.get_data_by_series(
651
+ include_horizon=False
652
+ ).items():
653
+ explain_predict_fn = self.get_explain_predict_fn(series_id=s_id)
605
654
 
606
- kernel_explnr = KernelExplainer(
607
- model=explain_predict_fn,
608
- data=self.bg_data[list(self.dataset_cols)][: -self.spec.horizon][
609
- list(self.dataset_cols)
610
- ],
611
- keep_index=False
612
- if self.spec.model == SupportedModels.AutoMLX
613
- else True,
655
+ data_trimmed = data_i.tail(max(int(len(data_i) * ratio), 5)).reset_index(
656
+ drop=True
657
+ )
658
+ data_trimmed[datetime_col_name] = data_trimmed[datetime_col_name].apply(
659
+ lambda x: x.timestamp()
614
660
  )
615
661
 
616
- kernel_explnr_vals = kernel_explnr.shap_values(
617
- self.bg_data[: -self.spec.horizon][list(self.dataset_cols)],
618
- nsamples=50,
662
+ kernel_explnr = PermutationExplainer(
663
+ model=explain_predict_fn, masker=data_trimmed
619
664
  )
665
+ kernel_explnr_vals = kernel_explnr.shap_values(data_trimmed)
666
+
667
+ exp_end_time = time.time()
668
+ global_ex_time = global_ex_time + exp_end_time - exp_start_time
669
+
670
+ self.local_explainer(
671
+ kernel_explnr, series_id=s_id, datetime_col_name=datetime_col_name
672
+ )
673
+ local_ex_time = local_ex_time + time.time() - exp_end_time
620
674
 
621
675
  if not len(kernel_explnr_vals):
622
676
  logger.warn(
623
677
  f"No explanations generated. Ensure that additional data has been provided."
624
678
  )
625
679
  else:
626
- self.global_explanation[series_id] = dict(
680
+ self.global_explanation[s_id] = dict(
627
681
  zip(
628
- self.dataset_cols,
629
- np.average(np.absolute(kernel_explnr_vals), axis=0),
682
+ data_trimmed.columns[1:],
683
+ np.average(np.absolute(kernel_explnr_vals[:, 1:]), axis=0),
630
684
  )
631
685
  )
632
686
 
633
- self.local_explainer(
634
- kernel_explnr, series_id=series_id, datetime_col_name=datetime_col_name
635
- )
687
+ logger.info(
688
+ "Global explanations generation completed in %s seconds", global_ex_time
689
+ )
690
+ logger.info(
691
+ "Local explanations generation completed in %s seconds", local_ex_time
692
+ )
636
693
 
637
694
  def local_explainer(self, kernel_explainer, series_id, datetime_col_name) -> None:
638
695
  """
@@ -642,24 +699,34 @@ class ForecastOperatorBaseModel(ABC):
642
699
  ----------
643
700
  kernel_explainer: The kernel explainer object to use for generating explanations.
644
701
  """
645
- # Get the data for the series ID and select the relevant columns
646
- # data = self.full_data_dict.get(series_id).set_index(datetime_col_name)
647
- data = self.bg_data[-self.spec.horizon :][list(self.dataset_cols)]
702
+ data = self.datasets.get_horizon_at_series(s_id=series_id)
648
703
 
704
+ data[datetime_col_name] = datetime_to_seconds(data[datetime_col_name])
705
+ data = data.reset_index(drop=True)
649
706
  # Generate local SHAP values using the kernel explainer
650
- local_kernel_explnr_vals = kernel_explainer.shap_values(data, nsamples=50)
707
+ local_kernel_explnr_vals = kernel_explainer.shap_values(data)
651
708
 
652
709
  # Convert the SHAP values into a DataFrame
653
710
  local_kernel_explnr_df = pd.DataFrame(
654
- local_kernel_explnr_vals, columns=self.dataset_cols
711
+ local_kernel_explnr_vals, columns=data.columns
655
712
  )
713
+ self.local_explanation[series_id] = local_kernel_explnr_df
656
714
 
657
- # set the index of the DataFrame to the datetime column
658
- local_kernel_explnr_df.index = data.index
659
-
660
- if self.spec.model == SupportedModels.AutoTS:
661
- local_kernel_explnr_df.drop(
662
- ["series_id", self.spec.target_column], axis=1, inplace=True
715
+ def get_explain_predict_fn(self, series_id, fcst_col_name="yhat"):
716
+ def _custom_predict(
717
+ data,
718
+ model=self.models[series_id],
719
+ dt_column_name=self.datasets._datetime_column_name,
720
+ ):
721
+ """
722
+ data: ForecastDatasets.get_data_at_series(s_id)
723
+ """
724
+ data[dt_column_name] = seconds_to_datetime(
725
+ data[dt_column_name], dt_format=self.spec.datetime_column.format
663
726
  )
727
+ data = self.preprocess(df=data, series_id=series_id)
728
+ data[self.original_target_column] = None
729
+ fcst = model.predict(data)[fcst_col_name]
730
+ return fcst
664
731
 
665
- self.local_explanation[series_id] = local_kernel_explnr_df
732
+ return _custom_predict