oracle-ads 2.12.4__py3-none-any.whl → 2.12.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/common/decorator.py +10 -0
- ads/aqua/evaluation/entities.py +12 -2
- ads/aqua/evaluation/evaluation.py +1 -1
- ads/aqua/extension/aqua_ws_msg_handler.py +2 -0
- ads/aqua/extension/base_handler.py +2 -0
- ads/aqua/finetuning/constants.py +3 -0
- ads/aqua/finetuning/finetuning.py +13 -2
- ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +6 -5
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +12 -8
- ads/opctl/operator/lowcode/anomaly/model/autots.py +6 -3
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +19 -7
- ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +9 -10
- ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +10 -11
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +6 -2
- ads/opctl/operator/lowcode/common/data.py +13 -11
- ads/opctl/operator/lowcode/forecast/model/arima.py +14 -12
- ads/opctl/operator/lowcode/forecast/model/automlx.py +26 -26
- ads/opctl/operator/lowcode/forecast/model/autots.py +16 -18
- ads/opctl/operator/lowcode/forecast/model/base_model.py +45 -36
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +36 -47
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +3 -0
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +30 -46
- ads/opctl/operator/lowcode/forecast/model/prophet.py +15 -20
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +25 -20
- ads/opctl/operator/lowcode/forecast/utils.py +30 -33
- ads/opctl/operator/lowcode/pii/model/report.py +11 -7
- ads/opctl/operator/lowcode/recommender/model/base_model.py +58 -45
- ads/opctl/operator/lowcode/recommender/model/svd.py +47 -29
- {oracle_ads-2.12.4.dist-info → oracle_ads-2.12.6.dist-info}/METADATA +5 -5
- {oracle_ads-2.12.4.dist-info → oracle_ads-2.12.6.dist-info}/RECORD +33 -33
- {oracle_ads-2.12.4.dist-info → oracle_ads-2.12.6.dist-info}/LICENSE.txt +0 -0
- {oracle_ads-2.12.4.dist-info → oracle_ads-2.12.6.dist-info}/WHEEL +0 -0
- {oracle_ads-2.12.4.dist-info → oracle_ads-2.12.6.dist-info}/entry_points.txt +0 -0
@@ -1,29 +1,30 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*--
|
3
|
-
import traceback
|
4
|
-
|
5
2
|
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
6
3
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
4
|
+
import logging
|
5
|
+
import traceback
|
7
6
|
|
8
|
-
import pandas as pd
|
9
7
|
import numpy as np
|
8
|
+
import pandas as pd
|
9
|
+
import report_creator as rc
|
10
|
+
|
10
11
|
from ads.common.decorator.runtime_dependency import runtime_dependency
|
12
|
+
from ads.opctl import logger
|
13
|
+
from ads.opctl.operator.lowcode.common.utils import (
|
14
|
+
seconds_to_datetime,
|
15
|
+
)
|
11
16
|
from ads.opctl.operator.lowcode.forecast.const import (
|
12
17
|
AUTOMLX_METRIC_MAP,
|
13
18
|
ForecastOutputColumns,
|
14
19
|
SupportedModels,
|
15
20
|
)
|
16
|
-
from ads.opctl import
|
21
|
+
from ads.opctl.operator.lowcode.forecast.utils import _label_encode_dataframe
|
17
22
|
|
18
|
-
from .base_model import ForecastOperatorBaseModel
|
19
23
|
from ..operator_config import ForecastOperatorConfig
|
24
|
+
from .base_model import ForecastOperatorBaseModel
|
20
25
|
from .forecast_datasets import ForecastDatasets, ForecastOutput
|
21
|
-
from ads.opctl.operator.lowcode.common.utils import (
|
22
|
-
seconds_to_datetime,
|
23
|
-
datetime_to_seconds,
|
24
|
-
)
|
25
|
-
from ads.opctl.operator.lowcode.forecast.utils import _label_encode_dataframe
|
26
26
|
|
27
|
+
logging.getLogger("report_creator").setLevel(logging.WARNING)
|
27
28
|
AUTOMLX_N_ALGOS_TUNED = 4
|
28
29
|
AUTOMLX_DEFAULT_SCORE_METRIC = "neg_sym_mean_abs_percent_error"
|
29
30
|
|
@@ -47,12 +48,13 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
|
|
47
48
|
)
|
48
49
|
model_kwargs_cleaned.pop("task", None)
|
49
50
|
time_budget = model_kwargs_cleaned.pop("time_budget", -1)
|
50
|
-
model_kwargs_cleaned[
|
51
|
-
|
52
|
-
|
51
|
+
model_kwargs_cleaned["preprocessing"] = (
|
52
|
+
self.spec.preprocessing.enabled
|
53
|
+
or model_kwargs_cleaned.get("preprocessing", True)
|
54
|
+
)
|
53
55
|
return model_kwargs_cleaned, time_budget
|
54
56
|
|
55
|
-
def preprocess(self, data
|
57
|
+
def preprocess(self, data): # TODO: re-use self.le for explanations
|
56
58
|
_, df_encoded = _label_encode_dataframe(
|
57
59
|
data,
|
58
60
|
no_encode={self.spec.datetime_column.name, self.original_target_column},
|
@@ -74,9 +76,10 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
|
|
74
76
|
),
|
75
77
|
)
|
76
78
|
def _build_model(self) -> pd.DataFrame:
|
77
|
-
from automlx import init
|
78
79
|
import logging
|
79
80
|
|
81
|
+
from automlx import Pipeline, init
|
82
|
+
|
80
83
|
try:
|
81
84
|
init(
|
82
85
|
engine="ray",
|
@@ -88,7 +91,7 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
|
|
88
91
|
|
89
92
|
full_data_dict = self.datasets.get_data_by_series()
|
90
93
|
|
91
|
-
self.models =
|
94
|
+
self.models = {}
|
92
95
|
horizon = self.spec.horizon
|
93
96
|
self.spec.confidence_interval_width = self.spec.confidence_interval_width or 0.8
|
94
97
|
self.forecast_output = ForecastOutput(
|
@@ -101,7 +104,7 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
|
|
101
104
|
# Clean up kwargs for pass through
|
102
105
|
model_kwargs_cleaned, time_budget = self.set_kwargs()
|
103
106
|
|
104
|
-
for
|
107
|
+
for s_id, df in full_data_dict.items():
|
105
108
|
try:
|
106
109
|
logger.debug(f"Running automlx on series {s_id}")
|
107
110
|
model_kwargs = model_kwargs_cleaned.copy()
|
@@ -120,7 +123,7 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
|
|
120
123
|
if self.loaded_models is not None and s_id in self.loaded_models:
|
121
124
|
model = self.loaded_models[s_id]
|
122
125
|
else:
|
123
|
-
model =
|
126
|
+
model = Pipeline(
|
124
127
|
task="forecasting",
|
125
128
|
**model_kwargs,
|
126
129
|
)
|
@@ -170,7 +173,7 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
|
|
170
173
|
self.errors_dict[s_id] = {
|
171
174
|
"model_name": self.spec.model,
|
172
175
|
"error": str(e),
|
173
|
-
"error_trace": traceback.format_exc()
|
176
|
+
"error_trace": traceback.format_exc(),
|
174
177
|
}
|
175
178
|
logger.warn(f"Encountered Error: {e}. Skipping.")
|
176
179
|
logger.warn(traceback.format_exc())
|
@@ -197,15 +200,12 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
|
|
197
200
|
- ds_forecast_col (pd.Series): The pd.Series object representing the forecasted column.
|
198
201
|
- ci_col_names (List[str]): A list of column names for the confidence interval in the report.
|
199
202
|
"""
|
200
|
-
|
201
|
-
|
202
|
-
"""The method that needs to be implemented on the particular model level."""
|
203
|
-
selected_models = dict()
|
203
|
+
selected_models = {}
|
204
204
|
models = self.models
|
205
205
|
other_sections = []
|
206
206
|
|
207
207
|
if len(self.models) > 0:
|
208
|
-
for
|
208
|
+
for s_id, m in models.items():
|
209
209
|
selected_models[s_id] = {
|
210
210
|
"series_id": s_id,
|
211
211
|
"selected_model": m.selected_model_,
|
@@ -352,7 +352,7 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
|
|
352
352
|
"""
|
353
353
|
data_temp = pd.DataFrame(
|
354
354
|
data,
|
355
|
-
columns=
|
355
|
+
columns=list(self.dataset_cols),
|
356
356
|
)
|
357
357
|
|
358
358
|
return self.models.get(self.series_id).forecast(
|
@@ -1,24 +1,26 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*--
|
3
2
|
|
4
3
|
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
5
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
5
|
|
7
6
|
import copy
|
7
|
+
import logging
|
8
8
|
import traceback
|
9
|
+
|
9
10
|
import pandas as pd
|
10
|
-
import
|
11
|
+
import report_creator as rc
|
11
12
|
import yaml
|
12
13
|
|
14
|
+
from ads.common.decorator.runtime_dependency import runtime_dependency
|
13
15
|
from ads.opctl import logger
|
14
|
-
from ads.opctl.operator.lowcode.
|
15
|
-
|
16
|
+
from ads.opctl.operator.lowcode.forecast.utils import _select_plot_list
|
17
|
+
|
18
|
+
from ..const import ForecastOutputColumns, SupportedModels
|
16
19
|
from ..operator_config import ForecastOperatorConfig
|
17
|
-
from
|
20
|
+
from .base_model import ForecastOperatorBaseModel
|
18
21
|
from .forecast_datasets import ForecastDatasets, ForecastOutput
|
19
|
-
from ..const import ForecastOutputColumns, SupportedModels
|
20
|
-
from ads.opctl.operator.lowcode.forecast.utils import _select_plot_list
|
21
22
|
|
23
|
+
logging.getLogger("report_creator").setLevel(logging.WARNING)
|
22
24
|
AUTOTS_MAX_GENERATION = 10
|
23
25
|
AUTOTS_MODELS_TO_VALIDATE = 0.15
|
24
26
|
|
@@ -43,10 +45,9 @@ class AutoTSOperatorModel(ForecastOperatorBaseModel):
|
|
43
45
|
"""
|
44
46
|
|
45
47
|
# Import necessary libraries
|
46
|
-
from autots import AutoTS
|
48
|
+
from autots import AutoTS
|
47
49
|
|
48
50
|
self.outputs = None
|
49
|
-
models = dict()
|
50
51
|
# Get the name of the datetime column
|
51
52
|
self.forecast_output = ForecastOutput(
|
52
53
|
confidence_interval_width=self.spec.confidence_interval_width,
|
@@ -208,7 +209,7 @@ class AutoTSOperatorModel(ForecastOperatorBaseModel):
|
|
208
209
|
self.errors_dict[s_id] = {
|
209
210
|
"model_name": self.spec.model,
|
210
211
|
"error": str(e),
|
211
|
-
"error_trace": traceback.format_exc()
|
212
|
+
"error_trace": traceback.format_exc(),
|
212
213
|
}
|
213
214
|
logger.warn(f"Encountered Error: {e}. Skipping.")
|
214
215
|
logger.warn(traceback.format_exc())
|
@@ -231,7 +232,6 @@ class AutoTSOperatorModel(ForecastOperatorBaseModel):
|
|
231
232
|
- ds_forecast_col (pd.Index): A pandas Index containing the forecast column values.
|
232
233
|
- ci_col_names (list): A list of column names for confidence intervals.
|
233
234
|
"""
|
234
|
-
import report_creator as rc
|
235
235
|
|
236
236
|
all_sections = []
|
237
237
|
if self.models:
|
@@ -258,18 +258,16 @@ class AutoTSOperatorModel(ForecastOperatorBaseModel):
|
|
258
258
|
yaml.dump(list(self.models.best_model.T.to_dict().values())[0]),
|
259
259
|
)
|
260
260
|
|
261
|
-
except KeyError
|
262
|
-
logger.warn(
|
263
|
-
f"Issue generating Model Parameters Table Section. Skipping"
|
264
|
-
)
|
261
|
+
except KeyError:
|
262
|
+
logger.warn("Issue generating Model Parameters Table Section. Skipping")
|
265
263
|
sec2 = rc.Text("Error generating model parameters.")
|
266
264
|
|
267
265
|
section_2 = rc.Block(sec2_text, sec2)
|
268
266
|
|
269
|
-
all_sections = [
|
267
|
+
all_sections = [section_1, section_2]
|
270
268
|
|
271
269
|
if self.spec.generate_explanations:
|
272
|
-
logger.warn(
|
270
|
+
logger.warn("Explanations not yet supported for the AutoTS Module")
|
273
271
|
|
274
272
|
# Model Description
|
275
273
|
model_description = rc.Text(
|
@@ -305,7 +303,7 @@ class AutoTSOperatorModel(ForecastOperatorBaseModel):
|
|
305
303
|
).T
|
306
304
|
df = pd.concat([mapes, scores])
|
307
305
|
except Exception as e:
|
308
|
-
logger.debug(
|
306
|
+
logger.debug("Failed to generate training metrics")
|
309
307
|
logger.debug(f"Received Error Statement: {e}")
|
310
308
|
|
311
309
|
return df
|
@@ -1,52 +1,57 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*--
|
3
2
|
|
4
3
|
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
5
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
5
|
|
7
|
-
import
|
8
|
-
import numpy as np
|
6
|
+
import logging
|
9
7
|
import os
|
10
|
-
import pandas as pd
|
11
8
|
import tempfile
|
12
9
|
import time
|
13
10
|
import traceback
|
14
11
|
from abc import ABC, abstractmethod
|
15
12
|
from typing import Tuple
|
16
13
|
|
14
|
+
import fsspec
|
15
|
+
import numpy as np
|
16
|
+
import pandas as pd
|
17
|
+
import report_creator as rc
|
18
|
+
|
17
19
|
from ads.common.decorator.runtime_dependency import runtime_dependency
|
18
20
|
from ads.common.object_storage_details import ObjectStorageDetails
|
19
21
|
from ads.opctl import logger
|
20
22
|
from ads.opctl.operator.lowcode.common.utils import (
|
21
|
-
|
22
|
-
enable_print,
|
23
|
+
datetime_to_seconds,
|
23
24
|
disable_print,
|
24
|
-
|
25
|
+
enable_print,
|
26
|
+
human_time_friendly,
|
25
27
|
merged_category_column_name,
|
26
|
-
datetime_to_seconds,
|
27
28
|
seconds_to_datetime,
|
29
|
+
write_data,
|
28
30
|
)
|
29
31
|
from ads.opctl.operator.lowcode.forecast.model.forecast_datasets import TestData
|
30
32
|
from ads.opctl.operator.lowcode.forecast.utils import (
|
33
|
+
_build_metrics_df,
|
34
|
+
_build_metrics_per_horizon,
|
35
|
+
_label_encode_dataframe,
|
31
36
|
default_signer,
|
32
37
|
evaluate_train_metrics,
|
33
|
-
get_forecast_plots,
|
34
38
|
get_auto_select_plot,
|
35
|
-
|
36
|
-
_build_metrics_per_horizon,
|
39
|
+
get_forecast_plots,
|
37
40
|
load_pkl,
|
38
41
|
write_pkl,
|
39
|
-
_label_encode_dataframe,
|
40
42
|
)
|
41
|
-
|
43
|
+
|
42
44
|
from ..const import (
|
45
|
+
AUTO_SELECT,
|
43
46
|
SUMMARY_METRICS_HORIZON_LIMIT,
|
47
|
+
SpeedAccuracyMode,
|
44
48
|
SupportedMetrics,
|
45
49
|
SupportedModels,
|
46
|
-
SpeedAccuracyMode,
|
47
|
-
AUTO_SELECT
|
48
50
|
)
|
49
51
|
from ..operator_config import ForecastOperatorConfig, ForecastOperatorSpec
|
52
|
+
from .forecast_datasets import ForecastDatasets
|
53
|
+
|
54
|
+
logging.getLogger("report_creator").setLevel(logging.WARNING)
|
50
55
|
|
51
56
|
|
52
57
|
class ForecastOperatorBaseModel(ABC):
|
@@ -70,7 +75,7 @@ class ForecastOperatorBaseModel(ABC):
|
|
70
75
|
self.original_target_column = self.spec.target_column
|
71
76
|
self.dt_column_name = self.spec.datetime_column.name
|
72
77
|
|
73
|
-
self.model_parameters =
|
78
|
+
self.model_parameters = {}
|
74
79
|
self.loaded_models = None
|
75
80
|
|
76
81
|
# these fields are populated in the _build_model() method
|
@@ -79,20 +84,21 @@ class ForecastOperatorBaseModel(ABC):
|
|
79
84
|
# "outputs" is a list of outputs generated by the models. These should only be generated when the framework requires the original output for plotting
|
80
85
|
self.outputs = None
|
81
86
|
self.forecast_output = None
|
82
|
-
self.errors_dict =
|
83
|
-
self.le =
|
87
|
+
self.errors_dict = {}
|
88
|
+
self.le = {}
|
84
89
|
|
85
90
|
self.formatted_global_explanation = None
|
86
91
|
self.formatted_local_explanation = None
|
87
92
|
|
88
93
|
self.forecast_col_name = "yhat"
|
89
|
-
self.perform_tuning = (self.spec.tuning
|
90
|
-
self.spec.tuning.n_trials
|
94
|
+
self.perform_tuning = (self.spec.tuning is not None) and (
|
95
|
+
self.spec.tuning.n_trials is not None
|
91
96
|
)
|
92
97
|
|
93
98
|
def generate_report(self):
|
94
99
|
"""Generates the forecasting report."""
|
95
100
|
import warnings
|
101
|
+
|
96
102
|
from sklearn.exceptions import ConvergenceWarning
|
97
103
|
|
98
104
|
with warnings.catch_warnings():
|
@@ -100,7 +106,6 @@ class ForecastOperatorBaseModel(ABC):
|
|
100
106
|
warnings.simplefilter(action="ignore", category=UserWarning)
|
101
107
|
warnings.simplefilter(action="ignore", category=RuntimeWarning)
|
102
108
|
warnings.simplefilter(action="ignore", category=ConvergenceWarning)
|
103
|
-
import report_creator as rc
|
104
109
|
|
105
110
|
# load models if given
|
106
111
|
if self.spec.previous_output_dir is not None:
|
@@ -128,7 +133,7 @@ class ForecastOperatorBaseModel(ABC):
|
|
128
133
|
) = self._test_evaluate_metrics(
|
129
134
|
elapsed_time=elapsed_time,
|
130
135
|
)
|
131
|
-
except Exception
|
136
|
+
except Exception:
|
132
137
|
logger.warn("Unable to generate Test Metrics.")
|
133
138
|
logger.debug(f"Full Traceback: {traceback.format_exc()}")
|
134
139
|
report_sections = []
|
@@ -253,25 +258,30 @@ class ForecastOperatorBaseModel(ABC):
|
|
253
258
|
backtest_report_name = "backtest_stats.csv"
|
254
259
|
file_path = f"{output_dir}/{backtest_report_name}"
|
255
260
|
if self.spec.model == AUTO_SELECT:
|
256
|
-
backtest_sections.append(
|
261
|
+
backtest_sections.append(
|
262
|
+
rc.Heading("Auto-select statistics", level=2)
|
263
|
+
)
|
257
264
|
if not os.path.exists(file_path):
|
258
|
-
failure_msg = rc.Text(
|
259
|
-
|
265
|
+
failure_msg = rc.Text(
|
266
|
+
"auto-select could not be executed. Please check the "
|
267
|
+
"logs for more details."
|
268
|
+
)
|
260
269
|
backtest_sections.append(failure_msg)
|
261
270
|
else:
|
262
271
|
backtest_stats = pd.read_csv(file_path)
|
263
272
|
average_dict = backtest_stats.mean().to_dict()
|
264
|
-
del average_dict[
|
273
|
+
del average_dict["backtest"]
|
265
274
|
best_model = min(average_dict, key=average_dict.get)
|
266
275
|
backtest_text = rc.Heading("Back Testing Metrics", level=3)
|
267
276
|
summary_text = rc.Text(
|
268
277
|
f"Overall, the average scores for the models are {average_dict}, with {best_model}"
|
269
|
-
f" being identified as the top-performing model during backtesting."
|
278
|
+
f" being identified as the top-performing model during backtesting."
|
279
|
+
)
|
270
280
|
backtest_table = rc.DataTable(backtest_stats, index=True)
|
271
281
|
liner_plot = get_auto_select_plot(backtest_stats)
|
272
|
-
backtest_sections.extend(
|
273
|
-
|
274
|
-
|
282
|
+
backtest_sections.extend(
|
283
|
+
[backtest_text, backtest_table, summary_text, liner_plot]
|
284
|
+
)
|
275
285
|
|
276
286
|
forecast_plots = []
|
277
287
|
if len(self.forecast_output.list_series_ids()) > 0:
|
@@ -431,14 +441,13 @@ class ForecastOperatorBaseModel(ABC):
|
|
431
441
|
test_metrics_df: pd.DataFrame,
|
432
442
|
):
|
433
443
|
"""Saves resulting reports to the given folder."""
|
434
|
-
import report_creator as rc
|
435
444
|
|
436
445
|
unique_output_dir = self.spec.output_directory.url
|
437
446
|
|
438
447
|
if ObjectStorageDetails.is_oci_path(unique_output_dir):
|
439
448
|
storage_options = default_signer()
|
440
449
|
else:
|
441
|
-
storage_options =
|
450
|
+
storage_options = {}
|
442
451
|
|
443
452
|
# report-creator html report
|
444
453
|
if self.spec.generate_report:
|
@@ -580,7 +589,7 @@ class ForecastOperatorBaseModel(ABC):
|
|
580
589
|
indent=4,
|
581
590
|
)
|
582
591
|
else:
|
583
|
-
logger.info(
|
592
|
+
logger.info("All modeling completed successfully.")
|
584
593
|
|
585
594
|
def preprocess(self, df, series_id):
|
586
595
|
"""The method that needs to be implemented on the particular model level."""
|
@@ -622,8 +631,8 @@ class ForecastOperatorBaseModel(ABC):
|
|
622
631
|
def _load_model(self):
|
623
632
|
try:
|
624
633
|
self.loaded_models = load_pkl(self.spec.previous_output_dir + "/model.pkl")
|
625
|
-
except:
|
626
|
-
logger.info("model.pkl is not present")
|
634
|
+
except Exception as e:
|
635
|
+
logger.info(f"model.pkl is not present. Error: {e}")
|
627
636
|
|
628
637
|
def _save_model(self, output_dir, storage_options):
|
629
638
|
write_pkl(
|
@@ -693,7 +702,7 @@ class ForecastOperatorBaseModel(ABC):
|
|
693
702
|
|
694
703
|
if not len(kernel_explnr_vals):
|
695
704
|
logger.warn(
|
696
|
-
|
705
|
+
"No explanations generated. Ensure that additional data has been provided."
|
697
706
|
)
|
698
707
|
else:
|
699
708
|
self.global_explanation[s_id] = dict(
|
@@ -1,33 +1,23 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*--
|
3
2
|
|
4
|
-
# Copyright (c) 2023 Oracle and/or its affiliates.
|
3
|
+
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
5
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
5
|
|
7
|
-
import time
|
8
6
|
import pandas as pd
|
9
|
-
from pandas.api.types import is_datetime64_any_dtype, is_string_dtype, is_numeric_dtype
|
10
7
|
|
11
|
-
from ..operator_config import ForecastOperatorConfig
|
12
8
|
from ads.opctl import logger
|
13
|
-
from ..const import ForecastOutputColumns, PROPHET_INTERNAL_DATE_COL
|
14
|
-
from ads.common.object_storage_details import ObjectStorageDetails
|
15
|
-
from ads.opctl.operator.lowcode.common.utils import (
|
16
|
-
get_frequency_in_seconds,
|
17
|
-
get_frequency_of_datetime,
|
18
|
-
)
|
19
9
|
from ads.opctl.operator.lowcode.common.data import AbstractData
|
20
|
-
from ads.opctl.operator.lowcode.forecast.utils import (
|
21
|
-
default_signer,
|
22
|
-
)
|
23
10
|
from ads.opctl.operator.lowcode.common.errors import (
|
24
|
-
InputDataError,
|
25
|
-
InvalidParameterError,
|
26
|
-
PermissionsError,
|
27
11
|
DataMismatchError,
|
12
|
+
InvalidParameterError,
|
13
|
+
)
|
14
|
+
from ads.opctl.operator.lowcode.common.utils import (
|
15
|
+
get_frequency_in_seconds,
|
16
|
+
get_frequency_of_datetime,
|
28
17
|
)
|
29
|
-
|
30
|
-
from
|
18
|
+
|
19
|
+
from ..const import ForecastOutputColumns, SupportedModels
|
20
|
+
from ..operator_config import ForecastOperatorConfig
|
31
21
|
|
32
22
|
|
33
23
|
class HistoricalData(AbstractData):
|
@@ -51,13 +41,12 @@ class HistoricalData(AbstractData):
|
|
51
41
|
self.freq_in_secs = get_frequency_in_seconds(
|
52
42
|
self.data.index.get_level_values(0)
|
53
43
|
)
|
54
|
-
if spec.model == SupportedModels.AutoMLX:
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
raise InvalidParameterError(message)
|
44
|
+
if spec.model == SupportedModels.AutoMLX and abs(self.freq_in_secs) < 3600:
|
45
|
+
message = (
|
46
|
+
f"{SupportedModels.AutoMLX} requires data with a frequency of at least one hour. Please try using a different model,"
|
47
|
+
" or select the 'auto' option."
|
48
|
+
)
|
49
|
+
raise InvalidParameterError(message)
|
61
50
|
|
62
51
|
|
63
52
|
class AdditionalData(AbstractData):
|
@@ -77,11 +66,11 @@ class AdditionalData(AbstractData):
|
|
77
66
|
else:
|
78
67
|
self.name = "additional_data"
|
79
68
|
self.data = None
|
80
|
-
self._data_dict =
|
69
|
+
self._data_dict = {}
|
81
70
|
self.create_horizon(spec, historical_data)
|
82
71
|
|
83
72
|
def create_horizon(self, spec, historical_data):
|
84
|
-
logger.debug(
|
73
|
+
logger.debug("No additional data provided. Constructing horizon.")
|
85
74
|
future_dates = pd.Series(
|
86
75
|
pd.date_range(
|
87
76
|
start=historical_data.get_max_time(),
|
@@ -109,6 +98,7 @@ class AdditionalData(AbstractData):
|
|
109
98
|
self.additional_regressors = []
|
110
99
|
|
111
100
|
def _ingest_data(self, spec):
|
101
|
+
_spec = spec
|
112
102
|
self.additional_regressors = list(self.data.columns)
|
113
103
|
if not self.additional_regressors:
|
114
104
|
logger.warn(
|
@@ -146,12 +136,11 @@ class ForecastDatasets:
|
|
146
136
|
self.historical_data = HistoricalData(spec)
|
147
137
|
self.additional_data = AdditionalData(spec, self.historical_data)
|
148
138
|
|
149
|
-
if spec.generate_explanations:
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
spec.generate_explanations = False
|
139
|
+
if spec.generate_explanations and spec.additional_data is None:
|
140
|
+
logger.warn(
|
141
|
+
"Unable to generate explanations as there is no additional data passed in. Either set generate_explanations to False, or pass in additional data."
|
142
|
+
)
|
143
|
+
spec.generate_explanations = False
|
155
144
|
|
156
145
|
def get_all_data_long(self, include_horizon=True):
|
157
146
|
how = "outer" if include_horizon else "left"
|
@@ -182,7 +171,7 @@ class ForecastDatasets:
|
|
182
171
|
)
|
183
172
|
|
184
173
|
def get_data_by_series(self, include_horizon=True):
|
185
|
-
total_dict =
|
174
|
+
total_dict = {}
|
186
175
|
hist_data = self.historical_data.get_dict_by_series()
|
187
176
|
add_data = self.additional_data.get_dict_by_series()
|
188
177
|
how = "outer" if include_horizon else "left"
|
@@ -200,10 +189,10 @@ class ForecastDatasets:
|
|
200
189
|
all_data = self.get_data_by_series(include_horizon=include_horizon)
|
201
190
|
try:
|
202
191
|
return all_data[s_id]
|
203
|
-
except:
|
192
|
+
except Exception as e:
|
204
193
|
raise InvalidParameterError(
|
205
194
|
f"Unable to retrieve series id: {s_id} from data. Available series ids are: {self.list_series_ids()}"
|
206
|
-
)
|
195
|
+
) from e
|
207
196
|
|
208
197
|
def get_horizon_at_series(self, s_id):
|
209
198
|
return self.get_data_at_series(s_id)[-self._horizon :]
|
@@ -234,7 +223,7 @@ class ForecastDatasets:
|
|
234
223
|
if sorted:
|
235
224
|
try:
|
236
225
|
series_ids.sort()
|
237
|
-
except:
|
226
|
+
except Exception:
|
238
227
|
pass
|
239
228
|
return series_ids
|
240
229
|
|
@@ -269,7 +258,7 @@ class ForecastOutput:
|
|
269
258
|
target_column: str the name of the original target column
|
270
259
|
dt_column: the name of the original datetime column
|
271
260
|
"""
|
272
|
-
self.series_id_map =
|
261
|
+
self.series_id_map = {}
|
273
262
|
self._set_ci_column_names(confidence_interval_width)
|
274
263
|
self.horizon = horizon
|
275
264
|
self.target_column_name = target_column
|
@@ -281,7 +270,7 @@ class ForecastOutput:
|
|
281
270
|
forecast: pd.DataFrame,
|
282
271
|
overwrite: bool = False,
|
283
272
|
):
|
284
|
-
if not overwrite and series_id in self.series_id_map
|
273
|
+
if not overwrite and series_id in self.series_id_map:
|
285
274
|
raise ValueError(
|
286
275
|
f"Attempting to update ForecastOutput for series_id {series_id} when this already exists. Set overwrite to True."
|
287
276
|
)
|
@@ -321,15 +310,15 @@ class ForecastOutput:
|
|
321
310
|
"""
|
322
311
|
try:
|
323
312
|
output_i = self.series_id_map[series_id]
|
324
|
-
except KeyError:
|
313
|
+
except KeyError as e:
|
325
314
|
raise ValueError(
|
326
315
|
f"Attempting to update output for series: {series_id}, however no series output has been initialized."
|
327
|
-
)
|
316
|
+
) from e
|
328
317
|
|
329
318
|
if (output_i.shape[0] - self.horizon) == len(fit_val):
|
330
|
-
output_i["fitted_value"].iloc[
|
331
|
-
: -
|
332
|
-
|
319
|
+
output_i["fitted_value"].iloc[: -self.horizon] = (
|
320
|
+
fit_val # Note: may need to do len(output_i) - (len(fit_val) + horizon) : -horizon
|
321
|
+
)
|
333
322
|
elif (output_i.shape[0] - self.horizon) > len(fit_val):
|
334
323
|
logger.debug(
|
335
324
|
f"Fitted Values were only generated on a subset ({len(fit_val)}/{(output_i.shape[0] - self.horizon)}) of the data for Series: {series_id}."
|
@@ -378,7 +367,7 @@ class ForecastOutput:
|
|
378
367
|
def get_forecast(self, series_id):
|
379
368
|
try:
|
380
369
|
return self.series_id_map[series_id]
|
381
|
-
except KeyError
|
370
|
+
except KeyError:
|
382
371
|
logger.debug(
|
383
372
|
f"No Forecast found for series_id: {series_id}. Returning empty DataFrame."
|
384
373
|
)
|
@@ -389,7 +378,7 @@ class ForecastOutput:
|
|
389
378
|
if sorted:
|
390
379
|
try:
|
391
380
|
series_ids.sort()
|
392
|
-
except:
|
381
|
+
except Exception:
|
393
382
|
pass
|
394
383
|
return series_ids
|
395
384
|
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
# Copyright (c) 2024 Oracle and/or its affiliates.
|
4
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
|
+
import logging
|
5
6
|
import traceback
|
6
7
|
|
7
8
|
import pandas as pd
|
@@ -192,6 +193,8 @@ class MLForecastOperatorModel(ForecastOperatorBaseModel):
|
|
192
193
|
import report_creator as rc
|
193
194
|
from utilsforecast.plotting import plot_series
|
194
195
|
|
196
|
+
logging.getLogger("report_creator").setLevel(logging.WARNING)
|
197
|
+
|
195
198
|
# Section 1: Forecast Overview
|
196
199
|
sec1_text = rc.Block(
|
197
200
|
rc.Heading("Forecast Overview", level=2),
|