oracle-ads 2.11.9__py3-none-any.whl → 2.11.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. ads/aqua/__init__.py +1 -1
  2. ads/aqua/{base.py → app.py} +27 -7
  3. ads/aqua/cli.py +59 -17
  4. ads/aqua/common/__init__.py +5 -0
  5. ads/aqua/{decorator.py → common/decorator.py} +14 -8
  6. ads/aqua/common/enums.py +69 -0
  7. ads/aqua/{exception.py → common/errors.py} +28 -0
  8. ads/aqua/{utils.py → common/utils.py} +193 -95
  9. ads/aqua/config/config.py +18 -0
  10. ads/aqua/constants.py +51 -33
  11. ads/aqua/data.py +15 -26
  12. ads/aqua/evaluation/__init__.py +8 -0
  13. ads/aqua/evaluation/constants.py +53 -0
  14. ads/aqua/evaluation/entities.py +170 -0
  15. ads/aqua/evaluation/errors.py +71 -0
  16. ads/aqua/{evaluation.py → evaluation/evaluation.py} +122 -370
  17. ads/aqua/extension/__init__.py +2 -0
  18. ads/aqua/extension/aqua_ws_msg_handler.py +97 -0
  19. ads/aqua/extension/base_handler.py +0 -7
  20. ads/aqua/extension/common_handler.py +12 -6
  21. ads/aqua/extension/deployment_handler.py +70 -4
  22. ads/aqua/extension/errors.py +10 -0
  23. ads/aqua/extension/evaluation_handler.py +5 -3
  24. ads/aqua/extension/evaluation_ws_msg_handler.py +43 -0
  25. ads/aqua/extension/finetune_handler.py +41 -3
  26. ads/aqua/extension/model_handler.py +56 -4
  27. ads/aqua/extension/models/__init__.py +0 -0
  28. ads/aqua/extension/models/ws_models.py +69 -0
  29. ads/aqua/extension/ui_handler.py +65 -4
  30. ads/aqua/extension/ui_websocket_handler.py +124 -0
  31. ads/aqua/extension/utils.py +1 -1
  32. ads/aqua/finetuning/__init__.py +7 -0
  33. ads/aqua/finetuning/constants.py +17 -0
  34. ads/aqua/finetuning/entities.py +102 -0
  35. ads/aqua/{finetune.py → finetuning/finetuning.py} +170 -141
  36. ads/aqua/model/__init__.py +8 -0
  37. ads/aqua/model/constants.py +46 -0
  38. ads/aqua/model/entities.py +266 -0
  39. ads/aqua/model/enums.py +26 -0
  40. ads/aqua/{model.py → model/model.py} +405 -309
  41. ads/aqua/modeldeployment/__init__.py +8 -0
  42. ads/aqua/modeldeployment/constants.py +26 -0
  43. ads/aqua/{deployment.py → modeldeployment/deployment.py} +288 -227
  44. ads/aqua/modeldeployment/entities.py +142 -0
  45. ads/aqua/modeldeployment/inference.py +75 -0
  46. ads/aqua/ui.py +88 -8
  47. ads/cli.py +55 -7
  48. ads/common/decorator/threaded.py +97 -0
  49. ads/common/serializer.py +2 -2
  50. ads/config.py +5 -1
  51. ads/jobs/builders/infrastructure/dsc_job.py +49 -6
  52. ads/model/datascience_model.py +1 -1
  53. ads/model/deployment/model_deployment.py +11 -0
  54. ads/model/model_metadata.py +17 -6
  55. ads/opctl/operator/lowcode/anomaly/README.md +0 -2
  56. ads/opctl/operator/lowcode/anomaly/__main__.py +3 -3
  57. ads/opctl/operator/lowcode/anomaly/environment.yaml +0 -2
  58. ads/opctl/operator/lowcode/anomaly/model/automlx.py +2 -2
  59. ads/opctl/operator/lowcode/anomaly/model/autots.py +1 -1
  60. ads/opctl/operator/lowcode/anomaly/model/base_model.py +13 -17
  61. ads/opctl/operator/lowcode/anomaly/operator_config.py +2 -0
  62. ads/opctl/operator/lowcode/anomaly/schema.yaml +1 -2
  63. ads/opctl/operator/lowcode/anomaly/utils.py +3 -2
  64. ads/opctl/operator/lowcode/common/transformations.py +2 -1
  65. ads/opctl/operator/lowcode/common/utils.py +1 -1
  66. ads/opctl/operator/lowcode/forecast/README.md +1 -3
  67. ads/opctl/operator/lowcode/forecast/__main__.py +3 -18
  68. ads/opctl/operator/lowcode/forecast/const.py +2 -0
  69. ads/opctl/operator/lowcode/forecast/environment.yaml +1 -2
  70. ads/opctl/operator/lowcode/forecast/model/arima.py +1 -0
  71. ads/opctl/operator/lowcode/forecast/model/automlx.py +7 -4
  72. ads/opctl/operator/lowcode/forecast/model/autots.py +1 -0
  73. ads/opctl/operator/lowcode/forecast/model/base_model.py +38 -22
  74. ads/opctl/operator/lowcode/forecast/model/factory.py +33 -4
  75. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +15 -1
  76. ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +234 -0
  77. ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +9 -1
  78. ads/opctl/operator/lowcode/forecast/model/prophet.py +1 -0
  79. ads/opctl/operator/lowcode/forecast/model_evaluator.py +147 -0
  80. ads/opctl/operator/lowcode/forecast/operator_config.py +2 -1
  81. ads/opctl/operator/lowcode/forecast/schema.yaml +7 -2
  82. ads/opctl/operator/lowcode/forecast/utils.py +18 -44
  83. {oracle_ads-2.11.9.dist-info → oracle_ads-2.11.11.dist-info}/METADATA +9 -12
  84. {oracle_ads-2.11.9.dist-info → oracle_ads-2.11.11.dist-info}/RECORD +87 -61
  85. ads/aqua/job.py +0 -29
  86. {oracle_ads-2.11.9.dist-info → oracle_ads-2.11.11.dist-info}/LICENSE.txt +0 -0
  87. {oracle_ads-2.11.9.dist-info → oracle_ads-2.11.11.dist-info}/WHEEL +0 -0
  88. {oracle_ads-2.11.9.dist-info → oracle_ads-2.11.11.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,234 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*--
3
+
4
+ # Copyright (c) 2024 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+ import pandas as pd
7
+ import numpy as np
8
+
9
+ from ads.opctl import logger
10
+ from ads.common.decorator import runtime_dependency
11
+ from ads.opctl.operator.lowcode.forecast.utils import _select_plot_list
12
+ from .base_model import ForecastOperatorBaseModel
13
+ from .forecast_datasets import ForecastDatasets, ForecastOutput
14
+ from ..operator_config import ForecastOperatorConfig
15
+ from ..const import ForecastOutputColumns, SupportedModels
16
+
17
+
18
+ class MLForecastOperatorModel(ForecastOperatorBaseModel):
19
+ """Class representing MLForecast operator model."""
20
+
21
+ def __init__(self, config: ForecastOperatorConfig, datasets: ForecastDatasets):
22
+ super().__init__(config=config, datasets=datasets)
23
+ self.global_explanation = {}
24
+ self.local_explanation = {}
25
+ self.formatted_global_explanation = None
26
+ self.formatted_local_explanation = None
27
+ self.date_col = config.spec.datetime_column.name
28
+
29
+ def set_kwargs(self):
30
+ """
31
+ Returns the model parameters.
32
+ """
33
+ model_kwargs = self.spec.model_kwargs
34
+
35
+ uppper_quantile = round(0.5 + self.spec.confidence_interval_width / 2, 2)
36
+ lower_quantile = round(0.5 - self.spec.confidence_interval_width / 2, 2)
37
+
38
+ model_kwargs["lower_quantile"] = lower_quantile
39
+ model_kwargs["uppper_quantile"] = uppper_quantile
40
+ return model_kwargs
41
+
42
+ def preprocess(self, df, series_id):
43
+ pass
44
+
45
+ @runtime_dependency(
46
+ module="mlforecast",
47
+ err_msg="MLForecast is not installed, please install it with 'pip install mlforecast'",
48
+ )
49
+ @runtime_dependency(
50
+ module="lightgbm",
51
+ err_msg="lightgbm is not installed, please install it with 'pip install lightgbm'",
52
+ )
53
+ def _train_model(self, data_train, data_test, model_kwargs):
54
+ try:
55
+ import lightgbm as lgb
56
+ from mlforecast import MLForecast
57
+ from mlforecast.lag_transforms import ExpandingMean, RollingMean
58
+ from mlforecast.target_transforms import Differences
59
+
60
+ lgb_params = {
61
+ "verbosity": -1,
62
+ "num_leaves": 512,
63
+ }
64
+
65
+ fcst = MLForecast(
66
+ models={
67
+ "forecast": lgb.LGBMRegressor(**lgb_params),
68
+ # "p" + str(int(model_kwargs["uppper_quantile"] * 100))
69
+ "upper": lgb.LGBMRegressor(
70
+ **lgb_params,
71
+ objective="quantile",
72
+ alpha=model_kwargs["uppper_quantile"],
73
+ ),
74
+ # "p" + str(int(model_kwargs["lower_quantile"] * 100))
75
+ "lower": lgb.LGBMRegressor(
76
+ **lgb_params,
77
+ objective="quantile",
78
+ alpha=model_kwargs["lower_quantile"],
79
+ ),
80
+ },
81
+ freq=pd.infer_freq(data_train[self.date_col].drop_duplicates())
82
+ or pd.infer_freq(data_train[self.date_col].drop_duplicates()[-5:]),
83
+ target_transforms=[Differences([12])],
84
+ lags=model_kwargs.get(
85
+ "lags",
86
+ (
87
+ [1, 6, 12]
88
+ if len(self.datasets.get_additional_data_column_names()) > 0
89
+ else []
90
+ ),
91
+ ),
92
+ lag_transforms=(
93
+ {
94
+ 1: [ExpandingMean()],
95
+ 12: [RollingMean(window_size=24)],
96
+ }
97
+ if len(self.datasets.get_additional_data_column_names()) > 0
98
+ else {}
99
+ ),
100
+ # date_features=[hour_index],
101
+ )
102
+
103
+ num_models = model_kwargs.get("recursive_models", False)
104
+
105
+ self.model_columns = [
106
+ ForecastOutputColumns.SERIES
107
+ ] + data_train.select_dtypes(exclude=["object"]).columns.to_list()
108
+ fcst.fit(
109
+ data_train[self.model_columns],
110
+ static_features=model_kwargs.get("static_features", []),
111
+ id_col=ForecastOutputColumns.SERIES,
112
+ time_col=self.date_col,
113
+ target_col=self.spec.target_column,
114
+ fitted=True,
115
+ max_horizon=None if num_models is False else self.spec.horizon,
116
+ )
117
+
118
+ self.outputs = fcst.predict(
119
+ h=self.spec.horizon,
120
+ X_df=pd.concat(
121
+ [
122
+ data_test[self.model_columns],
123
+ fcst.get_missing_future(
124
+ h=self.spec.horizon, X_df=data_test[self.model_columns]
125
+ ),
126
+ ],
127
+ axis=0,
128
+ ignore_index=True,
129
+ ).fillna(0),
130
+ )
131
+ self.fitted_values = fcst.forecast_fitted_values()
132
+ for s_id in self.datasets.list_series_ids():
133
+ self.forecast_output.init_series_output(
134
+ series_id=s_id,
135
+ data_at_series=self.datasets.get_data_at_series(s_id),
136
+ )
137
+
138
+ self.forecast_output.populate_series_output(
139
+ series_id=s_id,
140
+ fit_val=self.fitted_values[
141
+ self.fitted_values[ForecastOutputColumns.SERIES] == s_id
142
+ ].forecast.values,
143
+ forecast_val=self.outputs[
144
+ self.outputs[ForecastOutputColumns.SERIES] == s_id
145
+ ].forecast.values,
146
+ upper_bound=self.outputs[
147
+ self.outputs[ForecastOutputColumns.SERIES] == s_id
148
+ ].upper.values,
149
+ lower_bound=self.outputs[
150
+ self.outputs[ForecastOutputColumns.SERIES] == s_id
151
+ ].lower.values,
152
+ )
153
+
154
+ self.model_parameters[s_id] = {
155
+ "framework": SupportedModels.MLForecast,
156
+ **lgb_params,
157
+ }
158
+
159
+ logger.debug("===========Done===========")
160
+
161
+ except Exception as e:
162
+ self.errors_dict[self.spec.model] = {
163
+ "model_name": self.spec.model,
164
+ "error": str(e),
165
+ }
166
+ logger.debug(f"Encountered Error: {e}. Skipping.")
167
+
168
+ def _build_model(self) -> pd.DataFrame:
169
+ data_train = self.datasets.get_all_data_long(include_horizon=False)
170
+ data_test = self.datasets.get_all_data_long_forecast_horizon()
171
+ self.models = dict()
172
+ model_kwargs = self.set_kwargs()
173
+ self.forecast_output = ForecastOutput(
174
+ confidence_interval_width=self.spec.confidence_interval_width,
175
+ horizon=self.spec.horizon,
176
+ target_column=self.original_target_column,
177
+ dt_column=self.date_col,
178
+ )
179
+ self._train_model(data_train, data_test, model_kwargs)
180
+ return self.forecast_output.get_forecast_long()
181
+
182
+ def _generate_report(self):
183
+ """
184
+ Generates the report for the model
185
+ """
186
+ import report_creator as rc
187
+ from utilsforecast.plotting import plot_series
188
+
189
+ # Section 1: Forecast Overview
190
+ sec1_text = rc.Block(
191
+ rc.Heading("Forecast Overview", level=2),
192
+ rc.Text(
193
+ "These plots show your forecast in the context of historical data."
194
+ ),
195
+ )
196
+ sec_1 = _select_plot_list(
197
+ lambda s_id: plot_series(
198
+ self.datasets.get_all_data_long(include_horizon=False),
199
+ pd.concat(
200
+ [self.fitted_values, self.outputs], axis=0, ignore_index=True
201
+ ),
202
+ id_col=ForecastOutputColumns.SERIES,
203
+ time_col=self.spec.datetime_column.name,
204
+ target_col=self.original_target_column,
205
+ seed=42,
206
+ ids=[s_id],
207
+ ),
208
+ self.datasets.list_series_ids(),
209
+ )
210
+
211
+ # Section 2: MlForecast Model Parameters
212
+ sec2_text = rc.Block(
213
+ rc.Heading("MlForecast Model Parameters", level=2),
214
+ rc.Text("These are the parameters used for the MlForecast model."),
215
+ )
216
+
217
+ blocks = [
218
+ rc.Html(
219
+ str(s_id[1]),
220
+ label=s_id[0],
221
+ )
222
+ for _, s_id in enumerate(self.model_parameters.items())
223
+ ]
224
+ sec_2 = rc.Select(blocks=blocks)
225
+
226
+ all_sections = [sec1_text, sec_1, sec2_text, sec_2]
227
+ model_description = rc.Text(
228
+ "mlforecast is a framework to perform time series forecasting using machine learning models"
229
+ "with the option to scale to massive amounts of data using remote clusters."
230
+ "Fastest implementations of feature engineering for time series forecasting in Python."
231
+ "Support for exogenous variables and static covariates."
232
+ )
233
+
234
+ return model_description, all_sections
@@ -75,6 +75,8 @@ def _fit_model(data, params, additional_regressors, select_metric):
75
75
  m = m.add_future_regressor(name=add_reg)
76
76
  m.fit(df=data)
77
77
  accepted_regressors_config = m.config_regressors or dict()
78
+ if hasattr(accepted_regressors_config, "regressors"):
79
+ accepted_regressors_config = accepted_regressors_config.regressors or dict()
78
80
 
79
81
  enable_print()
80
82
  return m, list(accepted_regressors_config.keys())
@@ -122,7 +124,13 @@ class NeuralProphetOperatorModel(ForecastOperatorBaseModel):
122
124
 
123
125
  if self.loaded_models is not None and s_id in self.loaded_models:
124
126
  model = self.loaded_models[s_id]
125
- accepted_regressors_config = model.config_regressors or dict()
127
+ accepted_regressors_config = (
128
+ model.config_regressors.regressors or dict()
129
+ )
130
+ if hasattr(accepted_regressors_config, "regressors"):
131
+ accepted_regressors_config = (
132
+ accepted_regressors_config.regressors or dict()
133
+ )
126
134
  self.accepted_regressors[s_id] = list(accepted_regressors_config.keys())
127
135
  if self.loaded_trainers is not None and s_id in self.loaded_trainers:
128
136
  model.trainer = self.loaded_trainers[s_id]
@@ -131,6 +131,7 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
131
131
  "model_name": self.spec.model,
132
132
  "error": str(e),
133
133
  }
134
+ logger.debug(f"Encountered Error: {e}. Skipping.")
134
135
 
135
136
  def _build_model(self) -> pd.DataFrame:
136
137
  full_data_dict = self.datasets.get_data_by_series()
@@ -0,0 +1,147 @@
1
+ # -*- coding: utf-8; -*-
2
+
3
+ # Copyright (c) 2023 Oracle and/or its affiliates.
4
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
5
+
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ from pathlib import Path
10
+
11
+ from ads.opctl import logger
12
+ from ads.opctl.operator.lowcode.common.const import DataColumns
13
+ from .model.forecast_datasets import ForecastDatasets
14
+ from .operator_config import ForecastOperatorConfig
15
+
16
+
17
+ class ModelEvaluator:
18
+ """
19
+ A class used to evaluate and determine the best model or framework from a given set of candidates.
20
+
21
+ This class is responsible for comparing different models or frameworks based on specified evaluation
22
+ metrics and returning the best-performing option.
23
+ """
24
+ def __init__(self, models, k=5, subsample_ratio=0.20):
25
+ """
26
+ Initializes the ModelEvaluator with a list of models, number of backtests and subsample ratio.
27
+
28
+ Properties:
29
+ ----------
30
+ models (list): The list of model to be evaluated.
31
+ k (int): The number of times each model is backtested to verify its performance.
32
+ subsample_ratio (float): The proportion of the data used in the evaluation process.
33
+ """
34
+ self.models = models
35
+ self.k = k
36
+ self.subsample_ratio = subsample_ratio
37
+ self.minimum_sample_count = 5
38
+
39
+ def generate_cutoffs(self, unique_dates, horizon):
40
+ sorted_dates = np.sort(unique_dates)
41
+ train_window_size = [len(sorted_dates) - (i + 1) * horizon for i in range(self.k)]
42
+ valid_train_window_size = [ws for ws in train_window_size if ws >= horizon * 3]
43
+ if len(valid_train_window_size) < self.k:
44
+ logger.warn(f"Only {valid_train_window_size} backtests can be created")
45
+ cut_offs = sorted_dates[-horizon - 1:-horizon * (self.k + 1):-horizon][:len(valid_train_window_size)]
46
+ return cut_offs
47
+
48
+ def generate_k_fold_data(self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig):
49
+ date_col = operator_config.spec.datetime_column.name
50
+ horizon = operator_config.spec.horizon
51
+ historical_data = datasets.historical_data.data.reset_index()
52
+ series_col = DataColumns.Series
53
+ group_counts = historical_data[series_col].value_counts()
54
+
55
+ sample_count = max(self.minimum_sample_count, int(len(group_counts) * self.subsample_ratio))
56
+ sampled_groups = group_counts.head(sample_count)
57
+ sampled_historical_data = historical_data[historical_data[series_col].isin(sampled_groups.index)]
58
+
59
+ min_group = group_counts.idxmin()
60
+ min_series_data = historical_data[historical_data[series_col] == min_group]
61
+ unique_dates = min_series_data[date_col].unique()
62
+
63
+ cut_offs = self.generate_cutoffs(unique_dates, horizon)
64
+ training_datasets = [sampled_historical_data[sampled_historical_data[date_col] <= cut_off_date] for cut_off_date
65
+ in cut_offs]
66
+ test_datasets = [sampled_historical_data[sampled_historical_data[date_col] > cut_offs[0]]]
67
+ for i, current in enumerate(cut_offs[1:]):
68
+ test_datasets.append(sampled_historical_data[(current < sampled_historical_data[date_col]) & (
69
+ sampled_historical_data[date_col] <= cut_offs[i])])
70
+ all_additional = datasets.additional_data.data.reset_index()
71
+ sampled_additional_data = all_additional[all_additional[series_col].isin(sampled_groups.index)]
72
+ max_historical_date = sampled_historical_data[date_col].max()
73
+ additional_data = [sampled_additional_data[sampled_additional_data[date_col] <= max_historical_date]]
74
+ for cut_off in cut_offs[:-1]:
75
+ trimmed_additional_data = sampled_additional_data[sampled_additional_data[date_col] <= cut_off]
76
+ additional_data.append(trimmed_additional_data)
77
+ return cut_offs, training_datasets, additional_data, test_datasets
78
+
79
+ def remove_none_values(self, obj):
80
+ if isinstance(obj, dict):
81
+ return {k: self.remove_none_values(v) for k, v in obj.items() if k is not None and v is not None}
82
+ else:
83
+ return obj
84
+
85
+ def create_operator_config(self, operator_config, backtest, model, historical_data, additional_data, test_data):
86
+ output_dir = operator_config.spec.output_directory.url
87
+ output_file_path = f'{output_dir}/back_testing/{model}/{backtest}'
88
+ Path(output_file_path).mkdir(parents=True, exist_ok=True)
89
+ historical_data_url = f'{output_file_path}/historical.csv'
90
+ additional_data_url = f'{output_file_path}/additional.csv'
91
+ test_data_url = f'{output_file_path}/test.csv'
92
+ historical_data.to_csv(historical_data_url, index=False)
93
+ additional_data.to_csv(additional_data_url, index=False)
94
+ test_data.to_csv(test_data_url, index=False)
95
+ backtest_op_config_draft = operator_config.to_dict()
96
+ backtest_spec = backtest_op_config_draft["spec"]
97
+ backtest_spec["historical_data"]["url"] = historical_data_url
98
+ backtest_spec["additional_data"]["url"] = additional_data_url
99
+ backtest_spec["test_data"]["url"] = test_data_url
100
+ backtest_spec["model"] = model
101
+ backtest_spec['model_kwargs'] = None
102
+ backtest_spec["output_directory"] = {"url": output_file_path}
103
+ backtest_spec["target_category_columns"] = [DataColumns.Series]
104
+ backtest_spec['generate_explanations'] = False
105
+ cleaned_config = self.remove_none_values(backtest_op_config_draft)
106
+
107
+ backtest_op_config = ForecastOperatorConfig.from_dict(
108
+ obj_dict=cleaned_config)
109
+ return backtest_op_config
110
+
111
+ def run_all_models(self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig):
112
+ cut_offs, train_sets, additional_data, test_sets = self.generate_k_fold_data(datasets, operator_config)
113
+ metrics = {}
114
+ for model in self.models:
115
+ from .model.factory import ForecastOperatorModelFactory
116
+ metrics[model] = {}
117
+ for i in range(len(cut_offs)):
118
+ backtest_historical_data = train_sets[i]
119
+ backtest_additional_data = additional_data[i]
120
+ backtest_test_data = test_sets[i]
121
+ backtest_operator_config = self.create_operator_config(operator_config, i, model,
122
+ backtest_historical_data,
123
+ backtest_additional_data,
124
+ backtest_test_data)
125
+ datasets = ForecastDatasets(backtest_operator_config)
126
+ ForecastOperatorModelFactory.get_model(
127
+ backtest_operator_config, datasets
128
+ ).generate_report()
129
+ test_metrics_filename = backtest_operator_config.spec.test_metrics_filename
130
+ metrics_df = pd.read_csv(
131
+ f"{backtest_operator_config.spec.output_directory.url}/{test_metrics_filename}")
132
+ metrics_df["average_across_series"] = metrics_df.drop('metrics', axis=1).mean(axis=1)
133
+ metrics_average_dict = dict(zip(metrics_df['metrics'].str.lower(), metrics_df['average_across_series']))
134
+ metrics[model][i] = metrics_average_dict[operator_config.spec.metric]
135
+ return metrics
136
+
137
+ def find_best_model(self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig):
138
+ metrics = self.run_all_models(datasets, operator_config)
139
+ avg_backtests_metrics = {key: sum(value.values()) / len(value.values()) for key, value in metrics.items()}
140
+ best_model = min(avg_backtests_metrics, key=avg_backtests_metrics.get)
141
+ logger.info(f"Among models {self.models}, {best_model} model shows better performance during backtesting.")
142
+ backtest_stats = pd.DataFrame(metrics).rename_axis('backtest')
143
+ backtest_stats.reset_index(inplace=True)
144
+ output_dir = operator_config.spec.output_directory.url
145
+ backtest_report_name = "backtest_stats.csv"
146
+ backtest_stats.to_csv(f"{output_dir}/{backtest_report_name}", index=False)
147
+ return best_model
@@ -14,7 +14,7 @@ from ads.opctl.operator.common.operator_config import OperatorConfig, OutputDire
14
14
 
15
15
  from .const import SupportedMetrics, SpeedAccuracyMode
16
16
  from .const import SupportedModels
17
-
17
+ from ads.opctl.operator.lowcode.common.utils import find_output_dirname
18
18
 
19
19
  @dataclass(repr=True)
20
20
  class TestData(InputData):
@@ -90,6 +90,7 @@ class ForecastOperatorSpec(DataClassSerializable):
90
90
 
91
91
  def __post_init__(self):
92
92
  """Adjusts the specification details."""
93
+ self.output_directory = self.output_directory or OutputDirectory(url=find_output_dirname(self.output_directory))
93
94
  self.metric = (self.metric or "").lower() or SupportedMetrics.SMAPE.lower()
94
95
  self.model = self.model or SupportedModels.Auto
95
96
  self.confidence_interval_width = self.confidence_interval_width or 0.80
@@ -365,14 +365,15 @@ spec:
365
365
  model:
366
366
  type: string
367
367
  required: false
368
- default: auto
368
+ default: auto-select
369
369
  allowed:
370
370
  - prophet
371
371
  - arima
372
372
  - neuralprophet
373
+ - mlforecast
373
374
  - automlx
374
375
  - autots
375
- - auto
376
+ - auto-select
376
377
 
377
378
  model_kwargs:
378
379
  type: dict
@@ -413,4 +414,8 @@ spec:
413
414
  - RMSE
414
415
  - MSE
415
416
  - SMAPE
417
+ - mape
418
+ - rmse
419
+ - mse
420
+ - smape
416
421
  type: dict
@@ -19,7 +19,6 @@ from sklearn.metrics import (
19
19
  mean_absolute_percentage_error,
20
20
  mean_squared_error,
21
21
  )
22
-
23
22
  try:
24
23
  from scipy.stats import linregress
25
24
  except:
@@ -34,8 +33,7 @@ from .errors import ForecastInputDataError, ForecastSchemaYamlError
34
33
  from .operator_config import ForecastOperatorSpec, ForecastOperatorConfig
35
34
  from ads.opctl.operator.lowcode.common.utils import merge_category_columns
36
35
  from ads.opctl.operator.lowcode.forecast.const import ForecastOutputColumns
37
-
38
- # from ads.opctl.operator.lowcode.forecast.model.forecast_datasets import TestData, ForecastOutput
36
+ import report_creator as rc
39
37
 
40
38
 
41
39
  def _label_encode_dataframe(df, no_encode=set()):
@@ -258,8 +256,6 @@ def evaluate_train_metrics(output, metrics_col_name=None):
258
256
 
259
257
 
260
258
  def _select_plot_list(fn, series_ids):
261
- import report_creator as rc
262
-
263
259
  blocks = [rc.Widget(fn(s_id=s_id), label=s_id) for s_id in series_ids]
264
260
  return rc.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
265
261
 
@@ -267,6 +263,23 @@ def _select_plot_list(fn, series_ids):
267
263
  def _add_unit(num, unit):
268
264
  return f"{num} {unit}"
269
265
 
266
+ def get_auto_select_plot(backtest_results):
267
+ fig = go.Figure()
268
+ columns = backtest_results.columns.tolist()
269
+ back_test_column = "backtest"
270
+ columns.remove(back_test_column)
271
+ for i, column in enumerate(columns):
272
+ color = 0 #int(i * 255 / len(columns))
273
+ fig.add_trace(
274
+ go.Scatter(
275
+ x=backtest_results[back_test_column],
276
+ y=backtest_results[column],
277
+ mode="lines",
278
+ name=column,
279
+ ))
280
+
281
+ return rc.Widget(fig)
282
+
270
283
 
271
284
  def get_forecast_plots(
272
285
  forecast_output,
@@ -371,45 +384,6 @@ def get_forecast_plots(
371
384
 
372
385
  return _select_plot_list(plot_forecast_plotly, forecast_output.list_series_ids())
373
386
 
374
-
375
- def select_auto_model(
376
- datasets: "ForecastDatasets", operator_config: ForecastOperatorConfig
377
- ) -> str:
378
- """
379
- Selects AutoMLX or Arima model based on column count.
380
-
381
- If the number of columns is less than or equal to the maximum allowed for AutoMLX,
382
- returns 'AutoMLX'. Otherwise, returns 'Arima'.
383
-
384
- Parameters
385
- ------------
386
- datasets: ForecastDatasets
387
- Datasets for predictions
388
-
389
- Returns
390
- --------
391
- str
392
- The type of the model.
393
- """
394
- freq_in_secs = datasets.get_datetime_frequency_in_seconds()
395
- num_of_additional_cols = len(datasets.get_additional_data_column_names())
396
- row_count = datasets.get_num_rows()
397
- number_of_series = len(datasets.list_series_ids())
398
- if (
399
- num_of_additional_cols < 15
400
- and row_count < 10000
401
- and number_of_series < 10
402
- and freq_in_secs > 3600
403
- ):
404
- return SupportedModels.AutoMLX
405
- elif row_count < 10000 and number_of_series > 10:
406
- return SupportedModels.AutoTS
407
- elif row_count > 20000:
408
- return SupportedModels.NeuralProphet
409
- else:
410
- return SupportedModels.NeuralProphet
411
-
412
-
413
387
  def convert_target(target: str, target_col: str):
414
388
  """
415
389
  Removes the target_column that got appended to target.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: oracle_ads
3
- Version: 2.11.9
3
+ Version: 2.11.11
4
4
  Summary: Oracle Accelerated Data Science SDK
5
5
  Keywords: Oracle Cloud Infrastructure,OCI,Machine Learning,ML,Artificial Intelligence,AI,Data Science,Cloud,Oracle
6
6
  Author: Oracle Data Science
@@ -20,7 +20,7 @@ Requires-Dist: cloudpickle>=1.6.0
20
20
  Requires-Dist: fsspec>=0.8.7
21
21
  Requires-Dist: gitpython>=3.1.2
22
22
  Requires-Dist: jinja2>=2.11.2
23
- Requires-Dist: matplotlib>=3.1.3
23
+ Requires-Dist: matplotlib>=3.1.3, <=3.8.4
24
24
  Requires-Dist: numpy>=1.19.2
25
25
  Requires-Dist: oci>=2.125.3
26
26
  Requires-Dist: ocifs>=1.1.3
@@ -34,10 +34,8 @@ Requires-Dist: tabulate>=0.8.9
34
34
  Requires-Dist: tqdm>=4.59.0
35
35
  Requires-Dist: oracle_ads[opctl] ; extra == "anomaly"
36
36
  Requires-Dist: autots ; extra == "anomaly"
37
- Requires-Dist: oracle-automlx[forecasting]==23.4.1 ; extra == "anomaly"
38
- Requires-Dist: oracle-automlx[classic]==23.4.1 ; extra == "anomaly"
39
37
  Requires-Dist: oracledb ; extra == "anomaly"
40
- Requires-Dist: report-creator ; extra == "anomaly"
38
+ Requires-Dist: report-creator==1.0.9 ; extra == "anomaly"
41
39
  Requires-Dist: jupyter_server ; extra == "aqua"
42
40
  Requires-Dist: hdfs[kerberos] ; extra == "bds"
43
41
  Requires-Dist: ibis-framework[impala] ; extra == "bds"
@@ -61,14 +59,12 @@ Requires-Dist: oci-cli ; extra == "forecast"
61
59
  Requires-Dist: py-cpuinfo ; extra == "forecast"
62
60
  Requires-Dist: rich ; extra == "forecast"
63
61
  Requires-Dist: autots[additional] ; extra == "forecast"
64
- Requires-Dist: holidays==0.21.13 ; extra == "forecast"
65
- Requires-Dist: neuralprophet ; extra == "forecast"
62
+ Requires-Dist: mlforecast ; extra == "forecast"
63
+ Requires-Dist: neuralprophet>=0.7.0 ; extra == "forecast"
66
64
  Requires-Dist: numpy ; extra == "forecast"
67
65
  Requires-Dist: oci-cli ; extra == "forecast"
68
- Requires-Dist: optuna==3.1.0 ; extra == "forecast"
66
+ Requires-Dist: optuna ; extra == "forecast"
69
67
  Requires-Dist: oracle-ads ; extra == "forecast"
70
- Requires-Dist: oracle-automlx[forecasting]==23.4.1 ; extra == "forecast"
71
- Requires-Dist: oracle-automlx[classic]==23.4.1 ; extra == "forecast"
72
68
  Requires-Dist: pmdarima ; extra == "forecast"
73
69
  Requires-Dist: prophet ; extra == "forecast"
74
70
  Requires-Dist: shap ; extra == "forecast"
@@ -76,13 +72,14 @@ Requires-Dist: sktime ; extra == "forecast"
76
72
  Requires-Dist: statsmodels ; extra == "forecast"
77
73
  Requires-Dist: plotly ; extra == "forecast"
78
74
  Requires-Dist: oracledb ; extra == "forecast"
79
- Requires-Dist: report-creator ; extra == "forecast"
75
+ Requires-Dist: report-creator==1.0.9 ; extra == "forecast"
80
76
  Requires-Dist: geopandas ; extra == "geo"
81
77
  Requires-Dist: oracle_ads[viz] ; extra == "geo"
82
78
  Requires-Dist: transformers ; extra == "huggingface"
83
79
  Requires-Dist: langchain-community<0.0.32 ; extra == "llm"
84
80
  Requires-Dist: langchain>=0.1.10,<0.1.14 ; extra == "llm"
85
81
  Requires-Dist: evaluate>=0.4.0 ; extra == "llm"
82
+ Requires-Dist: langchain-core<0.1.51 ; extra == "llm"
86
83
  Requires-Dist: ipython>=7.23.1, <8.0 ; extra == "notebook"
87
84
  Requires-Dist: ipywidgets~=7.6.3 ; extra == "notebook"
88
85
  Requires-Dist: lightgbm<4.0.0 ; extra == "onnx"
@@ -115,7 +112,7 @@ Requires-Dist: scrubadub==2.0.1 ; extra == "pii"
115
112
  Requires-Dist: scrubadub_spacy ; extra == "pii"
116
113
  Requires-Dist: spacy-transformers==1.2.5 ; extra == "pii"
117
114
  Requires-Dist: spacy==3.6.1 ; extra == "pii"
118
- Requires-Dist: report-creator ; extra == "pii"
115
+ Requires-Dist: report-creator==1.0.9 ; extra == "pii"
119
116
  Requires-Dist: pyspark>=3.0.0 ; extra == "spark"
120
117
  Requires-Dist: oracle_ads[viz] ; extra == "tensorflow"
121
118
  Requires-Dist: tensorflow ; extra == "tensorflow"