oracle-ads 2.13.1rc0__py3-none-any.whl → 2.13.2rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. ads/aqua/__init__.py +7 -1
  2. ads/aqua/app.py +24 -23
  3. ads/aqua/client/client.py +48 -11
  4. ads/aqua/common/entities.py +28 -1
  5. ads/aqua/common/enums.py +13 -7
  6. ads/aqua/common/utils.py +8 -13
  7. ads/aqua/config/container_config.py +203 -0
  8. ads/aqua/config/evaluation/evaluation_service_config.py +5 -181
  9. ads/aqua/constants.py +0 -1
  10. ads/aqua/evaluation/evaluation.py +4 -4
  11. ads/aqua/extension/base_handler.py +4 -0
  12. ads/aqua/extension/model_handler.py +19 -28
  13. ads/aqua/finetuning/finetuning.py +2 -3
  14. ads/aqua/model/entities.py +2 -3
  15. ads/aqua/model/model.py +25 -30
  16. ads/aqua/modeldeployment/deployment.py +6 -14
  17. ads/aqua/modeldeployment/entities.py +2 -2
  18. ads/aqua/server/__init__.py +4 -0
  19. ads/aqua/server/__main__.py +24 -0
  20. ads/aqua/server/app.py +47 -0
  21. ads/aqua/server/aqua_spec.yml +1291 -0
  22. ads/aqua/ui.py +5 -199
  23. ads/common/auth.py +20 -11
  24. ads/common/utils.py +91 -11
  25. ads/config.py +3 -0
  26. ads/llm/__init__.py +1 -0
  27. ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +32 -23
  28. ads/model/artifact_downloader.py +4 -1
  29. ads/model/common/utils.py +15 -3
  30. ads/model/datascience_model.py +339 -8
  31. ads/model/model_metadata.py +54 -14
  32. ads/model/model_version_set.py +5 -3
  33. ads/model/service/oci_datascience_model.py +477 -5
  34. ads/opctl/operator/common/utils.py +16 -0
  35. ads/opctl/operator/lowcode/anomaly/model/base_model.py +3 -3
  36. ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +1 -1
  37. ads/opctl/operator/lowcode/anomaly/utils.py +1 -1
  38. ads/opctl/operator/lowcode/common/data.py +5 -2
  39. ads/opctl/operator/lowcode/common/transformations.py +7 -13
  40. ads/opctl/operator/lowcode/common/utils.py +7 -2
  41. ads/opctl/operator/lowcode/forecast/model/arima.py +15 -10
  42. ads/opctl/operator/lowcode/forecast/model/automlx.py +39 -9
  43. ads/opctl/operator/lowcode/forecast/model/autots.py +7 -5
  44. ads/opctl/operator/lowcode/forecast/model/base_model.py +135 -110
  45. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +30 -14
  46. ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +2 -2
  47. ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +46 -32
  48. ads/opctl/operator/lowcode/forecast/model/prophet.py +82 -29
  49. ads/opctl/operator/lowcode/forecast/model_evaluator.py +142 -62
  50. ads/opctl/operator/lowcode/forecast/operator_config.py +29 -3
  51. ads/opctl/operator/lowcode/forecast/schema.yaml +1 -1
  52. ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +108 -56
  53. {oracle_ads-2.13.1rc0.dist-info → oracle_ads-2.13.2rc1.dist-info}/METADATA +15 -12
  54. {oracle_ads-2.13.1rc0.dist-info → oracle_ads-2.13.2rc1.dist-info}/RECORD +57 -53
  55. {oracle_ads-2.13.1rc0.dist-info → oracle_ads-2.13.2rc1.dist-info}/WHEEL +1 -1
  56. ads/aqua/config/evaluation/evaluation_service_model_config.py +0 -8
  57. {oracle_ads-2.13.1rc0.dist-info → oracle_ads-2.13.2rc1.dist-info}/entry_points.txt +0 -0
  58. {oracle_ads-2.13.1rc0.dist-info → oracle_ads-2.13.2rc1.dist-info/licenses}/LICENSE.txt +0 -0
@@ -1,20 +1,21 @@
1
- # -*- coding: utf-8; -*-
2
-
3
1
  # Copyright (c) 2023 Oracle and/or its affiliates.
4
2
  # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
5
3
 
6
4
 
5
+ from pathlib import Path
6
+
7
7
  import numpy as np
8
8
  import pandas as pd
9
- from pathlib import Path
10
9
 
11
10
  from ads.opctl import logger
12
11
  from ads.opctl.operator.lowcode.common.const import DataColumns
12
+ from ads.opctl.operator.lowcode.common.errors import InsufficientDataError
13
13
  from ads.opctl.operator.lowcode.forecast.const import BACKTEST_REPORT_NAME
14
+ from ads.opctl.operator.lowcode.forecast.model.factory import SupportedModels
15
+
14
16
  from .model.forecast_datasets import ForecastDatasets
15
17
  from .operator_config import ForecastOperatorConfig
16
- from ads.opctl.operator.lowcode.forecast.model.factory import SupportedModels
17
- from ads.opctl.operator.lowcode.common.errors import InsufficientDataError
18
+
18
19
 
19
20
  class ModelEvaluator:
20
21
  """
@@ -23,6 +24,7 @@ class ModelEvaluator:
23
24
  This class is responsible for comparing different models or frameworks based on specified evaluation
24
25
  metrics and returning the best-performing option.
25
26
  """
27
+
26
28
  def __init__(self, models, k=5, subsample_ratio=0.20):
27
29
  """
28
30
  Initializes the ModelEvaluator with a list of models, number of backtests and subsample ratio.
@@ -40,23 +42,33 @@ class ModelEvaluator:
40
42
 
41
43
  def generate_cutoffs(self, unique_dates, horizon):
42
44
  sorted_dates = np.sort(unique_dates)
43
- train_window_size = [len(sorted_dates) - (i + 1) * horizon for i in range(self.k)]
45
+ train_window_size = [
46
+ len(sorted_dates) - (i + 1) * horizon for i in range(self.k)
47
+ ]
44
48
  valid_train_window_size = [ws for ws in train_window_size if ws >= horizon * 2]
45
49
  if len(valid_train_window_size) < self.k:
46
- logger.warn(f"Only {valid_train_window_size} backtests can be created")
47
- cut_offs = sorted_dates[-horizon - 1:-horizon * (self.k + 1):-horizon][:len(valid_train_window_size)]
50
+ logger.warning(f"Only {valid_train_window_size} backtests can be created")
51
+ cut_offs = sorted_dates[-horizon - 1 : -horizon * (self.k + 1) : -horizon][
52
+ : len(valid_train_window_size)
53
+ ]
48
54
  return cut_offs
49
55
 
50
- def generate_k_fold_data(self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig):
56
+ def generate_k_fold_data(
57
+ self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig
58
+ ):
51
59
  date_col = operator_config.spec.datetime_column.name
52
60
  horizon = operator_config.spec.horizon
53
61
  historical_data = datasets.historical_data.data.reset_index()
54
62
  series_col = DataColumns.Series
55
63
  group_counts = historical_data[series_col].value_counts()
56
64
 
57
- sample_count = max(self.minimum_sample_count, int(len(group_counts) * self.subsample_ratio))
65
+ sample_count = max(
66
+ self.minimum_sample_count, int(len(group_counts) * self.subsample_ratio)
67
+ )
58
68
  sampled_groups = group_counts.head(sample_count)
59
- sampled_historical_data = historical_data[historical_data[series_col].isin(sampled_groups.index)]
69
+ sampled_historical_data = historical_data[
70
+ historical_data[series_col].isin(sampled_groups.index)
71
+ ]
60
72
 
61
73
  min_group = group_counts.idxmin()
62
74
  min_series_data = historical_data[historical_data[series_col] == min_group]
@@ -64,99 +76,167 @@ class ModelEvaluator:
64
76
 
65
77
  cut_offs = self.generate_cutoffs(unique_dates, horizon)
66
78
  if not len(cut_offs):
67
- raise InsufficientDataError("Insufficient data to evaluate multiple models. Please specify a model "
68
- "instead of using auto-select.")
69
- training_datasets = [sampled_historical_data[sampled_historical_data[date_col] <= cut_off_date] for cut_off_date
70
- in cut_offs]
71
- test_datasets = [sampled_historical_data[sampled_historical_data[date_col] > cut_offs[0]]]
79
+ raise InsufficientDataError(
80
+ "Insufficient data to evaluate multiple models. Please specify a model "
81
+ "instead of using auto-select."
82
+ )
83
+ training_datasets = [
84
+ sampled_historical_data[sampled_historical_data[date_col] <= cut_off_date]
85
+ for cut_off_date in cut_offs
86
+ ]
87
+ test_datasets = [
88
+ sampled_historical_data[sampled_historical_data[date_col] > cut_offs[0]]
89
+ ]
72
90
  for i, current in enumerate(cut_offs[1:]):
73
- test_datasets.append(sampled_historical_data[(current < sampled_historical_data[date_col]) & (
74
- sampled_historical_data[date_col] <= cut_offs[i])])
91
+ test_datasets.append(
92
+ sampled_historical_data[
93
+ (current < sampled_historical_data[date_col])
94
+ & (sampled_historical_data[date_col] <= cut_offs[i])
95
+ ]
96
+ )
75
97
  all_additional = datasets.additional_data.data.reset_index()
76
- sampled_additional_data = all_additional[all_additional[series_col].isin(sampled_groups.index)]
98
+ sampled_additional_data = all_additional[
99
+ all_additional[series_col].isin(sampled_groups.index)
100
+ ]
77
101
  max_historical_date = sampled_historical_data[date_col].max()
78
- additional_data = [sampled_additional_data[sampled_additional_data[date_col] <= max_historical_date]]
102
+ additional_data = [
103
+ sampled_additional_data[
104
+ sampled_additional_data[date_col] <= max_historical_date
105
+ ]
106
+ ]
79
107
  for cut_off in cut_offs[:-1]:
80
- trimmed_additional_data = sampled_additional_data[sampled_additional_data[date_col] <= cut_off]
108
+ trimmed_additional_data = sampled_additional_data[
109
+ sampled_additional_data[date_col] <= cut_off
110
+ ]
81
111
  additional_data.append(trimmed_additional_data)
82
112
  return cut_offs, training_datasets, additional_data, test_datasets
83
113
 
84
114
  def remove_none_values(self, obj):
85
115
  if isinstance(obj, dict):
86
- return {k: self.remove_none_values(v) for k, v in obj.items() if k is not None and v is not None}
116
+ return {
117
+ k: self.remove_none_values(v)
118
+ for k, v in obj.items()
119
+ if k is not None and v is not None
120
+ }
87
121
  else:
88
122
  return obj
89
123
 
90
- def create_operator_config(self, operator_config, backtest, model, historical_data, additional_data, test_data):
124
+ def create_operator_config(
125
+ self,
126
+ operator_config,
127
+ backtest,
128
+ model,
129
+ historical_data,
130
+ additional_data,
131
+ test_data,
132
+ ):
91
133
  output_dir = operator_config.spec.output_directory.url
92
- output_file_path = f'{output_dir}/back_testing/{model}/{backtest}'
134
+ output_file_path = f"{output_dir}/back_testing/{model}/{backtest}"
93
135
  Path(output_file_path).mkdir(parents=True, exist_ok=True)
94
- historical_data_url = f'{output_file_path}/historical.csv'
95
- additional_data_url = f'{output_file_path}/additional.csv'
96
- test_data_url = f'{output_file_path}/test.csv'
97
- historical_data.to_csv(historical_data_url, index=False)
98
- additional_data.to_csv(additional_data_url, index=False)
99
- test_data.to_csv(test_data_url, index=False)
100
136
  backtest_op_config_draft = operator_config.to_dict()
101
137
  backtest_spec = backtest_op_config_draft["spec"]
102
- backtest_spec["historical_data"]["url"] = historical_data_url
103
- if backtest_spec["additional_data"]:
104
- backtest_spec["additional_data"]["url"] = additional_data_url
105
- backtest_spec["test_data"] = {}
106
- backtest_spec["test_data"]["url"] = test_data_url
138
+ backtest_spec["datetime_column"]["format"] = None
139
+ backtest_spec.pop("test_data")
140
+ backtest_spec.pop("additional_data")
141
+ backtest_spec.pop("historical_data")
142
+ backtest_spec["generate_report"] = False
107
143
  backtest_spec["model"] = model
108
- backtest_spec['model_kwargs'] = None
144
+ backtest_spec["model_kwargs"] = None
109
145
  backtest_spec["output_directory"] = {"url": output_file_path}
110
146
  backtest_spec["target_category_columns"] = [DataColumns.Series]
111
- backtest_spec['generate_explanations'] = False
147
+ backtest_spec["generate_explanations"] = False
112
148
  cleaned_config = self.remove_none_values(backtest_op_config_draft)
113
149
 
114
- backtest_op_config = ForecastOperatorConfig.from_dict(
115
- obj_dict=cleaned_config)
150
+ backtest_op_config = ForecastOperatorConfig.from_dict(obj_dict=cleaned_config)
116
151
  return backtest_op_config
117
152
 
118
- def run_all_models(self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig):
119
- cut_offs, train_sets, additional_data, test_sets = self.generate_k_fold_data(datasets, operator_config)
153
+ def run_all_models(
154
+ self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig
155
+ ):
156
+ cut_offs, train_sets, additional_data, test_sets = self.generate_k_fold_data(
157
+ datasets, operator_config
158
+ )
120
159
  metrics = {}
160
+ date_col = operator_config.spec.datetime_column.name
121
161
  for model in self.models:
122
162
  from .model.factory import ForecastOperatorModelFactory
163
+
123
164
  metrics[model] = {}
124
165
  for i in range(len(cut_offs)):
125
166
  try:
126
- backtest_historical_data = train_sets[i]
127
- backtest_additional_data = additional_data[i]
128
- backtest_test_data = test_sets[i]
129
- backtest_operator_config = self.create_operator_config(operator_config, i, model,
130
- backtest_historical_data,
131
- backtest_additional_data,
132
- backtest_test_data)
133
- datasets = ForecastDatasets(backtest_operator_config)
167
+ backtest_historical_data = train_sets[i].set_index(
168
+ [date_col, DataColumns.Series]
169
+ )
170
+ backtest_additional_data = additional_data[i].set_index(
171
+ [date_col, DataColumns.Series]
172
+ )
173
+ backtest_test_data = test_sets[i].set_index(
174
+ [date_col, DataColumns.Series]
175
+ )
176
+ backtest_operator_config = self.create_operator_config(
177
+ operator_config,
178
+ i,
179
+ model,
180
+ backtest_historical_data,
181
+ backtest_additional_data,
182
+ backtest_test_data,
183
+ )
184
+ datasets = ForecastDatasets(
185
+ backtest_operator_config,
186
+ backtest_historical_data,
187
+ backtest_additional_data,
188
+ backtest_test_data,
189
+ )
134
190
  ForecastOperatorModelFactory.get_model(
135
191
  backtest_operator_config, datasets
136
192
  ).generate_report()
137
- test_metrics_filename = backtest_operator_config.spec.test_metrics_filename
193
+ test_metrics_filename = (
194
+ backtest_operator_config.spec.test_metrics_filename
195
+ )
138
196
  metrics_df = pd.read_csv(
139
- f"{backtest_operator_config.spec.output_directory.url}/{test_metrics_filename}")
140
- metrics_df["average_across_series"] = metrics_df.drop('metrics', axis=1).mean(axis=1)
141
- metrics_average_dict = dict(zip(metrics_df['metrics'].str.lower(), metrics_df['average_across_series']))
142
- metrics[model][i] = metrics_average_dict[operator_config.spec.metric]
197
+ f"{backtest_operator_config.spec.output_directory.url}/{test_metrics_filename}"
198
+ )
199
+ metrics_df["average_across_series"] = metrics_df.drop(
200
+ "metrics", axis=1
201
+ ).mean(axis=1)
202
+ metrics_average_dict = dict(
203
+ zip(
204
+ metrics_df["metrics"].str.lower(),
205
+ metrics_df["average_across_series"],
206
+ )
207
+ )
208
+ metrics[model][i] = metrics_average_dict[
209
+ operator_config.spec.metric
210
+ ]
143
211
  except:
144
- logger.warn(f"Failed to calculate metrics for {model} and {i} backtest")
212
+ logger.warning(
213
+ f"Failed to calculate metrics for {model} and {i} backtest"
214
+ )
145
215
  return metrics
146
216
 
147
- def find_best_model(self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig):
217
+ def find_best_model(
218
+ self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig
219
+ ):
148
220
  try:
149
221
  metrics = self.run_all_models(datasets, operator_config)
150
222
  except InsufficientDataError as e:
151
223
  model = SupportedModels.Prophet
152
- logger.error(f"Running {model} model as auto-select failed with the following error: {e.message}")
224
+ logger.error(
225
+ f"Running {model} model as auto-select failed with the following error: {e.message}"
226
+ )
153
227
  return model
154
- nonempty_metrics = {model: metric for model, metric in metrics.items() if metric != {}}
155
- avg_backtests_metric = {model: sum(value.values()) / len(value.values())
156
- for model, value in nonempty_metrics.items()}
228
+ nonempty_metrics = {
229
+ model: metric for model, metric in metrics.items() if metric != {}
230
+ }
231
+ avg_backtests_metric = {
232
+ model: sum(value.values()) / len(value.values())
233
+ for model, value in nonempty_metrics.items()
234
+ }
157
235
  best_model = min(avg_backtests_metric, key=avg_backtests_metric.get)
158
- logger.info(f"Among models {self.models}, {best_model} model shows better performance during backtesting.")
159
- backtest_stats = pd.DataFrame(nonempty_metrics).rename_axis('backtest')
236
+ logger.info(
237
+ f"Among models {self.models}, {best_model} model shows better performance during backtesting."
238
+ )
239
+ backtest_stats = pd.DataFrame(nonempty_metrics).rename_axis("backtest")
160
240
  backtest_stats["metric"] = operator_config.spec.metric
161
241
  backtest_stats.reset_index(inplace=True)
162
242
  output_dir = operator_config.spec.output_directory.url
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env python
2
2
 
3
- # Copyright (c) 2023, 2024 Oracle and/or its affiliates.
3
+ # Copyright (c) 2023, 2025 Oracle and/or its affiliates.
4
4
  # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
5
5
 
6
6
  import os
@@ -18,9 +18,11 @@ from ads.opctl.operator.lowcode.common.utils import find_output_dirname
18
18
 
19
19
  from .const import SpeedAccuracyMode, SupportedMetrics, SupportedModels
20
20
 
21
+
21
22
  @dataclass
22
23
  class AutoScaling(DataClassSerializable):
23
24
  """Class representing simple autoscaling policy"""
25
+
24
26
  minimum_instance: int = 1
25
27
  maximum_instance: int = None
26
28
  cool_down_in_seconds: int = 600
@@ -28,9 +30,11 @@ class AutoScaling(DataClassSerializable):
28
30
  scale_out_threshold: int = 80
29
31
  scaling_metric: str = "CPU_UTILIZATION"
30
32
 
33
+
31
34
  @dataclass(repr=True)
32
35
  class ModelDeploymentServer(DataClassSerializable):
33
36
  """Class representing model deployment server specification for whatif-analysis."""
37
+
34
38
  display_name: str = None
35
39
  initial_shape: str = None
36
40
  description: str = None
@@ -42,10 +46,13 @@ class ModelDeploymentServer(DataClassSerializable):
42
46
  @dataclass(repr=True)
43
47
  class WhatIfAnalysis(DataClassSerializable):
44
48
  """Class representing operator specification for whatif-analysis."""
49
+
45
50
  model_display_name: str = None
46
51
  compartment_id: str = None
47
52
  project_id: str = None
48
- model_deployment: ModelDeploymentServer = field(default_factory=ModelDeploymentServer)
53
+ model_deployment: ModelDeploymentServer = field(
54
+ default_factory=ModelDeploymentServer
55
+ )
49
56
 
50
57
 
51
58
  @dataclass(repr=True)
@@ -106,8 +113,11 @@ class ForecastOperatorSpec(DataClassSerializable):
106
113
  datetime_column: DateTimeColumn = field(default_factory=DateTimeColumn)
107
114
  target_category_columns: List[str] = field(default_factory=list)
108
115
  generate_report: bool = None
116
+ generate_forecast_file: bool = None
109
117
  generate_metrics: bool = None
118
+ generate_metrics_file: bool = None
110
119
  generate_explanations: bool = None
120
+ generate_explanation_files: bool = None
111
121
  explanations_accuracy_mode: str = None
112
122
  horizon: int = None
113
123
  model: str = None
@@ -126,7 +136,7 @@ class ForecastOperatorSpec(DataClassSerializable):
126
136
  self.output_directory = self.output_directory or OutputDirectory(
127
137
  url=find_output_dirname(self.output_directory)
128
138
  )
129
- self.generate_model_pickle = True if self.generate_model_pickle or self.what_if_analysis else False
139
+ self.generate_model_pickle = self.generate_model_pickle or self.what_if_analysis
130
140
  self.metric = (self.metric or "").lower() or SupportedMetrics.SMAPE.lower()
131
141
  self.model = self.model or SupportedModels.Prophet
132
142
  self.confidence_interval_width = self.confidence_interval_width or 0.80
@@ -144,6 +154,21 @@ class ForecastOperatorSpec(DataClassSerializable):
144
154
  self.generate_metrics = (
145
155
  self.generate_metrics if self.generate_metrics is not None else True
146
156
  )
157
+ self.generate_metrics_file = (
158
+ self.generate_metrics_file
159
+ if self.generate_metrics_file is not None
160
+ else True
161
+ )
162
+ self.generate_forecast_file = (
163
+ self.generate_forecast_file
164
+ if self.generate_forecast_file is not None
165
+ else True
166
+ )
167
+ self.generate_explanation_files = (
168
+ self.generate_explanation_files
169
+ if self.generate_explanation_files is not None
170
+ else True
171
+ )
147
172
  # For Explanations Generation. When user doesn't specify defaults to False
148
173
  self.generate_explanations = (
149
174
  self.generate_explanations
@@ -164,6 +189,7 @@ class ForecastOperatorSpec(DataClassSerializable):
164
189
  if self.generate_model_pickle is not None
165
190
  else False
166
191
  )
192
+ self.report_title = self.report_title or "Forecast Report"
167
193
  self.report_theme = self.report_theme or "light"
168
194
  self.metrics_filename = self.metrics_filename or "metrics.csv"
169
195
  self.test_metrics_filename = self.test_metrics_filename or "test_metrics.csv"
@@ -379,7 +379,7 @@ spec:
379
379
  required: true
380
380
  log_id:
381
381
  type: string
382
- required: true
382
+ required: false
383
383
  auto_scaling:
384
384
  type: dict
385
385
  required: false