oracle-ads 2.13.1rc0__py3-none-any.whl → 2.13.2rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +7 -1
- ads/aqua/app.py +24 -23
- ads/aqua/client/client.py +48 -11
- ads/aqua/common/entities.py +28 -1
- ads/aqua/common/enums.py +13 -7
- ads/aqua/common/utils.py +8 -13
- ads/aqua/config/container_config.py +203 -0
- ads/aqua/config/evaluation/evaluation_service_config.py +5 -181
- ads/aqua/constants.py +0 -1
- ads/aqua/evaluation/evaluation.py +4 -4
- ads/aqua/extension/base_handler.py +4 -0
- ads/aqua/extension/model_handler.py +19 -28
- ads/aqua/finetuning/finetuning.py +2 -3
- ads/aqua/model/entities.py +2 -3
- ads/aqua/model/model.py +25 -30
- ads/aqua/modeldeployment/deployment.py +6 -14
- ads/aqua/modeldeployment/entities.py +2 -2
- ads/aqua/server/__init__.py +4 -0
- ads/aqua/server/__main__.py +24 -0
- ads/aqua/server/app.py +47 -0
- ads/aqua/server/aqua_spec.yml +1291 -0
- ads/aqua/ui.py +5 -199
- ads/common/auth.py +20 -11
- ads/common/utils.py +91 -11
- ads/config.py +3 -0
- ads/llm/__init__.py +1 -0
- ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +32 -23
- ads/model/artifact_downloader.py +4 -1
- ads/model/common/utils.py +15 -3
- ads/model/datascience_model.py +339 -8
- ads/model/model_metadata.py +54 -14
- ads/model/model_version_set.py +5 -3
- ads/model/service/oci_datascience_model.py +477 -5
- ads/opctl/operator/common/utils.py +16 -0
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +3 -3
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +1 -1
- ads/opctl/operator/lowcode/anomaly/utils.py +1 -1
- ads/opctl/operator/lowcode/common/data.py +5 -2
- ads/opctl/operator/lowcode/common/transformations.py +7 -13
- ads/opctl/operator/lowcode/common/utils.py +7 -2
- ads/opctl/operator/lowcode/forecast/model/arima.py +15 -10
- ads/opctl/operator/lowcode/forecast/model/automlx.py +39 -9
- ads/opctl/operator/lowcode/forecast/model/autots.py +7 -5
- ads/opctl/operator/lowcode/forecast/model/base_model.py +135 -110
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +30 -14
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +2 -2
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +46 -32
- ads/opctl/operator/lowcode/forecast/model/prophet.py +82 -29
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +142 -62
- ads/opctl/operator/lowcode/forecast/operator_config.py +29 -3
- ads/opctl/operator/lowcode/forecast/schema.yaml +1 -1
- ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +108 -56
- {oracle_ads-2.13.1rc0.dist-info → oracle_ads-2.13.2rc1.dist-info}/METADATA +15 -12
- {oracle_ads-2.13.1rc0.dist-info → oracle_ads-2.13.2rc1.dist-info}/RECORD +57 -53
- {oracle_ads-2.13.1rc0.dist-info → oracle_ads-2.13.2rc1.dist-info}/WHEEL +1 -1
- ads/aqua/config/evaluation/evaluation_service_model_config.py +0 -8
- {oracle_ads-2.13.1rc0.dist-info → oracle_ads-2.13.2rc1.dist-info}/entry_points.txt +0 -0
- {oracle_ads-2.13.1rc0.dist-info → oracle_ads-2.13.2rc1.dist-info/licenses}/LICENSE.txt +0 -0
@@ -1,20 +1,21 @@
|
|
1
|
-
# -*- coding: utf-8; -*-
|
2
|
-
|
3
1
|
# Copyright (c) 2023 Oracle and/or its affiliates.
|
4
2
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
3
|
|
6
4
|
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
7
|
import numpy as np
|
8
8
|
import pandas as pd
|
9
|
-
from pathlib import Path
|
10
9
|
|
11
10
|
from ads.opctl import logger
|
12
11
|
from ads.opctl.operator.lowcode.common.const import DataColumns
|
12
|
+
from ads.opctl.operator.lowcode.common.errors import InsufficientDataError
|
13
13
|
from ads.opctl.operator.lowcode.forecast.const import BACKTEST_REPORT_NAME
|
14
|
+
from ads.opctl.operator.lowcode.forecast.model.factory import SupportedModels
|
15
|
+
|
14
16
|
from .model.forecast_datasets import ForecastDatasets
|
15
17
|
from .operator_config import ForecastOperatorConfig
|
16
|
-
|
17
|
-
from ads.opctl.operator.lowcode.common.errors import InsufficientDataError
|
18
|
+
|
18
19
|
|
19
20
|
class ModelEvaluator:
|
20
21
|
"""
|
@@ -23,6 +24,7 @@ class ModelEvaluator:
|
|
23
24
|
This class is responsible for comparing different models or frameworks based on specified evaluation
|
24
25
|
metrics and returning the best-performing option.
|
25
26
|
"""
|
27
|
+
|
26
28
|
def __init__(self, models, k=5, subsample_ratio=0.20):
|
27
29
|
"""
|
28
30
|
Initializes the ModelEvaluator with a list of models, number of backtests and subsample ratio.
|
@@ -40,23 +42,33 @@ class ModelEvaluator:
|
|
40
42
|
|
41
43
|
def generate_cutoffs(self, unique_dates, horizon):
|
42
44
|
sorted_dates = np.sort(unique_dates)
|
43
|
-
train_window_size = [
|
45
|
+
train_window_size = [
|
46
|
+
len(sorted_dates) - (i + 1) * horizon for i in range(self.k)
|
47
|
+
]
|
44
48
|
valid_train_window_size = [ws for ws in train_window_size if ws >= horizon * 2]
|
45
49
|
if len(valid_train_window_size) < self.k:
|
46
|
-
logger.
|
47
|
-
cut_offs = sorted_dates[-horizon - 1
|
50
|
+
logger.warning(f"Only {valid_train_window_size} backtests can be created")
|
51
|
+
cut_offs = sorted_dates[-horizon - 1 : -horizon * (self.k + 1) : -horizon][
|
52
|
+
: len(valid_train_window_size)
|
53
|
+
]
|
48
54
|
return cut_offs
|
49
55
|
|
50
|
-
def generate_k_fold_data(
|
56
|
+
def generate_k_fold_data(
|
57
|
+
self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig
|
58
|
+
):
|
51
59
|
date_col = operator_config.spec.datetime_column.name
|
52
60
|
horizon = operator_config.spec.horizon
|
53
61
|
historical_data = datasets.historical_data.data.reset_index()
|
54
62
|
series_col = DataColumns.Series
|
55
63
|
group_counts = historical_data[series_col].value_counts()
|
56
64
|
|
57
|
-
sample_count = max(
|
65
|
+
sample_count = max(
|
66
|
+
self.minimum_sample_count, int(len(group_counts) * self.subsample_ratio)
|
67
|
+
)
|
58
68
|
sampled_groups = group_counts.head(sample_count)
|
59
|
-
sampled_historical_data = historical_data[
|
69
|
+
sampled_historical_data = historical_data[
|
70
|
+
historical_data[series_col].isin(sampled_groups.index)
|
71
|
+
]
|
60
72
|
|
61
73
|
min_group = group_counts.idxmin()
|
62
74
|
min_series_data = historical_data[historical_data[series_col] == min_group]
|
@@ -64,99 +76,167 @@ class ModelEvaluator:
|
|
64
76
|
|
65
77
|
cut_offs = self.generate_cutoffs(unique_dates, horizon)
|
66
78
|
if not len(cut_offs):
|
67
|
-
raise InsufficientDataError(
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
79
|
+
raise InsufficientDataError(
|
80
|
+
"Insufficient data to evaluate multiple models. Please specify a model "
|
81
|
+
"instead of using auto-select."
|
82
|
+
)
|
83
|
+
training_datasets = [
|
84
|
+
sampled_historical_data[sampled_historical_data[date_col] <= cut_off_date]
|
85
|
+
for cut_off_date in cut_offs
|
86
|
+
]
|
87
|
+
test_datasets = [
|
88
|
+
sampled_historical_data[sampled_historical_data[date_col] > cut_offs[0]]
|
89
|
+
]
|
72
90
|
for i, current in enumerate(cut_offs[1:]):
|
73
|
-
test_datasets.append(
|
74
|
-
|
91
|
+
test_datasets.append(
|
92
|
+
sampled_historical_data[
|
93
|
+
(current < sampled_historical_data[date_col])
|
94
|
+
& (sampled_historical_data[date_col] <= cut_offs[i])
|
95
|
+
]
|
96
|
+
)
|
75
97
|
all_additional = datasets.additional_data.data.reset_index()
|
76
|
-
sampled_additional_data = all_additional[
|
98
|
+
sampled_additional_data = all_additional[
|
99
|
+
all_additional[series_col].isin(sampled_groups.index)
|
100
|
+
]
|
77
101
|
max_historical_date = sampled_historical_data[date_col].max()
|
78
|
-
additional_data = [
|
102
|
+
additional_data = [
|
103
|
+
sampled_additional_data[
|
104
|
+
sampled_additional_data[date_col] <= max_historical_date
|
105
|
+
]
|
106
|
+
]
|
79
107
|
for cut_off in cut_offs[:-1]:
|
80
|
-
trimmed_additional_data = sampled_additional_data[
|
108
|
+
trimmed_additional_data = sampled_additional_data[
|
109
|
+
sampled_additional_data[date_col] <= cut_off
|
110
|
+
]
|
81
111
|
additional_data.append(trimmed_additional_data)
|
82
112
|
return cut_offs, training_datasets, additional_data, test_datasets
|
83
113
|
|
84
114
|
def remove_none_values(self, obj):
|
85
115
|
if isinstance(obj, dict):
|
86
|
-
return {
|
116
|
+
return {
|
117
|
+
k: self.remove_none_values(v)
|
118
|
+
for k, v in obj.items()
|
119
|
+
if k is not None and v is not None
|
120
|
+
}
|
87
121
|
else:
|
88
122
|
return obj
|
89
123
|
|
90
|
-
def create_operator_config(
|
124
|
+
def create_operator_config(
|
125
|
+
self,
|
126
|
+
operator_config,
|
127
|
+
backtest,
|
128
|
+
model,
|
129
|
+
historical_data,
|
130
|
+
additional_data,
|
131
|
+
test_data,
|
132
|
+
):
|
91
133
|
output_dir = operator_config.spec.output_directory.url
|
92
|
-
output_file_path = f
|
134
|
+
output_file_path = f"{output_dir}/back_testing/{model}/{backtest}"
|
93
135
|
Path(output_file_path).mkdir(parents=True, exist_ok=True)
|
94
|
-
historical_data_url = f'{output_file_path}/historical.csv'
|
95
|
-
additional_data_url = f'{output_file_path}/additional.csv'
|
96
|
-
test_data_url = f'{output_file_path}/test.csv'
|
97
|
-
historical_data.to_csv(historical_data_url, index=False)
|
98
|
-
additional_data.to_csv(additional_data_url, index=False)
|
99
|
-
test_data.to_csv(test_data_url, index=False)
|
100
136
|
backtest_op_config_draft = operator_config.to_dict()
|
101
137
|
backtest_spec = backtest_op_config_draft["spec"]
|
102
|
-
backtest_spec["
|
103
|
-
|
104
|
-
|
105
|
-
backtest_spec
|
106
|
-
backtest_spec["
|
138
|
+
backtest_spec["datetime_column"]["format"] = None
|
139
|
+
backtest_spec.pop("test_data")
|
140
|
+
backtest_spec.pop("additional_data")
|
141
|
+
backtest_spec.pop("historical_data")
|
142
|
+
backtest_spec["generate_report"] = False
|
107
143
|
backtest_spec["model"] = model
|
108
|
-
backtest_spec[
|
144
|
+
backtest_spec["model_kwargs"] = None
|
109
145
|
backtest_spec["output_directory"] = {"url": output_file_path}
|
110
146
|
backtest_spec["target_category_columns"] = [DataColumns.Series]
|
111
|
-
backtest_spec[
|
147
|
+
backtest_spec["generate_explanations"] = False
|
112
148
|
cleaned_config = self.remove_none_values(backtest_op_config_draft)
|
113
149
|
|
114
|
-
backtest_op_config = ForecastOperatorConfig.from_dict(
|
115
|
-
obj_dict=cleaned_config)
|
150
|
+
backtest_op_config = ForecastOperatorConfig.from_dict(obj_dict=cleaned_config)
|
116
151
|
return backtest_op_config
|
117
152
|
|
118
|
-
def run_all_models(
|
119
|
-
|
153
|
+
def run_all_models(
|
154
|
+
self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig
|
155
|
+
):
|
156
|
+
cut_offs, train_sets, additional_data, test_sets = self.generate_k_fold_data(
|
157
|
+
datasets, operator_config
|
158
|
+
)
|
120
159
|
metrics = {}
|
160
|
+
date_col = operator_config.spec.datetime_column.name
|
121
161
|
for model in self.models:
|
122
162
|
from .model.factory import ForecastOperatorModelFactory
|
163
|
+
|
123
164
|
metrics[model] = {}
|
124
165
|
for i in range(len(cut_offs)):
|
125
166
|
try:
|
126
|
-
backtest_historical_data = train_sets[i]
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
167
|
+
backtest_historical_data = train_sets[i].set_index(
|
168
|
+
[date_col, DataColumns.Series]
|
169
|
+
)
|
170
|
+
backtest_additional_data = additional_data[i].set_index(
|
171
|
+
[date_col, DataColumns.Series]
|
172
|
+
)
|
173
|
+
backtest_test_data = test_sets[i].set_index(
|
174
|
+
[date_col, DataColumns.Series]
|
175
|
+
)
|
176
|
+
backtest_operator_config = self.create_operator_config(
|
177
|
+
operator_config,
|
178
|
+
i,
|
179
|
+
model,
|
180
|
+
backtest_historical_data,
|
181
|
+
backtest_additional_data,
|
182
|
+
backtest_test_data,
|
183
|
+
)
|
184
|
+
datasets = ForecastDatasets(
|
185
|
+
backtest_operator_config,
|
186
|
+
backtest_historical_data,
|
187
|
+
backtest_additional_data,
|
188
|
+
backtest_test_data,
|
189
|
+
)
|
134
190
|
ForecastOperatorModelFactory.get_model(
|
135
191
|
backtest_operator_config, datasets
|
136
192
|
).generate_report()
|
137
|
-
test_metrics_filename =
|
193
|
+
test_metrics_filename = (
|
194
|
+
backtest_operator_config.spec.test_metrics_filename
|
195
|
+
)
|
138
196
|
metrics_df = pd.read_csv(
|
139
|
-
f"{backtest_operator_config.spec.output_directory.url}/{test_metrics_filename}"
|
140
|
-
|
141
|
-
|
142
|
-
|
197
|
+
f"{backtest_operator_config.spec.output_directory.url}/{test_metrics_filename}"
|
198
|
+
)
|
199
|
+
metrics_df["average_across_series"] = metrics_df.drop(
|
200
|
+
"metrics", axis=1
|
201
|
+
).mean(axis=1)
|
202
|
+
metrics_average_dict = dict(
|
203
|
+
zip(
|
204
|
+
metrics_df["metrics"].str.lower(),
|
205
|
+
metrics_df["average_across_series"],
|
206
|
+
)
|
207
|
+
)
|
208
|
+
metrics[model][i] = metrics_average_dict[
|
209
|
+
operator_config.spec.metric
|
210
|
+
]
|
143
211
|
except:
|
144
|
-
logger.
|
212
|
+
logger.warning(
|
213
|
+
f"Failed to calculate metrics for {model} and {i} backtest"
|
214
|
+
)
|
145
215
|
return metrics
|
146
216
|
|
147
|
-
def find_best_model(
|
217
|
+
def find_best_model(
|
218
|
+
self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig
|
219
|
+
):
|
148
220
|
try:
|
149
221
|
metrics = self.run_all_models(datasets, operator_config)
|
150
222
|
except InsufficientDataError as e:
|
151
223
|
model = SupportedModels.Prophet
|
152
|
-
logger.error(
|
224
|
+
logger.error(
|
225
|
+
f"Running {model} model as auto-select failed with the following error: {e.message}"
|
226
|
+
)
|
153
227
|
return model
|
154
|
-
nonempty_metrics = {
|
155
|
-
|
156
|
-
|
228
|
+
nonempty_metrics = {
|
229
|
+
model: metric for model, metric in metrics.items() if metric != {}
|
230
|
+
}
|
231
|
+
avg_backtests_metric = {
|
232
|
+
model: sum(value.values()) / len(value.values())
|
233
|
+
for model, value in nonempty_metrics.items()
|
234
|
+
}
|
157
235
|
best_model = min(avg_backtests_metric, key=avg_backtests_metric.get)
|
158
|
-
logger.info(
|
159
|
-
|
236
|
+
logger.info(
|
237
|
+
f"Among models {self.models}, {best_model} model shows better performance during backtesting."
|
238
|
+
)
|
239
|
+
backtest_stats = pd.DataFrame(nonempty_metrics).rename_axis("backtest")
|
160
240
|
backtest_stats["metric"] = operator_config.spec.metric
|
161
241
|
backtest_stats.reset_index(inplace=True)
|
162
242
|
output_dir = operator_config.spec.output_directory.url
|
@@ -1,6 +1,6 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
|
3
|
-
# Copyright (c) 2023,
|
3
|
+
# Copyright (c) 2023, 2025 Oracle and/or its affiliates.
|
4
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
5
|
|
6
6
|
import os
|
@@ -18,9 +18,11 @@ from ads.opctl.operator.lowcode.common.utils import find_output_dirname
|
|
18
18
|
|
19
19
|
from .const import SpeedAccuracyMode, SupportedMetrics, SupportedModels
|
20
20
|
|
21
|
+
|
21
22
|
@dataclass
|
22
23
|
class AutoScaling(DataClassSerializable):
|
23
24
|
"""Class representing simple autoscaling policy"""
|
25
|
+
|
24
26
|
minimum_instance: int = 1
|
25
27
|
maximum_instance: int = None
|
26
28
|
cool_down_in_seconds: int = 600
|
@@ -28,9 +30,11 @@ class AutoScaling(DataClassSerializable):
|
|
28
30
|
scale_out_threshold: int = 80
|
29
31
|
scaling_metric: str = "CPU_UTILIZATION"
|
30
32
|
|
33
|
+
|
31
34
|
@dataclass(repr=True)
|
32
35
|
class ModelDeploymentServer(DataClassSerializable):
|
33
36
|
"""Class representing model deployment server specification for whatif-analysis."""
|
37
|
+
|
34
38
|
display_name: str = None
|
35
39
|
initial_shape: str = None
|
36
40
|
description: str = None
|
@@ -42,10 +46,13 @@ class ModelDeploymentServer(DataClassSerializable):
|
|
42
46
|
@dataclass(repr=True)
|
43
47
|
class WhatIfAnalysis(DataClassSerializable):
|
44
48
|
"""Class representing operator specification for whatif-analysis."""
|
49
|
+
|
45
50
|
model_display_name: str = None
|
46
51
|
compartment_id: str = None
|
47
52
|
project_id: str = None
|
48
|
-
model_deployment: ModelDeploymentServer = field(
|
53
|
+
model_deployment: ModelDeploymentServer = field(
|
54
|
+
default_factory=ModelDeploymentServer
|
55
|
+
)
|
49
56
|
|
50
57
|
|
51
58
|
@dataclass(repr=True)
|
@@ -106,8 +113,11 @@ class ForecastOperatorSpec(DataClassSerializable):
|
|
106
113
|
datetime_column: DateTimeColumn = field(default_factory=DateTimeColumn)
|
107
114
|
target_category_columns: List[str] = field(default_factory=list)
|
108
115
|
generate_report: bool = None
|
116
|
+
generate_forecast_file: bool = None
|
109
117
|
generate_metrics: bool = None
|
118
|
+
generate_metrics_file: bool = None
|
110
119
|
generate_explanations: bool = None
|
120
|
+
generate_explanation_files: bool = None
|
111
121
|
explanations_accuracy_mode: str = None
|
112
122
|
horizon: int = None
|
113
123
|
model: str = None
|
@@ -126,7 +136,7 @@ class ForecastOperatorSpec(DataClassSerializable):
|
|
126
136
|
self.output_directory = self.output_directory or OutputDirectory(
|
127
137
|
url=find_output_dirname(self.output_directory)
|
128
138
|
)
|
129
|
-
self.generate_model_pickle =
|
139
|
+
self.generate_model_pickle = self.generate_model_pickle or self.what_if_analysis
|
130
140
|
self.metric = (self.metric or "").lower() or SupportedMetrics.SMAPE.lower()
|
131
141
|
self.model = self.model or SupportedModels.Prophet
|
132
142
|
self.confidence_interval_width = self.confidence_interval_width or 0.80
|
@@ -144,6 +154,21 @@ class ForecastOperatorSpec(DataClassSerializable):
|
|
144
154
|
self.generate_metrics = (
|
145
155
|
self.generate_metrics if self.generate_metrics is not None else True
|
146
156
|
)
|
157
|
+
self.generate_metrics_file = (
|
158
|
+
self.generate_metrics_file
|
159
|
+
if self.generate_metrics_file is not None
|
160
|
+
else True
|
161
|
+
)
|
162
|
+
self.generate_forecast_file = (
|
163
|
+
self.generate_forecast_file
|
164
|
+
if self.generate_forecast_file is not None
|
165
|
+
else True
|
166
|
+
)
|
167
|
+
self.generate_explanation_files = (
|
168
|
+
self.generate_explanation_files
|
169
|
+
if self.generate_explanation_files is not None
|
170
|
+
else True
|
171
|
+
)
|
147
172
|
# For Explanations Generation. When user doesn't specify defaults to False
|
148
173
|
self.generate_explanations = (
|
149
174
|
self.generate_explanations
|
@@ -164,6 +189,7 @@ class ForecastOperatorSpec(DataClassSerializable):
|
|
164
189
|
if self.generate_model_pickle is not None
|
165
190
|
else False
|
166
191
|
)
|
192
|
+
self.report_title = self.report_title or "Forecast Report"
|
167
193
|
self.report_theme = self.report_theme or "light"
|
168
194
|
self.metrics_filename = self.metrics_filename or "metrics.csv"
|
169
195
|
self.test_metrics_filename = self.test_metrics_filename or "test_metrics.csv"
|