oracle-ads 2.13.2__py3-none-any.whl → 2.13.2rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +3 -3
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +1 -1
- ads/opctl/operator/lowcode/anomaly/utils.py +1 -1
- ads/opctl/operator/lowcode/common/transformations.py +5 -1
- ads/opctl/operator/lowcode/common/utils.py +7 -2
- ads/opctl/operator/lowcode/forecast/model/arima.py +15 -10
- ads/opctl/operator/lowcode/forecast/model/automlx.py +31 -9
- ads/opctl/operator/lowcode/forecast/model/autots.py +7 -5
- ads/opctl/operator/lowcode/forecast/model/base_model.py +127 -101
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +14 -6
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +2 -2
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +46 -32
- ads/opctl/operator/lowcode/forecast/model/prophet.py +82 -29
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +136 -54
- ads/opctl/operator/lowcode/forecast/operator_config.py +29 -3
- ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +103 -58
- {oracle_ads-2.13.2.dist-info → oracle_ads-2.13.2rc1.dist-info}/METADATA +1 -1
- {oracle_ads-2.13.2.dist-info → oracle_ads-2.13.2rc1.dist-info}/RECORD +21 -21
- {oracle_ads-2.13.2.dist-info → oracle_ads-2.13.2rc1.dist-info}/WHEEL +0 -0
- {oracle_ads-2.13.2.dist-info → oracle_ads-2.13.2rc1.dist-info}/entry_points.txt +0 -0
- {oracle_ads-2.13.2.dist-info → oracle_ads-2.13.2rc1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -1,20 +1,21 @@
|
|
1
|
-
# -*- coding: utf-8; -*-
|
2
|
-
|
3
1
|
# Copyright (c) 2023 Oracle and/or its affiliates.
|
4
2
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
3
|
|
6
4
|
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
7
|
import numpy as np
|
8
8
|
import pandas as pd
|
9
|
-
from pathlib import Path
|
10
9
|
|
11
10
|
from ads.opctl import logger
|
12
11
|
from ads.opctl.operator.lowcode.common.const import DataColumns
|
12
|
+
from ads.opctl.operator.lowcode.common.errors import InsufficientDataError
|
13
13
|
from ads.opctl.operator.lowcode.forecast.const import BACKTEST_REPORT_NAME
|
14
|
+
from ads.opctl.operator.lowcode.forecast.model.factory import SupportedModels
|
15
|
+
|
14
16
|
from .model.forecast_datasets import ForecastDatasets
|
15
17
|
from .operator_config import ForecastOperatorConfig
|
16
|
-
|
17
|
-
from ads.opctl.operator.lowcode.common.errors import InsufficientDataError
|
18
|
+
|
18
19
|
|
19
20
|
class ModelEvaluator:
|
20
21
|
"""
|
@@ -23,6 +24,7 @@ class ModelEvaluator:
|
|
23
24
|
This class is responsible for comparing different models or frameworks based on specified evaluation
|
24
25
|
metrics and returning the best-performing option.
|
25
26
|
"""
|
27
|
+
|
26
28
|
def __init__(self, models, k=5, subsample_ratio=0.20):
|
27
29
|
"""
|
28
30
|
Initializes the ModelEvaluator with a list of models, number of backtests and subsample ratio.
|
@@ -40,23 +42,33 @@ class ModelEvaluator:
|
|
40
42
|
|
41
43
|
def generate_cutoffs(self, unique_dates, horizon):
|
42
44
|
sorted_dates = np.sort(unique_dates)
|
43
|
-
train_window_size = [
|
45
|
+
train_window_size = [
|
46
|
+
len(sorted_dates) - (i + 1) * horizon for i in range(self.k)
|
47
|
+
]
|
44
48
|
valid_train_window_size = [ws for ws in train_window_size if ws >= horizon * 2]
|
45
49
|
if len(valid_train_window_size) < self.k:
|
46
|
-
logger.
|
47
|
-
cut_offs = sorted_dates[-horizon - 1
|
50
|
+
logger.warning(f"Only {valid_train_window_size} backtests can be created")
|
51
|
+
cut_offs = sorted_dates[-horizon - 1 : -horizon * (self.k + 1) : -horizon][
|
52
|
+
: len(valid_train_window_size)
|
53
|
+
]
|
48
54
|
return cut_offs
|
49
55
|
|
50
|
-
def generate_k_fold_data(
|
56
|
+
def generate_k_fold_data(
|
57
|
+
self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig
|
58
|
+
):
|
51
59
|
date_col = operator_config.spec.datetime_column.name
|
52
60
|
horizon = operator_config.spec.horizon
|
53
61
|
historical_data = datasets.historical_data.data.reset_index()
|
54
62
|
series_col = DataColumns.Series
|
55
63
|
group_counts = historical_data[series_col].value_counts()
|
56
64
|
|
57
|
-
sample_count = max(
|
65
|
+
sample_count = max(
|
66
|
+
self.minimum_sample_count, int(len(group_counts) * self.subsample_ratio)
|
67
|
+
)
|
58
68
|
sampled_groups = group_counts.head(sample_count)
|
59
|
-
sampled_historical_data = historical_data[
|
69
|
+
sampled_historical_data = historical_data[
|
70
|
+
historical_data[series_col].isin(sampled_groups.index)
|
71
|
+
]
|
60
72
|
|
61
73
|
min_group = group_counts.idxmin()
|
62
74
|
min_series_data = historical_data[historical_data[series_col] == min_group]
|
@@ -64,32 +76,62 @@ class ModelEvaluator:
|
|
64
76
|
|
65
77
|
cut_offs = self.generate_cutoffs(unique_dates, horizon)
|
66
78
|
if not len(cut_offs):
|
67
|
-
raise InsufficientDataError(
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
79
|
+
raise InsufficientDataError(
|
80
|
+
"Insufficient data to evaluate multiple models. Please specify a model "
|
81
|
+
"instead of using auto-select."
|
82
|
+
)
|
83
|
+
training_datasets = [
|
84
|
+
sampled_historical_data[sampled_historical_data[date_col] <= cut_off_date]
|
85
|
+
for cut_off_date in cut_offs
|
86
|
+
]
|
87
|
+
test_datasets = [
|
88
|
+
sampled_historical_data[sampled_historical_data[date_col] > cut_offs[0]]
|
89
|
+
]
|
72
90
|
for i, current in enumerate(cut_offs[1:]):
|
73
|
-
test_datasets.append(
|
74
|
-
|
91
|
+
test_datasets.append(
|
92
|
+
sampled_historical_data[
|
93
|
+
(current < sampled_historical_data[date_col])
|
94
|
+
& (sampled_historical_data[date_col] <= cut_offs[i])
|
95
|
+
]
|
96
|
+
)
|
75
97
|
all_additional = datasets.additional_data.data.reset_index()
|
76
|
-
sampled_additional_data = all_additional[
|
98
|
+
sampled_additional_data = all_additional[
|
99
|
+
all_additional[series_col].isin(sampled_groups.index)
|
100
|
+
]
|
77
101
|
max_historical_date = sampled_historical_data[date_col].max()
|
78
|
-
additional_data = [
|
102
|
+
additional_data = [
|
103
|
+
sampled_additional_data[
|
104
|
+
sampled_additional_data[date_col] <= max_historical_date
|
105
|
+
]
|
106
|
+
]
|
79
107
|
for cut_off in cut_offs[:-1]:
|
80
|
-
trimmed_additional_data = sampled_additional_data[
|
108
|
+
trimmed_additional_data = sampled_additional_data[
|
109
|
+
sampled_additional_data[date_col] <= cut_off
|
110
|
+
]
|
81
111
|
additional_data.append(trimmed_additional_data)
|
82
112
|
return cut_offs, training_datasets, additional_data, test_datasets
|
83
113
|
|
84
114
|
def remove_none_values(self, obj):
|
85
115
|
if isinstance(obj, dict):
|
86
|
-
return {
|
116
|
+
return {
|
117
|
+
k: self.remove_none_values(v)
|
118
|
+
for k, v in obj.items()
|
119
|
+
if k is not None and v is not None
|
120
|
+
}
|
87
121
|
else:
|
88
122
|
return obj
|
89
123
|
|
90
|
-
def create_operator_config(
|
124
|
+
def create_operator_config(
|
125
|
+
self,
|
126
|
+
operator_config,
|
127
|
+
backtest,
|
128
|
+
model,
|
129
|
+
historical_data,
|
130
|
+
additional_data,
|
131
|
+
test_data,
|
132
|
+
):
|
91
133
|
output_dir = operator_config.spec.output_directory.url
|
92
|
-
output_file_path = f
|
134
|
+
output_file_path = f"{output_dir}/back_testing/{model}/{backtest}"
|
93
135
|
Path(output_file_path).mkdir(parents=True, exist_ok=True)
|
94
136
|
backtest_op_config_draft = operator_config.to_dict()
|
95
137
|
backtest_spec = backtest_op_config_draft["spec"]
|
@@ -99,62 +141,102 @@ class ModelEvaluator:
|
|
99
141
|
backtest_spec.pop("historical_data")
|
100
142
|
backtest_spec["generate_report"] = False
|
101
143
|
backtest_spec["model"] = model
|
102
|
-
backtest_spec[
|
144
|
+
backtest_spec["model_kwargs"] = None
|
103
145
|
backtest_spec["output_directory"] = {"url": output_file_path}
|
104
146
|
backtest_spec["target_category_columns"] = [DataColumns.Series]
|
105
|
-
backtest_spec[
|
147
|
+
backtest_spec["generate_explanations"] = False
|
106
148
|
cleaned_config = self.remove_none_values(backtest_op_config_draft)
|
107
149
|
|
108
|
-
backtest_op_config = ForecastOperatorConfig.from_dict(
|
109
|
-
obj_dict=cleaned_config)
|
150
|
+
backtest_op_config = ForecastOperatorConfig.from_dict(obj_dict=cleaned_config)
|
110
151
|
return backtest_op_config
|
111
152
|
|
112
|
-
def run_all_models(
|
113
|
-
|
153
|
+
def run_all_models(
|
154
|
+
self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig
|
155
|
+
):
|
156
|
+
cut_offs, train_sets, additional_data, test_sets = self.generate_k_fold_data(
|
157
|
+
datasets, operator_config
|
158
|
+
)
|
114
159
|
metrics = {}
|
115
160
|
date_col = operator_config.spec.datetime_column.name
|
116
161
|
for model in self.models:
|
117
162
|
from .model.factory import ForecastOperatorModelFactory
|
163
|
+
|
118
164
|
metrics[model] = {}
|
119
165
|
for i in range(len(cut_offs)):
|
120
166
|
try:
|
121
|
-
backtest_historical_data = train_sets[i].set_index(
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
167
|
+
backtest_historical_data = train_sets[i].set_index(
|
168
|
+
[date_col, DataColumns.Series]
|
169
|
+
)
|
170
|
+
backtest_additional_data = additional_data[i].set_index(
|
171
|
+
[date_col, DataColumns.Series]
|
172
|
+
)
|
173
|
+
backtest_test_data = test_sets[i].set_index(
|
174
|
+
[date_col, DataColumns.Series]
|
175
|
+
)
|
176
|
+
backtest_operator_config = self.create_operator_config(
|
177
|
+
operator_config,
|
178
|
+
i,
|
179
|
+
model,
|
180
|
+
backtest_historical_data,
|
181
|
+
backtest_additional_data,
|
182
|
+
backtest_test_data,
|
183
|
+
)
|
184
|
+
datasets = ForecastDatasets(
|
185
|
+
backtest_operator_config,
|
186
|
+
backtest_historical_data,
|
187
|
+
backtest_additional_data,
|
188
|
+
backtest_test_data,
|
189
|
+
)
|
132
190
|
ForecastOperatorModelFactory.get_model(
|
133
191
|
backtest_operator_config, datasets
|
134
192
|
).generate_report()
|
135
|
-
test_metrics_filename =
|
193
|
+
test_metrics_filename = (
|
194
|
+
backtest_operator_config.spec.test_metrics_filename
|
195
|
+
)
|
136
196
|
metrics_df = pd.read_csv(
|
137
|
-
f"{backtest_operator_config.spec.output_directory.url}/{test_metrics_filename}"
|
138
|
-
|
139
|
-
|
140
|
-
|
197
|
+
f"{backtest_operator_config.spec.output_directory.url}/{test_metrics_filename}"
|
198
|
+
)
|
199
|
+
metrics_df["average_across_series"] = metrics_df.drop(
|
200
|
+
"metrics", axis=1
|
201
|
+
).mean(axis=1)
|
202
|
+
metrics_average_dict = dict(
|
203
|
+
zip(
|
204
|
+
metrics_df["metrics"].str.lower(),
|
205
|
+
metrics_df["average_across_series"],
|
206
|
+
)
|
207
|
+
)
|
208
|
+
metrics[model][i] = metrics_average_dict[
|
209
|
+
operator_config.spec.metric
|
210
|
+
]
|
141
211
|
except:
|
142
|
-
logger.
|
212
|
+
logger.warning(
|
213
|
+
f"Failed to calculate metrics for {model} and {i} backtest"
|
214
|
+
)
|
143
215
|
return metrics
|
144
216
|
|
145
|
-
def find_best_model(
|
217
|
+
def find_best_model(
|
218
|
+
self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig
|
219
|
+
):
|
146
220
|
try:
|
147
221
|
metrics = self.run_all_models(datasets, operator_config)
|
148
222
|
except InsufficientDataError as e:
|
149
223
|
model = SupportedModels.Prophet
|
150
|
-
logger.error(
|
224
|
+
logger.error(
|
225
|
+
f"Running {model} model as auto-select failed with the following error: {e.message}"
|
226
|
+
)
|
151
227
|
return model
|
152
|
-
nonempty_metrics = {
|
153
|
-
|
154
|
-
|
228
|
+
nonempty_metrics = {
|
229
|
+
model: metric for model, metric in metrics.items() if metric != {}
|
230
|
+
}
|
231
|
+
avg_backtests_metric = {
|
232
|
+
model: sum(value.values()) / len(value.values())
|
233
|
+
for model, value in nonempty_metrics.items()
|
234
|
+
}
|
155
235
|
best_model = min(avg_backtests_metric, key=avg_backtests_metric.get)
|
156
|
-
logger.info(
|
157
|
-
|
236
|
+
logger.info(
|
237
|
+
f"Among models {self.models}, {best_model} model shows better performance during backtesting."
|
238
|
+
)
|
239
|
+
backtest_stats = pd.DataFrame(nonempty_metrics).rename_axis("backtest")
|
158
240
|
backtest_stats["metric"] = operator_config.spec.metric
|
159
241
|
backtest_stats.reset_index(inplace=True)
|
160
242
|
output_dir = operator_config.spec.output_directory.url
|
@@ -1,6 +1,6 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
|
3
|
-
# Copyright (c) 2023,
|
3
|
+
# Copyright (c) 2023, 2025 Oracle and/or its affiliates.
|
4
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
5
|
|
6
6
|
import os
|
@@ -18,9 +18,11 @@ from ads.opctl.operator.lowcode.common.utils import find_output_dirname
|
|
18
18
|
|
19
19
|
from .const import SpeedAccuracyMode, SupportedMetrics, SupportedModels
|
20
20
|
|
21
|
+
|
21
22
|
@dataclass
|
22
23
|
class AutoScaling(DataClassSerializable):
|
23
24
|
"""Class representing simple autoscaling policy"""
|
25
|
+
|
24
26
|
minimum_instance: int = 1
|
25
27
|
maximum_instance: int = None
|
26
28
|
cool_down_in_seconds: int = 600
|
@@ -28,9 +30,11 @@ class AutoScaling(DataClassSerializable):
|
|
28
30
|
scale_out_threshold: int = 80
|
29
31
|
scaling_metric: str = "CPU_UTILIZATION"
|
30
32
|
|
33
|
+
|
31
34
|
@dataclass(repr=True)
|
32
35
|
class ModelDeploymentServer(DataClassSerializable):
|
33
36
|
"""Class representing model deployment server specification for whatif-analysis."""
|
37
|
+
|
34
38
|
display_name: str = None
|
35
39
|
initial_shape: str = None
|
36
40
|
description: str = None
|
@@ -42,10 +46,13 @@ class ModelDeploymentServer(DataClassSerializable):
|
|
42
46
|
@dataclass(repr=True)
|
43
47
|
class WhatIfAnalysis(DataClassSerializable):
|
44
48
|
"""Class representing operator specification for whatif-analysis."""
|
49
|
+
|
45
50
|
model_display_name: str = None
|
46
51
|
compartment_id: str = None
|
47
52
|
project_id: str = None
|
48
|
-
model_deployment: ModelDeploymentServer = field(
|
53
|
+
model_deployment: ModelDeploymentServer = field(
|
54
|
+
default_factory=ModelDeploymentServer
|
55
|
+
)
|
49
56
|
|
50
57
|
|
51
58
|
@dataclass(repr=True)
|
@@ -106,8 +113,11 @@ class ForecastOperatorSpec(DataClassSerializable):
|
|
106
113
|
datetime_column: DateTimeColumn = field(default_factory=DateTimeColumn)
|
107
114
|
target_category_columns: List[str] = field(default_factory=list)
|
108
115
|
generate_report: bool = None
|
116
|
+
generate_forecast_file: bool = None
|
109
117
|
generate_metrics: bool = None
|
118
|
+
generate_metrics_file: bool = None
|
110
119
|
generate_explanations: bool = None
|
120
|
+
generate_explanation_files: bool = None
|
111
121
|
explanations_accuracy_mode: str = None
|
112
122
|
horizon: int = None
|
113
123
|
model: str = None
|
@@ -126,7 +136,7 @@ class ForecastOperatorSpec(DataClassSerializable):
|
|
126
136
|
self.output_directory = self.output_directory or OutputDirectory(
|
127
137
|
url=find_output_dirname(self.output_directory)
|
128
138
|
)
|
129
|
-
self.generate_model_pickle =
|
139
|
+
self.generate_model_pickle = self.generate_model_pickle or self.what_if_analysis
|
130
140
|
self.metric = (self.metric or "").lower() or SupportedMetrics.SMAPE.lower()
|
131
141
|
self.model = self.model or SupportedModels.Prophet
|
132
142
|
self.confidence_interval_width = self.confidence_interval_width or 0.80
|
@@ -144,6 +154,21 @@ class ForecastOperatorSpec(DataClassSerializable):
|
|
144
154
|
self.generate_metrics = (
|
145
155
|
self.generate_metrics if self.generate_metrics is not None else True
|
146
156
|
)
|
157
|
+
self.generate_metrics_file = (
|
158
|
+
self.generate_metrics_file
|
159
|
+
if self.generate_metrics_file is not None
|
160
|
+
else True
|
161
|
+
)
|
162
|
+
self.generate_forecast_file = (
|
163
|
+
self.generate_forecast_file
|
164
|
+
if self.generate_forecast_file is not None
|
165
|
+
else True
|
166
|
+
)
|
167
|
+
self.generate_explanation_files = (
|
168
|
+
self.generate_explanation_files
|
169
|
+
if self.generate_explanation_files is not None
|
170
|
+
else True
|
171
|
+
)
|
147
172
|
# For Explanations Generation. When user doesn't specify defaults to False
|
148
173
|
self.generate_explanations = (
|
149
174
|
self.generate_explanations
|
@@ -164,6 +189,7 @@ class ForecastOperatorSpec(DataClassSerializable):
|
|
164
189
|
if self.generate_model_pickle is not None
|
165
190
|
else False
|
166
191
|
)
|
192
|
+
self.report_title = self.report_title or "Forecast Report"
|
167
193
|
self.report_theme = self.report_theme or "light"
|
168
194
|
self.metrics_filename = self.metrics_filename or "metrics.csv"
|
169
195
|
self.test_metrics_filename = self.test_metrics_filename or "test_metrics.csv"
|
@@ -1,5 +1,4 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
-
import json
|
3
2
|
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
4
3
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
4
|
|
@@ -8,39 +7,58 @@ import pickle
|
|
8
7
|
import shutil
|
9
8
|
import sys
|
10
9
|
import tempfile
|
11
|
-
import oci
|
12
10
|
|
13
|
-
import pandas as pd
|
14
11
|
import cloudpickle
|
12
|
+
import oci
|
13
|
+
import pandas as pd
|
14
|
+
from oci.data_science import DataScienceClient, DataScienceClientCompositeOperations
|
15
|
+
from oci.data_science.models import (
|
16
|
+
CategoryLogDetails,
|
17
|
+
CreateModelDeploymentDetails,
|
18
|
+
FixedSizeScalingPolicy,
|
19
|
+
InstanceConfiguration,
|
20
|
+
LogDetails,
|
21
|
+
ModelConfigurationDetails,
|
22
|
+
SingleModelDeploymentConfigurationDetails,
|
23
|
+
)
|
15
24
|
|
16
|
-
from ads.opctl import logger
|
17
25
|
from ads.common.model_export_util import prepare_generic_model
|
26
|
+
from ads.common.object_storage_details import ObjectStorageDetails
|
27
|
+
from ads.opctl import logger
|
18
28
|
from ads.opctl.operator.common.utils import create_log_in_log_group
|
19
|
-
from ads.opctl.operator.lowcode.common.utils import
|
20
|
-
|
29
|
+
from ads.opctl.operator.lowcode.common.utils import (
|
30
|
+
default_signer,
|
31
|
+
write_data,
|
32
|
+
write_simple_json,
|
33
|
+
)
|
34
|
+
|
21
35
|
from ..model.forecast_datasets import AdditionalData
|
22
36
|
from ..operator_config import ForecastOperatorSpec
|
23
37
|
|
24
|
-
from oci.data_science import DataScienceClient, DataScienceClientCompositeOperations
|
25
|
-
|
26
|
-
from oci.data_science.models import ModelConfigurationDetails, InstanceConfiguration, \
|
27
|
-
FixedSizeScalingPolicy, CategoryLogDetails, LogDetails, \
|
28
|
-
SingleModelDeploymentConfigurationDetails, CreateModelDeploymentDetails
|
29
|
-
from ads.common.object_storage_details import ObjectStorageDetails
|
30
|
-
|
31
38
|
|
32
39
|
class ModelDeploymentManager:
|
33
|
-
def __init__(
|
40
|
+
def __init__(
|
41
|
+
self,
|
42
|
+
spec: ForecastOperatorSpec,
|
43
|
+
additional_data: AdditionalData,
|
44
|
+
previous_model_version=None,
|
45
|
+
):
|
34
46
|
self.spec = spec
|
35
47
|
self.model_name = spec.model
|
36
48
|
self.horizon = spec.horizon
|
37
49
|
self.additional_data = additional_data.get_dict_by_series()
|
38
50
|
self.model_obj = {}
|
39
51
|
self.display_name = spec.what_if_analysis.model_display_name
|
40
|
-
self.project_id =
|
41
|
-
|
42
|
-
|
43
|
-
else os.environ.get(
|
52
|
+
self.project_id = (
|
53
|
+
spec.what_if_analysis.project_id
|
54
|
+
if spec.what_if_analysis.project_id
|
55
|
+
else os.environ.get("PROJECT_OCID")
|
56
|
+
)
|
57
|
+
self.compartment_id = (
|
58
|
+
spec.what_if_analysis.compartment_id
|
59
|
+
if spec.what_if_analysis.compartment_id
|
60
|
+
else os.environ.get("NB_SESSION_COMPARTMENT_OCID")
|
61
|
+
)
|
44
62
|
if self.project_id is None or self.compartment_id is None:
|
45
63
|
raise ValueError("Either project_id or compartment_id cannot be None.")
|
46
64
|
self.path_to_artifact = f"{self.spec.output_directory.url}/artifacts/"
|
@@ -58,17 +76,23 @@ class ModelDeploymentManager:
|
|
58
76
|
try:
|
59
77
|
sys.path.insert(0, f"{self.path_to_artifact}")
|
60
78
|
from score import load_model, predict
|
79
|
+
|
61
80
|
_ = load_model()
|
62
81
|
|
63
82
|
# Write additional data to tmp file and perform sanity check
|
64
|
-
with tempfile.NamedTemporaryFile(suffix=
|
83
|
+
with tempfile.NamedTemporaryFile(suffix=".csv") as temp_file:
|
65
84
|
one_series = next(iter(self.additional_data))
|
66
|
-
sample_prediction_data = self.additional_data[one_series].tail(
|
67
|
-
|
85
|
+
sample_prediction_data = self.additional_data[one_series].tail(
|
86
|
+
self.horizon
|
87
|
+
)
|
88
|
+
sample_prediction_data[self.spec.target_category_columns[0]] = (
|
89
|
+
one_series
|
90
|
+
)
|
68
91
|
date_col_name = self.spec.datetime_column.name
|
69
92
|
date_col_format = self.spec.datetime_column.format
|
70
|
-
sample_prediction_data[date_col_name] = sample_prediction_data[
|
71
|
-
|
93
|
+
sample_prediction_data[date_col_name] = sample_prediction_data[
|
94
|
+
date_col_name
|
95
|
+
].dt.strftime(date_col_format)
|
72
96
|
sample_prediction_data.to_csv(temp_file.name, index=False)
|
73
97
|
input_data = {"additional_data": {"url": temp_file.name}}
|
74
98
|
prediction_test = predict(input_data, _)
|
@@ -86,16 +110,18 @@ class ModelDeploymentManager:
|
|
86
110
|
try:
|
87
111
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
88
112
|
score_file = os.path.join(current_dir, "score.py")
|
89
|
-
destination_file = os.path.join(
|
113
|
+
destination_file = os.path.join(
|
114
|
+
self.path_to_artifact, os.path.basename(score_file)
|
115
|
+
)
|
90
116
|
shutil.copy2(score_file, destination_file)
|
91
117
|
logger.info(f"score.py copied successfully to {self.path_to_artifact}")
|
92
118
|
except Exception as e:
|
93
|
-
logger.
|
119
|
+
logger.warning(f"Error copying file: {e}")
|
94
120
|
raise e
|
95
121
|
|
96
122
|
def save_to_catalog(self):
|
97
123
|
"""Save the model to a model catalog"""
|
98
|
-
with open(self.pickle_file_path,
|
124
|
+
with open(self.pickle_file_path, "rb") as file:
|
99
125
|
self.model_obj = pickle.load(file)
|
100
126
|
|
101
127
|
if not os.path.exists(self.path_to_artifact):
|
@@ -108,7 +134,8 @@ class ModelDeploymentManager:
|
|
108
134
|
self.path_to_artifact,
|
109
135
|
function_artifacts=False,
|
110
136
|
force_overwrite=True,
|
111
|
-
data_science_env=True
|
137
|
+
data_science_env=True,
|
138
|
+
)
|
112
139
|
|
113
140
|
self._copy_score_file()
|
114
141
|
self._sanity_test()
|
@@ -124,11 +151,14 @@ class ModelDeploymentManager:
|
|
124
151
|
display_name=self.display_name,
|
125
152
|
compartment_id=self.compartment_id,
|
126
153
|
project_id=self.project_id,
|
127
|
-
description=description
|
154
|
+
description=description,
|
155
|
+
)
|
128
156
|
self.catalog_id = catalog_entry.id
|
129
157
|
|
130
|
-
logger.info(
|
131
|
-
|
158
|
+
logger.info(
|
159
|
+
f"Saved {self.model_name} version-v{self.model_version} to model catalog"
|
160
|
+
f" with model ocid : {self.catalog_id}"
|
161
|
+
)
|
132
162
|
|
133
163
|
self.deployment_info = {"model_ocid": self.catalog_id, "series": list(series)}
|
134
164
|
|
@@ -154,19 +184,25 @@ class ModelDeploymentManager:
|
|
154
184
|
metric_type=auto_scaling_config.scaling_metric,
|
155
185
|
scale_in_configuration=oci.data_science.models.PredefinedExpressionThresholdScalingConfiguration(
|
156
186
|
scaling_configuration_type="THRESHOLD",
|
157
|
-
threshold=auto_scaling_config.scale_in_threshold
|
187
|
+
threshold=auto_scaling_config.scale_in_threshold,
|
158
188
|
),
|
159
189
|
scale_out_configuration=oci.data_science.models.PredefinedExpressionThresholdScalingConfiguration(
|
160
190
|
scaling_configuration_type="THRESHOLD",
|
161
|
-
threshold=auto_scaling_config.scale_out_threshold
|
162
|
-
)
|
163
|
-
)
|
191
|
+
threshold=auto_scaling_config.scale_out_threshold,
|
192
|
+
),
|
193
|
+
)
|
194
|
+
],
|
164
195
|
maximum_instance_count=auto_scaling_config.maximum_instance,
|
165
196
|
minimum_instance_count=auto_scaling_config.minimum_instance,
|
166
|
-
initial_instance_count=auto_scaling_config.minimum_instance
|
197
|
+
initial_instance_count=auto_scaling_config.minimum_instance,
|
198
|
+
)
|
199
|
+
],
|
167
200
|
cool_down_in_seconds=auto_scaling_config.cool_down_in_seconds,
|
168
|
-
is_enabled=True
|
169
|
-
|
201
|
+
is_enabled=True,
|
202
|
+
)
|
203
|
+
logger.info(
|
204
|
+
f"Using autoscaling {auto_scaling_config.scaling_metric} for creating MD"
|
205
|
+
)
|
170
206
|
else:
|
171
207
|
scaling_policy = FixedSizeScalingPolicy(instance_count=1)
|
172
208
|
logger.info("Using fixed size policy for creating MD")
|
@@ -174,13 +210,15 @@ class ModelDeploymentManager:
|
|
174
210
|
model_configuration_details_object = ModelConfigurationDetails(
|
175
211
|
model_id=self.catalog_id,
|
176
212
|
instance_configuration=InstanceConfiguration(
|
177
|
-
instance_shape_name=initial_shape
|
213
|
+
instance_shape_name=initial_shape
|
214
|
+
),
|
178
215
|
scaling_policy=scaling_policy,
|
179
|
-
bandwidth_mbps=20
|
216
|
+
bandwidth_mbps=20,
|
217
|
+
)
|
180
218
|
|
181
219
|
single_model_config = SingleModelDeploymentConfigurationDetails(
|
182
|
-
deployment_type=
|
183
|
-
model_configuration_details=model_configuration_details_object
|
220
|
+
deployment_type="SINGLE_MODEL",
|
221
|
+
model_configuration_details=model_configuration_details_object,
|
184
222
|
)
|
185
223
|
|
186
224
|
log_group = self.spec.what_if_analysis.model_deployment.log_group
|
@@ -191,10 +229,9 @@ class ModelDeploymentManager:
|
|
191
229
|
log_id = create_log_in_log_group(self.compartment_id, log_group, auth)
|
192
230
|
|
193
231
|
logs_configuration_details_object = CategoryLogDetails(
|
194
|
-
access=LogDetails(log_group_id=log_group,
|
195
|
-
|
196
|
-
|
197
|
-
log_id=log_id))
|
232
|
+
access=LogDetails(log_group_id=log_group, log_id=log_id),
|
233
|
+
predict=LogDetails(log_group_id=log_group, log_id=log_id),
|
234
|
+
)
|
198
235
|
|
199
236
|
model_deploy_configuration = CreateModelDeploymentDetails(
|
200
237
|
display_name=name,
|
@@ -202,24 +239,30 @@ class ModelDeploymentManager:
|
|
202
239
|
project_id=self.project_id,
|
203
240
|
compartment_id=self.compartment_id,
|
204
241
|
model_deployment_configuration_details=single_model_config,
|
205
|
-
category_log_details=logs_configuration_details_object
|
242
|
+
category_log_details=logs_configuration_details_object,
|
243
|
+
)
|
206
244
|
|
207
245
|
if not self.test_mode:
|
208
246
|
auth = oci.auth.signers.get_resource_principals_signer()
|
209
247
|
data_science = DataScienceClient({}, signer=auth)
|
210
248
|
data_science_composite = DataScienceClientCompositeOperations(data_science)
|
211
|
-
model_deployment =
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
249
|
+
model_deployment = (
|
250
|
+
data_science_composite.create_model_deployment_and_wait_for_state(
|
251
|
+
model_deploy_configuration, wait_for_states=["SUCCEEDED", "FAILED"]
|
252
|
+
)
|
253
|
+
)
|
254
|
+
self.deployment_info["work_request"] = model_deployment.data.id
|
216
255
|
logger.info(f"deployment metadata :{model_deployment.data}")
|
217
|
-
md = data_science.get_model_deployment(
|
218
|
-
|
219
|
-
|
256
|
+
md = data_science.get_model_deployment(
|
257
|
+
model_deployment_id=model_deployment.data.resources[0].identifier
|
258
|
+
)
|
259
|
+
self.deployment_info["model_deployment_ocid"] = md.data.id
|
260
|
+
self.deployment_info["status"] = md.data.lifecycle_state
|
220
261
|
endpoint_url = md.data.model_deployment_url
|
221
|
-
self.deployment_info[
|
222
|
-
|
262
|
+
self.deployment_info["model_deployment_endpoint"] = (
|
263
|
+
f"{endpoint_url}/predict"
|
264
|
+
)
|
265
|
+
self.deployment_info["log_id"] = log_id
|
223
266
|
|
224
267
|
def save_deployment_info(self):
|
225
268
|
output_dir = self.spec.output_directory.url
|
@@ -234,7 +277,9 @@ class ModelDeploymentManager:
|
|
234
277
|
storage_options=storage_options,
|
235
278
|
index=False,
|
236
279
|
indent=4,
|
237
|
-
orient="records"
|
280
|
+
orient="records",
|
281
|
+
)
|
282
|
+
write_simple_json(
|
283
|
+
self.deployment_info, os.path.join(output_dir, "deployment_info.json")
|
238
284
|
)
|
239
|
-
write_simple_json(self.deployment_info, os.path.join(output_dir, "deployment_info.json"))
|
240
285
|
logger.info(f"Saved deployment info to {output_dir}")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: oracle_ads
|
3
|
-
Version: 2.13.
|
3
|
+
Version: 2.13.2rc1
|
4
4
|
Summary: Oracle Accelerated Data Science SDK
|
5
5
|
Keywords: Oracle Cloud Infrastructure,OCI,Machine Learning,ML,Artificial Intelligence,AI,Data Science,Cloud,Oracle
|
6
6
|
Author: Oracle Data Science
|