oracle-ads 2.11.6__py3-none-any.whl → 2.11.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/catalog/model.py +3 -3
- ads/catalog/notebook.py +3 -3
- ads/catalog/project.py +2 -2
- ads/catalog/summary.py +2 -4
- ads/cli.py +2 -1
- ads/common/serializer.py +1 -1
- ads/data_labeling/metadata.py +2 -2
- ads/dataset/dataset.py +3 -5
- ads/dataset/factory.py +2 -3
- ads/dataset/label_encoder.py +1 -1
- ads/dataset/sampled_dataset.py +3 -5
- ads/jobs/ads_job.py +26 -2
- ads/jobs/builders/infrastructure/dsc_job.py +20 -7
- ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +1 -1
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +8 -15
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +2 -1
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +2 -2
- ads/opctl/operator/lowcode/anomaly/operator_config.py +18 -1
- ads/opctl/operator/lowcode/anomaly/schema.yaml +16 -4
- ads/opctl/operator/lowcode/common/data.py +16 -2
- ads/opctl/operator/lowcode/common/transformations.py +48 -14
- ads/opctl/operator/lowcode/forecast/environment.yaml +1 -0
- ads/opctl/operator/lowcode/forecast/model/arima.py +21 -12
- ads/opctl/operator/lowcode/forecast/model/automlx.py +79 -72
- ads/opctl/operator/lowcode/forecast/model/autots.py +182 -164
- ads/opctl/operator/lowcode/forecast/model/base_model.py +59 -41
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +47 -47
- ads/opctl/operator/lowcode/forecast/model/prophet.py +48 -48
- ads/opctl/operator/lowcode/forecast/operator_config.py +18 -2
- ads/opctl/operator/lowcode/forecast/schema.yaml +20 -4
- ads/opctl/operator/lowcode/forecast/utils.py +4 -0
- ads/pipeline/ads_pipeline_step.py +11 -12
- {oracle_ads-2.11.6.dist-info → oracle_ads-2.11.7.dist-info}/METADATA +4 -3
- {oracle_ads-2.11.6.dist-info → oracle_ads-2.11.7.dist-info}/RECORD +37 -37
- {oracle_ads-2.11.6.dist-info → oracle_ads-2.11.7.dist-info}/LICENSE.txt +0 -0
- {oracle_ads-2.11.6.dist-info → oracle_ads-2.11.7.dist-info}/WHEEL +0 -0
- {oracle_ads-2.11.6.dist-info → oracle_ads-2.11.7.dist-info}/entry_points.txt +0 -0
@@ -58,26 +58,33 @@ class Transformations(ABC):
|
|
58
58
|
clean_df = self._format_datetime_col(clean_df)
|
59
59
|
clean_df = self._set_multi_index(clean_df)
|
60
60
|
|
61
|
-
if self.
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
61
|
+
if self.preprocessing and self.preprocessing.enabled:
|
62
|
+
if self.name == "historical_data":
|
63
|
+
if self.preprocessing.steps.missing_value_imputation:
|
64
|
+
try:
|
65
|
+
clean_df = self._missing_value_imputation_hist(clean_df)
|
66
|
+
except Exception as e:
|
67
|
+
logger.debug(f"Missing value imputation failed with {e.args}")
|
68
|
+
else:
|
69
|
+
logger.info("Skipping missing value imputation because it is disabled")
|
70
|
+
if self.preprocessing.steps.outlier_treatment:
|
71
|
+
try:
|
72
|
+
clean_df = self._outlier_treatment(clean_df)
|
73
|
+
except Exception as e:
|
74
|
+
logger.debug(f"Outlier Treatment failed with {e.args}")
|
75
|
+
else:
|
76
|
+
logger.info("Skipping outlier treatment because it is disabled")
|
77
|
+
elif self.name == "additional_data":
|
78
|
+
clean_df = self._missing_value_imputation_add(clean_df)
|
79
|
+
else:
|
80
|
+
logger.info("Skipping all preprocessing steps because preprocessing is disabled")
|
75
81
|
return clean_df
|
76
82
|
|
77
83
|
def _remove_trailing_whitespace(self, df):
|
78
84
|
return df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
|
79
85
|
|
80
86
|
def _set_series_id_column(self, df):
|
87
|
+
self._target_category_columns_map = dict()
|
81
88
|
if not self.target_category_columns:
|
82
89
|
df[DataColumns.Series] = "Series 1"
|
83
90
|
self.has_artificial_series = True
|
@@ -85,6 +92,11 @@ class Transformations(ABC):
|
|
85
92
|
df[DataColumns.Series] = merge_category_columns(
|
86
93
|
df, self.target_category_columns
|
87
94
|
)
|
95
|
+
merged_values = df[DataColumns.Series].unique().tolist()
|
96
|
+
if self.target_category_columns:
|
97
|
+
for value in merged_values:
|
98
|
+
self._target_category_columns_map[value] = df[df[DataColumns.Series] == value][self.target_category_columns].drop_duplicates().iloc[0].to_dict()
|
99
|
+
|
88
100
|
df = df.drop(self.target_category_columns, axis=1)
|
89
101
|
return df
|
90
102
|
|
@@ -189,3 +201,25 @@ class Transformations(ABC):
|
|
189
201
|
raise DataMismatchError(
|
190
202
|
f"Expected {self.name} to have columns: {expected_names}, but instead found column names: {df.columns}. Is the {self.name} path correct?"
|
191
203
|
)
|
204
|
+
|
205
|
+
"""
|
206
|
+
Map between merged target category column values and target category column and its value
|
207
|
+
If target category columns are PPG_Code, Class, Num
|
208
|
+
Merged target category column values are Product Category 1__A__1, Product Category 2__A__2
|
209
|
+
Then target_category_columns_map would be
|
210
|
+
{
|
211
|
+
"Product Category 1__A__1": {
|
212
|
+
"PPG_Code": "Product Category 1",
|
213
|
+
"Class": "A",
|
214
|
+
"Num": 1
|
215
|
+
},
|
216
|
+
"Product Category 2__A__2": {
|
217
|
+
"PPG_Code": "Product Category 2",
|
218
|
+
"Class": "A",
|
219
|
+
"Num": 2
|
220
|
+
},
|
221
|
+
|
222
|
+
}
|
223
|
+
"""
|
224
|
+
def get_target_category_columns_map(self):
|
225
|
+
return self._target_category_columns_map
|
@@ -29,6 +29,7 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
|
|
29
29
|
self.local_explanation = {}
|
30
30
|
self.formatted_global_explanation = None
|
31
31
|
self.formatted_local_explanation = None
|
32
|
+
self.constant_cols = {}
|
32
33
|
|
33
34
|
def set_kwargs(self):
|
34
35
|
# Extract the Confidence Interval Width and convert to arima's equivalent - alpha
|
@@ -64,6 +65,10 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
|
|
64
65
|
try:
|
65
66
|
target = self.original_target_column
|
66
67
|
self.forecast_output.init_series_output(series_id=s_id, data_at_series=df)
|
68
|
+
# If trend is constant, remove constant columns
|
69
|
+
if 'trend' not in model_kwargs or model_kwargs['trend'] == 'c':
|
70
|
+
self.constant_cols[s_id] = df.columns[df.nunique() == 1]
|
71
|
+
df = df.drop(columns=self.constant_cols[s_id])
|
67
72
|
|
68
73
|
# format the dataframe for this target. Dropping NA on target[df] will remove all future data
|
69
74
|
data = self.preprocess(df, s_id)
|
@@ -74,7 +79,7 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
|
|
74
79
|
X_in = data_i.drop(target, axis=1) if len(data_i.columns) > 1 else None
|
75
80
|
X_pred = self.get_horizon(data).drop(target, axis=1)
|
76
81
|
|
77
|
-
if self.loaded_models is not None:
|
82
|
+
if self.loaded_models is not None and s_id in self.loaded_models:
|
78
83
|
model = self.loaded_models[s_id]
|
79
84
|
else:
|
80
85
|
# Build and fit model
|
@@ -143,17 +148,18 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
|
|
143
148
|
def _generate_report(self):
|
144
149
|
"""The method that needs to be implemented on the particular model level."""
|
145
150
|
import datapane as dp
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
151
|
+
all_sections = []
|
152
|
+
if len(self.models) > 0:
|
153
|
+
sec5_text = dp.Text(f"## ARIMA Model Parameters")
|
154
|
+
blocks = [
|
155
|
+
dp.HTML(
|
156
|
+
m.summary().as_html(),
|
157
|
+
label=s_id,
|
158
|
+
)
|
159
|
+
for i, (s_id, m) in enumerate(self.models.items())
|
160
|
+
]
|
161
|
+
sec5 = dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
|
162
|
+
all_sections = [sec5_text, sec5]
|
157
163
|
|
158
164
|
if self.spec.generate_explanations:
|
159
165
|
try:
|
@@ -239,6 +245,9 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
|
|
239
245
|
"""
|
240
246
|
data: ForecastDatasets.get_data_at_series(s_id)
|
241
247
|
"""
|
248
|
+
if series_id in self.constant_cols:
|
249
|
+
data = data.drop(columns=self.constant_cols[series_id])
|
250
|
+
|
242
251
|
data = data.drop([target_col], axis=1)
|
243
252
|
data[dt_column_name] = seconds_to_datetime(
|
244
253
|
data[dt_column_name], dt_format=self.spec.datetime_column.format
|
@@ -22,6 +22,7 @@ from ads.opctl.operator.lowcode.common.utils import (
|
|
22
22
|
seconds_to_datetime,
|
23
23
|
datetime_to_seconds,
|
24
24
|
)
|
25
|
+
from ads.opctl.operator.lowcode.forecast.utils import _label_encode_dataframe
|
25
26
|
|
26
27
|
AUTOMLX_N_ALGOS_TUNED = 4
|
27
28
|
AUTOMLX_DEFAULT_SCORE_METRIC = "neg_sym_mean_abs_percent_error"
|
@@ -51,8 +52,13 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
|
|
51
52
|
] = self.spec.preprocessing or model_kwargs_cleaned.get("preprocessing", True)
|
52
53
|
return model_kwargs_cleaned, time_budget
|
53
54
|
|
54
|
-
|
55
|
-
|
55
|
+
|
56
|
+
def preprocess(self, data, series_id=None): # TODO: re-use self.le for explanations
|
57
|
+
_, df_encoded = _label_encode_dataframe(
|
58
|
+
data,
|
59
|
+
no_encode={self.spec.datetime_column.name, self.original_target_column},
|
60
|
+
)
|
61
|
+
return df_encoded.set_index(self.spec.datetime_column.name)
|
56
62
|
|
57
63
|
@runtime_dependency(
|
58
64
|
module="automlx",
|
@@ -70,17 +76,15 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
|
|
70
76
|
)
|
71
77
|
def _build_model(self) -> pd.DataFrame:
|
72
78
|
from automlx import init
|
73
|
-
|
79
|
+
import logging
|
74
80
|
try:
|
75
|
-
init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}})
|
81
|
+
init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}}, loglevel=logging.CRITICAL)
|
76
82
|
except Exception as e:
|
77
83
|
logger.info("Ray already initialized")
|
78
84
|
|
79
|
-
|
80
85
|
full_data_dict = self.datasets.get_data_by_series()
|
81
86
|
|
82
87
|
self.models = dict()
|
83
|
-
date_column = self.spec.datetime_column.name
|
84
88
|
horizon = self.spec.horizon
|
85
89
|
self.spec.confidence_interval_width = self.spec.confidence_interval_width or 0.8
|
86
90
|
self.forecast_output = ForecastOutput(
|
@@ -107,7 +111,7 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
|
|
107
111
|
|
108
112
|
logger.debug(f"Time Index Monotonic: {data_i.index.is_monotonic}")
|
109
113
|
|
110
|
-
if self.loaded_models is not None:
|
114
|
+
if self.loaded_models is not None and s_id in self.loaded_models:
|
111
115
|
model = self.loaded_models[s_id]
|
112
116
|
else:
|
113
117
|
model = automlx.Pipeline(
|
@@ -197,82 +201,85 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
|
|
197
201
|
)
|
198
202
|
selected_models = dict()
|
199
203
|
models = self.models
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
204
|
+
all_sections = []
|
205
|
+
|
206
|
+
if len(self.models) > 0:
|
207
|
+
for i, (s_id, m) in enumerate(models.items()):
|
208
|
+
selected_models[s_id] = {
|
209
|
+
"series_id": s_id,
|
210
|
+
"selected_model": m.selected_model_,
|
211
|
+
"model_params": m.selected_model_params_,
|
212
|
+
}
|
213
|
+
selected_models_df = pd.DataFrame(
|
214
|
+
selected_models.items(), columns=["series_id", "best_selected_model"]
|
215
|
+
)
|
216
|
+
selected_df = selected_models_df["best_selected_model"].apply(pd.Series)
|
217
|
+
selected_models_section = dp.Blocks(
|
218
|
+
"### Best Selected Model", dp.DataTable(selected_df)
|
219
|
+
)
|
213
220
|
|
214
|
-
|
221
|
+
all_sections = [selected_models_text, selected_models_section]
|
215
222
|
|
216
223
|
if self.spec.generate_explanations:
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
# Create a markdown text block for the global explanation section
|
222
|
-
global_explanation_text = dp.Text(
|
223
|
-
f"## Global Explanation of Models \n "
|
224
|
-
"The following tables provide the feature attribution for the global explainability."
|
225
|
-
)
|
226
|
-
|
227
|
-
# Convert the global explanation data to a DataFrame
|
228
|
-
global_explanation_df = pd.DataFrame(self.global_explanation)
|
224
|
+
try:
|
225
|
+
# If the key is present, call the "explain_model" method
|
226
|
+
self.explain_model()
|
229
227
|
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
self.formatted_global_explanation.rename(
|
235
|
-
{self.spec.datetime_column.name: ForecastOutputColumns.DATE}, axis=1
|
228
|
+
# Create a markdown text block for the global explanation section
|
229
|
+
global_explanation_text = dp.Text(
|
230
|
+
f"## Global Explanation of Models \n "
|
231
|
+
"The following tables provide the feature attribution for the global explainability."
|
236
232
|
)
|
237
|
-
)
|
238
233
|
|
239
|
-
|
240
|
-
|
241
|
-
"### Global Explainability ",
|
242
|
-
dp.DataTable(self.formatted_global_explanation),
|
243
|
-
)
|
234
|
+
# Convert the global explanation data to a DataFrame
|
235
|
+
global_explanation_df = pd.DataFrame(self.global_explanation)
|
244
236
|
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
237
|
+
self.formatted_global_explanation = (
|
238
|
+
global_explanation_df / global_explanation_df.sum(axis=0) * 100
|
239
|
+
)
|
240
|
+
self.formatted_global_explanation = (
|
241
|
+
self.formatted_global_explanation.rename(
|
242
|
+
{self.spec.datetime_column.name: ForecastOutputColumns.DATE}, axis=1
|
243
|
+
)
|
251
244
|
)
|
252
|
-
self.formatted_local_explanation = aggregate_local_explanations
|
253
245
|
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
label=s_id,
|
246
|
+
# Create a markdown section for the global explainability
|
247
|
+
global_explanation_section = dp.Blocks(
|
248
|
+
"### Global Explainability ",
|
249
|
+
dp.DataTable(self.formatted_global_explanation),
|
259
250
|
)
|
260
|
-
for s_id, local_ex_df in self.local_explanation.items()
|
261
|
-
]
|
262
|
-
local_explanation_section = (
|
263
|
-
dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
|
264
|
-
)
|
265
251
|
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
252
|
+
aggregate_local_explanations = pd.DataFrame()
|
253
|
+
for s_id, local_ex_df in self.local_explanation.items():
|
254
|
+
local_ex_df_copy = local_ex_df.copy()
|
255
|
+
local_ex_df_copy["Series"] = s_id
|
256
|
+
aggregate_local_explanations = pd.concat(
|
257
|
+
[aggregate_local_explanations, local_ex_df_copy], axis=0
|
258
|
+
)
|
259
|
+
self.formatted_local_explanation = aggregate_local_explanations
|
260
|
+
|
261
|
+
local_explanation_text = dp.Text(f"## Local Explanation of Models \n ")
|
262
|
+
blocks = [
|
263
|
+
dp.DataTable(
|
264
|
+
local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100,
|
265
|
+
label=s_id,
|
266
|
+
)
|
267
|
+
for s_id, local_ex_df in self.local_explanation.items()
|
268
|
+
]
|
269
|
+
local_explanation_section = (
|
270
|
+
dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
|
271
|
+
)
|
272
|
+
|
273
|
+
# Append the global explanation text and section to the "all_sections" list
|
274
|
+
all_sections = all_sections + [
|
275
|
+
global_explanation_text,
|
276
|
+
global_explanation_section,
|
277
|
+
local_explanation_text,
|
278
|
+
local_explanation_section,
|
279
|
+
]
|
280
|
+
except Exception as e:
|
281
|
+
logger.warn(f"Failed to generate Explanations with error: {e}.")
|
282
|
+
logger.debug(f"Full Traceback: {traceback.format_exc()}")
|
276
283
|
|
277
284
|
model_description = dp.Text(
|
278
285
|
"The AutoMLx model automatically preprocesses, selects and engineers "
|