oracle-ads 2.11.5__py3-none-any.whl → 2.11.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. ads/aqua/utils.py +5 -2
  2. ads/catalog/model.py +3 -3
  3. ads/catalog/notebook.py +3 -3
  4. ads/catalog/project.py +2 -2
  5. ads/catalog/summary.py +2 -4
  6. ads/cli.py +2 -1
  7. ads/common/serializer.py +1 -1
  8. ads/data_labeling/metadata.py +2 -2
  9. ads/dataset/dataset.py +3 -5
  10. ads/dataset/factory.py +2 -3
  11. ads/dataset/label_encoder.py +1 -1
  12. ads/dataset/sampled_dataset.py +3 -5
  13. ads/jobs/ads_job.py +26 -2
  14. ads/jobs/builders/infrastructure/dsc_job.py +20 -7
  15. ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +1 -1
  16. ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +8 -15
  17. ads/opctl/operator/lowcode/anomaly/model/automlx.py +2 -1
  18. ads/opctl/operator/lowcode/anomaly/model/base_model.py +2 -2
  19. ads/opctl/operator/lowcode/anomaly/operator_config.py +18 -1
  20. ads/opctl/operator/lowcode/anomaly/schema.yaml +16 -4
  21. ads/opctl/operator/lowcode/common/data.py +16 -2
  22. ads/opctl/operator/lowcode/common/transformations.py +48 -14
  23. ads/opctl/operator/lowcode/forecast/environment.yaml +1 -0
  24. ads/opctl/operator/lowcode/forecast/model/arima.py +21 -12
  25. ads/opctl/operator/lowcode/forecast/model/automlx.py +79 -72
  26. ads/opctl/operator/lowcode/forecast/model/autots.py +182 -164
  27. ads/opctl/operator/lowcode/forecast/model/base_model.py +59 -41
  28. ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +47 -47
  29. ads/opctl/operator/lowcode/forecast/model/prophet.py +48 -48
  30. ads/opctl/operator/lowcode/forecast/operator_config.py +18 -2
  31. ads/opctl/operator/lowcode/forecast/schema.yaml +20 -4
  32. ads/opctl/operator/lowcode/forecast/utils.py +4 -0
  33. ads/pipeline/ads_pipeline_step.py +11 -12
  34. {oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/METADATA +4 -3
  35. {oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/RECORD +38 -38
  36. {oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/LICENSE.txt +0 -0
  37. {oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/WHEEL +0 -0
  38. {oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/entry_points.txt +0 -0
@@ -58,26 +58,33 @@ class Transformations(ABC):
58
58
  clean_df = self._format_datetime_col(clean_df)
59
59
  clean_df = self._set_multi_index(clean_df)
60
60
 
61
- if self.name == "historical_data":
62
- try:
63
- clean_df = self._missing_value_imputation_hist(clean_df)
64
- except Exception as e:
65
- logger.debug(f"Missing value imputation failed with {e.args}")
66
- if self.preprocessing:
67
- try:
68
- clean_df = self._outlier_treatment(clean_df)
69
- except Exception as e:
70
- logger.debug(f"Outlier Treatment failed with {e.args}")
71
- else:
72
- logger.debug("Skipping outlier treatment as preprocessing is disabled")
73
- elif self.name == "additional_data":
74
- clean_df = self._missing_value_imputation_add(clean_df)
61
+ if self.preprocessing and self.preprocessing.enabled:
62
+ if self.name == "historical_data":
63
+ if self.preprocessing.steps.missing_value_imputation:
64
+ try:
65
+ clean_df = self._missing_value_imputation_hist(clean_df)
66
+ except Exception as e:
67
+ logger.debug(f"Missing value imputation failed with {e.args}")
68
+ else:
69
+ logger.info("Skipping missing value imputation because it is disabled")
70
+ if self.preprocessing.steps.outlier_treatment:
71
+ try:
72
+ clean_df = self._outlier_treatment(clean_df)
73
+ except Exception as e:
74
+ logger.debug(f"Outlier Treatment failed with {e.args}")
75
+ else:
76
+ logger.info("Skipping outlier treatment because it is disabled")
77
+ elif self.name == "additional_data":
78
+ clean_df = self._missing_value_imputation_add(clean_df)
79
+ else:
80
+ logger.info("Skipping all preprocessing steps because preprocessing is disabled")
75
81
  return clean_df
76
82
 
77
83
  def _remove_trailing_whitespace(self, df):
78
84
  return df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
79
85
 
80
86
  def _set_series_id_column(self, df):
87
+ self._target_category_columns_map = dict()
81
88
  if not self.target_category_columns:
82
89
  df[DataColumns.Series] = "Series 1"
83
90
  self.has_artificial_series = True
@@ -85,6 +92,11 @@ class Transformations(ABC):
85
92
  df[DataColumns.Series] = merge_category_columns(
86
93
  df, self.target_category_columns
87
94
  )
95
+ merged_values = df[DataColumns.Series].unique().tolist()
96
+ if self.target_category_columns:
97
+ for value in merged_values:
98
+ self._target_category_columns_map[value] = df[df[DataColumns.Series] == value][self.target_category_columns].drop_duplicates().iloc[0].to_dict()
99
+
88
100
  df = df.drop(self.target_category_columns, axis=1)
89
101
  return df
90
102
 
@@ -189,3 +201,25 @@ class Transformations(ABC):
189
201
  raise DataMismatchError(
190
202
  f"Expected {self.name} to have columns: {expected_names}, but instead found column names: {df.columns}. Is the {self.name} path correct?"
191
203
  )
204
+
205
+ """
206
+ Map between merged target category column values and target category column and its value
207
+ If target category columns are PPG_Code, Class, Num
208
+ Merged target category column values are Product Category 1__A__1, Product Category 2__A__2
209
+ Then target_category_columns_map would be
210
+ {
211
+ "Product Category 1__A__1": {
212
+ "PPG_Code": "Product Category 1",
213
+ "Class": "A",
214
+ "Num": 1
215
+ },
216
+ "Product Category 2__A__2": {
217
+ "PPG_Code": "Product Category 2",
218
+ "Class": "A",
219
+ "Num": 2
220
+ },
221
+
222
+ }
223
+ """
224
+ def get_target_category_columns_map(self):
225
+ return self._target_category_columns_map
@@ -18,3 +18,4 @@ dependencies:
18
18
  - optuna==3.1.0
19
19
  - oracle-automlx==23.4.1
20
20
  - oracle-automlx[forecasting]==23.4.1
21
+ - fire
@@ -29,6 +29,7 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
29
29
  self.local_explanation = {}
30
30
  self.formatted_global_explanation = None
31
31
  self.formatted_local_explanation = None
32
+ self.constant_cols = {}
32
33
 
33
34
  def set_kwargs(self):
34
35
  # Extract the Confidence Interval Width and convert to arima's equivalent - alpha
@@ -64,6 +65,10 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
64
65
  try:
65
66
  target = self.original_target_column
66
67
  self.forecast_output.init_series_output(series_id=s_id, data_at_series=df)
68
+ # If trend is constant, remove constant columns
69
+ if 'trend' not in model_kwargs or model_kwargs['trend'] == 'c':
70
+ self.constant_cols[s_id] = df.columns[df.nunique() == 1]
71
+ df = df.drop(columns=self.constant_cols[s_id])
67
72
 
68
73
  # format the dataframe for this target. Dropping NA on target[df] will remove all future data
69
74
  data = self.preprocess(df, s_id)
@@ -74,7 +79,7 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
74
79
  X_in = data_i.drop(target, axis=1) if len(data_i.columns) > 1 else None
75
80
  X_pred = self.get_horizon(data).drop(target, axis=1)
76
81
 
77
- if self.loaded_models is not None:
82
+ if self.loaded_models is not None and s_id in self.loaded_models:
78
83
  model = self.loaded_models[s_id]
79
84
  else:
80
85
  # Build and fit model
@@ -143,17 +148,18 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
143
148
  def _generate_report(self):
144
149
  """The method that needs to be implemented on the particular model level."""
145
150
  import datapane as dp
146
-
147
- sec5_text = dp.Text(f"## ARIMA Model Parameters")
148
- blocks = [
149
- dp.HTML(
150
- m.summary().as_html(),
151
- label=s_id,
152
- )
153
- for i, (s_id, m) in enumerate(self.models.items())
154
- ]
155
- sec5 = dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
156
- all_sections = [sec5_text, sec5]
151
+ all_sections = []
152
+ if len(self.models) > 0:
153
+ sec5_text = dp.Text(f"## ARIMA Model Parameters")
154
+ blocks = [
155
+ dp.HTML(
156
+ m.summary().as_html(),
157
+ label=s_id,
158
+ )
159
+ for i, (s_id, m) in enumerate(self.models.items())
160
+ ]
161
+ sec5 = dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
162
+ all_sections = [sec5_text, sec5]
157
163
 
158
164
  if self.spec.generate_explanations:
159
165
  try:
@@ -239,6 +245,9 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
239
245
  """
240
246
  data: ForecastDatasets.get_data_at_series(s_id)
241
247
  """
248
+ if series_id in self.constant_cols:
249
+ data = data.drop(columns=self.constant_cols[series_id])
250
+
242
251
  data = data.drop([target_col], axis=1)
243
252
  data[dt_column_name] = seconds_to_datetime(
244
253
  data[dt_column_name], dt_format=self.spec.datetime_column.format
@@ -22,6 +22,7 @@ from ads.opctl.operator.lowcode.common.utils import (
22
22
  seconds_to_datetime,
23
23
  datetime_to_seconds,
24
24
  )
25
+ from ads.opctl.operator.lowcode.forecast.utils import _label_encode_dataframe
25
26
 
26
27
  AUTOMLX_N_ALGOS_TUNED = 4
27
28
  AUTOMLX_DEFAULT_SCORE_METRIC = "neg_sym_mean_abs_percent_error"
@@ -51,8 +52,13 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
51
52
  ] = self.spec.preprocessing or model_kwargs_cleaned.get("preprocessing", True)
52
53
  return model_kwargs_cleaned, time_budget
53
54
 
54
- def preprocess(self, data, series_id=None):
55
- return data.set_index(self.spec.datetime_column.name)
55
+
56
+ def preprocess(self, data, series_id=None): # TODO: re-use self.le for explanations
57
+ _, df_encoded = _label_encode_dataframe(
58
+ data,
59
+ no_encode={self.spec.datetime_column.name, self.original_target_column},
60
+ )
61
+ return df_encoded.set_index(self.spec.datetime_column.name)
56
62
 
57
63
  @runtime_dependency(
58
64
  module="automlx",
@@ -70,17 +76,15 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
70
76
  )
71
77
  def _build_model(self) -> pd.DataFrame:
72
78
  from automlx import init
73
- from sktime.forecasting.model_selection import temporal_train_test_split
79
+ import logging
74
80
  try:
75
- init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}})
81
+ init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}}, loglevel=logging.CRITICAL)
76
82
  except Exception as e:
77
83
  logger.info("Ray already initialized")
78
84
 
79
-
80
85
  full_data_dict = self.datasets.get_data_by_series()
81
86
 
82
87
  self.models = dict()
83
- date_column = self.spec.datetime_column.name
84
88
  horizon = self.spec.horizon
85
89
  self.spec.confidence_interval_width = self.spec.confidence_interval_width or 0.8
86
90
  self.forecast_output = ForecastOutput(
@@ -107,7 +111,7 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
107
111
 
108
112
  logger.debug(f"Time Index Monotonic: {data_i.index.is_monotonic}")
109
113
 
110
- if self.loaded_models is not None:
114
+ if self.loaded_models is not None and s_id in self.loaded_models:
111
115
  model = self.loaded_models[s_id]
112
116
  else:
113
117
  model = automlx.Pipeline(
@@ -197,82 +201,85 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
197
201
  )
198
202
  selected_models = dict()
199
203
  models = self.models
200
- for i, (s_id, df) in enumerate(self.full_data_dict.items()):
201
- selected_models[s_id] = {
202
- "series_id": s_id,
203
- "selected_model": models[s_id].selected_model_,
204
- "model_params": models[s_id].selected_model_params_,
205
- }
206
- selected_models_df = pd.DataFrame(
207
- selected_models.items(), columns=["series_id", "best_selected_model"]
208
- )
209
- selected_df = selected_models_df["best_selected_model"].apply(pd.Series)
210
- selected_models_section = dp.Blocks(
211
- "### Best Selected Model", dp.DataTable(selected_df)
212
- )
204
+ all_sections = []
205
+
206
+ if len(self.models) > 0:
207
+ for i, (s_id, m) in enumerate(models.items()):
208
+ selected_models[s_id] = {
209
+ "series_id": s_id,
210
+ "selected_model": m.selected_model_,
211
+ "model_params": m.selected_model_params_,
212
+ }
213
+ selected_models_df = pd.DataFrame(
214
+ selected_models.items(), columns=["series_id", "best_selected_model"]
215
+ )
216
+ selected_df = selected_models_df["best_selected_model"].apply(pd.Series)
217
+ selected_models_section = dp.Blocks(
218
+ "### Best Selected Model", dp.DataTable(selected_df)
219
+ )
213
220
 
214
- all_sections = [selected_models_text, selected_models_section]
221
+ all_sections = [selected_models_text, selected_models_section]
215
222
 
216
223
  if self.spec.generate_explanations:
217
- # try:
218
- # If the key is present, call the "explain_model" method
219
- self.explain_model()
220
-
221
- # Create a markdown text block for the global explanation section
222
- global_explanation_text = dp.Text(
223
- f"## Global Explanation of Models \n "
224
- "The following tables provide the feature attribution for the global explainability."
225
- )
226
-
227
- # Convert the global explanation data to a DataFrame
228
- global_explanation_df = pd.DataFrame(self.global_explanation)
224
+ try:
225
+ # If the key is present, call the "explain_model" method
226
+ self.explain_model()
229
227
 
230
- self.formatted_global_explanation = (
231
- global_explanation_df / global_explanation_df.sum(axis=0) * 100
232
- )
233
- self.formatted_global_explanation = (
234
- self.formatted_global_explanation.rename(
235
- {self.spec.datetime_column.name: ForecastOutputColumns.DATE}, axis=1
228
+ # Create a markdown text block for the global explanation section
229
+ global_explanation_text = dp.Text(
230
+ f"## Global Explanation of Models \n "
231
+ "The following tables provide the feature attribution for the global explainability."
236
232
  )
237
- )
238
233
 
239
- # Create a markdown section for the global explainability
240
- global_explanation_section = dp.Blocks(
241
- "### Global Explainability ",
242
- dp.DataTable(self.formatted_global_explanation),
243
- )
234
+ # Convert the global explanation data to a DataFrame
235
+ global_explanation_df = pd.DataFrame(self.global_explanation)
244
236
 
245
- aggregate_local_explanations = pd.DataFrame()
246
- for s_id, local_ex_df in self.local_explanation.items():
247
- local_ex_df_copy = local_ex_df.copy()
248
- local_ex_df_copy["Series"] = s_id
249
- aggregate_local_explanations = pd.concat(
250
- [aggregate_local_explanations, local_ex_df_copy], axis=0
237
+ self.formatted_global_explanation = (
238
+ global_explanation_df / global_explanation_df.sum(axis=0) * 100
239
+ )
240
+ self.formatted_global_explanation = (
241
+ self.formatted_global_explanation.rename(
242
+ {self.spec.datetime_column.name: ForecastOutputColumns.DATE}, axis=1
243
+ )
251
244
  )
252
- self.formatted_local_explanation = aggregate_local_explanations
253
245
 
254
- local_explanation_text = dp.Text(f"## Local Explanation of Models \n ")
255
- blocks = [
256
- dp.DataTable(
257
- local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100,
258
- label=s_id,
246
+ # Create a markdown section for the global explainability
247
+ global_explanation_section = dp.Blocks(
248
+ "### Global Explainability ",
249
+ dp.DataTable(self.formatted_global_explanation),
259
250
  )
260
- for s_id, local_ex_df in self.local_explanation.items()
261
- ]
262
- local_explanation_section = (
263
- dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
264
- )
265
251
 
266
- # Append the global explanation text and section to the "all_sections" list
267
- all_sections = all_sections + [
268
- global_explanation_text,
269
- global_explanation_section,
270
- local_explanation_text,
271
- local_explanation_section,
272
- ]
273
- # except Exception as e:
274
- # logger.warn(f"Failed to generate Explanations with error: {e}.")
275
- # logger.debug(f"Full Traceback: {traceback.format_exc()}")
252
+ aggregate_local_explanations = pd.DataFrame()
253
+ for s_id, local_ex_df in self.local_explanation.items():
254
+ local_ex_df_copy = local_ex_df.copy()
255
+ local_ex_df_copy["Series"] = s_id
256
+ aggregate_local_explanations = pd.concat(
257
+ [aggregate_local_explanations, local_ex_df_copy], axis=0
258
+ )
259
+ self.formatted_local_explanation = aggregate_local_explanations
260
+
261
+ local_explanation_text = dp.Text(f"## Local Explanation of Models \n ")
262
+ blocks = [
263
+ dp.DataTable(
264
+ local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100,
265
+ label=s_id,
266
+ )
267
+ for s_id, local_ex_df in self.local_explanation.items()
268
+ ]
269
+ local_explanation_section = (
270
+ dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
271
+ )
272
+
273
+ # Append the global explanation text and section to the "all_sections" list
274
+ all_sections = all_sections + [
275
+ global_explanation_text,
276
+ global_explanation_section,
277
+ local_explanation_text,
278
+ local_explanation_section,
279
+ ]
280
+ except Exception as e:
281
+ logger.warn(f"Failed to generate Explanations with error: {e}.")
282
+ logger.debug(f"Full Traceback: {traceback.format_exc()}")
276
283
 
277
284
  model_description = dp.Text(
278
285
  "The AutoMLx model automatically preprocesses, selects and engineers "