PyPI - oracle-ads - Versions diffs - 2.11.5__py3-none-any.whl → 2.11.7__py3-none-any.whl - Mend

oracle-ads 2.11.5py3-none-any.whl → 2.11.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

ads/aqua/utils.py +5 -2
ads/catalog/model.py +3 -3
ads/catalog/notebook.py +3 -3
ads/catalog/project.py +2 -2
ads/catalog/summary.py +2 -4
ads/cli.py +2 -1
ads/common/serializer.py +1 -1
ads/data_labeling/metadata.py +2 -2
ads/dataset/dataset.py +3 -5
ads/dataset/factory.py +2 -3
ads/dataset/label_encoder.py +1 -1
ads/dataset/sampled_dataset.py +3 -5
ads/jobs/ads_job.py +26 -2
ads/jobs/builders/infrastructure/dsc_job.py +20 -7
ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +1 -1
ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +8 -15
ads/opctl/operator/lowcode/anomaly/model/automlx.py +2 -1
ads/opctl/operator/lowcode/anomaly/model/base_model.py +2 -2
ads/opctl/operator/lowcode/anomaly/operator_config.py +18 -1
ads/opctl/operator/lowcode/anomaly/schema.yaml +16 -4
ads/opctl/operator/lowcode/common/data.py +16 -2
ads/opctl/operator/lowcode/common/transformations.py +48 -14
ads/opctl/operator/lowcode/forecast/environment.yaml +1 -0
ads/opctl/operator/lowcode/forecast/model/arima.py +21 -12
ads/opctl/operator/lowcode/forecast/model/automlx.py +79 -72
ads/opctl/operator/lowcode/forecast/model/autots.py +182 -164
ads/opctl/operator/lowcode/forecast/model/base_model.py +59 -41
ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +47 -47
ads/opctl/operator/lowcode/forecast/model/prophet.py +48 -48
ads/opctl/operator/lowcode/forecast/operator_config.py +18 -2
ads/opctl/operator/lowcode/forecast/schema.yaml +20 -4
ads/opctl/operator/lowcode/forecast/utils.py +4 -0
ads/pipeline/ads_pipeline_step.py +11 -12
{oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/METADATA +4 -3
{oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/RECORD +38 -38
{oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/LICENSE.txt +0 -0
{oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/WHEEL +0 -0
{oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/entry_points.txt +0 -0

ads/opctl/operator/lowcode/common/transformations.py CHANGED Viewed

@@ -58,26 +58,33 @@ class Transformations(ABC):
         clean_df = self._format_datetime_col(clean_df)
         clean_df = self._set_multi_index(clean_df)
-        if self.name == "historical_data":
-            try:
-                clean_df = self._missing_value_imputation_hist(clean_df)
-            except Exception as e:
-                logger.debug(f"Missing value imputation failed with {e.args}")
-            if self.preprocessing:
-                try:
-                    clean_df = self._outlier_treatment(clean_df)
-                except Exception as e:
-                    logger.debug(f"Outlier Treatment failed with {e.args}")
-            else:
-                logger.debug("Skipping outlier treatment as preprocessing is disabled")
-        elif self.name == "additional_data":
-            clean_df = self._missing_value_imputation_add(clean_df)
+        if self.preprocessing and self.preprocessing.enabled:
+            if self.name == "historical_data":
+                if self.preprocessing.steps.missing_value_imputation:
+                    try:
+                        clean_df = self._missing_value_imputation_hist(clean_df)
+                    except Exception as e:
+                        logger.debug(f"Missing value imputation failed with {e.args}")
+                else:
+                    logger.info("Skipping missing value imputation because it is disabled")
+                if self.preprocessing.steps.outlier_treatment:
+                    try:
+                        clean_df = self._outlier_treatment(clean_df)
+                    except Exception as e:
+                        logger.debug(f"Outlier Treatment failed with {e.args}")
+                else:
+                    logger.info("Skipping outlier treatment because it is disabled")
+            elif self.name == "additional_data":
+                clean_df = self._missing_value_imputation_add(clean_df)
+        else:
+            logger.info("Skipping all preprocessing steps because preprocessing is disabled")
         return clean_df
     def _remove_trailing_whitespace(self, df):
         return df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
     def _set_series_id_column(self, df):
+        self._target_category_columns_map = dict()
         if not self.target_category_columns:
             df[DataColumns.Series] = "Series 1"
             self.has_artificial_series = True
@@ -85,6 +92,11 @@ class Transformations(ABC):
             df[DataColumns.Series] = merge_category_columns(
                 df, self.target_category_columns
             )
+            merged_values = df[DataColumns.Series].unique().tolist()
+            if self.target_category_columns:
+                for value in merged_values:
+                    self._target_category_columns_map[value] = df[df[DataColumns.Series] == value][self.target_category_columns].drop_duplicates().iloc[0].to_dict()
             df = df.drop(self.target_category_columns, axis=1)
         return df
@@ -189,3 +201,25 @@ class Transformations(ABC):
             raise DataMismatchError(
                 f"Expected {self.name} to have columns: {expected_names}, but instead found column names: {df.columns}. Is the {self.name} path correct?"
             )
+    """
+        Map between merged target category column values and target category column and its value
+        If target category columns are PPG_Code, Class, Num
+        Merged target category column values are Product Category 1__A__1, Product Category 2__A__2
+        Then target_category_columns_map would be
+        {
+            "Product Category 1__A__1": {
+                "PPG_Code": "Product Category 1",
+                "Class": "A",
+                "Num": 1
+            },
+             "Product Category 2__A__2": {
+                "PPG_Code": "Product Category 2",
+                "Class": "A",
+                "Num": 2
+            },
+        }
+    """
+    def get_target_category_columns_map(self):
+        return self._target_category_columns_map

ads/opctl/operator/lowcode/forecast/environment.yaml CHANGED Viewed

@@ -18,3 +18,4 @@ dependencies:
       - optuna==3.1.0
       - oracle-automlx==23.4.1
       - oracle-automlx[forecasting]==23.4.1
+      - fire

ads/opctl/operator/lowcode/forecast/model/arima.py CHANGED Viewed

@@ -29,6 +29,7 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
         self.local_explanation = {}
         self.formatted_global_explanation = None
         self.formatted_local_explanation = None
+        self.constant_cols = {}
     def set_kwargs(self):
         # Extract the Confidence Interval Width and convert to arima's equivalent - alpha
@@ -64,6 +65,10 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
         try:
             target = self.original_target_column
             self.forecast_output.init_series_output(series_id=s_id, data_at_series=df)
+            # If trend is constant, remove constant columns
+            if 'trend' not in model_kwargs or model_kwargs['trend'] == 'c':
+                self.constant_cols[s_id] = df.columns[df.nunique() == 1]
+                df = df.drop(columns=self.constant_cols[s_id])
             # format the dataframe for this target. Dropping NA on target[df] will remove all future data
             data = self.preprocess(df, s_id)
@@ -74,7 +79,7 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
             X_in = data_i.drop(target, axis=1) if len(data_i.columns) > 1 else None
             X_pred = self.get_horizon(data).drop(target, axis=1)
-            if self.loaded_models is not None:
+            if self.loaded_models is not None and s_id in self.loaded_models:
                 model = self.loaded_models[s_id]
             else:
                 # Build and fit model
@@ -143,17 +148,18 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
     def _generate_report(self):
         """The method that needs to be implemented on the particular model level."""
         import datapane as dp
-        sec5_text = dp.Text(f"## ARIMA Model Parameters")
-        blocks = [
-            dp.HTML(
-                m.summary().as_html(),
-                label=s_id,
-            )
-            for i, (s_id, m) in enumerate(self.models.items())
-        ]
-        sec5 = dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
-        all_sections = [sec5_text, sec5]
+        all_sections = []
+        if len(self.models) > 0:
+            sec5_text = dp.Text(f"## ARIMA Model Parameters")
+            blocks = [
+                dp.HTML(
+                    m.summary().as_html(),
+                    label=s_id,
+                )
+                for i, (s_id, m) in enumerate(self.models.items())
+            ]
+            sec5 = dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
+            all_sections = [sec5_text, sec5]
         if self.spec.generate_explanations:
             try:
@@ -239,6 +245,9 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
             """
             data: ForecastDatasets.get_data_at_series(s_id)
             """
+            if series_id in self.constant_cols:
+                data = data.drop(columns=self.constant_cols[series_id])
             data = data.drop([target_col], axis=1)
             data[dt_column_name] = seconds_to_datetime(
                 data[dt_column_name], dt_format=self.spec.datetime_column.format

ads/opctl/operator/lowcode/forecast/model/automlx.py CHANGED Viewed

@@ -22,6 +22,7 @@ from ads.opctl.operator.lowcode.common.utils import (
     seconds_to_datetime,
     datetime_to_seconds,
 )
+from ads.opctl.operator.lowcode.forecast.utils import _label_encode_dataframe
 AUTOMLX_N_ALGOS_TUNED = 4
 AUTOMLX_DEFAULT_SCORE_METRIC = "neg_sym_mean_abs_percent_error"
@@ -51,8 +52,13 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
         ] = self.spec.preprocessing or model_kwargs_cleaned.get("preprocessing", True)
         return model_kwargs_cleaned, time_budget
-    def preprocess(self, data, series_id=None):
-        return data.set_index(self.spec.datetime_column.name)
+    def preprocess(self, data, series_id=None):  # TODO: re-use self.le for explanations
+        _, df_encoded = _label_encode_dataframe(
+            data,
+            no_encode={self.spec.datetime_column.name, self.original_target_column},
+        )
+        return df_encoded.set_index(self.spec.datetime_column.name)
     @runtime_dependency(
         module="automlx",
@@ -70,17 +76,15 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
     )
     def _build_model(self) -> pd.DataFrame:
         from automlx import init
-        from sktime.forecasting.model_selection import temporal_train_test_split
+        import logging
         try:
-            init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}})
+            init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}}, loglevel=logging.CRITICAL)
         except Exception as e:
             logger.info("Ray already initialized")
         full_data_dict = self.datasets.get_data_by_series()
         self.models = dict()
-        date_column = self.spec.datetime_column.name
         horizon = self.spec.horizon
         self.spec.confidence_interval_width = self.spec.confidence_interval_width or 0.8
         self.forecast_output = ForecastOutput(
@@ -107,7 +111,7 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
                 logger.debug(f"Time Index Monotonic: {data_i.index.is_monotonic}")
-                if self.loaded_models is not None:
+                if self.loaded_models is not None and s_id in self.loaded_models:
                     model = self.loaded_models[s_id]
                 else:
                     model = automlx.Pipeline(
@@ -197,82 +201,85 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
         )
         selected_models = dict()
         models = self.models
-        for i, (s_id, df) in enumerate(self.full_data_dict.items()):
-            selected_models[s_id] = {
-                "series_id": s_id,
-                "selected_model": models[s_id].selected_model_,
-                "model_params": models[s_id].selected_model_params_,
-            }
-        selected_models_df = pd.DataFrame(
-            selected_models.items(), columns=["series_id", "best_selected_model"]
-        )
-        selected_df = selected_models_df["best_selected_model"].apply(pd.Series)
-        selected_models_section = dp.Blocks(
-            "### Best Selected Model", dp.DataTable(selected_df)
-        )
+        all_sections = []
+        if len(self.models) > 0:
+            for i, (s_id, m) in enumerate(models.items()):
+                selected_models[s_id] = {
+                    "series_id": s_id,
+                    "selected_model": m.selected_model_,
+                    "model_params": m.selected_model_params_,
+                }
+            selected_models_df = pd.DataFrame(
+                selected_models.items(), columns=["series_id", "best_selected_model"]
+            )
+            selected_df = selected_models_df["best_selected_model"].apply(pd.Series)
+            selected_models_section = dp.Blocks(
+                "### Best Selected Model", dp.DataTable(selected_df)
+            )
-        all_sections = [selected_models_text, selected_models_section]
+            all_sections = [selected_models_text, selected_models_section]
         if self.spec.generate_explanations:
-            # try:
-            # If the key is present, call the "explain_model" method
-            self.explain_model()
-            # Create a markdown text block for the global explanation section
-            global_explanation_text = dp.Text(
-                f"## Global Explanation of Models \n "
-                "The following tables provide the feature attribution for the global explainability."
-            )
-            # Convert the global explanation data to a DataFrame
-            global_explanation_df = pd.DataFrame(self.global_explanation)
+            try:
+                # If the key is present, call the "explain_model" method
+                self.explain_model()
-            self.formatted_global_explanation = (
-                global_explanation_df / global_explanation_df.sum(axis=0) * 100
-            )
-            self.formatted_global_explanation = (
-                self.formatted_global_explanation.rename(
-                    {self.spec.datetime_column.name: ForecastOutputColumns.DATE}, axis=1
+                # Create a markdown text block for the global explanation section
+                global_explanation_text = dp.Text(
+                    f"## Global Explanation of Models \n "
+                    "The following tables provide the feature attribution for the global explainability."
                 )
-            )
-            # Create a markdown section for the global explainability
-            global_explanation_section = dp.Blocks(
-                "### Global Explainability ",
-                dp.DataTable(self.formatted_global_explanation),
-            )
+                # Convert the global explanation data to a DataFrame
+                global_explanation_df = pd.DataFrame(self.global_explanation)
-            aggregate_local_explanations = pd.DataFrame()
-            for s_id, local_ex_df in self.local_explanation.items():
-                local_ex_df_copy = local_ex_df.copy()
-                local_ex_df_copy["Series"] = s_id
-                aggregate_local_explanations = pd.concat(
-                    [aggregate_local_explanations, local_ex_df_copy], axis=0
+                self.formatted_global_explanation = (
+                        global_explanation_df / global_explanation_df.sum(axis=0) * 100
+                )
+                self.formatted_global_explanation = (
+                    self.formatted_global_explanation.rename(
+                        {self.spec.datetime_column.name: ForecastOutputColumns.DATE}, axis=1
+                    )
                 )
-            self.formatted_local_explanation = aggregate_local_explanations
-            local_explanation_text = dp.Text(f"## Local Explanation of Models \n ")
-            blocks = [
-                dp.DataTable(
-                    local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100,
-                    label=s_id,
+                # Create a markdown section for the global explainability
+                global_explanation_section = dp.Blocks(
+                    "### Global Explainability ",
+                    dp.DataTable(self.formatted_global_explanation),
                 )
-                for s_id, local_ex_df in self.local_explanation.items()
-            ]
-            local_explanation_section = (
-                dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
-            )
-            # Append the global explanation text and section to the "all_sections" list
-            all_sections = all_sections + [
-                global_explanation_text,
-                global_explanation_section,
-                local_explanation_text,
-                local_explanation_section,
-            ]
-            # except Exception as e:
-            #     logger.warn(f"Failed to generate Explanations with error: {e}.")
-            #     logger.debug(f"Full Traceback: {traceback.format_exc()}")
+                aggregate_local_explanations = pd.DataFrame()
+                for s_id, local_ex_df in self.local_explanation.items():
+                    local_ex_df_copy = local_ex_df.copy()
+                    local_ex_df_copy["Series"] = s_id
+                    aggregate_local_explanations = pd.concat(
+                        [aggregate_local_explanations, local_ex_df_copy], axis=0
+                    )
+                self.formatted_local_explanation = aggregate_local_explanations
+                local_explanation_text = dp.Text(f"## Local Explanation of Models \n ")
+                blocks = [
+                    dp.DataTable(
+                        local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100,
+                        label=s_id,
+                    )
+                    for s_id, local_ex_df in self.local_explanation.items()
+                ]
+                local_explanation_section = (
+                    dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
+                )
+                # Append the global explanation text and section to the "all_sections" list
+                all_sections = all_sections + [
+                    global_explanation_text,
+                    global_explanation_section,
+                    local_explanation_text,
+                    local_explanation_section,
+                ]
+            except Exception as e:
+                logger.warn(f"Failed to generate Explanations with error: {e}.")
+                logger.debug(f"Full Traceback: {traceback.format_exc()}")
         model_description = dp.Text(
             "The AutoMLx model automatically preprocesses, selects and engineers "

oracle-ads 2.11.5__py3-none-any.whl → 2.11.7__py3-none-any.whl

oracle-ads 2.11.5py3-none-any.whl → 2.11.7py3-none-any.whl