PyPI - oracle-ads - Versions diffs - 2.12.10rc0__py3-none-any.whl → 2.12.11__py3-none-any.whl - Mend

oracle-ads 2.12.10rc0py3-none-any.whl → 2.12.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

ads/aqua/__init__.py +2 -1
ads/aqua/app.py +30 -16
ads/aqua/client/__init__.py +3 -0
ads/aqua/client/client.py +799 -0
ads/aqua/evaluation/evaluation.py +20 -12
ads/aqua/extension/aqua_ws_msg_handler.py +14 -7
ads/aqua/extension/base_handler.py +12 -9
ads/aqua/extension/model_handler.py +6 -1
ads/aqua/finetuning/entities.py +3 -0
ads/aqua/finetuning/finetuning.py +32 -1
ads/aqua/model/entities.py +2 -1
ads/aqua/model/model.py +136 -76
ads/aqua/modeldeployment/deployment.py +22 -10
ads/cli.py +16 -8
ads/opctl/operator/lowcode/common/transformations.py +38 -3
ads/opctl/operator/lowcode/common/utils.py +11 -1
ads/opctl/operator/lowcode/forecast/__main__.py +10 -0
ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +1 -1
ads/opctl/operator/lowcode/forecast/operator_config.py +31 -0
ads/opctl/operator/lowcode/forecast/schema.yaml +63 -0
ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +233 -0
ads/opctl/operator/lowcode/forecast/whatifserve/score.py +238 -0
{oracle_ads-2.12.10rc0.dist-info → oracle_ads-2.12.11.dist-info}/METADATA +3 -1
{oracle_ads-2.12.10rc0.dist-info → oracle_ads-2.12.11.dist-info}/RECORD +28 -23
{oracle_ads-2.12.10rc0.dist-info → oracle_ads-2.12.11.dist-info}/LICENSE.txt +0 -0
{oracle_ads-2.12.10rc0.dist-info → oracle_ads-2.12.11.dist-info}/WHEEL +0 -0
{oracle_ads-2.12.10rc0.dist-info → oracle_ads-2.12.11.dist-info}/entry_points.txt +0 -0

ads/opctl/operator/lowcode/common/transformations.py CHANGED Viewed

@@ -15,6 +15,7 @@ from ads.opctl.operator.lowcode.common.errors import (
     InvalidParameterError,
 )
 from ads.opctl.operator.lowcode.common.utils import merge_category_columns
+from ads.opctl.operator.lowcode.forecast.operator_config import ForecastOperatorSpec
 class Transformations(ABC):
@@ -34,6 +35,7 @@ class Transformations(ABC):
         self.dataset_info = dataset_info
         self.target_category_columns = dataset_info.target_category_columns
         self.target_column_name = dataset_info.target_column
+        self.raw_column_names = None
         self.dt_column_name = (
             dataset_info.datetime_column.name if dataset_info.datetime_column else None
         )
@@ -60,7 +62,8 @@ class Transformations(ABC):
         """
         clean_df = self._remove_trailing_whitespace(data)
-        # clean_df = self._normalize_column_names(clean_df)
+        if isinstance(self.dataset_info, ForecastOperatorSpec):
+            clean_df = self._clean_column_names(clean_df)
         if self.name == "historical_data":
             self._check_historical_dataset(clean_df)
         clean_df = self._set_series_id_column(clean_df)
@@ -98,8 +101,36 @@ class Transformations(ABC):
     def _remove_trailing_whitespace(self, df):
         return df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
-    # def _normalize_column_names(self, df):
-    #     return df.rename(columns=lambda x: re.sub("[^A-Za-z0-9_]+", "", x))
+    def _clean_column_names(self, df):
+        """
+        Remove all whitespaces from column names in a DataFrame and store the original names.
+        Parameters:
+        df (pd.DataFrame): The DataFrame whose column names need to be cleaned.
+        Returns:
+        pd.DataFrame: The DataFrame with cleaned column names.
+        """
+        self.raw_column_names = {
+            col: col.replace(" ", "") for col in df.columns if " " in col
+        }
+        df.columns = [self.raw_column_names.get(col, col) for col in df.columns]
+        if self.target_column_name:
+            self.target_column_name = self.raw_column_names.get(
+                self.target_column_name, self.target_column_name
+            )
+        self.dt_column_name = self.raw_column_names.get(
+            self.dt_column_name, self.dt_column_name
+        )
+        if self.target_category_columns:
+            self.target_category_columns = [
+                self.raw_column_names.get(col, col)
+                for col in self.target_category_columns
+            ]
+        return df
     def _set_series_id_column(self, df):
         self._target_category_columns_map = {}
@@ -233,6 +264,10 @@ class Transformations(ABC):
         expected_names = [self.target_column_name, self.dt_column_name] + (
             self.target_category_columns if self.target_category_columns else []
         )
+        if self.raw_column_names:
+            expected_names.extend(list(self.raw_column_names.values()))
         if set(df.columns) != set(expected_names):
             raise DataMismatchError(
                 f"Expected {self.name} to have columns: {expected_names}, but instead found column names: {df.columns}. Is the {self.name} path correct?"

ads/opctl/operator/lowcode/common/utils.py CHANGED Viewed

@@ -12,6 +12,7 @@ from typing import List, Union
 import fsspec
 import oracledb
+import json
 import pandas as pd
 from ads.common.object_storage_details import ObjectStorageDetails
@@ -125,7 +126,7 @@ def load_data(data_spec, storage_options=None, **kwargs):
     return data
-def write_data(data, filename, format, storage_options, index=False, **kwargs):
+def write_data(data, filename, format, storage_options=None, index=False, **kwargs):
     disable_print()
     if not format:
         _, format = os.path.splitext(filename)
@@ -141,6 +142,15 @@ def write_data(data, filename, format, storage_options, index=False, **kwargs):
     )
+def write_simple_json(data, path):
+    if ObjectStorageDetails.is_oci_path(path):
+        storage_options = default_signer()
+    else:
+        storage_options = {}
+    with fsspec.open(path, mode="w", **storage_options) as f:
+        json.dump(data, f, indent=4)
 def merge_category_columns(data, target_category_columns):
     result = data.apply(
         lambda x: "__".join([str(x[col]) for col in target_category_columns]), axis=1

ads/opctl/operator/lowcode/forecast/__main__.py CHANGED Viewed

@@ -17,6 +17,7 @@ from ads.opctl.operator.common.utils import _parse_input_args
 from .operator_config import ForecastOperatorConfig
 from .model.forecast_datasets import ForecastDatasets
+from .whatifserve import ModelDeploymentManager
 def operate(operator_config: ForecastOperatorConfig) -> None:
@@ -27,6 +28,15 @@ def operate(operator_config: ForecastOperatorConfig) -> None:
     ForecastOperatorModelFactory.get_model(
         operator_config, datasets
     ).generate_report()
+    # saving to model catalog
+    spec = operator_config.spec
+    if spec.what_if_analysis and datasets.additional_data:
+        mdm = ModelDeploymentManager(spec, datasets.additional_data)
+        mdm.save_to_catalog()
+        if spec.what_if_analysis.model_deployment:
+            mdm.create_deployment()
+        mdm.save_deployment_info()
 def verify(spec: Dict, **kwargs: Dict) -> bool:
     """Verifies the forecasting operator config."""

ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py CHANGED Viewed

@@ -167,7 +167,7 @@ class ForecastDatasets:
                 self.historical_data.data,
                 self.additional_data.data,
             ],
-            axis=1,
+            axis=1
         )
     def get_data_by_series(self, include_horizon=True):

ads/opctl/operator/lowcode/forecast/operator_config.py CHANGED Viewed

@@ -18,6 +18,35 @@ from ads.opctl.operator.lowcode.common.utils import find_output_dirname
 from .const import SpeedAccuracyMode, SupportedMetrics, SupportedModels
+@dataclass
+class AutoScaling(DataClassSerializable):
+    """Class representing simple autoscaling policy"""
+    minimum_instance: int = 1
+    maximum_instance: int = None
+    cool_down_in_seconds: int = 600
+    scale_in_threshold: int = 10
+    scale_out_threshold: int = 80
+    scaling_metric: str = "CPU_UTILIZATION"
+@dataclass(repr=True)
+class ModelDeploymentServer(DataClassSerializable):
+    """Class representing model deployment server specification for whatif-analysis."""
+    display_name: str = None
+    initial_shape: str = None
+    description: str = None
+    log_group: str = None
+    log_id: str = None
+    auto_scaling: AutoScaling = field(default_factory=AutoScaling)
+@dataclass(repr=True)
+class WhatIfAnalysis(DataClassSerializable):
+    """Class representing operator specification for whatif-analysis."""
+    model_display_name: str = None
+    compartment_id: str = None
+    project_id: str = None
+    model_deployment: ModelDeploymentServer = field(default_factory=ModelDeploymentServer)
 @dataclass(repr=True)
 class TestData(InputData):
@@ -90,12 +119,14 @@ class ForecastOperatorSpec(DataClassSerializable):
     confidence_interval_width: float = None
     metric: str = None
     tuning: Tuning = field(default_factory=Tuning)
+    what_if_analysis: WhatIfAnalysis = field(default_factory=WhatIfAnalysis)
     def __post_init__(self):
         """Adjusts the specification details."""
         self.output_directory = self.output_directory or OutputDirectory(
             url=find_output_dirname(self.output_directory)
         )
+        self.generate_model_pickle = True if self.generate_model_pickle or self.what_if_analysis else False
         self.metric = (self.metric or "").lower() or SupportedMetrics.SMAPE.lower()
         self.model = self.model or SupportedModels.Prophet
         self.confidence_interval_width = self.confidence_interval_width or 0.80

ads/opctl/operator/lowcode/forecast/schema.yaml CHANGED Viewed

@@ -353,6 +353,69 @@ spec:
       meta:
         description: "Report file generation can be enabled using this flag. Defaults to true."
+    what_if_analysis:
+      type: dict
+      required: false
+      schema:
+        model_deployment:
+          type: dict
+          required: false
+          meta: "If model_deployment id is not specified, a new model deployment is created; otherwise, the model is linked to the specified model deployment."
+          schema:
+            id:
+              type: string
+              required: false
+            display_name:
+              type: string
+              required: false
+            initial_shape:
+              type: string
+              required: false
+            description:
+              type: string
+              required: false
+            log_group:
+              type: string
+              required: true
+            log_id:
+              type: string
+              required: true
+            auto_scaling:
+              type: dict
+              required: false
+              schema:
+                minimum_instance:
+                  type: integer
+                  required: true
+                maximum_instance:
+                  type: integer
+                  required: true
+                scale_in_threshold:
+                  type: integer
+                  required: true
+                scale_out_threshold:
+                  type: integer
+                  required: true
+                scaling_metric:
+                  type: string
+                  required: true
+                cool_down_in_seconds:
+                  type: integer
+                  required: true
+        model_display_name:
+          type: string
+          required: true
+        project_id:
+          type: string
+          required: false
+          meta: "If not provided, The project OCID from config.PROJECT_OCID is used"
+        compartment_id:
+          type: string
+          required: false
+          meta: "If not provided, The compartment OCID from config.NB_SESSION_COMPARTMENT_OCID is used."
+      meta:
+        description: "When enabled, the models are saved to the model catalog. Defaults to false."
     generate_metrics:
       type: boolean
       required: false

ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+from .deployment_manager import ModelDeploymentManager

ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py ADDED Viewed

@@ -0,0 +1,233 @@
+#!/usr/bin/env python
+import json
+# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+import os
+import pickle
+import shutil
+import sys
+import tempfile
+import oci
+import pandas as pd
+import cloudpickle
+from ads.opctl import logger
+from ads.common.model_export_util import prepare_generic_model
+from ads.opctl.operator.lowcode.common.utils import write_data, write_simple_json
+from ads.opctl.operator.lowcode.common.utils import default_signer
+from ..model.forecast_datasets import AdditionalData
+from ..operator_config import ForecastOperatorSpec
+from oci.data_science import DataScienceClient, DataScienceClientCompositeOperations
+from oci.data_science.models import ModelConfigurationDetails, InstanceConfiguration, \
+    FixedSizeScalingPolicy, CategoryLogDetails, LogDetails, \
+    SingleModelDeploymentConfigurationDetails, CreateModelDeploymentDetails
+from ads.common.object_storage_details import ObjectStorageDetails
+class ModelDeploymentManager:
+    def __init__(self, spec: ForecastOperatorSpec, additional_data: AdditionalData, previous_model_version=None):
+        self.spec = spec
+        self.model_name = spec.model
+        self.horizon = spec.horizon
+        self.additional_data = additional_data.get_dict_by_series()
+        self.model_obj = {}
+        self.display_name = spec.what_if_analysis.model_display_name
+        self.project_id = spec.what_if_analysis.project_id if spec.what_if_analysis.project_id \
+            else os.environ.get('PROJECT_OCID')
+        self.compartment_id = spec.what_if_analysis.compartment_id if spec.what_if_analysis.compartment_id \
+            else os.environ.get('NB_SESSION_COMPARTMENT_OCID')
+        if self.project_id is None or self.compartment_id is None:
+            raise ValueError("Either project_id or compartment_id cannot be None.")
+        self.path_to_artifact = f"{self.spec.output_directory.url}/artifacts/"
+        self.pickle_file_path = f"{self.spec.output_directory.url}/model.pkl"
+        self.model_version = previous_model_version + 1 if previous_model_version else 1
+        self.catalog_id = None
+        self.test_mode = os.environ.get("TEST_MODE", False)
+        self.deployment_info = {}
+    def _sanity_test(self):
+        """
+        Function perform sanity test for saved artifact
+        """
+        org_sys_path = sys.path[:]
+        try:
+            sys.path.insert(0, f"{self.path_to_artifact}")
+            from score import load_model, predict
+            _ = load_model()
+            # Write additional data to tmp file and perform sanity check
+            with tempfile.NamedTemporaryFile(suffix='.csv') as temp_file:
+                one_series = next(iter(self.additional_data))
+                sample_prediction_data = self.additional_data[one_series].tail(self.horizon)
+                sample_prediction_data[self.spec.target_category_columns[0]] = one_series
+                date_col_name = self.spec.datetime_column.name
+                date_col_format = self.spec.datetime_column.format
+                sample_prediction_data[date_col_name] = sample_prediction_data[date_col_name].dt.strftime(
+                    date_col_format)
+                sample_prediction_data.to_csv(temp_file.name, index=False)
+                input_data = {"additional_data": {"url": temp_file.name}}
+                prediction_test = predict(input_data, _)
+                logger.info(f"prediction test completed with result :{prediction_test}")
+        except Exception as e:
+            logger.error(f"An error occurred during the sanity test: {e}")
+            raise
+        finally:
+            sys.path = org_sys_path
+    def _copy_score_file(self):
+        """
+        Copies the score.py to the artifact_path.
+        """
+        try:
+            current_dir = os.path.dirname(os.path.abspath(__file__))
+            score_file = os.path.join(current_dir, "score.py")
+            destination_file = os.path.join(self.path_to_artifact, os.path.basename(score_file))
+            shutil.copy2(score_file, destination_file)
+            logger.info(f"score.py copied successfully to {self.path_to_artifact}")
+        except Exception as e:
+            logger.warn(f"Error copying file: {e}")
+            raise e
+    def save_to_catalog(self):
+        """Save the model to a model catalog"""
+        with open(self.pickle_file_path, 'rb') as file:
+            self.model_obj = pickle.load(file)
+        if not os.path.exists(self.path_to_artifact):
+            os.mkdir(self.path_to_artifact)
+        artifact_dict = {"spec": self.spec.to_dict(), "models": self.model_obj}
+        with open(f"{self.path_to_artifact}/models.pickle", "wb") as f:
+            cloudpickle.dump(artifact_dict, f)
+        artifact = prepare_generic_model(
+            self.path_to_artifact,
+            function_artifacts=False,
+            force_overwrite=True,
+            data_science_env=True)
+        self._copy_score_file()
+        self._sanity_test()
+        if isinstance(self.model_obj, dict):
+            series = self.model_obj.keys()
+        else:
+            series = self.additional_data.keys()
+        description = f"The object contains {len(series)} {self.model_name} models"
+        if not self.test_mode:
+            catalog_entry = artifact.save(
+                display_name=self.display_name,
+                compartment_id=self.compartment_id,
+                project_id=self.project_id,
+                description=description)
+            self.catalog_id = catalog_entry.id
+        logger.info(f"Saved {self.model_name} version-v{self.model_version} to model catalog"
+                    f" with model ocid : {self.catalog_id}")
+        self.deployment_info = {"model_ocid": self.catalog_id, "series": list(series)}
+    def create_deployment(self):
+        """Create a model deployment serving"""
+        # create new model deployment
+        initial_shape = self.spec.what_if_analysis.model_deployment.initial_shape
+        name = self.spec.what_if_analysis.model_deployment.display_name
+        description = self.spec.what_if_analysis.model_deployment.description
+        auto_scaling_config = self.spec.what_if_analysis.model_deployment.auto_scaling
+        # if auto_scaling_config is defined
+        if auto_scaling_config:
+            scaling_policy = oci.data_science.models.AutoScalingPolicy(
+                policy_type="AUTOSCALING",
+                auto_scaling_policies=[
+                    oci.data_science.models.ThresholdBasedAutoScalingPolicyDetails(
+                        auto_scaling_policy_type="THRESHOLD",
+                        rules=[
+                            oci.data_science.models.PredefinedMetricExpressionRule(
+                                metric_expression_rule_type="PREDEFINED_EXPRESSION",
+                                metric_type=auto_scaling_config.scaling_metric,
+                                scale_in_configuration=oci.data_science.models.PredefinedExpressionThresholdScalingConfiguration(
+                                    scaling_configuration_type="THRESHOLD",
+                                    threshold=auto_scaling_config.scale_in_threshold
+                                ),
+                                scale_out_configuration=oci.data_science.models.PredefinedExpressionThresholdScalingConfiguration(
+                                    scaling_configuration_type="THRESHOLD",
+                                    threshold=auto_scaling_config.scale_out_threshold
+                                )
+                            )],
+                        maximum_instance_count=auto_scaling_config.maximum_instance,
+                        minimum_instance_count=auto_scaling_config.minimum_instance,
+                        initial_instance_count=auto_scaling_config.minimum_instance)],
+                cool_down_in_seconds=auto_scaling_config.cool_down_in_seconds,
+                is_enabled=True)
+            logger.info(f"Using autoscaling {auto_scaling_config.scaling_metric} for creating MD")
+        else:
+            scaling_policy = FixedSizeScalingPolicy(instance_count=1)
+            logger.info("Using fixed size policy for creating MD")
+        model_configuration_details_object = ModelConfigurationDetails(
+            model_id=self.catalog_id,
+            instance_configuration=InstanceConfiguration(
+                instance_shape_name=initial_shape),
+            scaling_policy=scaling_policy,
+            bandwidth_mbps=20)
+        single_model_config = SingleModelDeploymentConfigurationDetails(
+            deployment_type='SINGLE_MODEL',
+            model_configuration_details=model_configuration_details_object
+        )
+        log_group = self.spec.what_if_analysis.model_deployment.log_group
+        log_id = self.spec.what_if_analysis.model_deployment.log_id
+        logs_configuration_details_object = CategoryLogDetails(
+            access=LogDetails(log_group_id=log_group,
+                              log_id=log_id),
+            predict=LogDetails(log_group_id=log_group,
+                               log_id=log_id))
+        model_deploy_configuration = CreateModelDeploymentDetails(
+            display_name=name,
+            description=description,
+            project_id=self.project_id,
+            compartment_id=self.compartment_id,
+            model_deployment_configuration_details=single_model_config,
+            category_log_details=logs_configuration_details_object)
+        if not self.test_mode:
+            auth = oci.auth.signers.get_resource_principals_signer()
+            data_science = DataScienceClient({}, signer=auth)
+            data_science_composite = DataScienceClientCompositeOperations(data_science)
+            model_deployment = data_science_composite.create_model_deployment_and_wait_for_state(
+                model_deploy_configuration,
+                wait_for_states=[
+                    "SUCCEEDED", "FAILED"])
+            self.deployment_info['work_request'] = model_deployment.data.id
+            logger.info(f"deployment metadata :{model_deployment.data}")
+            md = data_science.get_model_deployment(model_deployment_id=model_deployment.data.resources[0].identifier)
+            self.deployment_info['model_deployment_ocid'] = md.data.id
+            endpoint_url = md.data.model_deployment_url
+            self.deployment_info['model_deployment_endpoint'] = f"{endpoint_url}/predict"
+    def save_deployment_info(self):
+        output_dir = self.spec.output_directory.url
+        if ObjectStorageDetails.is_oci_path(output_dir):
+            storage_options = default_signer()
+        else:
+            storage_options = {}
+        write_data(
+            data=pd.DataFrame.from_dict(self.deployment_info),
+            filename=os.path.join(output_dir, "deployment_info.json"),
+            format="json",
+            storage_options=storage_options,
+            index=False,
+            indent=4,
+            orient="records"
+        )
+        write_simple_json(self.deployment_info, os.path.join(output_dir, "deployment_info.json"))
+        logger.info(f"Saved deployment info to {output_dir}")

oracle-ads 2.12.10rc0__py3-none-any.whl → 2.12.11__py3-none-any.whl

oracle-ads 2.12.10rc0py3-none-any.whl → 2.12.11py3-none-any.whl