PyPI - oracle-ads - Versions diffs - 2.13.17rc0__py3-none-any.whl → 2.13.18__py3-none-any.whl - Mend

oracle-ads 2.13.17rc0py3-none-any.whl → 2.13.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

ads/aqua/cli.py +7 -5
ads/aqua/common/entities.py +88 -29
ads/aqua/common/enums.py +7 -0
ads/aqua/common/errors.py +5 -0
ads/aqua/common/utils.py +87 -7
ads/aqua/constants.py +3 -0
ads/aqua/extension/deployment_handler.py +36 -0
ads/aqua/modeldeployment/config_loader.py +10 -0
ads/aqua/modeldeployment/constants.py +1 -0
ads/aqua/modeldeployment/deployment.py +99 -22
ads/aqua/modeldeployment/entities.py +4 -0
ads/aqua/resources/gpu_shapes_index.json +315 -26
ads/aqua/shaperecommend/__init__.py +6 -0
ads/aqua/shaperecommend/constants.py +116 -0
ads/aqua/shaperecommend/estimator.py +384 -0
ads/aqua/shaperecommend/llm_config.py +283 -0
ads/aqua/shaperecommend/recommend.py +493 -0
ads/aqua/shaperecommend/shape_report.py +233 -0
ads/aqua/version.json +1 -1
ads/cli.py +9 -1
ads/jobs/builders/infrastructure/dsc_job.py +1 -0
ads/jobs/builders/infrastructure/dsc_job_runtime.py +9 -1
ads/model/service/oci_datascience_model_deployment.py +46 -19
ads/opctl/operator/lowcode/common/data.py +7 -2
ads/opctl/operator/lowcode/common/transformations.py +207 -0
ads/opctl/operator/lowcode/common/utils.py +8 -0
ads/opctl/operator/lowcode/forecast/__init__.py +3 -0
ads/opctl/operator/lowcode/forecast/__main__.py +53 -3
ads/opctl/operator/lowcode/forecast/const.py +2 -0
ads/opctl/operator/lowcode/forecast/errors.py +5 -0
ads/opctl/operator/lowcode/forecast/meta_selector.py +310 -0
ads/opctl/operator/lowcode/forecast/model/automlx.py +1 -1
ads/opctl/operator/lowcode/forecast/model/base_model.py +119 -30
ads/opctl/operator/lowcode/forecast/model/factory.py +33 -2
ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +54 -17
ads/opctl/operator/lowcode/forecast/model_evaluator.py +6 -1
ads/opctl/operator/lowcode/forecast/schema.yaml +1 -0
ads/pipeline/ads_pipeline.py +13 -9
{oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/METADATA +1 -1
{oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/RECORD +43 -36
{oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/WHEEL +0 -0
{oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/entry_points.txt +0 -0
{oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/licenses/LICENSE.txt +0 -0

ads/aqua/modeldeployment/deployment.py CHANGED Viewed

@@ -8,11 +8,12 @@ import re
 import shlex
 import threading
 from datetime import datetime, timedelta
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union
 from cachetools import TTLCache, cached
 from oci.data_science.models import ModelDeploymentShapeSummary
 from pydantic import ValidationError
+from rich.table import Table
 from ads.aqua.app import AquaApp, logger
 from ads.aqua.common.entities import (
@@ -27,6 +28,7 @@ from ads.aqua.common.utils import (
     build_pydantic_error_message,
     find_restricted_params,
     get_combined_params,
+    get_container_env_type,
     get_container_params_type,
     get_ocid_substring,
     get_params_list,
@@ -43,8 +45,11 @@ from ads.aqua.constants import (
     AQUA_MODEL_TYPE_SERVICE,
     AQUA_MULTI_MODEL_CONFIG,
     MODEL_BY_REFERENCE_OSS_PATH_KEY,
+    MODEL_GROUP,
     MODEL_NAME_DELIMITER,
+    SINGLE_MODEL_FLEX,
     UNKNOWN_DICT,
+    UNKNOWN_ENUM_VALUE,
 )
 from ads.aqua.data import AquaResourceIdentifier
 from ads.aqua.model import AquaModelApp
@@ -71,6 +76,11 @@ from ads.aqua.modeldeployment.entities import (
     CreateModelDeploymentDetails,
 )
 from ads.aqua.modeldeployment.model_group_config import ModelGroupConfig
+from ads.aqua.shaperecommend.recommend import AquaShapeRecommend
+from ads.aqua.shaperecommend.shape_report import (
+    RequestRecommend,
+    ShapeRecommendationReport,
+)
 from ads.common.object_storage_details import ObjectStorageDetails
 from ads.common.utils import UNKNOWN, get_log_links
 from ads.common.work_request import DataScienceWorkRequest
@@ -199,7 +209,7 @@ class AquaDeploymentApp(AquaApp):
         if create_deployment_details.instance_shape.lower() not in available_shapes:
             raise AquaValueError(
                 f"Invalid Instance Shape. The selected shape '{create_deployment_details.instance_shape}' "
-                f"is not available in the {self.region} region. Please choose another shape to deploy the model."
+                f"is not supported in the {self.region} region. Please choose another shape to deploy the model."
             )
         # Get container config
@@ -381,6 +391,7 @@ class AquaDeploymentApp(AquaApp):
             Tags.AQUA_SERVICE_MODEL_TAG,
             Tags.AQUA_FINE_TUNED_MODEL_TAG,
             Tags.AQUA_TAG,
+            Tags.BASE_MODEL_CUSTOM,
         ]:
             if tag in aqua_model.freeform_tags:
                 tags[tag] = aqua_model.freeform_tags[tag]
@@ -515,6 +526,7 @@ class AquaDeploymentApp(AquaApp):
         # validate user provided params
         user_params = env_var.get("PARAMS", UNKNOWN)
         if user_params:
             # todo: remove this check in the future version, logic to be moved to container_index
             if (
@@ -540,6 +552,18 @@ class AquaDeploymentApp(AquaApp):
         deployment_params = get_combined_params(config_params, user_params)
         params = f"{params} {deployment_params}".strip()
+        if create_deployment_details.model_name:
+            # Replace existing --served-model-name argument if present, otherwise add it
+            if "--served-model-name" in params:
+                params = re.sub(
+                    r"--served-model-name\s+\S+",
+                    f"--served-model-name {create_deployment_details.model_name}",
+                    params,
+                )
+            else:
+                params += f" --served-model-name {create_deployment_details.model_name}"
         if params:
             env_var.update({"PARAMS": params})
         env_vars = container_spec.env_vars if container_spec else []
@@ -862,21 +886,26 @@ class AquaDeploymentApp(AquaApp):
             if oci_aqua:
                 # skipping the AQUA model deployments that are created from model group
-                # TODO: remove this checker after AQUA deployment is integrated with model group
-                aqua_model_id = model_deployment.freeform_tags.get(
-                    Tags.AQUA_MODEL_ID_TAG, UNKNOWN
-                )
                 if (
-                    "datasciencemodelgroup" in aqua_model_id
-                    or model_deployment.model_deployment_configuration_details.deployment_type
-                    == "UNKNOWN_ENUM_VALUE"
+                    model_deployment.model_deployment_configuration_details.deployment_type
+                    in [UNKNOWN_ENUM_VALUE, MODEL_GROUP, SINGLE_MODEL_FLEX]
                 ):
                     continue
-                results.append(
-                    AquaDeployment.from_oci_model_deployment(
-                        model_deployment, self.region
+                try:
+                    results.append(
+                        AquaDeployment.from_oci_model_deployment(
+                            model_deployment, self.region
+                        )
                     )
-                )
+                except Exception as e:
+                    logger.error(
+                        f"There was an issue processing the list of model deployments . Error: {str(e)}",
+                        exc_info=True,
+                    )
+                    raise AquaRuntimeError(
+                        f"There was an issue processing the list of model deployments . Error: {str(e)}"
+                    ) from e
                 # log telemetry if MD is in active or failed state
                 deployment_id = model_deployment.id
                 state = model_deployment.lifecycle_state.upper()
@@ -1042,6 +1071,7 @@ class AquaDeploymentApp(AquaApp):
         config = self.get_config_from_metadata(
             model_id, AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION
         ).config
         if config:
             logger.info(
                 f"Fetched {AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION} from defined metadata for model: {model_id}."
@@ -1126,7 +1156,7 @@ class AquaDeploymentApp(AquaApp):
         model_id: str,
         instance_shape: str,
         gpu_count: int = None,
-    ) -> List[str]:
+    ) -> Dict:
         """Gets the default params set in the deployment configs for the given model and instance shape.
         Parameters
@@ -1148,6 +1178,7 @@ class AquaDeploymentApp(AquaApp):
         """
         default_params = []
+        default_envs = {}
         config_params = {}
         model = DataScienceModel.from_id(model_id)
         try:
@@ -1157,19 +1188,15 @@ class AquaDeploymentApp(AquaApp):
         except ValueError:
             container_type_key = UNKNOWN
             logger.debug(
-                f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the custom metadata field for model {model_id}."
+                f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the "
+                f"custom metadata field for model {model_id}."
             )
-        if (
-            container_type_key
-            and container_type_key in InferenceContainerTypeFamily.values()
-        ):
+        if container_type_key:
             deployment_config = self.get_deployment_config(model_id)
             instance_shape_config = deployment_config.configuration.get(
                 instance_shape, ConfigurationItem()
             )
             if instance_shape_config.multi_model_deployment and gpu_count:
                 gpu_params = instance_shape_config.multi_model_deployment
@@ -1178,12 +1205,18 @@ class AquaDeploymentApp(AquaApp):
                         config_params = gpu_config.parameters.get(
                             get_container_params_type(container_type_key), UNKNOWN
                         )
+                        default_envs = instance_shape_config.env.get(
+                            get_container_env_type(container_type_key), {}
+                        )
                         break
             else:
                 config_params = instance_shape_config.parameters.get(
                     get_container_params_type(container_type_key), UNKNOWN
                 )
+                default_envs = instance_shape_config.env.get(
+                    get_container_env_type(container_type_key), {}
+                )
             if config_params:
                 params_list = get_params_list(config_params)
@@ -1196,7 +1229,7 @@ class AquaDeploymentApp(AquaApp):
                     if params.split()[0] not in restricted_params_set:
                         default_params.append(params)
-        return default_params
+        return {"data": default_params, "env": default_envs}
     def validate_deployment_params(
         self,
@@ -1243,6 +1276,50 @@ class AquaDeploymentApp(AquaApp):
             )
         return {"valid": True}
+    def recommend_shape(self, **kwargs) -> Union[Table, ShapeRecommendationReport]:
+        """
+        For the CLI (set generate_table = True), generates the table (in rich diff) with valid
+        GPU deployment shapes for the provided model and configuration.
+        For the API (set generate_table = False), generates the JSON with valid
+        GPU deployment shapes for the provided model and configuration.
+        Validates if recommendations are generated, calls method to construct the rich diff
+        table with the recommendation data.
+        Parameters
+        ----------
+        model_ocid : str
+        OCID of the model to recommend feasible compute shapes.
+        Returns
+        -------
+        Table (generate_table = True)
+            A table format for the recommendation report with compatible deployment shapes
+            or troubleshooting info citing the largest shapes if no shape is suitable.
+        ShapeRecommendationReport (generate_table = False)
+            A recommendation report with compatible deployment shapes, or troubleshooting info
+            citing the largest shapes if no shape is suitable.
+        Raises
+        ------
+        AquaValueError
+            If model type is unsupported by tool (no recommendation report generated)
+        """
+        try:
+            request = RequestRecommend(**kwargs)
+        except ValidationError as e:
+            custom_error = build_pydantic_error_message(e)
+            raise AquaValueError(  # noqa: B904
+                f"Failed to request shape recommendation due to invalid input parameters: {custom_error}"
+            )
+        shape_recommend = AquaShapeRecommend()
+        shape_recommend_report = shape_recommend.which_shapes(request)
+        return shape_recommend_report
     @telemetry(entry_point="plugin=deployment&action=list_shapes", name="aqua")
     @cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now))
     def list_shapes(self, **kwargs) -> List[ComputeShapeSummary]:

ads/aqua/modeldeployment/entities.py CHANGED Viewed

@@ -233,6 +233,10 @@ class CreateModelDeploymentDetails(BaseModel):
         None, description="The description of the deployment."
     )
     model_id: Optional[str] = Field(None, description="The model OCID to deploy.")
+    model_name: Optional[str] = Field(
+        None, description="The model name specified by user to deploy."
+    )
     models: Optional[List[AquaMultiModelRef]] = Field(
         None, description="List of models for multimodel deployment."
     )

ads/aqua/resources/gpu_shapes_index.json CHANGED Viewed

@@ -1,94 +1,383 @@
 {
   "shapes": {
     "BM.GPU.A10.4": {
+      "cpu_count": 64,
+      "cpu_memory_in_gbs": 1024,
       "gpu_count": 4,
       "gpu_memory_in_gbs": 96,
-      "gpu_type": "A10"
+      "gpu_type": "A10",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 50,
+        "performance": 50
+      }
     },
     "BM.GPU.A100-V2.8": {
+      "cpu_count": 128,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 640,
-      "gpu_type": "A100"
+      "gpu_type": "A100",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 80,
+        "performance": 70
+      }
+    },
+    "BM.GPU.B200.8": {
+      "cpu_count": 128,
+      "cpu_memory_in_gbs": 4096,
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 1440,
+      "gpu_type": "B200",
+      "quantization": [
+        "fp4",
+        "fp8",
+        "fp16",
+        "bf16",
+        "tf32",
+        "int8",
+        "fp64"
+      ],
+      "ranking": {
+        "cost": 120,
+        "performance": 130
+      }
     },
     "BM.GPU.B4.8": {
+      "cpu_count": 64,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 320,
-      "gpu_type": "A100"
+      "gpu_type": "A100",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 70,
+        "performance": 60
+      }
+    },
+    "BM.GPU.GB200.4": {
+      "cpu_count": 144,
+      "cpu_memory_in_gbs": 1024,
+      "gpu_count": 4,
+      "gpu_memory_in_gbs": 768,
+      "gpu_type": "GB200",
+      "quantization": [
+        "fp4",
+        "fp8",
+        "fp6",
+        "int8",
+        "fp16",
+        "bf16",
+        "tf32",
+        "fp64"
+      ],
+      "ranking": {
+        "cost": 110,
+        "performance": 120
+      }
     },
     "BM.GPU.H100.8": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 640,
-      "gpu_type": "H100"
+      "gpu_type": "H100",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "fp8",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 100,
+        "performance": 100
+      }
     },
     "BM.GPU.H200.8": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 3072,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 1128,
-      "gpu_type": "H200"
+      "gpu_type": "H200",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "fp8",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 100,
+        "performance": 110
+      }
     },
     "BM.GPU.L40S-NC.4": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 1024,
       "gpu_count": 4,
       "gpu_memory_in_gbs": 192,
-      "gpu_type": "L40S"
+      "gpu_type": "L40S",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "fp8",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 60,
+        "performance": 80
+      }
     },
     "BM.GPU.L40S.4": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 1024,
       "gpu_count": 4,
       "gpu_memory_in_gbs": 192,
-      "gpu_type": "L40S"
+      "gpu_type": "L40S",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "fp8",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 60,
+        "performance": 80
+      }
     },
     "BM.GPU.MI300X.8": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 1536,
-      "gpu_type": "MI300X"
+      "gpu_type": "MI300X",
+      "quantization": [
+        "fp8",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 90,
+        "performance": 90
+      }
     },
     "BM.GPU2.2": {
+      "cpu_count": 28,
+      "cpu_memory_in_gbs": 192,
       "gpu_count": 2,
       "gpu_memory_in_gbs": 32,
-      "gpu_type": "P100"
-    },
-    "BM.GPU3.8": {
-      "gpu_count": 8,
-      "gpu_memory_in_gbs": 128,
-      "gpu_type": "V100"
+      "gpu_type": "P100",
+      "quantization": [
+        "fp16"
+      ],
+      "ranking": {
+        "cost": 30,
+        "performance": 20
+      }
     },
     "BM.GPU4.8": {
+      "cpu_count": 64,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 320,
-      "gpu_type": "A100"
+      "gpu_type": "A100",
+      "quantization": [
+        "int8",
+        "fp16",
+        "bf16",
+        "tf32"
+      ],
+      "ranking": {
+        "cost": 57,
+        "performance": 65
+      }
     },
     "VM.GPU.A10.1": {
+      "cpu_count": 15,
+      "cpu_memory_in_gbs": 240,
       "gpu_count": 1,
       "gpu_memory_in_gbs": 24,
-      "gpu_type": "A10"
+      "gpu_type": "A10",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 20,
+        "performance": 30
+      }
     },
     "VM.GPU.A10.2": {
+      "cpu_count": 30,
+      "cpu_memory_in_gbs": 480,
       "gpu_count": 2,
       "gpu_memory_in_gbs": 48,
-      "gpu_type": "A10"
-    },
-    "VM.GPU.A10.4": {
-      "gpu_count": 4,
-      "gpu_memory_in_gbs": 96,
-      "gpu_type": "A10"
+      "gpu_type": "A10",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 40,
+        "performance": 40
+      }
     },
     "VM.GPU2.1": {
+      "cpu_count": 12,
+      "cpu_memory_in_gbs": 72,
       "gpu_count": 1,
       "gpu_memory_in_gbs": 16,
-      "gpu_type": "P100"
+      "gpu_type": "P100",
+      "quantization": [
+        "fp16"
+      ],
+      "ranking": {
+        "cost": 10,
+        "performance": 10
+      }
     },
     "VM.GPU3.1": {
+      "cpu_count": 6,
+      "cpu_memory_in_gbs": 90,
       "gpu_count": 1,
       "gpu_memory_in_gbs": 16,
-      "gpu_type": "V100"
+      "gpu_type": "V100",
+      "quantization": [
+        "gptq",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 35,
+        "performance": 10
+      }
     },
     "VM.GPU3.2": {
+      "cpu_count": 12,
+      "cpu_memory_in_gbs": 180,
       "gpu_count": 2,
       "gpu_memory_in_gbs": 32,
-      "gpu_type": "V100"
+      "gpu_type": "V100",
+      "quantization": [
+        "gptq",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 45,
+        "performance": 20
+      }
     },
     "VM.GPU3.4": {
+      "cpu_count": 24,
+      "cpu_memory_in_gbs": 360,
       "gpu_count": 4,
       "gpu_memory_in_gbs": 64,
-      "gpu_type": "V100"
+      "gpu_type": "V100",
+      "quantization": [
+        "gptq",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 55,
+        "performance": 45
+      }
+    },
+    "VM.GPU3.8": {
+      "cpu_count": 24,
+      "cpu_memory_in_gbs": 768,
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 128,
+      "gpu_type": "V100",
+      "quantization": [
+        "gptq",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 56,
+        "performance": 46
+      }
     }
   }
 }

ads/aqua/shaperecommend/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+from ads.aqua.shaperecommend.recommend import AquaShapeRecommend
+__all__ = ["AquaShapeRecommend"]

oracle-ads 2.13.17rc0__py3-none-any.whl → 2.13.18__py3-none-any.whl

oracle-ads 2.13.17rc0py3-none-any.whl → 2.13.18py3-none-any.whl