PyPI - oracle-ads - Versions diffs - 2.13.17__py3-none-any.whl → 2.13.18__py3-none-any.whl - Mend

oracle-ads 2.13.17py3-none-any.whl → 2.13.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

ads/aqua/cli.py +7 -5
ads/aqua/common/entities.py +88 -29
ads/aqua/common/enums.py +6 -0
ads/aqua/common/errors.py +5 -0
ads/aqua/common/utils.py +49 -7
ads/aqua/constants.py +3 -0
ads/aqua/extension/deployment_handler.py +36 -0
ads/aqua/modeldeployment/constants.py +1 -0
ads/aqua/modeldeployment/deployment.py +83 -12
ads/aqua/modeldeployment/entities.py +3 -0
ads/aqua/resources/gpu_shapes_index.json +315 -26
ads/aqua/shaperecommend/__init__.py +6 -0
ads/aqua/shaperecommend/constants.py +116 -0
ads/aqua/shaperecommend/estimator.py +384 -0
ads/aqua/shaperecommend/llm_config.py +283 -0
ads/aqua/shaperecommend/recommend.py +493 -0
ads/aqua/shaperecommend/shape_report.py +233 -0
ads/aqua/version.json +1 -1
ads/cli.py +9 -1
ads/jobs/builders/infrastructure/dsc_job.py +1 -0
ads/jobs/builders/infrastructure/dsc_job_runtime.py +9 -1
ads/model/service/oci_datascience_model_deployment.py +46 -19
ads/opctl/operator/lowcode/common/data.py +7 -2
ads/opctl/operator/lowcode/common/transformations.py +207 -0
ads/opctl/operator/lowcode/common/utils.py +8 -0
ads/opctl/operator/lowcode/forecast/__init__.py +3 -0
ads/opctl/operator/lowcode/forecast/__main__.py +53 -3
ads/opctl/operator/lowcode/forecast/const.py +2 -0
ads/opctl/operator/lowcode/forecast/errors.py +5 -0
ads/opctl/operator/lowcode/forecast/meta_selector.py +310 -0
ads/opctl/operator/lowcode/forecast/model/automlx.py +1 -1
ads/opctl/operator/lowcode/forecast/model/base_model.py +119 -30
ads/opctl/operator/lowcode/forecast/model/factory.py +33 -2
ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +54 -17
ads/opctl/operator/lowcode/forecast/model_evaluator.py +6 -1
ads/opctl/operator/lowcode/forecast/schema.yaml +1 -0
ads/pipeline/ads_pipeline.py +13 -9
{oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/METADATA +1 -1
{oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/RECORD +42 -35
{oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/WHEEL +0 -0
{oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/entry_points.txt +0 -0
{oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/licenses/LICENSE.txt +0 -0

ads/aqua/resources/gpu_shapes_index.json CHANGED Viewed

@@ -1,94 +1,383 @@
 {
   "shapes": {
     "BM.GPU.A10.4": {
+      "cpu_count": 64,
+      "cpu_memory_in_gbs": 1024,
       "gpu_count": 4,
       "gpu_memory_in_gbs": 96,
-      "gpu_type": "A10"
+      "gpu_type": "A10",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 50,
+        "performance": 50
+      }
     },
     "BM.GPU.A100-V2.8": {
+      "cpu_count": 128,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 640,
-      "gpu_type": "A100"
+      "gpu_type": "A100",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 80,
+        "performance": 70
+      }
+    },
+    "BM.GPU.B200.8": {
+      "cpu_count": 128,
+      "cpu_memory_in_gbs": 4096,
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 1440,
+      "gpu_type": "B200",
+      "quantization": [
+        "fp4",
+        "fp8",
+        "fp16",
+        "bf16",
+        "tf32",
+        "int8",
+        "fp64"
+      ],
+      "ranking": {
+        "cost": 120,
+        "performance": 130
+      }
     },
     "BM.GPU.B4.8": {
+      "cpu_count": 64,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 320,
-      "gpu_type": "A100"
+      "gpu_type": "A100",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 70,
+        "performance": 60
+      }
+    },
+    "BM.GPU.GB200.4": {
+      "cpu_count": 144,
+      "cpu_memory_in_gbs": 1024,
+      "gpu_count": 4,
+      "gpu_memory_in_gbs": 768,
+      "gpu_type": "GB200",
+      "quantization": [
+        "fp4",
+        "fp8",
+        "fp6",
+        "int8",
+        "fp16",
+        "bf16",
+        "tf32",
+        "fp64"
+      ],
+      "ranking": {
+        "cost": 110,
+        "performance": 120
+      }
     },
     "BM.GPU.H100.8": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 640,
-      "gpu_type": "H100"
+      "gpu_type": "H100",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "fp8",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 100,
+        "performance": 100
+      }
     },
     "BM.GPU.H200.8": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 3072,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 1128,
-      "gpu_type": "H200"
+      "gpu_type": "H200",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "fp8",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 100,
+        "performance": 110
+      }
     },
     "BM.GPU.L40S-NC.4": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 1024,
       "gpu_count": 4,
       "gpu_memory_in_gbs": 192,
-      "gpu_type": "L40S"
+      "gpu_type": "L40S",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "fp8",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 60,
+        "performance": 80
+      }
     },
     "BM.GPU.L40S.4": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 1024,
       "gpu_count": 4,
       "gpu_memory_in_gbs": 192,
-      "gpu_type": "L40S"
+      "gpu_type": "L40S",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "fp8",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 60,
+        "performance": 80
+      }
     },
     "BM.GPU.MI300X.8": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 1536,
-      "gpu_type": "MI300X"
+      "gpu_type": "MI300X",
+      "quantization": [
+        "fp8",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 90,
+        "performance": 90
+      }
     },
     "BM.GPU2.2": {
+      "cpu_count": 28,
+      "cpu_memory_in_gbs": 192,
       "gpu_count": 2,
       "gpu_memory_in_gbs": 32,
-      "gpu_type": "P100"
-    },
-    "BM.GPU3.8": {
-      "gpu_count": 8,
-      "gpu_memory_in_gbs": 128,
-      "gpu_type": "V100"
+      "gpu_type": "P100",
+      "quantization": [
+        "fp16"
+      ],
+      "ranking": {
+        "cost": 30,
+        "performance": 20
+      }
     },
     "BM.GPU4.8": {
+      "cpu_count": 64,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 320,
-      "gpu_type": "A100"
+      "gpu_type": "A100",
+      "quantization": [
+        "int8",
+        "fp16",
+        "bf16",
+        "tf32"
+      ],
+      "ranking": {
+        "cost": 57,
+        "performance": 65
+      }
     },
     "VM.GPU.A10.1": {
+      "cpu_count": 15,
+      "cpu_memory_in_gbs": 240,
       "gpu_count": 1,
       "gpu_memory_in_gbs": 24,
-      "gpu_type": "A10"
+      "gpu_type": "A10",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 20,
+        "performance": 30
+      }
     },
     "VM.GPU.A10.2": {
+      "cpu_count": 30,
+      "cpu_memory_in_gbs": 480,
       "gpu_count": 2,
       "gpu_memory_in_gbs": 48,
-      "gpu_type": "A10"
-    },
-    "VM.GPU.A10.4": {
-      "gpu_count": 4,
-      "gpu_memory_in_gbs": 96,
-      "gpu_type": "A10"
+      "gpu_type": "A10",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 40,
+        "performance": 40
+      }
     },
     "VM.GPU2.1": {
+      "cpu_count": 12,
+      "cpu_memory_in_gbs": 72,
       "gpu_count": 1,
       "gpu_memory_in_gbs": 16,
-      "gpu_type": "P100"
+      "gpu_type": "P100",
+      "quantization": [
+        "fp16"
+      ],
+      "ranking": {
+        "cost": 10,
+        "performance": 10
+      }
     },
     "VM.GPU3.1": {
+      "cpu_count": 6,
+      "cpu_memory_in_gbs": 90,
       "gpu_count": 1,
       "gpu_memory_in_gbs": 16,
-      "gpu_type": "V100"
+      "gpu_type": "V100",
+      "quantization": [
+        "gptq",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 35,
+        "performance": 10
+      }
     },
     "VM.GPU3.2": {
+      "cpu_count": 12,
+      "cpu_memory_in_gbs": 180,
       "gpu_count": 2,
       "gpu_memory_in_gbs": 32,
-      "gpu_type": "V100"
+      "gpu_type": "V100",
+      "quantization": [
+        "gptq",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 45,
+        "performance": 20
+      }
     },
     "VM.GPU3.4": {
+      "cpu_count": 24,
+      "cpu_memory_in_gbs": 360,
       "gpu_count": 4,
       "gpu_memory_in_gbs": 64,
-      "gpu_type": "V100"
+      "gpu_type": "V100",
+      "quantization": [
+        "gptq",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 55,
+        "performance": 45
+      }
+    },
+    "VM.GPU3.8": {
+      "cpu_count": 24,
+      "cpu_memory_in_gbs": 768,
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 128,
+      "gpu_type": "V100",
+      "quantization": [
+        "gptq",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 56,
+        "performance": 46
+      }
     }
   }
 }

ads/aqua/shaperecommend/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+from ads.aqua.shaperecommend.recommend import AquaShapeRecommend
+__all__ = ["AquaShapeRecommend"]

ads/aqua/shaperecommend/constants.py ADDED Viewed

@@ -0,0 +1,116 @@
+#!/usr/bin/env python
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+"""
+aqua.shaperecommend.constants
+~~~~~~~~~~~~~~
+This module contains constants used in Aqua GPU Recommendation for Models.
+LLAMA_REQUIRED_FIELDS refer to fields necessary for calculating model memory for GQA Architecture Models
+MOE_REQUIRED_FIELDS refer to fields necessary for Mixture of Experts (MoE) Architecture Models
+NEXT_QUANT suggests the next quantization level based on the current quantization (if applied) or the model weights (if no quantization yet)
+"""
+LLAMA_REQUIRED_FIELDS = [
+    "num_hidden_layers",
+    "hidden_size",
+    "num_attention_heads",
+    "num_key_value_heads",
+    "head_dim",
+    "intermediate_size",
+    "vocab_size",
+]
+MOE_REQUIRED_FIELDS = LLAMA_REQUIRED_FIELDS + ["num_local_experts", "intermediate_size"]
+NEXT_QUANT = {
+    "float32": ["8bit", "4bit"],
+    "bfloat16": ["8bit", "4bit"],
+    "float16": ["8bit", "4bit"],
+    "int8": ["4bit"],
+    "fp8": ["4bit"],
+    "8bit": ["4bit"],
+    "int4": ["No smaller quantization available"],
+    "4bit": ["No smaller quantization available"],
+}
+TEXT_GENERATION = "text_generation"
+SAFETENSORS = "safetensors"
+QUANT_METHODS = [
+    "aqlm",
+    "awq",
+    "deepspeedfp",
+    "tpu_int8",
+    "fp8",
+    "ptpc_fp8",
+    "fbgemm_fp8",
+    "modelopt",
+    "modelopt_fp4",
+    "marlin",
+    "bitblas",
+    "gguf",
+    "gptq_marlin_24",
+    "gptq_marlin",
+    "gptq_bitblas",
+    "awq_marlin",
+    "gptq",
+    "compressed-tensors",
+    "bitsandbytes",
+    "qqq",
+    "hqq",
+    "experts_int8",
+    "neuron_quant",
+    "ipex",
+    "quark",
+    "moe_wna16",
+    "torchao",
+    "auto-round",
+    "rtn",
+    "inc",
+    "mxfp4",
+]
+IN_FLIGHT_QUANTIZATION = {"4bit"}  # vLLM only supports 4bit in-flight-quantization
+TROUBLESHOOT_MSG = "The selected model is too large to fit on standard GPU shapes with the current configuration.\nAs troubleshooting, we have suggested the two largest available GPU shapes using the smallest quantization level ('4bit') to maximize chances of fitting the model. "
+VLLM_PARAMS = {
+    "max_model_len": "--max-model-len",
+    "in_flight_quant": "--quantization bitsandbytes --load-format bitsandbytes",
+}
+DEFAULT_WEIGHT_SIZE = "float32"
+BITS_AND_BYTES_8BIT = "8bit"
+BITS_AND_BYTES_4BIT = "4bit"
+BITSANDBYTES = "bitsandbytes"
+QUANT_MAPPING = {
+    "float32": 4,
+    "bfloat16": 2,
+    "float16": 2,
+    "fp16": 2,
+    "half": 2,
+    "int8": 1,
+    "fp8": 1,
+    "8bit": 1,
+    "4bit": 0.5,
+    "int4": 0.5,
+}
+SHAPE_MAP = {
+    "NVIDIA_GPU": "GPU",
+    "AMD_ROME": "CPU",
+    "GENERIC": "CPU",
+    "LEGACY": "CPU",
+    "ARM": "CPU",
+    "UNKNOWN_ENUM_VALUE": "N/A",
+}

oracle-ads 2.13.17__py3-none-any.whl → 2.13.18__py3-none-any.whl

oracle-ads 2.13.17py3-none-any.whl → 2.13.18py3-none-any.whl