PyPI - oracle-ads - Versions diffs - 2.13.18rc0__py3-none-any.whl → 2.13.19__py3-none-any.whl - Mend

oracle-ads 2.13.18rc0py3-none-any.whl → 2.13.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

ads/aqua/cli.py +7 -5
ads/aqua/common/entities.py +195 -48
ads/aqua/common/enums.py +6 -0
ads/aqua/common/errors.py +5 -0
ads/aqua/common/utils.py +157 -66
ads/aqua/constants.py +3 -0
ads/aqua/extension/deployment_handler.py +36 -0
ads/aqua/modeldeployment/constants.py +1 -0
ads/aqua/modeldeployment/deployment.py +95 -14
ads/aqua/modeldeployment/entities.py +3 -0
ads/aqua/modeldeployment/model_group_config.py +3 -3
ads/aqua/resources/gpu_shapes_index.json +315 -26
ads/aqua/shaperecommend/__init__.py +6 -0
ads/aqua/shaperecommend/constants.py +116 -0
ads/aqua/shaperecommend/estimator.py +384 -0
ads/aqua/shaperecommend/llm_config.py +283 -0
ads/aqua/shaperecommend/recommend.py +493 -0
ads/aqua/shaperecommend/shape_report.py +233 -0
ads/aqua/version.json +1 -1
ads/cli.py +9 -1
ads/jobs/builders/infrastructure/dsc_job.py +1 -0
ads/jobs/builders/infrastructure/dsc_job_runtime.py +9 -1
ads/model/service/oci_datascience_model_deployment.py +46 -19
ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +4 -3
ads/pipeline/ads_pipeline.py +13 -9
{oracle_ads-2.13.18rc0.dist-info → oracle_ads-2.13.19.dist-info}/METADATA +1 -1
{oracle_ads-2.13.18rc0.dist-info → oracle_ads-2.13.19.dist-info}/RECORD +30 -24
{oracle_ads-2.13.18rc0.dist-info → oracle_ads-2.13.19.dist-info}/WHEEL +0 -0
{oracle_ads-2.13.18rc0.dist-info → oracle_ads-2.13.19.dist-info}/entry_points.txt +0 -0
{oracle_ads-2.13.18rc0.dist-info → oracle_ads-2.13.19.dist-info}/licenses/LICENSE.txt +0 -0

ads/aqua/resources/gpu_shapes_index.json CHANGED Viewed

@@ -1,94 +1,383 @@
 {
   "shapes": {
     "BM.GPU.A10.4": {
+      "cpu_count": 64,
+      "cpu_memory_in_gbs": 1024,
       "gpu_count": 4,
       "gpu_memory_in_gbs": 96,
-      "gpu_type": "A10"
+      "gpu_type": "A10",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 50,
+        "performance": 50
+      }
     },
     "BM.GPU.A100-V2.8": {
+      "cpu_count": 128,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 640,
-      "gpu_type": "A100"
+      "gpu_type": "A100",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 80,
+        "performance": 70
+      }
+    },
+    "BM.GPU.B200.8": {
+      "cpu_count": 128,
+      "cpu_memory_in_gbs": 4096,
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 1440,
+      "gpu_type": "B200",
+      "quantization": [
+        "fp4",
+        "fp8",
+        "fp16",
+        "bf16",
+        "tf32",
+        "int8",
+        "fp64"
+      ],
+      "ranking": {
+        "cost": 120,
+        "performance": 130
+      }
     },
     "BM.GPU.B4.8": {
+      "cpu_count": 64,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 320,
-      "gpu_type": "A100"
+      "gpu_type": "A100",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 70,
+        "performance": 60
+      }
+    },
+    "BM.GPU.GB200.4": {
+      "cpu_count": 144,
+      "cpu_memory_in_gbs": 1024,
+      "gpu_count": 4,
+      "gpu_memory_in_gbs": 768,
+      "gpu_type": "GB200",
+      "quantization": [
+        "fp4",
+        "fp8",
+        "fp6",
+        "int8",
+        "fp16",
+        "bf16",
+        "tf32",
+        "fp64"
+      ],
+      "ranking": {
+        "cost": 110,
+        "performance": 120
+      }
     },
     "BM.GPU.H100.8": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 640,
-      "gpu_type": "H100"
+      "gpu_type": "H100",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "fp8",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 100,
+        "performance": 100
+      }
     },
     "BM.GPU.H200.8": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 3072,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 1128,
-      "gpu_type": "H200"
+      "gpu_type": "H200",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "fp8",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 100,
+        "performance": 110
+      }
     },
     "BM.GPU.L40S-NC.4": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 1024,
       "gpu_count": 4,
       "gpu_memory_in_gbs": 192,
-      "gpu_type": "L40S"
+      "gpu_type": "L40S",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "fp8",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 60,
+        "performance": 80
+      }
     },
     "BM.GPU.L40S.4": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 1024,
       "gpu_count": 4,
       "gpu_memory_in_gbs": 192,
-      "gpu_type": "L40S"
+      "gpu_type": "L40S",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "fp8",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 60,
+        "performance": 80
+      }
     },
     "BM.GPU.MI300X.8": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 1536,
-      "gpu_type": "MI300X"
+      "gpu_type": "MI300X",
+      "quantization": [
+        "fp8",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 90,
+        "performance": 90
+      }
     },
     "BM.GPU2.2": {
+      "cpu_count": 28,
+      "cpu_memory_in_gbs": 192,
       "gpu_count": 2,
       "gpu_memory_in_gbs": 32,
-      "gpu_type": "P100"
-    },
-    "BM.GPU3.8": {
-      "gpu_count": 8,
-      "gpu_memory_in_gbs": 128,
-      "gpu_type": "V100"
+      "gpu_type": "P100",
+      "quantization": [
+        "fp16"
+      ],
+      "ranking": {
+        "cost": 30,
+        "performance": 20
+      }
     },
     "BM.GPU4.8": {
+      "cpu_count": 64,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 320,
-      "gpu_type": "A100"
+      "gpu_type": "A100",
+      "quantization": [
+        "int8",
+        "fp16",
+        "bf16",
+        "tf32"
+      ],
+      "ranking": {
+        "cost": 57,
+        "performance": 65
+      }
     },
     "VM.GPU.A10.1": {
+      "cpu_count": 15,
+      "cpu_memory_in_gbs": 240,
       "gpu_count": 1,
       "gpu_memory_in_gbs": 24,
-      "gpu_type": "A10"
+      "gpu_type": "A10",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 20,
+        "performance": 30
+      }
     },
     "VM.GPU.A10.2": {
+      "cpu_count": 30,
+      "cpu_memory_in_gbs": 480,
       "gpu_count": 2,
       "gpu_memory_in_gbs": 48,
-      "gpu_type": "A10"
-    },
-    "VM.GPU.A10.4": {
-      "gpu_count": 4,
-      "gpu_memory_in_gbs": 96,
-      "gpu_type": "A10"
+      "gpu_type": "A10",
+      "quantization": [
+        "awq",
+        "gptq",
+        "marlin",
+        "int8",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 40,
+        "performance": 40
+      }
     },
     "VM.GPU2.1": {
+      "cpu_count": 12,
+      "cpu_memory_in_gbs": 72,
       "gpu_count": 1,
       "gpu_memory_in_gbs": 16,
-      "gpu_type": "P100"
+      "gpu_type": "P100",
+      "quantization": [
+        "fp16"
+      ],
+      "ranking": {
+        "cost": 10,
+        "performance": 10
+      }
     },
     "VM.GPU3.1": {
+      "cpu_count": 6,
+      "cpu_memory_in_gbs": 90,
       "gpu_count": 1,
       "gpu_memory_in_gbs": 16,
-      "gpu_type": "V100"
+      "gpu_type": "V100",
+      "quantization": [
+        "gptq",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 35,
+        "performance": 10
+      }
     },
     "VM.GPU3.2": {
+      "cpu_count": 12,
+      "cpu_memory_in_gbs": 180,
       "gpu_count": 2,
       "gpu_memory_in_gbs": 32,
-      "gpu_type": "V100"
+      "gpu_type": "V100",
+      "quantization": [
+        "gptq",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 45,
+        "performance": 20
+      }
     },
     "VM.GPU3.4": {
+      "cpu_count": 24,
+      "cpu_memory_in_gbs": 360,
       "gpu_count": 4,
       "gpu_memory_in_gbs": 64,
-      "gpu_type": "V100"
+      "gpu_type": "V100",
+      "quantization": [
+        "gptq",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 55,
+        "performance": 45
+      }
+    },
+    "VM.GPU3.8": {
+      "cpu_count": 24,
+      "cpu_memory_in_gbs": 768,
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 128,
+      "gpu_type": "V100",
+      "quantization": [
+        "gptq",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 56,
+        "performance": 46
+      }
     }
   }
 }

ads/aqua/shaperecommend/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+from ads.aqua.shaperecommend.recommend import AquaShapeRecommend
+__all__ = ["AquaShapeRecommend"]

ads/aqua/shaperecommend/constants.py ADDED Viewed

@@ -0,0 +1,116 @@
+#!/usr/bin/env python
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+"""
+aqua.shaperecommend.constants
+~~~~~~~~~~~~~~
+This module contains constants used in Aqua GPU Recommendation for Models.
+LLAMA_REQUIRED_FIELDS refer to fields necessary for calculating model memory for GQA Architecture Models
+MOE_REQUIRED_FIELDS refer to fields necessary for Mixture of Experts (MoE) Architecture Models
+NEXT_QUANT suggests the next quantization level based on the current quantization (if applied) or the model weights (if no quantization yet)
+"""
+LLAMA_REQUIRED_FIELDS = [
+    "num_hidden_layers",
+    "hidden_size",
+    "num_attention_heads",
+    "num_key_value_heads",
+    "head_dim",
+    "intermediate_size",
+    "vocab_size",
+]
+MOE_REQUIRED_FIELDS = LLAMA_REQUIRED_FIELDS + ["num_local_experts", "intermediate_size"]
+NEXT_QUANT = {
+    "float32": ["8bit", "4bit"],
+    "bfloat16": ["8bit", "4bit"],
+    "float16": ["8bit", "4bit"],
+    "int8": ["4bit"],
+    "fp8": ["4bit"],
+    "8bit": ["4bit"],
+    "int4": ["No smaller quantization available"],
+    "4bit": ["No smaller quantization available"],
+}
+TEXT_GENERATION = "text_generation"
+SAFETENSORS = "safetensors"
+QUANT_METHODS = [
+    "aqlm",
+    "awq",
+    "deepspeedfp",
+    "tpu_int8",
+    "fp8",
+    "ptpc_fp8",
+    "fbgemm_fp8",
+    "modelopt",
+    "modelopt_fp4",
+    "marlin",
+    "bitblas",
+    "gguf",
+    "gptq_marlin_24",
+    "gptq_marlin",
+    "gptq_bitblas",
+    "awq_marlin",
+    "gptq",
+    "compressed-tensors",
+    "bitsandbytes",
+    "qqq",
+    "hqq",
+    "experts_int8",
+    "neuron_quant",
+    "ipex",
+    "quark",
+    "moe_wna16",
+    "torchao",
+    "auto-round",
+    "rtn",
+    "inc",
+    "mxfp4",
+]
+IN_FLIGHT_QUANTIZATION = {"4bit"}  # vLLM only supports 4bit in-flight-quantization
+TROUBLESHOOT_MSG = "The selected model is too large to fit on standard GPU shapes with the current configuration.\nAs troubleshooting, we have suggested the two largest available GPU shapes using the smallest quantization level ('4bit') to maximize chances of fitting the model. "
+VLLM_PARAMS = {
+    "max_model_len": "--max-model-len",
+    "in_flight_quant": "--quantization bitsandbytes --load-format bitsandbytes",
+}
+DEFAULT_WEIGHT_SIZE = "float32"
+BITS_AND_BYTES_8BIT = "8bit"
+BITS_AND_BYTES_4BIT = "4bit"
+BITSANDBYTES = "bitsandbytes"
+QUANT_MAPPING = {
+    "float32": 4,
+    "bfloat16": 2,
+    "float16": 2,
+    "fp16": 2,
+    "half": 2,
+    "int8": 1,
+    "fp8": 1,
+    "8bit": 1,
+    "4bit": 0.5,
+    "int4": 0.5,
+}
+SHAPE_MAP = {
+    "NVIDIA_GPU": "GPU",
+    "AMD_ROME": "CPU",
+    "GENERIC": "CPU",
+    "LEGACY": "CPU",
+    "ARM": "CPU",
+    "UNKNOWN_ENUM_VALUE": "N/A",
+}

oracle-ads 2.13.18rc0__py3-none-any.whl → 2.13.19__py3-none-any.whl

oracle-ads 2.13.18rc0py3-none-any.whl → 2.13.19py3-none-any.whl