oracle-ads 2.13.17rc0__py3-none-any.whl → 2.13.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. ads/aqua/cli.py +7 -5
  2. ads/aqua/common/entities.py +88 -29
  3. ads/aqua/common/enums.py +7 -0
  4. ads/aqua/common/errors.py +5 -0
  5. ads/aqua/common/utils.py +87 -7
  6. ads/aqua/constants.py +3 -0
  7. ads/aqua/extension/deployment_handler.py +36 -0
  8. ads/aqua/modeldeployment/config_loader.py +10 -0
  9. ads/aqua/modeldeployment/constants.py +1 -0
  10. ads/aqua/modeldeployment/deployment.py +99 -22
  11. ads/aqua/modeldeployment/entities.py +4 -0
  12. ads/aqua/resources/gpu_shapes_index.json +315 -26
  13. ads/aqua/shaperecommend/__init__.py +6 -0
  14. ads/aqua/shaperecommend/constants.py +116 -0
  15. ads/aqua/shaperecommend/estimator.py +384 -0
  16. ads/aqua/shaperecommend/llm_config.py +283 -0
  17. ads/aqua/shaperecommend/recommend.py +493 -0
  18. ads/aqua/shaperecommend/shape_report.py +233 -0
  19. ads/aqua/version.json +1 -1
  20. ads/cli.py +9 -1
  21. ads/jobs/builders/infrastructure/dsc_job.py +1 -0
  22. ads/jobs/builders/infrastructure/dsc_job_runtime.py +9 -1
  23. ads/model/service/oci_datascience_model_deployment.py +46 -19
  24. ads/opctl/operator/lowcode/common/data.py +7 -2
  25. ads/opctl/operator/lowcode/common/transformations.py +207 -0
  26. ads/opctl/operator/lowcode/common/utils.py +8 -0
  27. ads/opctl/operator/lowcode/forecast/__init__.py +3 -0
  28. ads/opctl/operator/lowcode/forecast/__main__.py +53 -3
  29. ads/opctl/operator/lowcode/forecast/const.py +2 -0
  30. ads/opctl/operator/lowcode/forecast/errors.py +5 -0
  31. ads/opctl/operator/lowcode/forecast/meta_selector.py +310 -0
  32. ads/opctl/operator/lowcode/forecast/model/automlx.py +1 -1
  33. ads/opctl/operator/lowcode/forecast/model/base_model.py +119 -30
  34. ads/opctl/operator/lowcode/forecast/model/factory.py +33 -2
  35. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +54 -17
  36. ads/opctl/operator/lowcode/forecast/model_evaluator.py +6 -1
  37. ads/opctl/operator/lowcode/forecast/schema.yaml +1 -0
  38. ads/pipeline/ads_pipeline.py +13 -9
  39. {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/METADATA +1 -1
  40. {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/RECORD +43 -36
  41. {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/WHEEL +0 -0
  42. {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/entry_points.txt +0 -0
  43. {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/licenses/LICENSE.txt +0 -0
ads/aqua/cli.py CHANGED
@@ -96,18 +96,20 @@ class AquaCommand:
96
96
  "If you intend to chain a function call to the result, please separate the "
97
97
  "flag and the subsequent function call with separator `-`."
98
98
  )
99
-
99
+
100
100
  @staticmethod
101
101
  def install():
102
102
  """Install ADS Aqua Extension from wheel file. Set enviroment variable `AQUA_EXTENSTION_PATH` to change the wheel file path.
103
103
 
104
- Return
104
+ Return
105
105
  ------
106
106
  int:
107
107
  Installatation status.
108
108
  """
109
109
  import subprocess
110
110
 
111
- wheel_file_path = os.environ.get("AQUA_EXTENSTION_PATH", "/ads/extension/adsjupyterlab_aqua_extension*.whl")
112
- status = subprocess.run(f"pip install {wheel_file_path}",shell=True)
113
- return status.check_returncode
111
+ wheel_file_path = os.environ.get(
112
+ "AQUA_EXTENSTION_PATH", "/ads/extension/adsjupyterlab_aqua_extension*.whl"
113
+ )
114
+ status = subprocess.run(f"pip install {wheel_file_path}", shell=True, check=False)
115
+ return status.check_returncode
@@ -47,19 +47,76 @@ class ModelConfigResult(BaseModel):
47
47
  protected_namespaces = ()
48
48
 
49
49
 
50
- class GPUSpecs(Serializable):
50
+ class ComputeRank(Serializable):
51
51
  """
52
- Represents the GPU specifications for a compute instance.
52
+ Represents the cost and performance rankings for a specific compute shape.
53
+ These rankings help compare different shapes based on their relative pricing
54
+ and computational capabilities.
53
55
  """
54
56
 
55
- gpu_memory_in_gbs: Optional[int] = Field(
56
- default=None, description="The amount of GPU memory available (in GB)."
57
+ cost: Optional[int] = Field(
58
+ None,
59
+ description=(
60
+ "Relative cost ranking of the compute shape. "
61
+ "Value ranges from 10 (most cost-effective) to 100 (most expensive). "
62
+ "Lower values indicate cheaper compute options."
63
+ ),
64
+ )
65
+
66
+ performance: Optional[int] = Field(
67
+ None,
68
+ description=(
69
+ "Relative performance ranking of the compute shape. "
70
+ "Value ranges from 10 (lowest performance) to 110 (highest performance). "
71
+ "Higher values indicate better compute performance."
72
+ ),
57
73
  )
74
+
75
+
76
+ class GPUSpecs(Serializable):
77
+ """
78
+ Represents the specifications and capabilities of a GPU-enabled compute shape.
79
+ Includes details about GPU and CPU resources, supported quantization formats, and
80
+ relative rankings for cost and performance.
81
+ """
82
+
58
83
  gpu_count: Optional[int] = Field(
59
- default=None, description="The number of GPUs available."
84
+ default=None,
85
+ description="Number of physical GPUs available on the compute shape.",
86
+ )
87
+
88
+ gpu_memory_in_gbs: Optional[int] = Field(
89
+ default=None, description="Total GPU memory available in gigabytes (GB)."
60
90
  )
91
+
61
92
  gpu_type: Optional[str] = Field(
62
- default=None, description="The type of GPU (e.g., 'V100, A100, H100')."
93
+ default=None,
94
+ description="Type of GPU and architecture. Example: 'H100', 'GB200'.",
95
+ )
96
+
97
+ quantization: Optional[List[str]] = Field(
98
+ default_factory=list,
99
+ description=(
100
+ "List of supported quantization formats for the GPU. "
101
+ "Examples: 'fp16', 'int8', 'bitsandbytes', 'bf16', 'fp4', etc."
102
+ ),
103
+ )
104
+
105
+ cpu_count: Optional[int] = Field(
106
+ default=None, description="Number of CPU cores available on the shape."
107
+ )
108
+
109
+ cpu_memory_in_gbs: Optional[int] = Field(
110
+ default=None, description="Total CPU memory available in gigabytes (GB)."
111
+ )
112
+
113
+ ranking: Optional[ComputeRank] = Field(
114
+ default=None,
115
+ description=(
116
+ "Relative cost and performance rankings of this shape. "
117
+ "Cost is ranked from 10 (least expensive) to 100+ (most expensive), "
118
+ "and performance from 10 (lowest) to 100+ (highest)."
119
+ ),
63
120
  )
64
121
 
65
122
 
@@ -80,46 +137,49 @@ class GPUShapesIndex(Serializable):
80
137
 
81
138
  class ComputeShapeSummary(Serializable):
82
139
  """
83
- Represents the specifications of a compute instance shape,
84
- including CPU, memory, and optional GPU characteristics.
140
+ Represents a compute shape's specification including CPU, memory, and (if applicable) GPU configuration.
85
141
  """
86
142
 
143
+ available: Optional[bool] = Field(
144
+ default=False,
145
+ description="True if the shape is available in the user's tenancy/region.",
146
+ )
147
+
87
148
  core_count: Optional[int] = Field(
88
- default=None,
89
- description="Total number of CPU cores available for the compute shape.",
149
+ default=None, description="Number of vCPUs available for the compute shape."
90
150
  )
151
+
91
152
  memory_in_gbs: Optional[int] = Field(
92
- default=None,
93
- description="Amount of memory (in GB) available for the compute shape.",
153
+ default=None, description="Total CPU memory available for the shape (in GB)."
94
154
  )
155
+
95
156
  name: Optional[str] = Field(
96
- default=None,
97
- description="Full name of the compute shape, e.g., 'VM.GPU.A10.2'.",
157
+ default=None, description="Name of the compute shape, e.g., 'VM.GPU.A10.2'."
98
158
  )
159
+
99
160
  shape_series: Optional[str] = Field(
100
161
  default=None,
101
- description="Shape family or series, e.g., 'GPU', 'Standard', etc.",
162
+ description="Series or family of the shape, e.g., 'GPU', 'Standard'.",
102
163
  )
164
+
103
165
  gpu_specs: Optional[GPUSpecs] = Field(
104
- default=None,
105
- description="Optional GPU specifications associated with the shape.",
166
+ default=None, description="GPU configuration for the shape, if applicable."
106
167
  )
107
168
 
108
169
  @model_validator(mode="after")
109
170
  @classmethod
110
- def set_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
171
+ def populate_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
111
172
  """
112
- Validates and populates GPU specifications if the shape_series indicates a GPU-based shape.
173
+ Attempts to populate GPU specs if the shape is GPU-based and no GPU specs are explicitly set.
113
174
 
114
- - If the shape_series contains "GPU", the validator first checks if the shape name exists
115
- in the GPU_SPECS dictionary. If found, it creates a GPUSpecs instance with the corresponding data.
116
- - If the shape is not found in the GPU_SPECS, it attempts to extract the GPU count from the shape name
117
- using a regex pattern (looking for a number following a dot at the end of the name).
118
-
119
- The information about shapes is taken from: https://docs.oracle.com/en-us/iaas/data-science/using/supported-shapes.htm
175
+ Logic:
176
+ - If `shape_series` includes 'GPU' and `gpu_specs` is None:
177
+ - Tries to parse the shape name to extract GPU count (e.g., from 'VM.GPU.A10.2').
178
+ - Fallback is based on suffix numeric group (e.g., '.2' gpu_count=2).
179
+ - If extraction fails, logs debug-level error but does not raise.
120
180
 
121
181
  Returns:
122
- ComputeShapeSummary: The updated instance with gpu_specs populated if applicable.
182
+ ComputeShapeSummary: The updated model instance.
123
183
  """
124
184
  try:
125
185
  if (
@@ -128,16 +188,15 @@ class ComputeShapeSummary(Serializable):
128
188
  and model.name
129
189
  and not model.gpu_specs
130
190
  ):
131
- # Try to extract gpu_count from the shape name using a regex (e.g., "VM.GPU3.2" -> gpu_count=2)
132
191
  match = re.search(r"\.(\d+)$", model.name)
133
192
  if match:
134
193
  gpu_count = int(match.group(1))
135
194
  model.gpu_specs = GPUSpecs(gpu_count=gpu_count)
136
195
  except Exception as err:
137
196
  logger.debug(
138
- f"Error occurred in attempt to extract GPU specification for the f{model.name}. "
139
- f"Details: {err}"
197
+ f"[populate_gpu_specs] Failed to auto-populate GPU specs for shape '{model.name}': {err}"
140
198
  )
199
+
141
200
  return model
142
201
 
143
202
 
ads/aqua/common/enums.py CHANGED
@@ -58,6 +58,7 @@ class InferenceContainerTypeFamily(ExtendedEnum):
58
58
  AQUA_VLLM_LLAMA4_CONTAINER_FAMILY = "odsc-vllm-serving-llama4"
59
59
  AQUA_TGI_CONTAINER_FAMILY = "odsc-tgi-serving"
60
60
  AQUA_LLAMA_CPP_CONTAINER_FAMILY = "odsc-llama-cpp-serving"
61
+ AQUA_VLLM_OPENAI_CONTAINER_FAMILY = "odsc-vllm-serving-openai"
61
62
 
62
63
 
63
64
  class CustomInferenceContainerTypeFamily(ExtendedEnum):
@@ -122,6 +123,12 @@ class Platform(ExtendedEnum):
122
123
  # - Key: The preferred container family to use when multiple compatible families are selected.
123
124
  # - Value: A list of all compatible families (including the preferred one).
124
125
  CONTAINER_FAMILY_COMPATIBILITY: Dict[str, List[str]] = {
126
+ InferenceContainerTypeFamily.AQUA_VLLM_OPENAI_CONTAINER_FAMILY: [
127
+ InferenceContainerTypeFamily.AQUA_VLLM_OPENAI_CONTAINER_FAMILY,
128
+ InferenceContainerTypeFamily.AQUA_VLLM_LLAMA4_CONTAINER_FAMILY,
129
+ InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY,
130
+ InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY,
131
+ ],
125
132
  InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY: [
126
133
  InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY,
127
134
  InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY,
ads/aqua/common/errors.py CHANGED
@@ -55,6 +55,11 @@ class AquaValueError(AquaError, ValueError):
55
55
  def __init__(self, reason, status=403, service_payload=None):
56
56
  super().__init__(reason, status, service_payload)
57
57
 
58
+ class AquaRecommendationError(AquaError):
59
+ """Exception raised for models incompatible with shape recommendation tool."""
60
+
61
+ def __init__(self, reason, status=400, service_payload=None):
62
+ super().__init__(reason, status, service_payload)
58
63
 
59
64
  class AquaFileNotFoundError(AquaError, FileNotFoundError):
60
65
  """Exception raised for missing target file."""
ads/aqua/common/utils.py CHANGED
@@ -997,6 +997,44 @@ def get_container_params_type(container_type_name: str) -> str:
997
997
  return UNKNOWN
998
998
 
999
999
 
1000
+ def get_container_env_type(container_type_name: Optional[str]) -> str:
1001
+ """
1002
+ Determine the container environment type based on the container type name.
1003
+
1004
+ This function matches the provided container type name against the known
1005
+ values of `InferenceContainerType`. The check is case-insensitive and
1006
+ allows for partial matches so that changes in container naming conventions
1007
+ (e.g., prefixes or suffixes) will still be matched correctly.
1008
+
1009
+ Examples:
1010
+ >>> get_container_env_type("odsc-vllm-serving")
1011
+ 'vllm'
1012
+ >>> get_container_env_type("ODSC-TGI-Serving")
1013
+ 'tgi'
1014
+ >>> get_container_env_type("custom-unknown-container")
1015
+ 'UNKNOWN'
1016
+
1017
+ Args:
1018
+ container_type_name (Optional[str]):
1019
+ The deployment container type name (e.g., "odsc-vllm-serving").
1020
+
1021
+ Returns:
1022
+ str:
1023
+ - A matching `InferenceContainerType` value string (e.g., "VLLM", "TGI", "LLAMA-CPP").
1024
+ - `"UNKNOWN"` if no match is found or the input is empty/None.
1025
+ """
1026
+ if not container_type_name:
1027
+ return UNKNOWN
1028
+
1029
+ needle = container_type_name.strip().casefold()
1030
+
1031
+ for container_type in InferenceContainerType.values():
1032
+ if container_type and container_type.casefold() in needle:
1033
+ return container_type.upper()
1034
+
1035
+ return UNKNOWN
1036
+
1037
+
1000
1038
  def get_restricted_params_by_container(container_type_name: str) -> set:
1001
1039
  """The utility function accepts the deployment container type name and returns a set of restricted params
1002
1040
  for that container.
@@ -1229,10 +1267,10 @@ def load_gpu_shapes_index(
1229
1267
  auth: Optional[Dict[str, Any]] = None,
1230
1268
  ) -> GPUShapesIndex:
1231
1269
  """
1232
- Load the GPU shapes index, preferring the OS bucket copy over the local one.
1270
+ Load the GPU shapes index, merging based on freshness.
1233
1271
 
1234
- Attempts to read `gpu_shapes_index.json` from OCI Object Storage first;
1235
- if that succeeds, those entries will override the local defaults.
1272
+ Compares last-modified timestamps of local and remote files,
1273
+ merging the shapes from the fresher file on top of the older one.
1236
1274
 
1237
1275
  Parameters
1238
1276
  ----------
@@ -1253,7 +1291,9 @@ def load_gpu_shapes_index(
1253
1291
  file_name = "gpu_shapes_index.json"
1254
1292
 
1255
1293
  # Try remote load
1256
- remote_data: Dict[str, Any] = {}
1294
+ local_data, remote_data = {}, {}
1295
+ local_mtime, remote_mtime = None, None
1296
+
1257
1297
  if CONDA_BUCKET_NS:
1258
1298
  try:
1259
1299
  auth = auth or authutil.default_signer()
@@ -1263,8 +1303,24 @@ def load_gpu_shapes_index(
1263
1303
  logger.debug(
1264
1304
  "Loading GPU shapes index from Object Storage: %s", storage_path
1265
1305
  )
1266
- with fsspec.open(storage_path, mode="r", **auth) as f:
1306
+
1307
+ fs = fsspec.filesystem("oci", **auth)
1308
+ with fs.open(storage_path, mode="r") as f:
1267
1309
  remote_data = json.load(f)
1310
+
1311
+ remote_info = fs.info(storage_path)
1312
+ remote_mtime_str = remote_info.get("timeModified", None)
1313
+ if remote_mtime_str:
1314
+ # Convert OCI timestamp (e.g., 'Mon, 04 Aug 2025 06:37:13 GMT') to epoch time
1315
+ remote_mtime = datetime.strptime(
1316
+ remote_mtime_str, "%a, %d %b %Y %H:%M:%S %Z"
1317
+ ).timestamp()
1318
+
1319
+ logger.debug(
1320
+ "Remote GPU shapes last-modified time: %s",
1321
+ datetime.fromtimestamp(remote_mtime).strftime("%Y-%m-%d %H:%M:%S"),
1322
+ )
1323
+
1268
1324
  logger.debug(
1269
1325
  "Loaded %d shapes from Object Storage",
1270
1326
  len(remote_data.get("shapes", {})),
@@ -1273,12 +1329,19 @@ def load_gpu_shapes_index(
1273
1329
  logger.debug("Remote load failed (%s); falling back to local", ex)
1274
1330
 
1275
1331
  # Load local copy
1276
- local_data: Dict[str, Any] = {}
1277
1332
  local_path = os.path.join(os.path.dirname(__file__), "../resources", file_name)
1278
1333
  try:
1279
1334
  logger.debug("Loading GPU shapes index from local file: %s", local_path)
1280
1335
  with open(local_path) as f:
1281
1336
  local_data = json.load(f)
1337
+
1338
+ local_mtime = os.path.getmtime(local_path)
1339
+
1340
+ logger.debug(
1341
+ "Local GPU shapes last-modified time: %s",
1342
+ datetime.fromtimestamp(local_mtime).strftime("%Y-%m-%d %H:%M:%S"),
1343
+ )
1344
+
1282
1345
  logger.debug(
1283
1346
  "Loaded %d shapes from local file", len(local_data.get("shapes", {}))
1284
1347
  )
@@ -1288,7 +1351,24 @@ def load_gpu_shapes_index(
1288
1351
  # Merge: remote shapes override local
1289
1352
  local_shapes = local_data.get("shapes", {})
1290
1353
  remote_shapes = remote_data.get("shapes", {})
1291
- merged_shapes = {**local_shapes, **remote_shapes}
1354
+ merged_shapes = {}
1355
+
1356
+ if local_mtime and remote_mtime:
1357
+ if remote_mtime >= local_mtime:
1358
+ logger.debug("Remote data is fresher or equal; merging remote over local.")
1359
+ merged_shapes = {**local_shapes, **remote_shapes}
1360
+ else:
1361
+ logger.debug("Local data is fresher; merging local over remote.")
1362
+ merged_shapes = {**remote_shapes, **local_shapes}
1363
+ elif remote_shapes:
1364
+ logger.debug("Only remote shapes available.")
1365
+ merged_shapes = remote_shapes
1366
+ elif local_shapes:
1367
+ logger.debug("Only local shapes available.")
1368
+ merged_shapes = local_shapes
1369
+ else:
1370
+ logger.error("No GPU shapes data found in either source.")
1371
+ merged_shapes = {}
1292
1372
 
1293
1373
  return GPUShapesIndex(shapes=merged_shapes)
1294
1374
 
ads/aqua/constants.py CHANGED
@@ -56,6 +56,9 @@ SUPPORTED_FILE_FORMATS = ["jsonl"]
56
56
  MODEL_BY_REFERENCE_OSS_PATH_KEY = "artifact_location"
57
57
 
58
58
  AQUA_CHAT_TEMPLATE_METADATA_KEY = "chat_template"
59
+ UNKNOWN_ENUM_VALUE = "UNKNOWN_ENUM_VALUE"
60
+ MODEL_GROUP = "MODEL_GROUP"
61
+ SINGLE_MODEL_FLEX = "SINGLE_MODEL_FLEX"
59
62
 
60
63
  CONSOLE_LINK_RESOURCE_TYPE_MAPPING = {
61
64
  "datasciencemodel": "models",
@@ -57,6 +57,15 @@ class AquaDeploymentHandler(AquaAPIhandler):
57
57
  return self.get_deployment_config(
58
58
  model_id=id.split(",") if "," in id else id
59
59
  )
60
+ elif paths.startswith("aqua/deployments/recommend_shapes"):
61
+ if not id or not isinstance(id, str):
62
+ raise HTTPError(
63
+ 400,
64
+ f"Invalid request format for {self.request.path}. "
65
+ "Expected a single model OCID specified as --model_id",
66
+ )
67
+ id = id.replace(" ", "")
68
+ return self.get_recommend_shape(model_id=id)
60
69
  elif paths.startswith("aqua/deployments/shapes"):
61
70
  return self.list_shapes()
62
71
  elif paths.startswith("aqua/deployments"):
@@ -161,6 +170,32 @@ class AquaDeploymentHandler(AquaAPIhandler):
161
170
 
162
171
  return self.finish(deployment_config)
163
172
 
173
+ def get_recommend_shape(self, model_id: str):
174
+ """
175
+ Retrieves the valid shape and deployment parameter configuration for one Aqua Model.
176
+
177
+ Parameters
178
+ ----------
179
+ model_id : str
180
+ A single model ID (str).
181
+
182
+ Returns
183
+ -------
184
+ None
185
+ The function sends the ShapeRecommendReport (generate_table = False) or Rich Diff Table (generate_table = True)
186
+ """
187
+ app = AquaDeploymentApp()
188
+
189
+ compartment_id = self.get_argument("compartment_id", default=COMPARTMENT_OCID)
190
+
191
+ recommend_report = app.recommend_shape(
192
+ model_id=model_id,
193
+ compartment_id=compartment_id,
194
+ generate_table=False,
195
+ )
196
+
197
+ return self.finish(recommend_report)
198
+
164
199
  def list_shapes(self):
165
200
  """
166
201
  Lists the valid model deployment shapes.
@@ -408,6 +443,7 @@ __handlers__ = [
408
443
  ("deployments/?([^/]*)/params", AquaDeploymentParamsHandler),
409
444
  ("deployments/config/?([^/]*)", AquaDeploymentHandler),
410
445
  ("deployments/shapes/?([^/]*)", AquaDeploymentHandler),
446
+ ("deployments/recommend_shapes/?([^/]*)", AquaDeploymentHandler),
411
447
  ("deployments/?([^/]*)", AquaDeploymentHandler),
412
448
  ("deployments/?([^/]*)/activate", AquaDeploymentHandler),
413
449
  ("deployments/?([^/]*)/deactivate", AquaDeploymentHandler),
@@ -88,6 +88,7 @@ class MultiModelConfig(Serializable):
88
88
  gpu_count (int, optional): Number of GPUs count to this model of this shape.
89
89
  parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
90
90
  configure the behavior of a particular GPU shape.
91
+ env (Dict[str, Dict[str, str]]): Environment variables grouped by namespace (e.g., "VLLM": {"VAR": "VAL"}).
91
92
  """
92
93
 
93
94
  gpu_count: Optional[int] = Field(
@@ -97,6 +98,10 @@ class MultiModelConfig(Serializable):
97
98
  default_factory=dict,
98
99
  description="Key-value pairs for GPU shape parameters (e.g., VLLM_PARAMS).",
99
100
  )
101
+ env: Optional[Dict[str, Dict[str, str]]] = Field(
102
+ default_factory=dict,
103
+ description="Environment variables grouped by namespace",
104
+ )
100
105
 
101
106
  class Config:
102
107
  extra = "allow"
@@ -130,6 +135,7 @@ class ConfigurationItem(Serializable):
130
135
  configure the behavior of a particular GPU shape.
131
136
  multi_model_deployment (List[MultiModelConfig], optional): A list of multi model configuration details.
132
137
  shape_info (DeploymentShapeInfo, optional): The shape information to this model for specific CPU shape.
138
+ env (Dict[str, Dict[str, str]]): Environment variables grouped by namespace (e.g., "VLLM": {"VAR": "VAL"}).
133
139
  """
134
140
 
135
141
  parameters: Optional[Dict[str, str]] = Field(
@@ -143,6 +149,10 @@ class ConfigurationItem(Serializable):
143
149
  default_factory=DeploymentShapeInfo,
144
150
  description="The shape information to this model for specific shape",
145
151
  )
152
+ env: Optional[Dict[str, Dict[str, str]]] = Field(
153
+ default_factory=dict,
154
+ description="Environment variables grouped by namespace",
155
+ )
146
156
 
147
157
  class Config:
148
158
  extra = "allow"
@@ -11,3 +11,4 @@ This module contains constants used in Aqua Model Deployment.
11
11
 
12
12
  DEFAULT_WAIT_TIME = 12000
13
13
  DEFAULT_POLL_INTERVAL = 10
14
+