oracle-ads 2.13.11__py3-none-any.whl → 2.13.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/app.py +13 -7
- ads/aqua/cli.py +15 -0
- ads/aqua/common/entities.py +31 -5
- ads/aqua/common/utils.py +35 -0
- ads/aqua/evaluation/evaluation.py +5 -4
- ads/aqua/extension/model_handler.py +1 -1
- ads/aqua/model/enums.py +19 -1
- ads/aqua/model/model.py +45 -36
- ads/aqua/model/utils.py +1 -2
- ads/aqua/modeldeployment/config_loader.py +815 -0
- ads/aqua/modeldeployment/constants.py +4 -1
- ads/aqua/modeldeployment/deployment.py +100 -124
- ads/aqua/modeldeployment/entities.py +4 -178
- ads/aqua/modeldeployment/model_group_config.py +240 -0
- ads/aqua/modeldeployment/utils.py +0 -539
- ads/common/work_request.py +39 -38
- ads/jobs/builders/infrastructure/dsc_job.py +121 -24
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +71 -24
- ads/jobs/builders/runtimes/base.py +7 -5
- ads/jobs/builders/runtimes/pytorch_runtime.py +6 -8
- ads/jobs/templates/driver_pytorch.py +486 -172
- ads/jobs/templates/driver_utils.py +27 -11
- ads/model/service/oci_datascience_model_deployment.py +6 -11
- ads/telemetry/client.py +4 -4
- {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.12.dist-info}/METADATA +1 -1
- {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.12.dist-info}/RECORD +29 -27
- {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.12.dist-info}/WHEEL +0 -0
- {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.12.dist-info}/entry_points.txt +0 -0
- {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.12.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,815 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# Copyright (c) 2025 Oracle and/or its affiliates.
|
3
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
4
|
+
|
5
|
+
|
6
|
+
import copy
|
7
|
+
import itertools
|
8
|
+
import math
|
9
|
+
from concurrent.futures import ThreadPoolExecutor
|
10
|
+
from typing import Dict, List, Optional
|
11
|
+
|
12
|
+
from pydantic import Field
|
13
|
+
|
14
|
+
from ads.aqua import logger
|
15
|
+
from ads.aqua.app import AquaApp
|
16
|
+
from ads.aqua.common.entities import ComputeShapeSummary, ModelConfigResult
|
17
|
+
from ads.aqua.common.enums import Tags
|
18
|
+
from ads.aqua.config.utils.serializer import Serializable
|
19
|
+
from ads.aqua.finetuning.constants import FineTuneCustomMetadata
|
20
|
+
from ads.aqua.model.constants import AquaModelMetadataKeys
|
21
|
+
from ads.config import AQUA_MODEL_DEPLOYMENT_CONFIG
|
22
|
+
|
23
|
+
|
24
|
+
class ShapeInfoConfig(Serializable):
|
25
|
+
"""Describes how many memory and cpu to this model for specific shape.
|
26
|
+
|
27
|
+
Attributes:
|
28
|
+
memory_in_gbs (float, optional): The number of memory in gbs to this model of the shape.
|
29
|
+
ocpu (float, optional): The number of ocpus to this model of the shape.
|
30
|
+
"""
|
31
|
+
|
32
|
+
memory_in_gbs: Optional[float] = Field(
|
33
|
+
None,
|
34
|
+
description="The number of memory in gbs to this model of the shape.",
|
35
|
+
)
|
36
|
+
ocpu: Optional[float] = Field(
|
37
|
+
None,
|
38
|
+
description="The number of ocpus to this model of the shape.",
|
39
|
+
)
|
40
|
+
|
41
|
+
class Config:
|
42
|
+
extra = "allow"
|
43
|
+
|
44
|
+
|
45
|
+
class DeploymentShapeInfo(Serializable):
|
46
|
+
"""Describes the shape information to this model for specific shape.
|
47
|
+
|
48
|
+
Attributes:
|
49
|
+
configs (List[ShapeInfoConfig], optional): A list of memory and cpu number details to this model of the shape.
|
50
|
+
type (str, optional): The type of the shape.
|
51
|
+
"""
|
52
|
+
|
53
|
+
configs: Optional[List[ShapeInfoConfig]] = Field(
|
54
|
+
default_factory=list,
|
55
|
+
description="A list of memory and cpu number details to this model of the shape.",
|
56
|
+
)
|
57
|
+
type: Optional[str] = Field(
|
58
|
+
default_factory=str, description="The type of the shape."
|
59
|
+
)
|
60
|
+
|
61
|
+
class Config:
|
62
|
+
extra = "allow"
|
63
|
+
|
64
|
+
|
65
|
+
class GPUModelAllocation(Serializable):
|
66
|
+
"""Describes how many GPUs are allocated to a particular model.
|
67
|
+
|
68
|
+
Attributes:
|
69
|
+
ocid (str, optional): The unique identifier of the model.
|
70
|
+
gpu_count (int, optional): Number of GPUs allocated to this model.
|
71
|
+
"""
|
72
|
+
|
73
|
+
ocid: Optional[str] = Field(
|
74
|
+
default_factory=str, description="The unique model OCID."
|
75
|
+
)
|
76
|
+
gpu_count: Optional[int] = Field(
|
77
|
+
default_factory=int, description="The number of GPUs allocated to the model."
|
78
|
+
)
|
79
|
+
|
80
|
+
class Config:
|
81
|
+
extra = "allow"
|
82
|
+
|
83
|
+
|
84
|
+
class MultiModelConfig(Serializable):
|
85
|
+
"""Describes how many GPUs and the parameters of specific shape for multi model deployment.
|
86
|
+
|
87
|
+
Attributes:
|
88
|
+
gpu_count (int, optional): Number of GPUs count to this model of this shape.
|
89
|
+
parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
|
90
|
+
configure the behavior of a particular GPU shape.
|
91
|
+
"""
|
92
|
+
|
93
|
+
gpu_count: Optional[int] = Field(
|
94
|
+
default_factory=int, description="The number of GPUs allocated to the model."
|
95
|
+
)
|
96
|
+
parameters: Optional[Dict[str, str]] = Field(
|
97
|
+
default_factory=dict,
|
98
|
+
description="Key-value pairs for GPU shape parameters (e.g., VLLM_PARAMS).",
|
99
|
+
)
|
100
|
+
|
101
|
+
class Config:
|
102
|
+
extra = "allow"
|
103
|
+
|
104
|
+
|
105
|
+
class GPUShapeAllocation(Serializable):
|
106
|
+
"""
|
107
|
+
Allocation details for a specific GPU shape.
|
108
|
+
|
109
|
+
Attributes:
|
110
|
+
models (List[GPUModelAllocation], optional): List of model GPU allocations for this shape.
|
111
|
+
total_gpus_available (int, optional): The total number of GPUs available for this shape.
|
112
|
+
"""
|
113
|
+
|
114
|
+
models: Optional[List[GPUModelAllocation]] = Field(
|
115
|
+
default_factory=list, description="List of model allocations for this shape."
|
116
|
+
)
|
117
|
+
total_gpus_available: Optional[int] = Field(
|
118
|
+
default_factory=int, description="Total GPUs available for this shape."
|
119
|
+
)
|
120
|
+
|
121
|
+
class Config:
|
122
|
+
extra = "allow"
|
123
|
+
|
124
|
+
|
125
|
+
class ConfigurationItem(Serializable):
|
126
|
+
"""Holds key-value parameter pairs for a specific GPU or CPU shape.
|
127
|
+
|
128
|
+
Attributes:
|
129
|
+
parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
|
130
|
+
configure the behavior of a particular GPU shape.
|
131
|
+
multi_model_deployment (List[MultiModelConfig], optional): A list of multi model configuration details.
|
132
|
+
shape_info (DeploymentShapeInfo, optional): The shape information to this model for specific CPU shape.
|
133
|
+
"""
|
134
|
+
|
135
|
+
parameters: Optional[Dict[str, str]] = Field(
|
136
|
+
default_factory=dict,
|
137
|
+
description="Key-value pairs for shape parameters.",
|
138
|
+
)
|
139
|
+
multi_model_deployment: Optional[List[MultiModelConfig]] = Field(
|
140
|
+
default_factory=list, description="A list of multi model configuration details."
|
141
|
+
)
|
142
|
+
shape_info: Optional[DeploymentShapeInfo] = Field(
|
143
|
+
default_factory=DeploymentShapeInfo,
|
144
|
+
description="The shape information to this model for specific shape",
|
145
|
+
)
|
146
|
+
|
147
|
+
class Config:
|
148
|
+
extra = "allow"
|
149
|
+
|
150
|
+
|
151
|
+
class AquaDeploymentConfig(Serializable):
|
152
|
+
"""Represents multi model's shape list and detailed configuration.
|
153
|
+
|
154
|
+
Attributes:
|
155
|
+
shape (List[str], optional): A list of shape names (e.g., BM.GPU.A10.4).
|
156
|
+
configuration (Dict[str, ConfigurationItem], optional): Maps each shape to its configuration details.
|
157
|
+
"""
|
158
|
+
|
159
|
+
shape: Optional[List[str]] = Field(
|
160
|
+
default_factory=list, description="List of supported shapes for the model."
|
161
|
+
)
|
162
|
+
configuration: Optional[Dict[str, ConfigurationItem]] = Field(
|
163
|
+
default_factory=dict, description="Configuration details keyed by shape."
|
164
|
+
)
|
165
|
+
|
166
|
+
class Config:
|
167
|
+
extra = "allow"
|
168
|
+
|
169
|
+
|
170
|
+
class ModelDeploymentConfigSummary(Serializable):
|
171
|
+
"""Top-level configuration model for OCI-based deployments.
|
172
|
+
|
173
|
+
Attributes:
|
174
|
+
deployment_config (Dict[str, AquaDeploymentConfig], optional): Deployment configurations
|
175
|
+
keyed by model OCID.
|
176
|
+
gpu_allocation (Dict[str, GPUShapeAllocation], optional): GPU allocations keyed by GPU shape.
|
177
|
+
error_message (str, optional): Error message if GPU allocation is not possible.
|
178
|
+
"""
|
179
|
+
|
180
|
+
deployment_config: Optional[Dict[str, AquaDeploymentConfig]] = Field(
|
181
|
+
default_factory=dict,
|
182
|
+
description=(
|
183
|
+
"Deployment configuration details for each model, including supported shapes "
|
184
|
+
"and shape-specific parameters."
|
185
|
+
),
|
186
|
+
)
|
187
|
+
gpu_allocation: Optional[Dict[str, GPUShapeAllocation]] = Field(
|
188
|
+
default_factory=dict,
|
189
|
+
description=(
|
190
|
+
"Details on how GPUs are allocated per shape, including the total "
|
191
|
+
"GPUs available for each shape."
|
192
|
+
),
|
193
|
+
)
|
194
|
+
error_message: Optional[str] = Field(
|
195
|
+
default=None, description="Error message if GPU allocation is not possible."
|
196
|
+
)
|
197
|
+
|
198
|
+
class Config:
|
199
|
+
extra = "allow"
|
200
|
+
protected_namespaces = ()
|
201
|
+
|
202
|
+
|
203
|
+
class MultiModelDeploymentConfigLoader:
|
204
|
+
"""
|
205
|
+
Processes multiple model deployment configurations to determine compatible GPU shapes
|
206
|
+
and calculate optimal GPU allocations.
|
207
|
+
"""
|
208
|
+
|
209
|
+
MAX_WORKERS = 10 # Number of workers for asynchronous models detail loading
|
210
|
+
|
211
|
+
def __init__(self, deployment_app: AquaApp):
|
212
|
+
"""
|
213
|
+
Initializes the processor with a reference to the `AquaDeploymentApp` to fetch model configurations.
|
214
|
+
|
215
|
+
Parameters
|
216
|
+
----------
|
217
|
+
deployment_app : AquaDeploymentApp
|
218
|
+
An instance of AquaDeploymentApp used to fetch model deployment configurations.
|
219
|
+
"""
|
220
|
+
self.deployment_app = deployment_app
|
221
|
+
|
222
|
+
def load(
|
223
|
+
self,
|
224
|
+
shapes: List[ComputeShapeSummary],
|
225
|
+
model_ids: List[str],
|
226
|
+
primary_model_id: Optional[str] = None,
|
227
|
+
) -> ModelDeploymentConfigSummary:
|
228
|
+
"""
|
229
|
+
Retrieves deployment configurations for multiple/single model and calculates compatible GPU allocations.
|
230
|
+
|
231
|
+
Parameters
|
232
|
+
----------
|
233
|
+
shapes : List[ComputeShapeSummary]
|
234
|
+
Model deployment available shapes.
|
235
|
+
model_ids : List[str]
|
236
|
+
A list of OCIDs for the Aqua models.
|
237
|
+
primary_model_id : Optional[str], optional
|
238
|
+
The OCID of the primary Aqua model. If provided, GPU allocation prioritizes this model.
|
239
|
+
Otherwise, GPUs are evenly allocated.
|
240
|
+
|
241
|
+
Returns
|
242
|
+
-------
|
243
|
+
ModelDeploymentConfigSummary
|
244
|
+
A summary of the deployment configurations and GPU allocations. If GPU allocation
|
245
|
+
cannot be determined, an appropriate error message is included in the summary.
|
246
|
+
"""
|
247
|
+
return self._load_multi_model_deployment_configuration(
|
248
|
+
shapes=shapes, model_ids=model_ids, primary_model_id=primary_model_id
|
249
|
+
)
|
250
|
+
|
251
|
+
def _load_multi_model_deployment_configuration(
|
252
|
+
self,
|
253
|
+
shapes: List[ComputeShapeSummary],
|
254
|
+
model_ids: List[str],
|
255
|
+
primary_model_id: Optional[str] = None,
|
256
|
+
) -> ModelDeploymentConfigSummary:
|
257
|
+
"""
|
258
|
+
Retrieves deployment configurations for multiple models and calculates compatible GPU allocations.
|
259
|
+
|
260
|
+
Parameters
|
261
|
+
----------
|
262
|
+
shapes : List[ComputeShapeSummary]
|
263
|
+
Model deployment available shapes.
|
264
|
+
model_ids : List[str]
|
265
|
+
A list of OCIDs for the Aqua models.
|
266
|
+
primary_model_id : Optional[str], optional
|
267
|
+
The OCID of the primary Aqua model. If provided, GPU allocation prioritizes this model.
|
268
|
+
Otherwise, GPUs are evenly allocated.
|
269
|
+
|
270
|
+
Returns
|
271
|
+
-------
|
272
|
+
ModelDeploymentConfigSummary
|
273
|
+
A summary of the deployment configurations and GPU allocations. If GPU allocation
|
274
|
+
cannot be determined, an appropriate error message is included in the summary.
|
275
|
+
"""
|
276
|
+
model_shape_gpu, available_shapes, summary = self._fetch_model_shape_gpu(
|
277
|
+
shapes=shapes, model_ids=model_ids
|
278
|
+
)
|
279
|
+
|
280
|
+
# Identify common deployment shapes among all models.
|
281
|
+
common_shapes, empty_configs = self._get_common_shapes(model_shape_gpu)
|
282
|
+
logger.debug(f"Common Shapes: {common_shapes} from: {model_shape_gpu}")
|
283
|
+
|
284
|
+
# If all models' shape configs are empty, use default deployment shapes instead
|
285
|
+
common_shapes = (
|
286
|
+
available_shapes
|
287
|
+
if empty_configs
|
288
|
+
else [
|
289
|
+
shape_name
|
290
|
+
for shape_name in common_shapes
|
291
|
+
if shape_name.upper() in available_shapes
|
292
|
+
]
|
293
|
+
)
|
294
|
+
logger.debug(f"Available Common Shapes: {common_shapes}")
|
295
|
+
|
296
|
+
if not common_shapes:
|
297
|
+
summary.error_message = (
|
298
|
+
"The selected models do not share any available common deployment shapes. "
|
299
|
+
"Please ensure that all chosen models are compatible for multi-model deployment."
|
300
|
+
)
|
301
|
+
logger.debug(
|
302
|
+
f"No common deployment shapes found among selected models: {model_ids}"
|
303
|
+
)
|
304
|
+
return summary
|
305
|
+
|
306
|
+
# Compute GPU allocations based on the common shapes and optionally prioritize a primary model.
|
307
|
+
gpu_allocation = self._compute_gpu_allocation(
|
308
|
+
shapes=shapes,
|
309
|
+
common_shapes=common_shapes,
|
310
|
+
model_shape_gpu=model_shape_gpu,
|
311
|
+
primary_model_id=primary_model_id,
|
312
|
+
)
|
313
|
+
|
314
|
+
logger.debug(f"GPU Allocation: {gpu_allocation}")
|
315
|
+
|
316
|
+
if not gpu_allocation:
|
317
|
+
summary.error_message = (
|
318
|
+
"The selected models do not have a valid GPU allocation based on their current configurations. "
|
319
|
+
"Please select a different model group. If you are deploying custom models that lack AQUA service configuration, "
|
320
|
+
"refer to the deployment guidelines here: "
|
321
|
+
"https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/multimodel-deployment-tips.md#custom_models"
|
322
|
+
)
|
323
|
+
|
324
|
+
logger.debug(
|
325
|
+
f"GPU allocation computation failed for selected models: {model_ids}"
|
326
|
+
)
|
327
|
+
|
328
|
+
return summary
|
329
|
+
|
330
|
+
summary.gpu_allocation = gpu_allocation
|
331
|
+
return summary
|
332
|
+
|
333
|
+
def _fetch_model_shape_gpu(
|
334
|
+
self, shapes: List[ComputeShapeSummary], model_ids: List[str]
|
335
|
+
):
|
336
|
+
"""Fetches dict of model shape and gpu, list of available shapes and builds `ModelDeploymentConfigSummary` instance."""
|
337
|
+
# Fetch deployment configurations concurrently.
|
338
|
+
logger.debug(f"Loading model deployment configuration for models: {model_ids}")
|
339
|
+
deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)
|
340
|
+
|
341
|
+
logger.debug(f"Loaded config: {deployment_configs}")
|
342
|
+
model_shape_gpu, deployment = self._extract_model_shape_gpu(
|
343
|
+
deployment_configs=deployment_configs, shapes=shapes
|
344
|
+
)
|
345
|
+
|
346
|
+
# Initialize the summary result with the deployment configurations.
|
347
|
+
summary = ModelDeploymentConfigSummary(deployment_config=deployment)
|
348
|
+
|
349
|
+
# Filter out not available shapes
|
350
|
+
available_shapes = [item.name.upper() for item in shapes]
|
351
|
+
logger.debug(f"Service Available Shapes: {available_shapes}")
|
352
|
+
|
353
|
+
return model_shape_gpu, available_shapes, summary
|
354
|
+
|
355
|
+
def _fetch_deployment_configs_concurrently(
|
356
|
+
self, model_ids: List[str]
|
357
|
+
) -> Dict[str, AquaDeploymentConfig]:
|
358
|
+
"""Fetches deployment configurations in parallel using ThreadPoolExecutor."""
|
359
|
+
with ThreadPoolExecutor(max_workers=self.MAX_WORKERS) as executor:
|
360
|
+
results = executor.map(
|
361
|
+
self._fetch_deployment_config_from_metadata_and_oss,
|
362
|
+
model_ids,
|
363
|
+
)
|
364
|
+
|
365
|
+
return {
|
366
|
+
model_id: AquaDeploymentConfig(**config.config)
|
367
|
+
for model_id, config in zip(model_ids, results)
|
368
|
+
}
|
369
|
+
|
370
|
+
def _fetch_deployment_config_from_metadata_and_oss(
|
371
|
+
self, model_id: str
|
372
|
+
) -> ModelConfigResult:
|
373
|
+
"""
|
374
|
+
Retrieves the deployment configuration for a model.
|
375
|
+
|
376
|
+
The method first attempts to retrieve the configuration directly from the model itself
|
377
|
+
via metadata or object storage. If not found and the model is identified as a fine-tuned
|
378
|
+
model, it then attempts to retrieve the configuration from the associated base model.
|
379
|
+
|
380
|
+
Sources are checked in the following order:
|
381
|
+
1. Model metadata
|
382
|
+
2. Object Storage
|
383
|
+
3. (If fine-tuned) Base model metadata
|
384
|
+
4. (If fine-tuned) Base model Object Storage
|
385
|
+
|
386
|
+
Parameters
|
387
|
+
----------
|
388
|
+
model_id : str
|
389
|
+
OCID of the model in the Model Catalog.
|
390
|
+
|
391
|
+
Returns
|
392
|
+
-------
|
393
|
+
ModelConfigResult
|
394
|
+
A result object containing the deployment configuration and model details.
|
395
|
+
If no config is found, `config` will be an empty dictionary.
|
396
|
+
"""
|
397
|
+
# Try to get config from the model itself
|
398
|
+
logger.info(
|
399
|
+
"Attempting to retrieve config for model '%s' from metadata.", model_id
|
400
|
+
)
|
401
|
+
config = self.deployment_app.get_config_from_metadata(
|
402
|
+
model_id=model_id,
|
403
|
+
metadata_key=AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION,
|
404
|
+
)
|
405
|
+
if config and config.config:
|
406
|
+
logger.info(
|
407
|
+
"Successfully retrieved deployment config from metadata for model '%s'.",
|
408
|
+
model_id,
|
409
|
+
)
|
410
|
+
return config
|
411
|
+
|
412
|
+
logger.info(
|
413
|
+
"Config not found in metadata. Trying Object Storage for model '%s'.",
|
414
|
+
model_id,
|
415
|
+
)
|
416
|
+
config = self.deployment_app.get_config(
|
417
|
+
model_id=model_id,
|
418
|
+
config_file_name=AQUA_MODEL_DEPLOYMENT_CONFIG,
|
419
|
+
)
|
420
|
+
if config and config.config:
|
421
|
+
logger.info(
|
422
|
+
"Successfully retrieved deployment config from Object Storage for model '%s'.",
|
423
|
+
model_id,
|
424
|
+
)
|
425
|
+
return config
|
426
|
+
|
427
|
+
# If it's a fine-tuned model, try base model
|
428
|
+
oci_model = self.deployment_app.ds_client.get_model(model_id).data
|
429
|
+
is_fine_tuned_model = Tags.AQUA_FINE_TUNED_MODEL_TAG in oci_model.freeform_tags
|
430
|
+
if not is_fine_tuned_model:
|
431
|
+
logger.warning("No deployment config found for model '%s'.", model_id)
|
432
|
+
return ModelConfigResult(config={}, model_details=oci_model)
|
433
|
+
|
434
|
+
logger.info(
|
435
|
+
"Model '%s' is a fine-tuned model. Attempting to retrieve base model ID.",
|
436
|
+
model_id,
|
437
|
+
)
|
438
|
+
base_model_id = next(
|
439
|
+
(
|
440
|
+
item.value
|
441
|
+
for item in oci_model.custom_metadata_list
|
442
|
+
if item.key == FineTuneCustomMetadata.FINE_TUNE_SOURCE
|
443
|
+
),
|
444
|
+
None,
|
445
|
+
)
|
446
|
+
|
447
|
+
if not base_model_id:
|
448
|
+
logger.warning(
|
449
|
+
"Base model reference not found in custom metadata for fine-tuned model '%s'.",
|
450
|
+
model_id,
|
451
|
+
)
|
452
|
+
return ModelConfigResult(config={}, model_details=oci_model)
|
453
|
+
|
454
|
+
logger.info(
|
455
|
+
"Using base model '%s' to retrieve deployment config.", base_model_id
|
456
|
+
)
|
457
|
+
|
458
|
+
config = self.deployment_app.get_config_from_metadata(
|
459
|
+
model_id=base_model_id,
|
460
|
+
metadata_key=AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION,
|
461
|
+
)
|
462
|
+
if config and config.config:
|
463
|
+
logger.info(
|
464
|
+
"Successfully retrieved deployment config from base model metadata '%s'.",
|
465
|
+
base_model_id,
|
466
|
+
)
|
467
|
+
return config
|
468
|
+
|
469
|
+
config = self.deployment_app.get_config(
|
470
|
+
model_id=base_model_id,
|
471
|
+
config_file_name=AQUA_MODEL_DEPLOYMENT_CONFIG,
|
472
|
+
)
|
473
|
+
if config and config.config:
|
474
|
+
logger.info(
|
475
|
+
"Successfully retrieved deployment config from base model Object Storage '%s'.",
|
476
|
+
base_model_id,
|
477
|
+
)
|
478
|
+
return config
|
479
|
+
|
480
|
+
logger.warning(
|
481
|
+
"Deployment configuration could not be found for model '%s' or its base model '%s'.",
|
482
|
+
model_id,
|
483
|
+
base_model_id,
|
484
|
+
)
|
485
|
+
return ModelConfigResult(config={}, model_details=oci_model)
|
486
|
+
|
487
|
+
def _extract_model_shape_gpu(
|
488
|
+
self,
|
489
|
+
deployment_configs: Dict[str, AquaDeploymentConfig],
|
490
|
+
shapes: List[ComputeShapeSummary],
|
491
|
+
):
|
492
|
+
"""Extracts shape and GPU count details from deployment configurations.
|
493
|
+
Supported shapes for multi model deployment will be collected from `configuration` entry in deployment config.
|
494
|
+
Supported shapes for single model deployment will be collected from `shape` entry in deployment config.
|
495
|
+
"""
|
496
|
+
model_shape_gpu = {}
|
497
|
+
deployment = {}
|
498
|
+
is_single_model = len(deployment_configs) == 1
|
499
|
+
|
500
|
+
for model_id, config in deployment_configs.items():
|
501
|
+
# For multi model deployment, we cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
|
502
|
+
# However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2.
|
503
|
+
# Our current configuration does not support this flexibility.
|
504
|
+
# For single model deployment, we use `config.shape` to find the available shapes.
|
505
|
+
multi_deployment_shape = (
|
506
|
+
list(set(config.configuration.keys()).union(set(config.shape or [])))
|
507
|
+
if is_single_model
|
508
|
+
else list(config.configuration.keys())
|
509
|
+
)
|
510
|
+
|
511
|
+
shape_total_gpus_available_map = {
|
512
|
+
deployment_shape.name.upper(): deployment_shape.gpu_specs.gpu_count
|
513
|
+
or None
|
514
|
+
for deployment_shape in shapes
|
515
|
+
if deployment_shape and deployment_shape.gpu_specs
|
516
|
+
}
|
517
|
+
|
518
|
+
model_shape_gpu[model_id] = {
|
519
|
+
shape.upper(): [
|
520
|
+
item.gpu_count
|
521
|
+
for item in config.configuration.get(
|
522
|
+
shape,
|
523
|
+
ConfigurationItem(
|
524
|
+
multi_model_deployment=(
|
525
|
+
[
|
526
|
+
MultiModelConfig(
|
527
|
+
gpu_count=shape_total_gpus_available_map.get(
|
528
|
+
shape.upper()
|
529
|
+
)
|
530
|
+
)
|
531
|
+
]
|
532
|
+
if is_single_model
|
533
|
+
else []
|
534
|
+
)
|
535
|
+
),
|
536
|
+
).multi_model_deployment
|
537
|
+
]
|
538
|
+
for shape in multi_deployment_shape
|
539
|
+
}
|
540
|
+
|
541
|
+
# For single-model deployments: if the shape is listed in the `shapes` section of the config,
|
542
|
+
# we include the maximum available GPU count for that shape in the allocation consideration.
|
543
|
+
if is_single_model:
|
544
|
+
for shape in model_shape_gpu[model_id]:
|
545
|
+
shape_total_gpu_count = shape_total_gpus_available_map.get(
|
546
|
+
shape.upper()
|
547
|
+
)
|
548
|
+
if (
|
549
|
+
shape in config.shape
|
550
|
+
and shape_total_gpu_count
|
551
|
+
and shape_total_gpu_count
|
552
|
+
not in model_shape_gpu[model_id][shape]
|
553
|
+
):
|
554
|
+
model_shape_gpu[model_id][shape].append(shape_total_gpu_count)
|
555
|
+
|
556
|
+
deployment[model_id] = {
|
557
|
+
"shape": [shape.upper() for shape in config.shape],
|
558
|
+
"configuration": {
|
559
|
+
shape.upper(): config.configuration.get(shape, ConfigurationItem())
|
560
|
+
for shape in multi_deployment_shape
|
561
|
+
},
|
562
|
+
}
|
563
|
+
|
564
|
+
return model_shape_gpu, deployment
|
565
|
+
|
566
|
+
def _get_common_shapes(
|
567
|
+
self, model_shape_gpu: Dict[str, Dict[str, List[int]]]
|
568
|
+
) -> tuple:
|
569
|
+
"""Finds common shapes across all models."""
|
570
|
+
common_shapes_set = []
|
571
|
+
empty_configs = True
|
572
|
+
for shapes in model_shape_gpu.values():
|
573
|
+
if shapes:
|
574
|
+
common_shapes_set.append(set(shapes.keys()))
|
575
|
+
empty_configs = False
|
576
|
+
if not common_shapes_set:
|
577
|
+
return [], empty_configs
|
578
|
+
return list(set.intersection(*(common_shapes_set))), empty_configs
|
579
|
+
|
580
|
+
def _compute_gpu_allocation(
|
581
|
+
self,
|
582
|
+
shapes: List[ComputeShapeSummary],
|
583
|
+
common_shapes: List[str],
|
584
|
+
model_shape_gpu: Dict[str, Dict[str, List[int]]],
|
585
|
+
primary_model_id: Optional[str],
|
586
|
+
) -> Dict[str, GPUShapeAllocation]:
|
587
|
+
"""Computes GPU allocation for common shapes."""
|
588
|
+
|
589
|
+
gpu_allocation = {}
|
590
|
+
|
591
|
+
for common_shape in common_shapes:
|
592
|
+
total_gpus_available = 0
|
593
|
+
|
594
|
+
# search the shape in the available shapes list
|
595
|
+
shape_summary = next(
|
596
|
+
(shape for shape in shapes if shape.name.upper() == common_shape),
|
597
|
+
None,
|
598
|
+
)
|
599
|
+
if shape_summary and shape_summary.gpu_specs:
|
600
|
+
total_gpus_available = shape_summary.gpu_specs.gpu_count
|
601
|
+
|
602
|
+
# generate a list of possible gpu count from `total_gpus_available` for custom models
|
603
|
+
# without multi model deployment config
|
604
|
+
# model_gpu = {
|
605
|
+
# model: (
|
606
|
+
# shape_gpu[common_shape]
|
607
|
+
# if shape_gpu.get(common_shape, UNKNOWN)
|
608
|
+
# else self._generate_gpu_list(total_gpus_available)
|
609
|
+
# )
|
610
|
+
# for model, shape_gpu in model_shape_gpu.items()
|
611
|
+
# }
|
612
|
+
|
613
|
+
model_gpu = {
|
614
|
+
model: (shape_gpu.get(common_shape, []) or [])
|
615
|
+
for model, shape_gpu in model_shape_gpu.items()
|
616
|
+
}
|
617
|
+
|
618
|
+
is_compatible, combination = self._verify_compatibility(
|
619
|
+
total_gpus_available=total_gpus_available,
|
620
|
+
model_gpu_dict=model_gpu,
|
621
|
+
primary_model_id=primary_model_id,
|
622
|
+
)
|
623
|
+
|
624
|
+
if is_compatible:
|
625
|
+
gpu_allocation[common_shape] = GPUShapeAllocation(
|
626
|
+
models=combination, total_gpus_available=total_gpus_available
|
627
|
+
)
|
628
|
+
|
629
|
+
return gpu_allocation
|
630
|
+
|
631
|
+
@staticmethod
|
632
|
+
def _generate_gpu_list(total_gpus_available: int) -> list[int]:
|
633
|
+
"""Generates a list of powers of 2 that's smaller than or equal to `total_gpus_available`.
|
634
|
+
|
635
|
+
Example
|
636
|
+
-------
|
637
|
+
input: 8
|
638
|
+
output: [1,2,4,8]
|
639
|
+
|
640
|
+
Parameters
|
641
|
+
----------
|
642
|
+
total_gpus_available : int
|
643
|
+
Total GPU available
|
644
|
+
|
645
|
+
Returns
|
646
|
+
-------
|
647
|
+
list
|
648
|
+
A list of powers of 2.
|
649
|
+
"""
|
650
|
+
if total_gpus_available < 1:
|
651
|
+
return []
|
652
|
+
return [2**i for i in range(int(math.log2(total_gpus_available)) + 1)]
|
653
|
+
|
654
|
+
def _verify_compatibility(
|
655
|
+
self,
|
656
|
+
total_gpus_available: int,
|
657
|
+
model_gpu_dict: Dict,
|
658
|
+
primary_model_id: str = None,
|
659
|
+
) -> tuple:
|
660
|
+
"""Calculates the gpu allocations for all compatible shapes.
|
661
|
+
If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
|
662
|
+
If provided, gpu count for each compatible shape will be prioritized for primary model.
|
663
|
+
|
664
|
+
Example
|
665
|
+
-------
|
666
|
+
|
667
|
+
Case 1:
|
668
|
+
There is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
|
669
|
+
|
670
|
+
A - BM.GPU.H100.8 - 1, 2, 4, 8
|
671
|
+
B - BM.GPU.H100.8 - 1, 2, 4, 8
|
672
|
+
C - BM.GPU.H100.8 - 1, 2, 4, 8
|
673
|
+
|
674
|
+
If no primary model is provided, the gpu allocation for A, B, C could be [2, 4, 2], [2, 2, 4] or [4, 2, 2]
|
675
|
+
If B is the primary model, the gpu allocation is [2, 4, 2] as B always gets the maximum gpu count.
|
676
|
+
|
677
|
+
Case 2:
|
678
|
+
There is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
|
679
|
+
|
680
|
+
A - BM.GPU.H100.8 - 1
|
681
|
+
B - BM.GPU.H100.8 - 1, 2, 4
|
682
|
+
C - BM.GPU.H100.8 - 1, 2, 4
|
683
|
+
|
684
|
+
If no primary model is provided, the gpu allocation for A, B, C could be [1, 1, 2] or [1, 2, 1]
|
685
|
+
If C is the primary model, the gpu allocation is [1, 1, 2] as C always gets the maximum gpu count.
|
686
|
+
|
687
|
+
Parameters
|
688
|
+
----------
|
689
|
+
model_gpu_dict: Dict
|
690
|
+
A dict of Aqua model and its gpu counts.
|
691
|
+
primary_model_id: str
|
692
|
+
The OCID of the primary Aqua model
|
693
|
+
|
694
|
+
Returns
|
695
|
+
-------
|
696
|
+
tuple:
|
697
|
+
A tuple of gpu count allocation result.
|
698
|
+
"""
|
699
|
+
model_gpu_dict_copy = copy.deepcopy(model_gpu_dict)
|
700
|
+
# minimal gpu count needed to satisfy all models
|
701
|
+
minimal_gpus_needed = len(model_gpu_dict)
|
702
|
+
if primary_model_id and minimal_gpus_needed > 1:
|
703
|
+
primary_model_gpu_list = sorted(model_gpu_dict_copy.pop(primary_model_id))
|
704
|
+
primary_model_gpu_list.reverse()
|
705
|
+
combinations = self.get_combinations(model_gpu_dict_copy)
|
706
|
+
for gpu_count in primary_model_gpu_list:
|
707
|
+
current_gpus_available = total_gpus_available
|
708
|
+
while (
|
709
|
+
current_gpus_available >= minimal_gpus_needed
|
710
|
+
# or current_gpus_available == 1
|
711
|
+
):
|
712
|
+
for combination in combinations:
|
713
|
+
if (
|
714
|
+
len(combination) == len(model_gpu_dict_copy)
|
715
|
+
and sum(combination.values())
|
716
|
+
== current_gpus_available - gpu_count
|
717
|
+
):
|
718
|
+
combination[primary_model_id] = gpu_count
|
719
|
+
return (
|
720
|
+
True,
|
721
|
+
[
|
722
|
+
GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
|
723
|
+
for ocid, gpu_count in combination.items()
|
724
|
+
],
|
725
|
+
)
|
726
|
+
|
727
|
+
current_gpus_available -= 1
|
728
|
+
# current_gpus_available = (
|
729
|
+
# 1 if current_gpus_available == 0 else current_gpus_available
|
730
|
+
# )
|
731
|
+
else:
|
732
|
+
combinations = self.get_combinations(model_gpu_dict_copy)
|
733
|
+
current_gpus_available = total_gpus_available
|
734
|
+
while (
|
735
|
+
current_gpus_available >= minimal_gpus_needed
|
736
|
+
# or current_gpus_available == 1
|
737
|
+
):
|
738
|
+
minimal_difference = float("inf") # gets the positive infinity
|
739
|
+
optimal_combination = []
|
740
|
+
for combination in combinations:
|
741
|
+
if (
|
742
|
+
len(combination) == len(model_gpu_dict_copy)
|
743
|
+
and sum(combination.values()) == current_gpus_available
|
744
|
+
):
|
745
|
+
difference = max(combination.values()) - min(
|
746
|
+
combination.values()
|
747
|
+
)
|
748
|
+
if difference < minimal_difference:
|
749
|
+
minimal_difference = difference
|
750
|
+
optimal_combination = combination
|
751
|
+
|
752
|
+
# find the optimal combination, no need to continue
|
753
|
+
if minimal_difference == 0:
|
754
|
+
break
|
755
|
+
|
756
|
+
if optimal_combination:
|
757
|
+
return (
|
758
|
+
True,
|
759
|
+
[
|
760
|
+
GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
|
761
|
+
for ocid, gpu_count in optimal_combination.items()
|
762
|
+
],
|
763
|
+
)
|
764
|
+
|
765
|
+
current_gpus_available -= 1
|
766
|
+
# current_gpus_available = (
|
767
|
+
# 1 if current_gpus_available == 0 else current_gpus_available
|
768
|
+
# )
|
769
|
+
|
770
|
+
return (False, [])
|
771
|
+
|
772
|
+
@staticmethod
|
773
|
+
def get_combinations(input_dict: dict):
|
774
|
+
"""Finds all unique combinations within input dict.
|
775
|
+
|
776
|
+
The input is a dict of {model:[gpu_count]} on a specific shape and this method will
|
777
|
+
return a list of all unique combinations of gpu allocation of each model.
|
778
|
+
|
779
|
+
For example:
|
780
|
+
|
781
|
+
input: {'model_a': [2, 4], 'model_b': [1, 2, 4], 'model_c': [1, 2, 8]}
|
782
|
+
output:
|
783
|
+
[
|
784
|
+
{'model_a': 2, 'model_b': 1, 'model_c': 1},
|
785
|
+
{'model_a': 2, 'model_b': 1, 'model_c': 2},
|
786
|
+
{'model_a': 2, 'model_b': 1, 'model_c': 8},
|
787
|
+
{'model_a': 2, 'model_b': 2, 'model_c': 1},
|
788
|
+
{'model_a': 2, 'model_b': 2, 'model_c': 2},
|
789
|
+
{'model_a': 2, 'model_b': 2, 'model_c': 8},
|
790
|
+
{'model_a': 2, 'model_b': 4, 'model_c': 1},
|
791
|
+
{'model_a': 2, 'model_b': 4, 'model_c': 2},
|
792
|
+
{'model_a': 2, 'model_b': 4, 'model_c': 8},
|
793
|
+
{'model_a': 4, 'model_b': 1, 'model_c': 1},
|
794
|
+
{'model_a': 4, 'model_b': 1, 'model_c': 2},
|
795
|
+
{'model_a': 4, 'model_b': 1, 'model_c': 8},
|
796
|
+
{'model_a': 4, 'model_b': 2, 'model_c': 1},
|
797
|
+
{'model_a': 4, 'model_b': 2, 'model_c': 2},
|
798
|
+
{'model_a': 4, 'model_b': 2, 'model_c': 8},
|
799
|
+
{'model_a': 4, 'model_b': 4, 'model_c': 1},
|
800
|
+
{'model_a': 4, 'model_b': 4, 'model_c': 2},
|
801
|
+
{'model_a': 4, 'model_b': 4, 'model_c': 8}
|
802
|
+
]
|
803
|
+
|
804
|
+
Parameters
|
805
|
+
----------
|
806
|
+
input_dict: dict
|
807
|
+
A dict of {model:[gpu_count]} on a specific shape
|
808
|
+
|
809
|
+
Returns
|
810
|
+
-------
|
811
|
+
list:
|
812
|
+
A list of all unique combinations of gpu allocation of each model.
|
813
|
+
"""
|
814
|
+
keys, values = zip(*input_dict.items())
|
815
|
+
return [dict(zip(keys, v)) for v in itertools.product(*values)]
|