oracle-ads 2.13.11__py3-none-any.whl → 2.13.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. ads/aqua/app.py +73 -15
  2. ads/aqua/cli.py +17 -0
  3. ads/aqua/client/client.py +38 -21
  4. ads/aqua/client/openai_client.py +20 -10
  5. ads/aqua/common/entities.py +78 -12
  6. ads/aqua/common/utils.py +35 -0
  7. ads/aqua/constants.py +2 -0
  8. ads/aqua/evaluation/evaluation.py +5 -4
  9. ads/aqua/extension/common_handler.py +47 -2
  10. ads/aqua/extension/model_handler.py +51 -9
  11. ads/aqua/model/constants.py +1 -0
  12. ads/aqua/model/enums.py +19 -1
  13. ads/aqua/model/model.py +119 -51
  14. ads/aqua/model/utils.py +1 -2
  15. ads/aqua/modeldeployment/config_loader.py +815 -0
  16. ads/aqua/modeldeployment/constants.py +4 -1
  17. ads/aqua/modeldeployment/deployment.py +178 -129
  18. ads/aqua/modeldeployment/entities.py +150 -178
  19. ads/aqua/modeldeployment/model_group_config.py +233 -0
  20. ads/aqua/modeldeployment/utils.py +0 -539
  21. ads/aqua/verify_policies/__init__.py +8 -0
  22. ads/aqua/verify_policies/constants.py +13 -0
  23. ads/aqua/verify_policies/entities.py +29 -0
  24. ads/aqua/verify_policies/messages.py +101 -0
  25. ads/aqua/verify_policies/utils.py +432 -0
  26. ads/aqua/verify_policies/verify.py +345 -0
  27. ads/aqua/version.json +3 -0
  28. ads/common/oci_logging.py +4 -7
  29. ads/common/work_request.py +39 -38
  30. ads/jobs/builders/infrastructure/dsc_job.py +121 -24
  31. ads/jobs/builders/infrastructure/dsc_job_runtime.py +71 -24
  32. ads/jobs/builders/runtimes/base.py +7 -5
  33. ads/jobs/builders/runtimes/pytorch_runtime.py +6 -8
  34. ads/jobs/templates/driver_pytorch.py +486 -172
  35. ads/jobs/templates/driver_utils.py +27 -11
  36. ads/model/deployment/model_deployment.py +51 -38
  37. ads/model/service/oci_datascience_model_deployment.py +6 -11
  38. ads/telemetry/client.py +4 -4
  39. {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.13.dist-info}/METADATA +2 -1
  40. {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.13.dist-info}/RECORD +43 -34
  41. {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.13.dist-info}/WHEEL +0 -0
  42. {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.13.dist-info}/entry_points.txt +0 -0
  43. {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.13.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,815 @@
1
+ #!/usr/bin/env python
2
+ # Copyright (c) 2025 Oracle and/or its affiliates.
3
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
4
+
5
+
6
+ import copy
7
+ import itertools
8
+ import math
9
+ from concurrent.futures import ThreadPoolExecutor
10
+ from typing import Dict, List, Optional
11
+
12
+ from pydantic import Field
13
+
14
+ from ads.aqua import logger
15
+ from ads.aqua.app import AquaApp
16
+ from ads.aqua.common.entities import ComputeShapeSummary, ModelConfigResult
17
+ from ads.aqua.common.enums import Tags
18
+ from ads.aqua.config.utils.serializer import Serializable
19
+ from ads.aqua.finetuning.constants import FineTuneCustomMetadata
20
+ from ads.aqua.model.constants import AquaModelMetadataKeys
21
+ from ads.config import AQUA_MODEL_DEPLOYMENT_CONFIG
22
+
23
+
24
+ class ShapeInfoConfig(Serializable):
25
+ """Describes how many memory and cpu to this model for specific shape.
26
+
27
+ Attributes:
28
+ memory_in_gbs (float, optional): The number of memory in gbs to this model of the shape.
29
+ ocpu (float, optional): The number of ocpus to this model of the shape.
30
+ """
31
+
32
+ memory_in_gbs: Optional[float] = Field(
33
+ None,
34
+ description="The number of memory in gbs to this model of the shape.",
35
+ )
36
+ ocpu: Optional[float] = Field(
37
+ None,
38
+ description="The number of ocpus to this model of the shape.",
39
+ )
40
+
41
+ class Config:
42
+ extra = "allow"
43
+
44
+
45
+ class DeploymentShapeInfo(Serializable):
46
+ """Describes the shape information to this model for specific shape.
47
+
48
+ Attributes:
49
+ configs (List[ShapeInfoConfig], optional): A list of memory and cpu number details to this model of the shape.
50
+ type (str, optional): The type of the shape.
51
+ """
52
+
53
+ configs: Optional[List[ShapeInfoConfig]] = Field(
54
+ default_factory=list,
55
+ description="A list of memory and cpu number details to this model of the shape.",
56
+ )
57
+ type: Optional[str] = Field(
58
+ default_factory=str, description="The type of the shape."
59
+ )
60
+
61
+ class Config:
62
+ extra = "allow"
63
+
64
+
65
+ class GPUModelAllocation(Serializable):
66
+ """Describes how many GPUs are allocated to a particular model.
67
+
68
+ Attributes:
69
+ ocid (str, optional): The unique identifier of the model.
70
+ gpu_count (int, optional): Number of GPUs allocated to this model.
71
+ """
72
+
73
+ ocid: Optional[str] = Field(
74
+ default_factory=str, description="The unique model OCID."
75
+ )
76
+ gpu_count: Optional[int] = Field(
77
+ default_factory=int, description="The number of GPUs allocated to the model."
78
+ )
79
+
80
+ class Config:
81
+ extra = "allow"
82
+
83
+
84
+ class MultiModelConfig(Serializable):
85
+ """Describes how many GPUs and the parameters of specific shape for multi model deployment.
86
+
87
+ Attributes:
88
+ gpu_count (int, optional): Number of GPUs count to this model of this shape.
89
+ parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
90
+ configure the behavior of a particular GPU shape.
91
+ """
92
+
93
+ gpu_count: Optional[int] = Field(
94
+ default_factory=int, description="The number of GPUs allocated to the model."
95
+ )
96
+ parameters: Optional[Dict[str, str]] = Field(
97
+ default_factory=dict,
98
+ description="Key-value pairs for GPU shape parameters (e.g., VLLM_PARAMS).",
99
+ )
100
+
101
+ class Config:
102
+ extra = "allow"
103
+
104
+
105
+ class GPUShapeAllocation(Serializable):
106
+ """
107
+ Allocation details for a specific GPU shape.
108
+
109
+ Attributes:
110
+ models (List[GPUModelAllocation], optional): List of model GPU allocations for this shape.
111
+ total_gpus_available (int, optional): The total number of GPUs available for this shape.
112
+ """
113
+
114
+ models: Optional[List[GPUModelAllocation]] = Field(
115
+ default_factory=list, description="List of model allocations for this shape."
116
+ )
117
+ total_gpus_available: Optional[int] = Field(
118
+ default_factory=int, description="Total GPUs available for this shape."
119
+ )
120
+
121
+ class Config:
122
+ extra = "allow"
123
+
124
+
125
+ class ConfigurationItem(Serializable):
126
+ """Holds key-value parameter pairs for a specific GPU or CPU shape.
127
+
128
+ Attributes:
129
+ parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
130
+ configure the behavior of a particular GPU shape.
131
+ multi_model_deployment (List[MultiModelConfig], optional): A list of multi model configuration details.
132
+ shape_info (DeploymentShapeInfo, optional): The shape information to this model for specific CPU shape.
133
+ """
134
+
135
+ parameters: Optional[Dict[str, str]] = Field(
136
+ default_factory=dict,
137
+ description="Key-value pairs for shape parameters.",
138
+ )
139
+ multi_model_deployment: Optional[List[MultiModelConfig]] = Field(
140
+ default_factory=list, description="A list of multi model configuration details."
141
+ )
142
+ shape_info: Optional[DeploymentShapeInfo] = Field(
143
+ default_factory=DeploymentShapeInfo,
144
+ description="The shape information to this model for specific shape",
145
+ )
146
+
147
+ class Config:
148
+ extra = "allow"
149
+
150
+
151
+ class AquaDeploymentConfig(Serializable):
152
+ """Represents multi model's shape list and detailed configuration.
153
+
154
+ Attributes:
155
+ shape (List[str], optional): A list of shape names (e.g., BM.GPU.A10.4).
156
+ configuration (Dict[str, ConfigurationItem], optional): Maps each shape to its configuration details.
157
+ """
158
+
159
+ shape: Optional[List[str]] = Field(
160
+ default_factory=list, description="List of supported shapes for the model."
161
+ )
162
+ configuration: Optional[Dict[str, ConfigurationItem]] = Field(
163
+ default_factory=dict, description="Configuration details keyed by shape."
164
+ )
165
+
166
+ class Config:
167
+ extra = "allow"
168
+
169
+
170
+ class ModelDeploymentConfigSummary(Serializable):
171
+ """Top-level configuration model for OCI-based deployments.
172
+
173
+ Attributes:
174
+ deployment_config (Dict[str, AquaDeploymentConfig], optional): Deployment configurations
175
+ keyed by model OCID.
176
+ gpu_allocation (Dict[str, GPUShapeAllocation], optional): GPU allocations keyed by GPU shape.
177
+ error_message (str, optional): Error message if GPU allocation is not possible.
178
+ """
179
+
180
+ deployment_config: Optional[Dict[str, AquaDeploymentConfig]] = Field(
181
+ default_factory=dict,
182
+ description=(
183
+ "Deployment configuration details for each model, including supported shapes "
184
+ "and shape-specific parameters."
185
+ ),
186
+ )
187
+ gpu_allocation: Optional[Dict[str, GPUShapeAllocation]] = Field(
188
+ default_factory=dict,
189
+ description=(
190
+ "Details on how GPUs are allocated per shape, including the total "
191
+ "GPUs available for each shape."
192
+ ),
193
+ )
194
+ error_message: Optional[str] = Field(
195
+ default=None, description="Error message if GPU allocation is not possible."
196
+ )
197
+
198
+ class Config:
199
+ extra = "allow"
200
+ protected_namespaces = ()
201
+
202
+
203
+ class MultiModelDeploymentConfigLoader:
204
+ """
205
+ Processes multiple model deployment configurations to determine compatible GPU shapes
206
+ and calculate optimal GPU allocations.
207
+ """
208
+
209
+ MAX_WORKERS = 10 # Number of workers for asynchronous models detail loading
210
+
211
+ def __init__(self, deployment_app: AquaApp):
212
+ """
213
+ Initializes the processor with a reference to the `AquaDeploymentApp` to fetch model configurations.
214
+
215
+ Parameters
216
+ ----------
217
+ deployment_app : AquaDeploymentApp
218
+ An instance of AquaDeploymentApp used to fetch model deployment configurations.
219
+ """
220
+ self.deployment_app = deployment_app
221
+
222
+ def load(
223
+ self,
224
+ shapes: List[ComputeShapeSummary],
225
+ model_ids: List[str],
226
+ primary_model_id: Optional[str] = None,
227
+ ) -> ModelDeploymentConfigSummary:
228
+ """
229
+ Retrieves deployment configurations for multiple/single model and calculates compatible GPU allocations.
230
+
231
+ Parameters
232
+ ----------
233
+ shapes : List[ComputeShapeSummary]
234
+ Model deployment available shapes.
235
+ model_ids : List[str]
236
+ A list of OCIDs for the Aqua models.
237
+ primary_model_id : Optional[str], optional
238
+ The OCID of the primary Aqua model. If provided, GPU allocation prioritizes this model.
239
+ Otherwise, GPUs are evenly allocated.
240
+
241
+ Returns
242
+ -------
243
+ ModelDeploymentConfigSummary
244
+ A summary of the deployment configurations and GPU allocations. If GPU allocation
245
+ cannot be determined, an appropriate error message is included in the summary.
246
+ """
247
+ return self._load_multi_model_deployment_configuration(
248
+ shapes=shapes, model_ids=model_ids, primary_model_id=primary_model_id
249
+ )
250
+
251
+ def _load_multi_model_deployment_configuration(
252
+ self,
253
+ shapes: List[ComputeShapeSummary],
254
+ model_ids: List[str],
255
+ primary_model_id: Optional[str] = None,
256
+ ) -> ModelDeploymentConfigSummary:
257
+ """
258
+ Retrieves deployment configurations for multiple models and calculates compatible GPU allocations.
259
+
260
+ Parameters
261
+ ----------
262
+ shapes : List[ComputeShapeSummary]
263
+ Model deployment available shapes.
264
+ model_ids : List[str]
265
+ A list of OCIDs for the Aqua models.
266
+ primary_model_id : Optional[str], optional
267
+ The OCID of the primary Aqua model. If provided, GPU allocation prioritizes this model.
268
+ Otherwise, GPUs are evenly allocated.
269
+
270
+ Returns
271
+ -------
272
+ ModelDeploymentConfigSummary
273
+ A summary of the deployment configurations and GPU allocations. If GPU allocation
274
+ cannot be determined, an appropriate error message is included in the summary.
275
+ """
276
+ model_shape_gpu, available_shapes, summary = self._fetch_model_shape_gpu(
277
+ shapes=shapes, model_ids=model_ids
278
+ )
279
+
280
+ # Identify common deployment shapes among all models.
281
+ common_shapes, empty_configs = self._get_common_shapes(model_shape_gpu)
282
+ logger.debug(f"Common Shapes: {common_shapes} from: {model_shape_gpu}")
283
+
284
+ # If all models' shape configs are empty, use default deployment shapes instead
285
+ common_shapes = (
286
+ available_shapes
287
+ if empty_configs
288
+ else [
289
+ shape_name
290
+ for shape_name in common_shapes
291
+ if shape_name.upper() in available_shapes
292
+ ]
293
+ )
294
+ logger.debug(f"Available Common Shapes: {common_shapes}")
295
+
296
+ if not common_shapes:
297
+ summary.error_message = (
298
+ "The selected models do not share any available common deployment shapes. "
299
+ "Please ensure that all chosen models are compatible for multi-model deployment."
300
+ )
301
+ logger.debug(
302
+ f"No common deployment shapes found among selected models: {model_ids}"
303
+ )
304
+ return summary
305
+
306
+ # Compute GPU allocations based on the common shapes and optionally prioritize a primary model.
307
+ gpu_allocation = self._compute_gpu_allocation(
308
+ shapes=shapes,
309
+ common_shapes=common_shapes,
310
+ model_shape_gpu=model_shape_gpu,
311
+ primary_model_id=primary_model_id,
312
+ )
313
+
314
+ logger.debug(f"GPU Allocation: {gpu_allocation}")
315
+
316
+ if not gpu_allocation:
317
+ summary.error_message = (
318
+ "The selected models do not have a valid GPU allocation based on their current configurations. "
319
+ "Please select a different model group. If you are deploying custom models that lack AQUA service configuration, "
320
+ "refer to the deployment guidelines here: "
321
+ "https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/multimodel-deployment-tips.md#custom_models"
322
+ )
323
+
324
+ logger.debug(
325
+ f"GPU allocation computation failed for selected models: {model_ids}"
326
+ )
327
+
328
+ return summary
329
+
330
+ summary.gpu_allocation = gpu_allocation
331
+ return summary
332
+
333
+ def _fetch_model_shape_gpu(
334
+ self, shapes: List[ComputeShapeSummary], model_ids: List[str]
335
+ ):
336
+ """Fetches dict of model shape and gpu, list of available shapes and builds `ModelDeploymentConfigSummary` instance."""
337
+ # Fetch deployment configurations concurrently.
338
+ logger.debug(f"Loading model deployment configuration for models: {model_ids}")
339
+ deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)
340
+
341
+ logger.debug(f"Loaded config: {deployment_configs}")
342
+ model_shape_gpu, deployment = self._extract_model_shape_gpu(
343
+ deployment_configs=deployment_configs, shapes=shapes
344
+ )
345
+
346
+ # Initialize the summary result with the deployment configurations.
347
+ summary = ModelDeploymentConfigSummary(deployment_config=deployment)
348
+
349
+ # Filter out not available shapes
350
+ available_shapes = [item.name.upper() for item in shapes]
351
+ logger.debug(f"Service Available Shapes: {available_shapes}")
352
+
353
+ return model_shape_gpu, available_shapes, summary
354
+
355
+ def _fetch_deployment_configs_concurrently(
356
+ self, model_ids: List[str]
357
+ ) -> Dict[str, AquaDeploymentConfig]:
358
+ """Fetches deployment configurations in parallel using ThreadPoolExecutor."""
359
+ with ThreadPoolExecutor(max_workers=self.MAX_WORKERS) as executor:
360
+ results = executor.map(
361
+ self._fetch_deployment_config_from_metadata_and_oss,
362
+ model_ids,
363
+ )
364
+
365
+ return {
366
+ model_id: AquaDeploymentConfig(**config.config)
367
+ for model_id, config in zip(model_ids, results)
368
+ }
369
+
370
+ def _fetch_deployment_config_from_metadata_and_oss(
371
+ self, model_id: str
372
+ ) -> ModelConfigResult:
373
+ """
374
+ Retrieves the deployment configuration for a model.
375
+
376
+ The method first attempts to retrieve the configuration directly from the model itself
377
+ via metadata or object storage. If not found and the model is identified as a fine-tuned
378
+ model, it then attempts to retrieve the configuration from the associated base model.
379
+
380
+ Sources are checked in the following order:
381
+ 1. Model metadata
382
+ 2. Object Storage
383
+ 3. (If fine-tuned) Base model metadata
384
+ 4. (If fine-tuned) Base model Object Storage
385
+
386
+ Parameters
387
+ ----------
388
+ model_id : str
389
+ OCID of the model in the Model Catalog.
390
+
391
+ Returns
392
+ -------
393
+ ModelConfigResult
394
+ A result object containing the deployment configuration and model details.
395
+ If no config is found, `config` will be an empty dictionary.
396
+ """
397
+ # Try to get config from the model itself
398
+ logger.info(
399
+ "Attempting to retrieve config for model '%s' from metadata.", model_id
400
+ )
401
+ config = self.deployment_app.get_config_from_metadata(
402
+ model_id=model_id,
403
+ metadata_key=AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION,
404
+ )
405
+ if config and config.config:
406
+ logger.info(
407
+ "Successfully retrieved deployment config from metadata for model '%s'.",
408
+ model_id,
409
+ )
410
+ return config
411
+
412
+ logger.info(
413
+ "Config not found in metadata. Trying Object Storage for model '%s'.",
414
+ model_id,
415
+ )
416
+ config = self.deployment_app.get_config(
417
+ model_id=model_id,
418
+ config_file_name=AQUA_MODEL_DEPLOYMENT_CONFIG,
419
+ )
420
+ if config and config.config:
421
+ logger.info(
422
+ "Successfully retrieved deployment config from Object Storage for model '%s'.",
423
+ model_id,
424
+ )
425
+ return config
426
+
427
+ # If it's a fine-tuned model, try base model
428
+ oci_model = self.deployment_app.ds_client.get_model(model_id).data
429
+ is_fine_tuned_model = Tags.AQUA_FINE_TUNED_MODEL_TAG in oci_model.freeform_tags
430
+ if not is_fine_tuned_model:
431
+ logger.warning("No deployment config found for model '%s'.", model_id)
432
+ return ModelConfigResult(config={}, model_details=oci_model)
433
+
434
+ logger.info(
435
+ "Model '%s' is a fine-tuned model. Attempting to retrieve base model ID.",
436
+ model_id,
437
+ )
438
+ base_model_id = next(
439
+ (
440
+ item.value
441
+ for item in oci_model.custom_metadata_list
442
+ if item.key == FineTuneCustomMetadata.FINE_TUNE_SOURCE
443
+ ),
444
+ None,
445
+ )
446
+
447
+ if not base_model_id:
448
+ logger.warning(
449
+ "Base model reference not found in custom metadata for fine-tuned model '%s'.",
450
+ model_id,
451
+ )
452
+ return ModelConfigResult(config={}, model_details=oci_model)
453
+
454
+ logger.info(
455
+ "Using base model '%s' to retrieve deployment config.", base_model_id
456
+ )
457
+
458
+ config = self.deployment_app.get_config_from_metadata(
459
+ model_id=base_model_id,
460
+ metadata_key=AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION,
461
+ )
462
+ if config and config.config:
463
+ logger.info(
464
+ "Successfully retrieved deployment config from base model metadata '%s'.",
465
+ base_model_id,
466
+ )
467
+ return config
468
+
469
+ config = self.deployment_app.get_config(
470
+ model_id=base_model_id,
471
+ config_file_name=AQUA_MODEL_DEPLOYMENT_CONFIG,
472
+ )
473
+ if config and config.config:
474
+ logger.info(
475
+ "Successfully retrieved deployment config from base model Object Storage '%s'.",
476
+ base_model_id,
477
+ )
478
+ return config
479
+
480
+ logger.warning(
481
+ "Deployment configuration could not be found for model '%s' or its base model '%s'.",
482
+ model_id,
483
+ base_model_id,
484
+ )
485
+ return ModelConfigResult(config={}, model_details=oci_model)
486
+
487
+ def _extract_model_shape_gpu(
488
+ self,
489
+ deployment_configs: Dict[str, AquaDeploymentConfig],
490
+ shapes: List[ComputeShapeSummary],
491
+ ):
492
+ """Extracts shape and GPU count details from deployment configurations.
493
+ Supported shapes for multi model deployment will be collected from `configuration` entry in deployment config.
494
+ Supported shapes for single model deployment will be collected from `shape` entry in deployment config.
495
+ """
496
+ model_shape_gpu = {}
497
+ deployment = {}
498
+ is_single_model = len(deployment_configs) == 1
499
+
500
+ for model_id, config in deployment_configs.items():
501
+ # For multi model deployment, we cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
502
+ # However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2.
503
+ # Our current configuration does not support this flexibility.
504
+ # For single model deployment, we use `config.shape` to find the available shapes.
505
+ multi_deployment_shape = (
506
+ list(set(config.configuration.keys()).union(set(config.shape or [])))
507
+ if is_single_model
508
+ else list(config.configuration.keys())
509
+ )
510
+
511
+ shape_total_gpus_available_map = {
512
+ deployment_shape.name.upper(): deployment_shape.gpu_specs.gpu_count
513
+ or None
514
+ for deployment_shape in shapes
515
+ if deployment_shape and deployment_shape.gpu_specs
516
+ }
517
+
518
+ model_shape_gpu[model_id] = {
519
+ shape.upper(): [
520
+ item.gpu_count
521
+ for item in config.configuration.get(
522
+ shape,
523
+ ConfigurationItem(
524
+ multi_model_deployment=(
525
+ [
526
+ MultiModelConfig(
527
+ gpu_count=shape_total_gpus_available_map.get(
528
+ shape.upper()
529
+ )
530
+ )
531
+ ]
532
+ if is_single_model
533
+ else []
534
+ )
535
+ ),
536
+ ).multi_model_deployment
537
+ ]
538
+ for shape in multi_deployment_shape
539
+ }
540
+
541
+ # For single-model deployments: if the shape is listed in the `shapes` section of the config,
542
+ # we include the maximum available GPU count for that shape in the allocation consideration.
543
+ if is_single_model:
544
+ for shape in model_shape_gpu[model_id]:
545
+ shape_total_gpu_count = shape_total_gpus_available_map.get(
546
+ shape.upper()
547
+ )
548
+ if (
549
+ shape in config.shape
550
+ and shape_total_gpu_count
551
+ and shape_total_gpu_count
552
+ not in model_shape_gpu[model_id][shape]
553
+ ):
554
+ model_shape_gpu[model_id][shape].append(shape_total_gpu_count)
555
+
556
+ deployment[model_id] = {
557
+ "shape": [shape.upper() for shape in config.shape],
558
+ "configuration": {
559
+ shape.upper(): config.configuration.get(shape, ConfigurationItem())
560
+ for shape in multi_deployment_shape
561
+ },
562
+ }
563
+
564
+ return model_shape_gpu, deployment
565
+
566
+ def _get_common_shapes(
567
+ self, model_shape_gpu: Dict[str, Dict[str, List[int]]]
568
+ ) -> tuple:
569
+ """Finds common shapes across all models."""
570
+ common_shapes_set = []
571
+ empty_configs = True
572
+ for shapes in model_shape_gpu.values():
573
+ if shapes:
574
+ common_shapes_set.append(set(shapes.keys()))
575
+ empty_configs = False
576
+ if not common_shapes_set:
577
+ return [], empty_configs
578
+ return list(set.intersection(*(common_shapes_set))), empty_configs
579
+
580
+ def _compute_gpu_allocation(
581
+ self,
582
+ shapes: List[ComputeShapeSummary],
583
+ common_shapes: List[str],
584
+ model_shape_gpu: Dict[str, Dict[str, List[int]]],
585
+ primary_model_id: Optional[str],
586
+ ) -> Dict[str, GPUShapeAllocation]:
587
+ """Computes GPU allocation for common shapes."""
588
+
589
+ gpu_allocation = {}
590
+
591
+ for common_shape in common_shapes:
592
+ total_gpus_available = 0
593
+
594
+ # search the shape in the available shapes list
595
+ shape_summary = next(
596
+ (shape for shape in shapes if shape.name.upper() == common_shape),
597
+ None,
598
+ )
599
+ if shape_summary and shape_summary.gpu_specs:
600
+ total_gpus_available = shape_summary.gpu_specs.gpu_count
601
+
602
+ # generate a list of possible gpu count from `total_gpus_available` for custom models
603
+ # without multi model deployment config
604
+ # model_gpu = {
605
+ # model: (
606
+ # shape_gpu[common_shape]
607
+ # if shape_gpu.get(common_shape, UNKNOWN)
608
+ # else self._generate_gpu_list(total_gpus_available)
609
+ # )
610
+ # for model, shape_gpu in model_shape_gpu.items()
611
+ # }
612
+
613
+ model_gpu = {
614
+ model: (shape_gpu.get(common_shape, []) or [])
615
+ for model, shape_gpu in model_shape_gpu.items()
616
+ }
617
+
618
+ is_compatible, combination = self._verify_compatibility(
619
+ total_gpus_available=total_gpus_available,
620
+ model_gpu_dict=model_gpu,
621
+ primary_model_id=primary_model_id,
622
+ )
623
+
624
+ if is_compatible:
625
+ gpu_allocation[common_shape] = GPUShapeAllocation(
626
+ models=combination, total_gpus_available=total_gpus_available
627
+ )
628
+
629
+ return gpu_allocation
630
+
631
+ @staticmethod
632
+ def _generate_gpu_list(total_gpus_available: int) -> list[int]:
633
+ """Generates a list of powers of 2 that's smaller than or equal to `total_gpus_available`.
634
+
635
+ Example
636
+ -------
637
+ input: 8
638
+ output: [1,2,4,8]
639
+
640
+ Parameters
641
+ ----------
642
+ total_gpus_available : int
643
+ Total GPU available
644
+
645
+ Returns
646
+ -------
647
+ list
648
+ A list of powers of 2.
649
+ """
650
+ if total_gpus_available < 1:
651
+ return []
652
+ return [2**i for i in range(int(math.log2(total_gpus_available)) + 1)]
653
+
654
+ def _verify_compatibility(
655
+ self,
656
+ total_gpus_available: int,
657
+ model_gpu_dict: Dict,
658
+ primary_model_id: str = None,
659
+ ) -> tuple:
660
+ """Calculates the gpu allocations for all compatible shapes.
661
+ If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
662
+ If provided, gpu count for each compatible shape will be prioritized for primary model.
663
+
664
+ Example
665
+ -------
666
+
667
+ Case 1:
668
+ There is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
669
+
670
+ A - BM.GPU.H100.8 - 1, 2, 4, 8
671
+ B - BM.GPU.H100.8 - 1, 2, 4, 8
672
+ C - BM.GPU.H100.8 - 1, 2, 4, 8
673
+
674
+ If no primary model is provided, the gpu allocation for A, B, C could be [2, 4, 2], [2, 2, 4] or [4, 2, 2]
675
+ If B is the primary model, the gpu allocation is [2, 4, 2] as B always gets the maximum gpu count.
676
+
677
+ Case 2:
678
+ There is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
679
+
680
+ A - BM.GPU.H100.8 - 1
681
+ B - BM.GPU.H100.8 - 1, 2, 4
682
+ C - BM.GPU.H100.8 - 1, 2, 4
683
+
684
+ If no primary model is provided, the gpu allocation for A, B, C could be [1, 1, 2] or [1, 2, 1]
685
+ If C is the primary model, the gpu allocation is [1, 1, 2] as C always gets the maximum gpu count.
686
+
687
+ Parameters
688
+ ----------
689
+ model_gpu_dict: Dict
690
+ A dict of Aqua model and its gpu counts.
691
+ primary_model_id: str
692
+ The OCID of the primary Aqua model
693
+
694
+ Returns
695
+ -------
696
+ tuple:
697
+ A tuple of gpu count allocation result.
698
+ """
699
+ model_gpu_dict_copy = copy.deepcopy(model_gpu_dict)
700
+ # minimal gpu count needed to satisfy all models
701
+ minimal_gpus_needed = len(model_gpu_dict)
702
+ if primary_model_id and minimal_gpus_needed > 1:
703
+ primary_model_gpu_list = sorted(model_gpu_dict_copy.pop(primary_model_id))
704
+ primary_model_gpu_list.reverse()
705
+ combinations = self.get_combinations(model_gpu_dict_copy)
706
+ for gpu_count in primary_model_gpu_list:
707
+ current_gpus_available = total_gpus_available
708
+ while (
709
+ current_gpus_available >= minimal_gpus_needed
710
+ # or current_gpus_available == 1
711
+ ):
712
+ for combination in combinations:
713
+ if (
714
+ len(combination) == len(model_gpu_dict_copy)
715
+ and sum(combination.values())
716
+ == current_gpus_available - gpu_count
717
+ ):
718
+ combination[primary_model_id] = gpu_count
719
+ return (
720
+ True,
721
+ [
722
+ GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
723
+ for ocid, gpu_count in combination.items()
724
+ ],
725
+ )
726
+
727
+ current_gpus_available -= 1
728
+ # current_gpus_available = (
729
+ # 1 if current_gpus_available == 0 else current_gpus_available
730
+ # )
731
+ else:
732
+ combinations = self.get_combinations(model_gpu_dict_copy)
733
+ current_gpus_available = total_gpus_available
734
+ while (
735
+ current_gpus_available >= minimal_gpus_needed
736
+ # or current_gpus_available == 1
737
+ ):
738
+ minimal_difference = float("inf") # gets the positive infinity
739
+ optimal_combination = []
740
+ for combination in combinations:
741
+ if (
742
+ len(combination) == len(model_gpu_dict_copy)
743
+ and sum(combination.values()) == current_gpus_available
744
+ ):
745
+ difference = max(combination.values()) - min(
746
+ combination.values()
747
+ )
748
+ if difference < minimal_difference:
749
+ minimal_difference = difference
750
+ optimal_combination = combination
751
+
752
+ # find the optimal combination, no need to continue
753
+ if minimal_difference == 0:
754
+ break
755
+
756
+ if optimal_combination:
757
+ return (
758
+ True,
759
+ [
760
+ GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
761
+ for ocid, gpu_count in optimal_combination.items()
762
+ ],
763
+ )
764
+
765
+ current_gpus_available -= 1
766
+ # current_gpus_available = (
767
+ # 1 if current_gpus_available == 0 else current_gpus_available
768
+ # )
769
+
770
+ return (False, [])
771
+
772
+ @staticmethod
773
+ def get_combinations(input_dict: dict):
774
+ """Finds all unique combinations within input dict.
775
+
776
+ The input is a dict of {model:[gpu_count]} on a specific shape and this method will
777
+ return a list of all unique combinations of gpu allocation of each model.
778
+
779
+ For example:
780
+
781
+ input: {'model_a': [2, 4], 'model_b': [1, 2, 4], 'model_c': [1, 2, 8]}
782
+ output:
783
+ [
784
+ {'model_a': 2, 'model_b': 1, 'model_c': 1},
785
+ {'model_a': 2, 'model_b': 1, 'model_c': 2},
786
+ {'model_a': 2, 'model_b': 1, 'model_c': 8},
787
+ {'model_a': 2, 'model_b': 2, 'model_c': 1},
788
+ {'model_a': 2, 'model_b': 2, 'model_c': 2},
789
+ {'model_a': 2, 'model_b': 2, 'model_c': 8},
790
+ {'model_a': 2, 'model_b': 4, 'model_c': 1},
791
+ {'model_a': 2, 'model_b': 4, 'model_c': 2},
792
+ {'model_a': 2, 'model_b': 4, 'model_c': 8},
793
+ {'model_a': 4, 'model_b': 1, 'model_c': 1},
794
+ {'model_a': 4, 'model_b': 1, 'model_c': 2},
795
+ {'model_a': 4, 'model_b': 1, 'model_c': 8},
796
+ {'model_a': 4, 'model_b': 2, 'model_c': 1},
797
+ {'model_a': 4, 'model_b': 2, 'model_c': 2},
798
+ {'model_a': 4, 'model_b': 2, 'model_c': 8},
799
+ {'model_a': 4, 'model_b': 4, 'model_c': 1},
800
+ {'model_a': 4, 'model_b': 4, 'model_c': 2},
801
+ {'model_a': 4, 'model_b': 4, 'model_c': 8}
802
+ ]
803
+
804
+ Parameters
805
+ ----------
806
+ input_dict: dict
807
+ A dict of {model:[gpu_count]} on a specific shape
808
+
809
+ Returns
810
+ -------
811
+ list:
812
+ A list of all unique combinations of gpu allocation of each model.
813
+ """
814
+ keys, values = zip(*input_dict.items())
815
+ return [dict(zip(keys, v)) for v in itertools.product(*values)]