oracle-ads 2.13.11__py3-none-any.whl → 2.13.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,542 +2,3 @@
2
2
  # Copyright (c) 2025 Oracle and/or its affiliates.
3
3
  # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
4
4
  """AQUA model deployment utils"""
5
-
6
- import copy
7
- import itertools
8
- import logging
9
- import math
10
- from concurrent.futures import ThreadPoolExecutor
11
- from typing import Dict, List, Optional
12
-
13
- from ads.aqua.app import AquaApp
14
- from ads.aqua.common.entities import ComputeShapeSummary, ModelConfigResult
15
- from ads.aqua.model.constants import AquaModelMetadataKeys
16
- from ads.aqua.modeldeployment.entities import (
17
- AquaDeploymentConfig,
18
- ConfigurationItem,
19
- GPUModelAllocation,
20
- GPUShapeAllocation,
21
- ModelDeploymentConfigSummary,
22
- MultiModelConfig,
23
- )
24
- from ads.config import AQUA_MODEL_DEPLOYMENT_CONFIG
25
-
26
- logger = logging.getLogger("ads.aqua")
27
-
28
-
29
- class MultiModelDeploymentConfigLoader:
30
- """
31
- Processes multiple model deployment configurations to determine compatible GPU shapes
32
- and calculate optimal GPU allocations.
33
- """
34
-
35
- MAX_WORKERS = 10 # Number of workers for asynchronous models detail loading
36
-
37
- def __init__(self, deployment_app: AquaApp):
38
- """
39
- Initializes the processor with a reference to the `AquaDeploymentApp` to fetch model configurations.
40
-
41
- Parameters
42
- ----------
43
- deployment_app : AquaDeploymentApp
44
- An instance of AquaDeploymentApp used to fetch model deployment configurations.
45
- """
46
- self.deployment_app = deployment_app
47
-
48
- def load(
49
- self,
50
- shapes: List[ComputeShapeSummary],
51
- model_ids: List[str],
52
- primary_model_id: Optional[str] = None,
53
- ) -> ModelDeploymentConfigSummary:
54
- """
55
- Retrieves deployment configurations for multiple/single model and calculates compatible GPU allocations.
56
-
57
- Parameters
58
- ----------
59
- shapes : List[ComputeShapeSummary]
60
- Model deployment available shapes.
61
- model_ids : List[str]
62
- A list of OCIDs for the Aqua models.
63
- primary_model_id : Optional[str], optional
64
- The OCID of the primary Aqua model. If provided, GPU allocation prioritizes this model.
65
- Otherwise, GPUs are evenly allocated.
66
-
67
- Returns
68
- -------
69
- ModelDeploymentConfigSummary
70
- A summary of the deployment configurations and GPU allocations. If GPU allocation
71
- cannot be determined, an appropriate error message is included in the summary.
72
- """
73
- return self._load_multi_model_deployment_configuration(
74
- shapes=shapes, model_ids=model_ids, primary_model_id=primary_model_id
75
- )
76
-
77
- def _load_multi_model_deployment_configuration(
78
- self,
79
- shapes: List[ComputeShapeSummary],
80
- model_ids: List[str],
81
- primary_model_id: Optional[str] = None,
82
- ) -> ModelDeploymentConfigSummary:
83
- """
84
- Retrieves deployment configurations for multiple models and calculates compatible GPU allocations.
85
-
86
- Parameters
87
- ----------
88
- shapes : List[ComputeShapeSummary]
89
- Model deployment available shapes.
90
- model_ids : List[str]
91
- A list of OCIDs for the Aqua models.
92
- primary_model_id : Optional[str], optional
93
- The OCID of the primary Aqua model. If provided, GPU allocation prioritizes this model.
94
- Otherwise, GPUs are evenly allocated.
95
-
96
- Returns
97
- -------
98
- ModelDeploymentConfigSummary
99
- A summary of the deployment configurations and GPU allocations. If GPU allocation
100
- cannot be determined, an appropriate error message is included in the summary.
101
- """
102
- model_shape_gpu, available_shapes, summary = self._fetch_model_shape_gpu(
103
- shapes=shapes, model_ids=model_ids
104
- )
105
-
106
- # Identify common deployment shapes among all models.
107
- common_shapes, empty_configs = self._get_common_shapes(model_shape_gpu)
108
- logger.debug(f"Common Shapes: {common_shapes} from: {model_shape_gpu}")
109
-
110
- # If all models' shape configs are empty, use default deployment shapes instead
111
- common_shapes = (
112
- available_shapes
113
- if empty_configs
114
- else [
115
- shape_name
116
- for shape_name in common_shapes
117
- if shape_name.upper() in available_shapes
118
- ]
119
- )
120
- logger.debug(f"Available Common Shapes: {common_shapes}")
121
-
122
- if not common_shapes:
123
- summary.error_message = (
124
- "The selected models do not share any available common deployment shapes. "
125
- "Please ensure that all chosen models are compatible for multi-model deployment."
126
- )
127
- logger.debug(
128
- f"No common deployment shapes found among selected models: {model_ids}"
129
- )
130
- return summary
131
-
132
- # Compute GPU allocations based on the common shapes and optionally prioritize a primary model.
133
- gpu_allocation = self._compute_gpu_allocation(
134
- shapes=shapes,
135
- common_shapes=common_shapes,
136
- model_shape_gpu=model_shape_gpu,
137
- primary_model_id=primary_model_id,
138
- )
139
-
140
- logger.debug(f"GPU Allocation: {gpu_allocation}")
141
-
142
- if not gpu_allocation:
143
- summary.error_message = (
144
- "The selected models do not have a valid GPU allocation based on their current configurations. "
145
- "Please select a different model group. If you are deploying custom models that lack AQUA service configuration, "
146
- "refer to the deployment guidelines here: "
147
- "https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/multimodel-deployment-tips.md#custom_models"
148
- )
149
-
150
- logger.debug(
151
- f"GPU allocation computation failed for selected models: {model_ids}"
152
- )
153
-
154
- return summary
155
-
156
- summary.gpu_allocation = gpu_allocation
157
- return summary
158
-
159
- def _fetch_model_shape_gpu(
160
- self, shapes: List[ComputeShapeSummary], model_ids: List[str]
161
- ):
162
- """Fetches dict of model shape and gpu, list of available shapes and builds `ModelDeploymentConfigSummary` instance."""
163
- # Fetch deployment configurations concurrently.
164
- logger.debug(f"Loading model deployment configuration for models: {model_ids}")
165
- deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)
166
-
167
- logger.debug(f"Loaded config: {deployment_configs}")
168
- model_shape_gpu, deployment = self._extract_model_shape_gpu(
169
- deployment_configs=deployment_configs, shapes=shapes
170
- )
171
-
172
- # Initialize the summary result with the deployment configurations.
173
- summary = ModelDeploymentConfigSummary(deployment_config=deployment)
174
-
175
- # Filter out not available shapes
176
- available_shapes = [item.name.upper() for item in shapes]
177
- logger.debug(f"Service Available Shapes: {available_shapes}")
178
-
179
- return model_shape_gpu, available_shapes, summary
180
-
181
- def _fetch_deployment_configs_concurrently(
182
- self, model_ids: List[str]
183
- ) -> Dict[str, AquaDeploymentConfig]:
184
- """Fetches deployment configurations in parallel using ThreadPoolExecutor."""
185
- with ThreadPoolExecutor(max_workers=self.MAX_WORKERS) as executor:
186
- results = executor.map(
187
- self._fetch_deployment_config_from_metadata_and_oss,
188
- model_ids,
189
- )
190
-
191
- return {
192
- model_id: AquaDeploymentConfig(**config.config)
193
- for model_id, config in zip(model_ids, results)
194
- }
195
-
196
- def _fetch_deployment_config_from_metadata_and_oss(
197
- self, model_id
198
- ) -> ModelConfigResult:
199
- config = self.deployment_app.get_config_from_metadata(
200
- model_id, AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION
201
- )
202
- if config:
203
- logger.info(
204
- f"Fetched metadata key '{AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION}' from defined metadata for model '{model_id}'"
205
- )
206
- return config
207
- else:
208
- logger.info(
209
- f"Fetching '{AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION}' from object storage bucket for {model_id}'"
210
- )
211
- return self.deployment_app.get_config(
212
- model_id, AQUA_MODEL_DEPLOYMENT_CONFIG
213
- )
214
-
215
- def _extract_model_shape_gpu(
216
- self,
217
- deployment_configs: Dict[str, AquaDeploymentConfig],
218
- shapes: List[ComputeShapeSummary],
219
- ):
220
- """Extracts shape and GPU count details from deployment configurations.
221
- Supported shapes for multi model deployment will be collected from `configuration` entry in deployment config.
222
- Supported shapes for single model deployment will be collected from `shape` entry in deployment config.
223
- """
224
- model_shape_gpu = {}
225
- deployment = {}
226
- is_single_model = len(deployment_configs) == 1
227
-
228
- for model_id, config in deployment_configs.items():
229
- # For multi model deployment, we cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
230
- # However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2.
231
- # Our current configuration does not support this flexibility.
232
- # For single model deployment, we use `config.shape` to find the available shapes.
233
- multi_deployment_shape = (
234
- list(set(config.configuration.keys()).union(set(config.shape or [])))
235
- if is_single_model
236
- else list(config.configuration.keys())
237
- )
238
-
239
- shape_total_gpus_available_map = {
240
- deployment_shape.name.upper(): deployment_shape.gpu_specs.gpu_count
241
- or None
242
- for deployment_shape in shapes
243
- if deployment_shape and deployment_shape.gpu_specs
244
- }
245
-
246
- model_shape_gpu[model_id] = {
247
- shape.upper(): [
248
- item.gpu_count
249
- for item in config.configuration.get(
250
- shape,
251
- ConfigurationItem(
252
- multi_model_deployment=(
253
- [
254
- MultiModelConfig(
255
- gpu_count=shape_total_gpus_available_map.get(
256
- shape.upper()
257
- )
258
- )
259
- ]
260
- if is_single_model
261
- else []
262
- )
263
- ),
264
- ).multi_model_deployment
265
- ]
266
- for shape in multi_deployment_shape
267
- }
268
-
269
- # For single-model deployments: if the shape is listed in the `shapes` section of the config,
270
- # we include the maximum available GPU count for that shape in the allocation consideration.
271
- if is_single_model:
272
- for shape in model_shape_gpu[model_id]:
273
- shape_total_gpu_count = shape_total_gpus_available_map.get(
274
- shape.upper()
275
- )
276
- if (
277
- shape in config.shape
278
- and shape_total_gpu_count
279
- and shape_total_gpu_count
280
- not in model_shape_gpu[model_id][shape]
281
- ):
282
- model_shape_gpu[model_id][shape].append(shape_total_gpu_count)
283
-
284
- deployment[model_id] = {
285
- "shape": [shape.upper() for shape in config.shape],
286
- "configuration": {
287
- shape.upper(): config.configuration.get(shape, ConfigurationItem())
288
- for shape in multi_deployment_shape
289
- },
290
- }
291
-
292
- return model_shape_gpu, deployment
293
-
294
- def _get_common_shapes(
295
- self, model_shape_gpu: Dict[str, Dict[str, List[int]]]
296
- ) -> tuple:
297
- """Finds common shapes across all models."""
298
- common_shapes_set = []
299
- empty_configs = True
300
- for shapes in model_shape_gpu.values():
301
- if shapes:
302
- common_shapes_set.append(set(shapes.keys()))
303
- empty_configs = False
304
- if not common_shapes_set:
305
- return [], empty_configs
306
- return list(set.intersection(*(common_shapes_set))), empty_configs
307
-
308
- def _compute_gpu_allocation(
309
- self,
310
- shapes: List[ComputeShapeSummary],
311
- common_shapes: List[str],
312
- model_shape_gpu: Dict[str, Dict[str, List[int]]],
313
- primary_model_id: Optional[str],
314
- ) -> Dict[str, GPUShapeAllocation]:
315
- """Computes GPU allocation for common shapes."""
316
-
317
- gpu_allocation = {}
318
-
319
- for common_shape in common_shapes:
320
- total_gpus_available = 0
321
-
322
- # search the shape in the available shapes list
323
- shape_summary = next(
324
- (shape for shape in shapes if shape.name.upper() == common_shape),
325
- None,
326
- )
327
- if shape_summary and shape_summary.gpu_specs:
328
- total_gpus_available = shape_summary.gpu_specs.gpu_count
329
-
330
- # generate a list of possible gpu count from `total_gpus_available` for custom models
331
- # without multi model deployment config
332
- # model_gpu = {
333
- # model: (
334
- # shape_gpu[common_shape]
335
- # if shape_gpu.get(common_shape, UNKNOWN)
336
- # else self._generate_gpu_list(total_gpus_available)
337
- # )
338
- # for model, shape_gpu in model_shape_gpu.items()
339
- # }
340
-
341
- model_gpu = {
342
- model: (shape_gpu.get(common_shape, []) or [])
343
- for model, shape_gpu in model_shape_gpu.items()
344
- }
345
-
346
- is_compatible, combination = self._verify_compatibility(
347
- total_gpus_available=total_gpus_available,
348
- model_gpu_dict=model_gpu,
349
- primary_model_id=primary_model_id,
350
- )
351
-
352
- if is_compatible:
353
- gpu_allocation[common_shape] = GPUShapeAllocation(
354
- models=combination, total_gpus_available=total_gpus_available
355
- )
356
-
357
- return gpu_allocation
358
-
359
- @staticmethod
360
- def _generate_gpu_list(total_gpus_available: int) -> list[int]:
361
- """Generates a list of powers of 2 that's smaller than or equal to `total_gpus_available`.
362
-
363
- Example
364
- -------
365
- input: 8
366
- output: [1,2,4,8]
367
-
368
- Parameters
369
- ----------
370
- total_gpus_available : int
371
- Total GPU available
372
-
373
- Returns
374
- -------
375
- list
376
- A list of powers of 2.
377
- """
378
- if total_gpus_available < 1:
379
- return []
380
- return [2**i for i in range(int(math.log2(total_gpus_available)) + 1)]
381
-
382
- def _verify_compatibility(
383
- self,
384
- total_gpus_available: int,
385
- model_gpu_dict: Dict,
386
- primary_model_id: str = None,
387
- ) -> tuple:
388
- """Calculates the gpu allocations for all compatible shapes.
389
- If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
390
- If provided, gpu count for each compatible shape will be prioritized for primary model.
391
-
392
- Example
393
- -------
394
-
395
- Case 1:
396
- There is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
397
-
398
- A - BM.GPU.H100.8 - 1, 2, 4, 8
399
- B - BM.GPU.H100.8 - 1, 2, 4, 8
400
- C - BM.GPU.H100.8 - 1, 2, 4, 8
401
-
402
- If no primary model is provided, the gpu allocation for A, B, C could be [2, 4, 2], [2, 2, 4] or [4, 2, 2]
403
- If B is the primary model, the gpu allocation is [2, 4, 2] as B always gets the maximum gpu count.
404
-
405
- Case 2:
406
- There is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
407
-
408
- A - BM.GPU.H100.8 - 1
409
- B - BM.GPU.H100.8 - 1, 2, 4
410
- C - BM.GPU.H100.8 - 1, 2, 4
411
-
412
- If no primary model is provided, the gpu allocation for A, B, C could be [1, 1, 2] or [1, 2, 1]
413
- If C is the primary model, the gpu allocation is [1, 1, 2] as C always gets the maximum gpu count.
414
-
415
- Parameters
416
- ----------
417
- model_gpu_dict: Dict
418
- A dict of Aqua model and its gpu counts.
419
- primary_model_id: str
420
- The OCID of the primary Aqua model
421
-
422
- Returns
423
- -------
424
- tuple:
425
- A tuple of gpu count allocation result.
426
- """
427
- model_gpu_dict_copy = copy.deepcopy(model_gpu_dict)
428
- # minimal gpu count needed to satisfy all models
429
- minimal_gpus_needed = len(model_gpu_dict)
430
- if primary_model_id and minimal_gpus_needed > 1:
431
- primary_model_gpu_list = sorted(model_gpu_dict_copy.pop(primary_model_id))
432
- primary_model_gpu_list.reverse()
433
- combinations = self.get_combinations(model_gpu_dict_copy)
434
- for gpu_count in primary_model_gpu_list:
435
- current_gpus_available = total_gpus_available
436
- while (
437
- current_gpus_available >= minimal_gpus_needed
438
- # or current_gpus_available == 1
439
- ):
440
- for combination in combinations:
441
- if (
442
- len(combination) == len(model_gpu_dict_copy)
443
- and sum(combination.values())
444
- == current_gpus_available - gpu_count
445
- ):
446
- combination[primary_model_id] = gpu_count
447
- return (
448
- True,
449
- [
450
- GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
451
- for ocid, gpu_count in combination.items()
452
- ],
453
- )
454
-
455
- current_gpus_available -= 1
456
- # current_gpus_available = (
457
- # 1 if current_gpus_available == 0 else current_gpus_available
458
- # )
459
- else:
460
- combinations = self.get_combinations(model_gpu_dict_copy)
461
- current_gpus_available = total_gpus_available
462
- while (
463
- current_gpus_available >= minimal_gpus_needed
464
- # or current_gpus_available == 1
465
- ):
466
- minimal_difference = float("inf") # gets the positive infinity
467
- optimal_combination = []
468
- for combination in combinations:
469
- if (
470
- len(combination) == len(model_gpu_dict_copy)
471
- and sum(combination.values()) == current_gpus_available
472
- ):
473
- difference = max(combination.values()) - min(
474
- combination.values()
475
- )
476
- if difference < minimal_difference:
477
- minimal_difference = difference
478
- optimal_combination = combination
479
-
480
- # find the optimal combination, no need to continue
481
- if minimal_difference == 0:
482
- break
483
-
484
- if optimal_combination:
485
- return (
486
- True,
487
- [
488
- GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
489
- for ocid, gpu_count in optimal_combination.items()
490
- ],
491
- )
492
-
493
- current_gpus_available -= 1
494
- # current_gpus_available = (
495
- # 1 if current_gpus_available == 0 else current_gpus_available
496
- # )
497
-
498
- return (False, [])
499
-
500
- @staticmethod
501
- def get_combinations(input_dict: dict):
502
- """Finds all unique combinations within input dict.
503
-
504
- The input is a dict of {model:[gpu_count]} on a specific shape and this method will
505
- return a list of all unique combinations of gpu allocation of each model.
506
-
507
- For example:
508
-
509
- input: {'model_a': [2, 4], 'model_b': [1, 2, 4], 'model_c': [1, 2, 8]}
510
- output:
511
- [
512
- {'model_a': 2, 'model_b': 1, 'model_c': 1},
513
- {'model_a': 2, 'model_b': 1, 'model_c': 2},
514
- {'model_a': 2, 'model_b': 1, 'model_c': 8},
515
- {'model_a': 2, 'model_b': 2, 'model_c': 1},
516
- {'model_a': 2, 'model_b': 2, 'model_c': 2},
517
- {'model_a': 2, 'model_b': 2, 'model_c': 8},
518
- {'model_a': 2, 'model_b': 4, 'model_c': 1},
519
- {'model_a': 2, 'model_b': 4, 'model_c': 2},
520
- {'model_a': 2, 'model_b': 4, 'model_c': 8},
521
- {'model_a': 4, 'model_b': 1, 'model_c': 1},
522
- {'model_a': 4, 'model_b': 1, 'model_c': 2},
523
- {'model_a': 4, 'model_b': 1, 'model_c': 8},
524
- {'model_a': 4, 'model_b': 2, 'model_c': 1},
525
- {'model_a': 4, 'model_b': 2, 'model_c': 2},
526
- {'model_a': 4, 'model_b': 2, 'model_c': 8},
527
- {'model_a': 4, 'model_b': 4, 'model_c': 1},
528
- {'model_a': 4, 'model_b': 4, 'model_c': 2},
529
- {'model_a': 4, 'model_b': 4, 'model_c': 8}
530
- ]
531
-
532
- Parameters
533
- ----------
534
- input_dict: dict
535
- A dict of {model:[gpu_count]} on a specific shape
536
-
537
- Returns
538
- -------
539
- list:
540
- A list of all unique combinations of gpu allocation of each model.
541
- """
542
- keys, values = zip(*input_dict.items())
543
- return [dict(zip(keys, v)) for v in itertools.product(*values)]