oracle-ads 2.13.11__py3-none-any.whl → 2.13.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/app.py +73 -15
- ads/aqua/cli.py +17 -0
- ads/aqua/client/client.py +38 -21
- ads/aqua/client/openai_client.py +20 -10
- ads/aqua/common/entities.py +78 -12
- ads/aqua/common/utils.py +35 -0
- ads/aqua/constants.py +2 -0
- ads/aqua/evaluation/evaluation.py +5 -4
- ads/aqua/extension/common_handler.py +47 -2
- ads/aqua/extension/model_handler.py +51 -9
- ads/aqua/model/constants.py +1 -0
- ads/aqua/model/enums.py +19 -1
- ads/aqua/model/model.py +119 -51
- ads/aqua/model/utils.py +1 -2
- ads/aqua/modeldeployment/config_loader.py +815 -0
- ads/aqua/modeldeployment/constants.py +4 -1
- ads/aqua/modeldeployment/deployment.py +178 -129
- ads/aqua/modeldeployment/entities.py +150 -178
- ads/aqua/modeldeployment/model_group_config.py +233 -0
- ads/aqua/modeldeployment/utils.py +0 -539
- ads/aqua/verify_policies/__init__.py +8 -0
- ads/aqua/verify_policies/constants.py +13 -0
- ads/aqua/verify_policies/entities.py +29 -0
- ads/aqua/verify_policies/messages.py +101 -0
- ads/aqua/verify_policies/utils.py +432 -0
- ads/aqua/verify_policies/verify.py +345 -0
- ads/aqua/version.json +3 -0
- ads/common/oci_logging.py +4 -7
- ads/common/work_request.py +39 -38
- ads/jobs/builders/infrastructure/dsc_job.py +121 -24
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +71 -24
- ads/jobs/builders/runtimes/base.py +7 -5
- ads/jobs/builders/runtimes/pytorch_runtime.py +6 -8
- ads/jobs/templates/driver_pytorch.py +486 -172
- ads/jobs/templates/driver_utils.py +27 -11
- ads/model/deployment/model_deployment.py +51 -38
- ads/model/service/oci_datascience_model_deployment.py +6 -11
- ads/telemetry/client.py +4 -4
- {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.13.dist-info}/METADATA +2 -1
- {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.13.dist-info}/RECORD +43 -34
- {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.13.dist-info}/WHEEL +0 -0
- {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.13.dist-info}/entry_points.txt +0 -0
- {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.13.dist-info}/licenses/LICENSE.txt +0 -0
@@ -13,8 +13,15 @@ from ads.aqua.common.enums import Tags
|
|
13
13
|
from ads.aqua.config.utils.serializer import Serializable
|
14
14
|
from ads.aqua.constants import UNKNOWN_DICT
|
15
15
|
from ads.aqua.data import AquaResourceIdentifier
|
16
|
+
from ads.aqua.finetuning.constants import FineTuneCustomMetadata
|
17
|
+
from ads.aqua.modeldeployment.config_loader import (
|
18
|
+
ConfigurationItem,
|
19
|
+
ModelDeploymentConfigSummary,
|
20
|
+
)
|
16
21
|
from ads.common.serializer import DataClassSerializable
|
17
22
|
from ads.common.utils import UNKNOWN, get_console_link
|
23
|
+
from ads.model.datascience_model import DataScienceModel
|
24
|
+
from ads.model.model_metadata import ModelCustomMetadataItem
|
18
25
|
|
19
26
|
|
20
27
|
class ConfigValidationError(Exception):
|
@@ -213,184 +220,6 @@ class AquaDeploymentDetail(AquaDeployment, DataClassSerializable):
|
|
213
220
|
extra = "allow"
|
214
221
|
|
215
222
|
|
216
|
-
class ShapeInfoConfig(Serializable):
|
217
|
-
"""Describes how many memory and cpu to this model for specific shape.
|
218
|
-
|
219
|
-
Attributes:
|
220
|
-
memory_in_gbs (float, optional): The number of memory in gbs to this model of the shape.
|
221
|
-
ocpu (float, optional): The number of ocpus to this model of the shape.
|
222
|
-
"""
|
223
|
-
|
224
|
-
memory_in_gbs: Optional[float] = Field(
|
225
|
-
None,
|
226
|
-
description="The number of memory in gbs to this model of the shape.",
|
227
|
-
)
|
228
|
-
ocpu: Optional[float] = Field(
|
229
|
-
None,
|
230
|
-
description="The number of ocpus to this model of the shape.",
|
231
|
-
)
|
232
|
-
|
233
|
-
class Config:
|
234
|
-
extra = "allow"
|
235
|
-
|
236
|
-
|
237
|
-
class DeploymentShapeInfo(Serializable):
|
238
|
-
"""Describes the shape information to this model for specific shape.
|
239
|
-
|
240
|
-
Attributes:
|
241
|
-
configs (List[ShapeInfoConfig], optional): A list of memory and cpu number details to this model of the shape.
|
242
|
-
type (str, optional): The type of the shape.
|
243
|
-
"""
|
244
|
-
|
245
|
-
configs: Optional[List[ShapeInfoConfig]] = Field(
|
246
|
-
default_factory=list,
|
247
|
-
description="A list of memory and cpu number details to this model of the shape.",
|
248
|
-
)
|
249
|
-
type: Optional[str] = Field(
|
250
|
-
default_factory=str, description="The type of the shape."
|
251
|
-
)
|
252
|
-
|
253
|
-
class Config:
|
254
|
-
extra = "allow"
|
255
|
-
|
256
|
-
|
257
|
-
class MultiModelConfig(Serializable):
|
258
|
-
"""Describes how many GPUs and the parameters of specific shape for multi model deployment.
|
259
|
-
|
260
|
-
Attributes:
|
261
|
-
gpu_count (int, optional): Number of GPUs count to this model of this shape.
|
262
|
-
parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
|
263
|
-
configure the behavior of a particular GPU shape.
|
264
|
-
"""
|
265
|
-
|
266
|
-
gpu_count: Optional[int] = Field(
|
267
|
-
default_factory=int, description="The number of GPUs allocated to the model."
|
268
|
-
)
|
269
|
-
parameters: Optional[Dict[str, str]] = Field(
|
270
|
-
default_factory=dict,
|
271
|
-
description="Key-value pairs for GPU shape parameters (e.g., VLLM_PARAMS).",
|
272
|
-
)
|
273
|
-
|
274
|
-
class Config:
|
275
|
-
extra = "allow"
|
276
|
-
|
277
|
-
|
278
|
-
class ConfigurationItem(Serializable):
|
279
|
-
"""Holds key-value parameter pairs for a specific GPU or CPU shape.
|
280
|
-
|
281
|
-
Attributes:
|
282
|
-
parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
|
283
|
-
configure the behavior of a particular GPU shape.
|
284
|
-
multi_model_deployment (List[MultiModelConfig], optional): A list of multi model configuration details.
|
285
|
-
shape_info (DeploymentShapeInfo, optional): The shape information to this model for specific CPU shape.
|
286
|
-
"""
|
287
|
-
|
288
|
-
parameters: Optional[Dict[str, str]] = Field(
|
289
|
-
default_factory=dict,
|
290
|
-
description="Key-value pairs for shape parameters.",
|
291
|
-
)
|
292
|
-
multi_model_deployment: Optional[List[MultiModelConfig]] = Field(
|
293
|
-
default_factory=list, description="A list of multi model configuration details."
|
294
|
-
)
|
295
|
-
shape_info: Optional[DeploymentShapeInfo] = Field(
|
296
|
-
default_factory=DeploymentShapeInfo,
|
297
|
-
description="The shape information to this model for specific shape",
|
298
|
-
)
|
299
|
-
|
300
|
-
class Config:
|
301
|
-
extra = "allow"
|
302
|
-
|
303
|
-
|
304
|
-
class AquaDeploymentConfig(Serializable):
|
305
|
-
"""Represents multi model's shape list and detailed configuration.
|
306
|
-
|
307
|
-
Attributes:
|
308
|
-
shape (List[str], optional): A list of shape names (e.g., BM.GPU.A10.4).
|
309
|
-
configuration (Dict[str, ConfigurationItem], optional): Maps each shape to its configuration details.
|
310
|
-
"""
|
311
|
-
|
312
|
-
shape: Optional[List[str]] = Field(
|
313
|
-
default_factory=list, description="List of supported shapes for the model."
|
314
|
-
)
|
315
|
-
configuration: Optional[Dict[str, ConfigurationItem]] = Field(
|
316
|
-
default_factory=dict, description="Configuration details keyed by shape."
|
317
|
-
)
|
318
|
-
|
319
|
-
class Config:
|
320
|
-
extra = "allow"
|
321
|
-
|
322
|
-
|
323
|
-
class GPUModelAllocation(Serializable):
|
324
|
-
"""Describes how many GPUs are allocated to a particular model.
|
325
|
-
|
326
|
-
Attributes:
|
327
|
-
ocid (str, optional): The unique identifier of the model.
|
328
|
-
gpu_count (int, optional): Number of GPUs allocated to this model.
|
329
|
-
"""
|
330
|
-
|
331
|
-
ocid: Optional[str] = Field(
|
332
|
-
default_factory=str, description="The unique model OCID."
|
333
|
-
)
|
334
|
-
gpu_count: Optional[int] = Field(
|
335
|
-
default_factory=int, description="The number of GPUs allocated to the model."
|
336
|
-
)
|
337
|
-
|
338
|
-
class Config:
|
339
|
-
extra = "allow"
|
340
|
-
|
341
|
-
|
342
|
-
class GPUShapeAllocation(Serializable):
|
343
|
-
"""
|
344
|
-
Allocation details for a specific GPU shape.
|
345
|
-
|
346
|
-
Attributes:
|
347
|
-
models (List[GPUModelAllocation], optional): List of model GPU allocations for this shape.
|
348
|
-
total_gpus_available (int, optional): The total number of GPUs available for this shape.
|
349
|
-
"""
|
350
|
-
|
351
|
-
models: Optional[List[GPUModelAllocation]] = Field(
|
352
|
-
default_factory=list, description="List of model allocations for this shape."
|
353
|
-
)
|
354
|
-
total_gpus_available: Optional[int] = Field(
|
355
|
-
default_factory=int, description="Total GPUs available for this shape."
|
356
|
-
)
|
357
|
-
|
358
|
-
class Config:
|
359
|
-
extra = "allow"
|
360
|
-
|
361
|
-
|
362
|
-
class ModelDeploymentConfigSummary(Serializable):
|
363
|
-
"""Top-level configuration model for OCI-based deployments.
|
364
|
-
|
365
|
-
Attributes:
|
366
|
-
deployment_config (Dict[str, AquaDeploymentConfig], optional): Deployment configurations
|
367
|
-
keyed by model OCID.
|
368
|
-
gpu_allocation (Dict[str, GPUShapeAllocation], optional): GPU allocations keyed by GPU shape.
|
369
|
-
error_message (str, optional): Error message if GPU allocation is not possible.
|
370
|
-
"""
|
371
|
-
|
372
|
-
deployment_config: Optional[Dict[str, AquaDeploymentConfig]] = Field(
|
373
|
-
default_factory=dict,
|
374
|
-
description=(
|
375
|
-
"Deployment configuration details for each model, including supported shapes "
|
376
|
-
"and shape-specific parameters."
|
377
|
-
),
|
378
|
-
)
|
379
|
-
gpu_allocation: Optional[Dict[str, GPUShapeAllocation]] = Field(
|
380
|
-
default_factory=dict,
|
381
|
-
description=(
|
382
|
-
"Details on how GPUs are allocated per shape, including the total "
|
383
|
-
"GPUs available for each shape."
|
384
|
-
),
|
385
|
-
)
|
386
|
-
error_message: Optional[str] = Field(
|
387
|
-
default=None, description="Error message if GPU allocation is not possible."
|
388
|
-
)
|
389
|
-
|
390
|
-
class Config:
|
391
|
-
extra = "allow"
|
392
|
-
|
393
|
-
|
394
223
|
class CreateModelDeploymentDetails(BaseModel):
|
395
224
|
"""Class for creating Aqua model deployments."""
|
396
225
|
|
@@ -648,6 +477,149 @@ class CreateModelDeploymentDetails(BaseModel):
|
|
648
477
|
logger.error(error_message)
|
649
478
|
raise ConfigValidationError(error_message)
|
650
479
|
|
480
|
+
def validate_input_models(self, model_details: Dict[str, DataScienceModel]) -> None:
|
481
|
+
"""
|
482
|
+
Validates the input models for a multi-model deployment configuration.
|
483
|
+
|
484
|
+
Validation Criteria:
|
485
|
+
- The base model must be explicitly provided.
|
486
|
+
- The base model must be in 'ACTIVE' state.
|
487
|
+
- Fine-tuned model IDs must refer to valid, tagged fine-tuned models.
|
488
|
+
- Fine-tuned models must refer back to the same base model.
|
489
|
+
- All model names (including fine-tuned variants) must be unique.
|
490
|
+
|
491
|
+
Parameters
|
492
|
+
----------
|
493
|
+
model_details : Dict[str, DataScienceModel]
|
494
|
+
Dictionary mapping model OCIDs to DataScienceModel instances.
|
495
|
+
Includes the all models to validate including fine-tuned models.
|
496
|
+
|
497
|
+
Raises
|
498
|
+
------
|
499
|
+
ConfigValidationError
|
500
|
+
If any of the above conditions are violated.
|
501
|
+
"""
|
502
|
+
if not self.models:
|
503
|
+
logger.error("Validation failed: No models specified in the model group.")
|
504
|
+
raise ConfigValidationError(
|
505
|
+
"Multi-model deployment requires at least one model entry. "
|
506
|
+
"Please provide a base model in the `models` list."
|
507
|
+
)
|
508
|
+
|
509
|
+
seen_names = set()
|
510
|
+
duplicate_names = set()
|
511
|
+
|
512
|
+
for model in self.models:
|
513
|
+
base_model_id = model.model_id
|
514
|
+
base_model = model_details.get(base_model_id)
|
515
|
+
|
516
|
+
if not base_model:
|
517
|
+
logger.error(
|
518
|
+
"Validation failed: Base model ID '%s' not found.", base_model_id
|
519
|
+
)
|
520
|
+
raise ConfigValidationError(f"Model not found: '{base_model_id}'.")
|
521
|
+
|
522
|
+
if Tags.AQUA_FINE_TUNED_MODEL_TAG in (base_model.freeform_tags or {}):
|
523
|
+
logger.error(
|
524
|
+
"Validation failed: Base model ID '%s' is a fine-tuned model.",
|
525
|
+
base_model_id,
|
526
|
+
)
|
527
|
+
raise ConfigValidationError(
|
528
|
+
f"Invalid base model ID '{base_model_id}'. "
|
529
|
+
"Specify a base model OCID in the `models` input, not a fine-tuned model."
|
530
|
+
)
|
531
|
+
|
532
|
+
if base_model.lifecycle_state != "ACTIVE":
|
533
|
+
logger.error(
|
534
|
+
"Validation failed: Base model '%s' is in state '%s'.",
|
535
|
+
base_model_id,
|
536
|
+
base_model.lifecycle_state,
|
537
|
+
)
|
538
|
+
raise ConfigValidationError(
|
539
|
+
f"Invalid base model ID '{base_model_id}': must be in ACTIVE state."
|
540
|
+
)
|
541
|
+
|
542
|
+
# Normalize and validate model name uniqueness
|
543
|
+
model_name = model.model_name or base_model.display_name
|
544
|
+
if model_name in seen_names:
|
545
|
+
duplicate_names.add(model_name)
|
546
|
+
else:
|
547
|
+
seen_names.add(model_name)
|
548
|
+
|
549
|
+
for lora_module in model.fine_tune_weights or []:
|
550
|
+
ft_model_id = lora_module.model_id
|
551
|
+
ft_model = model_details.get(ft_model_id)
|
552
|
+
|
553
|
+
if not ft_model:
|
554
|
+
logger.error(
|
555
|
+
"Validation failed: Fine-tuned model ID '%s' not found.",
|
556
|
+
ft_model_id,
|
557
|
+
)
|
558
|
+
raise ConfigValidationError(
|
559
|
+
f"Fine-tuned model not found: '{ft_model_id}'."
|
560
|
+
)
|
561
|
+
|
562
|
+
if ft_model.lifecycle_state != "ACTIVE":
|
563
|
+
logger.error(
|
564
|
+
"Validation failed: Fine-tuned model '%s' is in state '%s'.",
|
565
|
+
ft_model_id,
|
566
|
+
ft_model.lifecycle_state,
|
567
|
+
)
|
568
|
+
raise ConfigValidationError(
|
569
|
+
f"Invalid Fine-tuned model ID '{ft_model_id}': must be in ACTIVE state."
|
570
|
+
)
|
571
|
+
|
572
|
+
if Tags.AQUA_FINE_TUNED_MODEL_TAG not in (ft_model.freeform_tags or {}):
|
573
|
+
logger.error(
|
574
|
+
"Validation failed: Model ID '%s' is missing tag '%s'.",
|
575
|
+
ft_model_id,
|
576
|
+
Tags.AQUA_FINE_TUNED_MODEL_TAG,
|
577
|
+
)
|
578
|
+
raise ConfigValidationError(
|
579
|
+
f"Invalid fine-tuned model ID '{ft_model_id}': missing tag '{Tags.AQUA_FINE_TUNED_MODEL_TAG}'."
|
580
|
+
)
|
581
|
+
|
582
|
+
ft_base_model_id = ft_model.custom_metadata_list.get(
|
583
|
+
FineTuneCustomMetadata.FINE_TUNE_SOURCE,
|
584
|
+
ModelCustomMetadataItem(
|
585
|
+
key=FineTuneCustomMetadata.FINE_TUNE_SOURCE
|
586
|
+
),
|
587
|
+
).value
|
588
|
+
|
589
|
+
if ft_base_model_id != base_model_id:
|
590
|
+
logger.error(
|
591
|
+
"Validation failed: Fine-tuned model '%s' is linked to base model '%s' (expected '%s').",
|
592
|
+
ft_model_id,
|
593
|
+
ft_base_model_id,
|
594
|
+
base_model_id,
|
595
|
+
)
|
596
|
+
raise ConfigValidationError(
|
597
|
+
f"Fine-tuned model '{ft_model_id}' belongs to base model '{ft_base_model_id}', "
|
598
|
+
f"but was included under base model '{base_model_id}'."
|
599
|
+
)
|
600
|
+
|
601
|
+
# Validate fine-tuned model name uniqueness
|
602
|
+
lora_model_name = lora_module.model_name or ft_model.display_name
|
603
|
+
if lora_model_name in seen_names:
|
604
|
+
duplicate_names.add(lora_model_name)
|
605
|
+
else:
|
606
|
+
seen_names.add(lora_model_name)
|
607
|
+
|
608
|
+
logger.debug(
|
609
|
+
"Validated fine-tuned model '%s' under base model '%s'.",
|
610
|
+
ft_model_id,
|
611
|
+
base_model_id,
|
612
|
+
)
|
613
|
+
|
614
|
+
if duplicate_names:
|
615
|
+
logger.error(
|
616
|
+
"Duplicate model names detected: %s", ", ".join(sorted(duplicate_names))
|
617
|
+
)
|
618
|
+
raise ConfigValidationError(
|
619
|
+
f"The following model names are duplicated across base and fine-tuned models: "
|
620
|
+
f"{', '.join(sorted(duplicate_names))}. Model names must be unique for proper routing in multi-model deployments."
|
621
|
+
)
|
622
|
+
|
651
623
|
class Config:
|
652
624
|
extra = "allow"
|
653
625
|
protected_namespaces = ()
|
@@ -0,0 +1,233 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# Copyright (c) 2025 Oracle and/or its affiliates.
|
3
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
4
|
+
|
5
|
+
from typing import List, Optional, Tuple, Union
|
6
|
+
|
7
|
+
from pydantic import BaseModel, Field, field_validator
|
8
|
+
from typing_extensions import Self
|
9
|
+
|
10
|
+
from ads.aqua import logger
|
11
|
+
from ads.aqua.common.entities import AquaMultiModelRef
|
12
|
+
from ads.aqua.common.errors import AquaValueError
|
13
|
+
from ads.aqua.common.utils import (
|
14
|
+
build_params_string,
|
15
|
+
find_restricted_params,
|
16
|
+
get_combined_params,
|
17
|
+
get_container_params_type,
|
18
|
+
get_params_dict,
|
19
|
+
)
|
20
|
+
from ads.aqua.config.utils.serializer import Serializable
|
21
|
+
from ads.aqua.modeldeployment.config_loader import (
|
22
|
+
AquaDeploymentConfig,
|
23
|
+
ConfigurationItem,
|
24
|
+
ModelDeploymentConfigSummary,
|
25
|
+
)
|
26
|
+
from ads.aqua.modeldeployment.entities import CreateModelDeploymentDetails
|
27
|
+
from ads.common.object_storage_details import ObjectStorageDetails
|
28
|
+
from ads.common.utils import UNKNOWN
|
29
|
+
|
30
|
+
__all__ = ["ModelGroupConfig", "BaseModelSpec"]
|
31
|
+
|
32
|
+
from ads.aqua.common.entities import LoraModuleSpec
|
33
|
+
|
34
|
+
|
35
|
+
class BaseModelSpec(BaseModel):
|
36
|
+
"""
|
37
|
+
Defines configuration for a single base model in multi-model deployment.
|
38
|
+
|
39
|
+
Attributes
|
40
|
+
----------
|
41
|
+
model_id: str
|
42
|
+
The OCID of the base model.
|
43
|
+
model_path : str
|
44
|
+
Path to the model in OCI Object Storage.
|
45
|
+
params : str
|
46
|
+
Additional vLLM launch parameters for this model (e.g. parallelism, max context).
|
47
|
+
model_task : str, optional
|
48
|
+
Model task type (e.g., text-generation, image-to-text).
|
49
|
+
fine_tune_weights : List[List[LoraModuleSpec]], optional
|
50
|
+
List of associated LoRA modules for fine-tuned models.
|
51
|
+
"""
|
52
|
+
|
53
|
+
model_id: str = Field(..., description="The base model OCID.")
|
54
|
+
model_path: str = Field(..., description="Path to the base model.")
|
55
|
+
params: str = Field(..., description="Startup parameters passed to vLLM.")
|
56
|
+
model_task: Optional[str] = Field(
|
57
|
+
..., description="Task type the model is intended for."
|
58
|
+
)
|
59
|
+
fine_tune_weights: Optional[List[LoraModuleSpec]] = Field(
|
60
|
+
default_factory=list,
|
61
|
+
description="Optional list of fine-tuned model variants associated with this base model.",
|
62
|
+
)
|
63
|
+
|
64
|
+
@field_validator("model_path")
|
65
|
+
@classmethod
|
66
|
+
def clean_model_path(cls, artifact_path_prefix: str) -> str:
|
67
|
+
"""Validates and cleans the file path for model_path parameter."""
|
68
|
+
if ObjectStorageDetails.is_oci_path(artifact_path_prefix):
|
69
|
+
os_path = ObjectStorageDetails.from_path(artifact_path_prefix)
|
70
|
+
artifact_path_prefix = os_path.filepath.rstrip("/")
|
71
|
+
return artifact_path_prefix
|
72
|
+
|
73
|
+
raise AquaValueError(
|
74
|
+
"The base model path is not available in the model artifact."
|
75
|
+
)
|
76
|
+
|
77
|
+
@classmethod
|
78
|
+
def dedup_lora_modules(cls, fine_tune_weights: List[LoraModuleSpec]):
|
79
|
+
"""Removes duplicate LoRA Modules (duplicate model_names in fine_tune_weights)"""
|
80
|
+
seen = set()
|
81
|
+
unique_modules: List[LoraModuleSpec] = []
|
82
|
+
|
83
|
+
for module in fine_tune_weights or []:
|
84
|
+
if module.model_name and module.model_name in seen:
|
85
|
+
logger.warning(
|
86
|
+
f"Duplicate LoRA Module detected: {module.model_name!r} (skipping duplicate)."
|
87
|
+
)
|
88
|
+
continue
|
89
|
+
seen.add(module.model_name)
|
90
|
+
unique_modules.append(module)
|
91
|
+
|
92
|
+
return unique_modules
|
93
|
+
|
94
|
+
@classmethod
|
95
|
+
def from_aqua_multi_model_ref(
|
96
|
+
cls, model: AquaMultiModelRef, model_params: str
|
97
|
+
) -> Self:
|
98
|
+
"""Converts AquaMultiModelRef to BaseModelSpec. Fields are validated using @field_validator methods above."""
|
99
|
+
|
100
|
+
return cls(
|
101
|
+
model_id=model.model_id,
|
102
|
+
model_path=model.artifact_location,
|
103
|
+
params=model_params,
|
104
|
+
model_task=model.model_task,
|
105
|
+
fine_tune_weights=cls.dedup_lora_modules(model.fine_tune_weights),
|
106
|
+
)
|
107
|
+
|
108
|
+
|
109
|
+
class ModelGroupConfig(Serializable):
|
110
|
+
"""
|
111
|
+
Schema representing the metadata passed via MULTI_MODEL_CONFIG for multi-model deployments.
|
112
|
+
|
113
|
+
Attributes
|
114
|
+
----------
|
115
|
+
models : List[BaseModelConfig]
|
116
|
+
List of base models (with optional fine-tune weights) to be served.
|
117
|
+
"""
|
118
|
+
|
119
|
+
models: List[BaseModelSpec] = Field(
|
120
|
+
..., description="List of models in the multi-model deployment."
|
121
|
+
)
|
122
|
+
|
123
|
+
@staticmethod
|
124
|
+
def _extract_model_params(
|
125
|
+
model: AquaMultiModelRef,
|
126
|
+
container_params: Union[str, List[str]],
|
127
|
+
container_type_key: str,
|
128
|
+
) -> Tuple[str, str]:
|
129
|
+
"""
|
130
|
+
Validates if user-provided parameters override pre-set parameters by AQUA.
|
131
|
+
Updates model name and TP size parameters to user-provided parameters.
|
132
|
+
"""
|
133
|
+
user_params = build_params_string(model.env_var)
|
134
|
+
if user_params:
|
135
|
+
restricted_params = find_restricted_params(
|
136
|
+
container_params, user_params, container_type_key
|
137
|
+
)
|
138
|
+
if restricted_params:
|
139
|
+
selected_model = model.model_name or model.model_id
|
140
|
+
raise AquaValueError(
|
141
|
+
f"Parameters {restricted_params} are set by Aqua "
|
142
|
+
f"and cannot be overridden or are invalid."
|
143
|
+
f"Select other parameters for model {selected_model}."
|
144
|
+
)
|
145
|
+
|
146
|
+
# replaces `--served-model-name`` with user's model name
|
147
|
+
container_params_dict = get_params_dict(container_params)
|
148
|
+
container_params_dict.update({"--served-model-name": model.model_name})
|
149
|
+
# replaces `--tensor-parallel-size` with model gpu count
|
150
|
+
container_params_dict.update({"--tensor-parallel-size": model.gpu_count})
|
151
|
+
params = build_params_string(container_params_dict)
|
152
|
+
|
153
|
+
return user_params, params
|
154
|
+
|
155
|
+
@staticmethod
|
156
|
+
def _merge_gpu_count_params(
|
157
|
+
model: AquaMultiModelRef,
|
158
|
+
model_config_summary: ModelDeploymentConfigSummary,
|
159
|
+
create_deployment_details: CreateModelDeploymentDetails,
|
160
|
+
container_type_key: str,
|
161
|
+
container_params,
|
162
|
+
):
|
163
|
+
"""Finds the corresponding deployment parameters based on the GPU count
|
164
|
+
and combines them with user's parameters. Existing deployment parameters
|
165
|
+
will be overriden by user's parameters."""
|
166
|
+
user_params, params = ModelGroupConfig._extract_model_params(
|
167
|
+
model, container_params, container_type_key
|
168
|
+
)
|
169
|
+
|
170
|
+
deployment_config = model_config_summary.deployment_config.get(
|
171
|
+
model.model_id, AquaDeploymentConfig()
|
172
|
+
).configuration.get(
|
173
|
+
create_deployment_details.instance_shape, ConfigurationItem()
|
174
|
+
)
|
175
|
+
|
176
|
+
params_found = False
|
177
|
+
for item in deployment_config.multi_model_deployment:
|
178
|
+
if model.gpu_count and item.gpu_count and item.gpu_count == model.gpu_count:
|
179
|
+
config_parameters = item.parameters.get(
|
180
|
+
get_container_params_type(container_type_key), UNKNOWN
|
181
|
+
)
|
182
|
+
params = f"{params} {get_combined_params(config_parameters, user_params)}".strip()
|
183
|
+
params_found = True
|
184
|
+
break
|
185
|
+
|
186
|
+
if not params_found and deployment_config.parameters:
|
187
|
+
config_parameters = deployment_config.parameters.get(
|
188
|
+
get_container_params_type(container_type_key), UNKNOWN
|
189
|
+
)
|
190
|
+
params = f"{params} {get_combined_params(config_parameters, user_params)}".strip()
|
191
|
+
params_found = True
|
192
|
+
|
193
|
+
# if no config parameters found, append user parameters directly.
|
194
|
+
if not params_found:
|
195
|
+
params = f"{params} {user_params}".strip()
|
196
|
+
|
197
|
+
return params
|
198
|
+
|
199
|
+
@classmethod
|
200
|
+
def from_create_model_deployment_details(
|
201
|
+
cls,
|
202
|
+
create_deployment_details: CreateModelDeploymentDetails,
|
203
|
+
model_config_summary: ModelDeploymentConfigSummary,
|
204
|
+
container_type_key,
|
205
|
+
container_params,
|
206
|
+
) -> Self:
|
207
|
+
"""
|
208
|
+
Converts CreateModelDeploymentDetail to ModelGroupConfig.
|
209
|
+
CreateModelDeploymentDetail represents user-provided parameters and models within a multi-model group after model artifact is created.
|
210
|
+
ModelGroupConfig is the Pydantic representation of MULTI_MODEL_CONFIG environment variable during model deployment.
|
211
|
+
"""
|
212
|
+
models = []
|
213
|
+
seen_models = set()
|
214
|
+
for model in create_deployment_details.models:
|
215
|
+
params = ModelGroupConfig._merge_gpu_count_params(
|
216
|
+
model,
|
217
|
+
model_config_summary,
|
218
|
+
create_deployment_details,
|
219
|
+
container_type_key,
|
220
|
+
container_params,
|
221
|
+
)
|
222
|
+
|
223
|
+
if model.model_name not in seen_models:
|
224
|
+
seen_models.add(model.model_name)
|
225
|
+
base_model_spec = BaseModelSpec.from_aqua_multi_model_ref(model, params)
|
226
|
+
models.append(base_model_spec)
|
227
|
+
else:
|
228
|
+
raise AquaValueError(
|
229
|
+
f"Duplicate model name ‘{model.model_name}’ detected in multi-model group. "
|
230
|
+
"Each base model must have a unique `model_name`. "
|
231
|
+
"Please remove or rename the duplicate model and register the model group again."
|
232
|
+
)
|
233
|
+
return cls(models=models)
|