oracle-ads 2.13.17rc0__py3-none-any.whl → 2.13.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. ads/aqua/cli.py +7 -5
  2. ads/aqua/common/entities.py +88 -29
  3. ads/aqua/common/enums.py +7 -0
  4. ads/aqua/common/errors.py +5 -0
  5. ads/aqua/common/utils.py +87 -7
  6. ads/aqua/constants.py +3 -0
  7. ads/aqua/extension/deployment_handler.py +36 -0
  8. ads/aqua/modeldeployment/config_loader.py +10 -0
  9. ads/aqua/modeldeployment/constants.py +1 -0
  10. ads/aqua/modeldeployment/deployment.py +99 -22
  11. ads/aqua/modeldeployment/entities.py +4 -0
  12. ads/aqua/resources/gpu_shapes_index.json +315 -26
  13. ads/aqua/shaperecommend/__init__.py +6 -0
  14. ads/aqua/shaperecommend/constants.py +116 -0
  15. ads/aqua/shaperecommend/estimator.py +384 -0
  16. ads/aqua/shaperecommend/llm_config.py +283 -0
  17. ads/aqua/shaperecommend/recommend.py +493 -0
  18. ads/aqua/shaperecommend/shape_report.py +233 -0
  19. ads/aqua/version.json +1 -1
  20. ads/cli.py +9 -1
  21. ads/jobs/builders/infrastructure/dsc_job.py +1 -0
  22. ads/jobs/builders/infrastructure/dsc_job_runtime.py +9 -1
  23. ads/model/service/oci_datascience_model_deployment.py +46 -19
  24. ads/opctl/operator/lowcode/common/data.py +7 -2
  25. ads/opctl/operator/lowcode/common/transformations.py +207 -0
  26. ads/opctl/operator/lowcode/common/utils.py +8 -0
  27. ads/opctl/operator/lowcode/forecast/__init__.py +3 -0
  28. ads/opctl/operator/lowcode/forecast/__main__.py +53 -3
  29. ads/opctl/operator/lowcode/forecast/const.py +2 -0
  30. ads/opctl/operator/lowcode/forecast/errors.py +5 -0
  31. ads/opctl/operator/lowcode/forecast/meta_selector.py +310 -0
  32. ads/opctl/operator/lowcode/forecast/model/automlx.py +1 -1
  33. ads/opctl/operator/lowcode/forecast/model/base_model.py +119 -30
  34. ads/opctl/operator/lowcode/forecast/model/factory.py +33 -2
  35. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +54 -17
  36. ads/opctl/operator/lowcode/forecast/model_evaluator.py +6 -1
  37. ads/opctl/operator/lowcode/forecast/schema.yaml +1 -0
  38. ads/pipeline/ads_pipeline.py +13 -9
  39. {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/METADATA +1 -1
  40. {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/RECORD +43 -36
  41. {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/WHEEL +0 -0
  42. {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/entry_points.txt +0 -0
  43. {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,283 @@
1
+ #!/usr/bin/env python
2
+ # Copyright (c) 2025 Oracle and/or its affiliates.
3
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
4
+
5
+ import re
6
+ from typing import Optional
7
+
8
+ from pydantic import BaseModel, Field
9
+
10
+ from ads.aqua.common.errors import AquaRecommendationError
11
+ from ads.aqua.shaperecommend.constants import (
12
+ BITS_AND_BYTES_4BIT,
13
+ BITS_AND_BYTES_8BIT,
14
+ DEFAULT_WEIGHT_SIZE,
15
+ NEXT_QUANT,
16
+ QUANT_MAPPING,
17
+ QUANT_METHODS,
18
+ )
19
+
20
+
21
+ class LLMConfig(BaseModel):
22
+ """
23
+ Standardized configuration object for evaluating the size of Large Language Models (LLMs)
24
+ based on their architecture and quantization.
25
+ """
26
+
27
+ num_hidden_layers: int = Field(
28
+ ...,
29
+ description="Number of transformer blocks (layers) in the model’s neural network stack.",
30
+ )
31
+ hidden_size: int = Field(
32
+ ..., description="Embedding dimension or hidden size of each layer."
33
+ )
34
+ vocab_size: int = Field(..., description="Vocabulary size for input/output tokens.")
35
+ num_attention_heads: int = Field(
36
+ ...,
37
+ description="Number of attention heads (used for queries and to determine head_dim).",
38
+ )
39
+
40
+ head_dim: int = Field(
41
+ ...,
42
+ description="Dimension of each attention head. Typically hidden_size // num_attention_heads.",
43
+ )
44
+ max_seq_len: Optional[int] = Field(
45
+ 4096, description="Maximum input sequence length (context window)."
46
+ )
47
+ weight_dtype: Optional[str] = Field(
48
+ DEFAULT_WEIGHT_SIZE,
49
+ description="Parameter data type: 'float32', 'float16', etc.",
50
+ )
51
+ quantization: Optional[str] = Field(
52
+ None,
53
+ description="Quantization weight (e.g., '8bit', '4bit') or None if unquantized.",
54
+ )
55
+ quantization_type: Optional[str] = Field(
56
+ None,
57
+ description="Quantization method (e.g., '8bit', '4bit', 'gptq', 'awq') or None if unquantized.",
58
+ )
59
+
60
+ in_flight_quantization: Optional[str] = Field(
61
+ None,
62
+ description="By setting this, enables recalculation of model footprint using 4bit in-flight quantization",
63
+ )
64
+
65
+ num_key_value_heads: Optional[int] = Field(
66
+ None,
67
+ description="Number of key/value heads (for GQA architectures: Llama, Mistral, Falcon, Qwen, etc.). Used to determine KV cache size",
68
+ )
69
+
70
+ num_local_experts: Optional[int] = Field(
71
+ None, description="For MoE architectures, the number of experts per MoE layer"
72
+ )
73
+ intermediate_size: Optional[int] = Field(
74
+ None, description="For MoE architectures, size of the MLP activation layer."
75
+ )
76
+
77
+ tie_word_embeddings: Optional[bool] = Field(None)
78
+
79
+ @property
80
+ def bytes_per_parameter(self) -> float:
81
+ """
82
+ Returns the number of bytes used to store a model parameter,
83
+ accounting for quantization or weight storage type.
84
+ """
85
+ # Quantization takes precedence
86
+ q = (self.quantization or "").lower()
87
+
88
+ # Direct match in mapping
89
+ if q in QUANT_MAPPING:
90
+ return QUANT_MAPPING[q]
91
+
92
+ # Dynamic bit-width detection
93
+ m = re.match(r"(\d+)\s*bit", q)
94
+ if m:
95
+ bits = int(m[1])
96
+ return bits / 8 # bytes per parameter
97
+
98
+ # consider in-flight quantization
99
+ if self.in_flight_quantization in QUANT_MAPPING:
100
+ return QUANT_MAPPING[self.in_flight_quantization]
101
+
102
+ # Fallback to dtype mapping
103
+ dtype = (self.weight_dtype or DEFAULT_WEIGHT_SIZE).lower()
104
+ return QUANT_MAPPING.get(dtype, QUANT_MAPPING[DEFAULT_WEIGHT_SIZE])
105
+
106
+ @classmethod
107
+ def detect_quantization_type(cls, raw: dict) -> Optional[str]:
108
+ """
109
+ Detects quantization type (e.g., 'gptq', 'bitsandbytes', 'awq', etc.) from Hugging Face config dict.
110
+ """
111
+ qcfg = raw.get("quantization_config", {})
112
+ if raw.get("load_in_8bit") or raw.get("load_in_4bit"):
113
+ return "bitsandbytes"
114
+ for key in QUANT_METHODS:
115
+ if key in str(qcfg).lower() or key in str(raw).lower():
116
+ return key
117
+ return None
118
+
119
+ @classmethod
120
+ def detect_quantization_bits(cls, raw: dict) -> Optional[str]:
121
+ """
122
+ Detects quantization bit-width as a string (e.g., '4bit', '8bit') from Hugging Face config dict.
123
+ """
124
+ if raw.get("load_in_8bit"):
125
+ return BITS_AND_BYTES_8BIT
126
+ if raw.get("load_in_4bit"):
127
+ return BITS_AND_BYTES_4BIT
128
+ if "quantization_config" in raw:
129
+ qcfg = raw["quantization_config"]
130
+ bits = qcfg.get("bits") or qcfg.get("wbits")
131
+ if bits:
132
+ return f"{bits}bit"
133
+ return None
134
+
135
+ @property
136
+ def suggested_quantizations(self):
137
+ """
138
+ Suggests the next lower quantization options based on the current quantization level/ weight size.
139
+
140
+ If model is un-quantized, uses the weight size.
141
+ If model is pre-quantized, uses the quantization level.
142
+ """
143
+ key = (
144
+ self.quantization
145
+ or self.in_flight_quantization
146
+ or self.weight_dtype
147
+ or DEFAULT_WEIGHT_SIZE
148
+ ).lower()
149
+ return NEXT_QUANT.get(key, [])
150
+
151
+ def calculate_possible_seq_len(self, min_len=2048):
152
+ """
153
+ Calculates a list of possible sequence lengths (in tokens).
154
+ [2048, ... max-length] (max-length found in model's config.json file)
155
+ """
156
+ vals = []
157
+ curr = min_len
158
+ while curr <= self.max_seq_len:
159
+ vals.append(curr)
160
+ curr *= 2
161
+ if vals and vals[-1] != self.max_seq_len:
162
+ vals.append(self.max_seq_len)
163
+ return vals
164
+
165
+ def optimal_config(self):
166
+ """
167
+ Builds a list of optimal configuration parameters (sorted descending). Combination of:
168
+ - Quantization / weight sizes: bfloat16 weight size -> 8bit -> 4bit
169
+ - max-model-len: power-of-two model lengths from max length (config.json of model) to 2048 tokens.
170
+
171
+ Example:
172
+ [('bfloat16', max_model_len supported by model) ('bfloat16', 1/2 of max_model_len) ... ('4bit', 4096), ('4bit', 2048)]
173
+
174
+ """
175
+ # use later-Create a copy of the suggested_quantizations list
176
+ # quantizations = self.suggested_quantizations[:]
177
+ quantizations = ["bfloat16", "4bit"]
178
+
179
+ lengths = self.calculate_possible_seq_len()
180
+
181
+ configs = []
182
+ for quantization in quantizations:
183
+ for length in lengths:
184
+ configs.append((quantization, length))
185
+
186
+ configs.sort(
187
+ key=lambda x: (-QUANT_MAPPING.get(x[0], 0), -x[1])
188
+ ) # (-quant_priority, -max_seq_len)
189
+ return configs
190
+
191
+ @classmethod
192
+ def validate_model_support(cls, raw: dict) -> ValueError:
193
+ """
194
+ Validates if model is decoder-only. Check for text-generation model occurs at DataScienceModel level.
195
+ """
196
+ excluded_models = {"t5", "gemma", "bart", "bert", "roberta", "albert"}
197
+ if (
198
+ raw.get("is_encoder_decoder", False) # exclude encoder-decoder models
199
+ or (
200
+ raw.get("is_decoder") is False
201
+ ) # exclude explicit encoder-only models (altho no text-generation task ones, just dbl check)
202
+ or raw.get("model_type", "").lower() # exclude by known model types
203
+ in excluded_models
204
+ ):
205
+ raise AquaRecommendationError(
206
+ "Please provide a decoder-only text-generation model (ex. Llama, Falcon, etc). "
207
+ "Encoder-decoder models (ex. T5, Gemma) and encoder-only (BERT) are not supported at this time."
208
+ )
209
+
210
+ @classmethod
211
+ def from_raw_config(cls, raw: dict) -> "LLMConfig":
212
+ """
213
+ Instantiates an LLMConfig from a raw Hugging Face config.json file,
214
+ using robust key detection and fallback for architecture.
215
+ """
216
+ cls.validate_model_support(raw)
217
+
218
+ # Field mappings with fallback
219
+ num_hidden_layers = (
220
+ raw.get("num_hidden_layers") or raw.get("n_layer") or raw.get("num_layers")
221
+ )
222
+ hidden_size = raw.get("hidden_size") or raw.get("n_embd") or raw.get("d_model")
223
+ vocab_size = raw.get("vocab_size")
224
+ weight_dtype = str(raw.get("torch_dtype", DEFAULT_WEIGHT_SIZE))
225
+ quantization = cls.detect_quantization_bits(raw)
226
+ quantization_type = cls.detect_quantization_type(raw)
227
+
228
+ if not quantization and quantization_type in QUANT_MAPPING:
229
+ quantization = quantization_type
230
+
231
+ num_key_value_heads = (
232
+ raw.get("num_key_value_heads") # GQA models (ex. Llama-type)
233
+ )
234
+
235
+ num_attention_heads = (
236
+ raw.get("num_attention_heads") or raw.get("n_head") or raw.get("num_heads")
237
+ )
238
+
239
+ head_dim = raw.get("head_dim") or (
240
+ int(hidden_size) // int(num_attention_heads)
241
+ if hidden_size and num_attention_heads
242
+ else None
243
+ )
244
+ max_seq_len = (
245
+ raw.get("max_position_embeddings")
246
+ or raw.get("n_positions")
247
+ or raw.get("max_seq_len")
248
+ or 2048
249
+ )
250
+
251
+ num_local_experts = (
252
+ raw.get("num_local_experts")
253
+ or raw.get("n_routed_experts")
254
+ or raw.get("num_experts")
255
+ )
256
+ intermediate_size = raw.get("moe_intermediate_size") or raw.get(
257
+ "intermediate_size"
258
+ )
259
+
260
+ # Type safety: minimal assertion
261
+ if None in [
262
+ num_hidden_layers,
263
+ hidden_size,
264
+ vocab_size,
265
+ num_attention_heads,
266
+ head_dim,
267
+ ]:
268
+ raise ValueError("Missing required value in model config.")
269
+
270
+ return cls(
271
+ num_hidden_layers=int(num_hidden_layers),
272
+ hidden_size=int(hidden_size),
273
+ num_attention_heads=int(num_attention_heads),
274
+ num_key_value_heads=num_key_value_heads,
275
+ head_dim=int(head_dim),
276
+ vocab_size=int(vocab_size),
277
+ weight_dtype=weight_dtype,
278
+ quantization=quantization,
279
+ quantization_type=quantization_type,
280
+ max_seq_len=int(max_seq_len),
281
+ num_local_experts=num_local_experts,
282
+ intermediate_size=intermediate_size,
283
+ )