oracle-ads 2.13.17rc0__py3-none-any.whl → 2.13.18rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/common/enums.py +1 -0
- ads/aqua/common/utils.py +38 -0
- ads/aqua/modeldeployment/config_loader.py +10 -0
- ads/aqua/modeldeployment/deployment.py +16 -10
- ads/aqua/modeldeployment/entities.py +1 -0
- ads/opctl/operator/lowcode/common/data.py +7 -2
- ads/opctl/operator/lowcode/common/transformations.py +207 -0
- ads/opctl/operator/lowcode/common/utils.py +8 -0
- ads/opctl/operator/lowcode/forecast/__init__.py +3 -0
- ads/opctl/operator/lowcode/forecast/__main__.py +53 -3
- ads/opctl/operator/lowcode/forecast/const.py +2 -0
- ads/opctl/operator/lowcode/forecast/errors.py +5 -0
- ads/opctl/operator/lowcode/forecast/meta_selector.py +310 -0
- ads/opctl/operator/lowcode/forecast/model/automlx.py +1 -1
- ads/opctl/operator/lowcode/forecast/model/base_model.py +119 -30
- ads/opctl/operator/lowcode/forecast/model/factory.py +33 -2
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +50 -14
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +6 -1
- ads/opctl/operator/lowcode/forecast/schema.yaml +1 -0
- {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18rc0.dist-info}/METADATA +1 -1
- {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18rc0.dist-info}/RECORD +24 -23
- {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18rc0.dist-info}/WHEEL +0 -0
- {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18rc0.dist-info}/entry_points.txt +0 -0
- {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18rc0.dist-info}/licenses/LICENSE.txt +0 -0
ads/aqua/common/enums.py
CHANGED
@@ -58,6 +58,7 @@ class InferenceContainerTypeFamily(ExtendedEnum):
|
|
58
58
|
AQUA_VLLM_LLAMA4_CONTAINER_FAMILY = "odsc-vllm-serving-llama4"
|
59
59
|
AQUA_TGI_CONTAINER_FAMILY = "odsc-tgi-serving"
|
60
60
|
AQUA_LLAMA_CPP_CONTAINER_FAMILY = "odsc-llama-cpp-serving"
|
61
|
+
AQUA_VLLM_OPENAI_CONTAINER_FAMILY = "odsc-vllm-serving-openai"
|
61
62
|
|
62
63
|
|
63
64
|
class CustomInferenceContainerTypeFamily(ExtendedEnum):
|
ads/aqua/common/utils.py
CHANGED
@@ -997,6 +997,44 @@ def get_container_params_type(container_type_name: str) -> str:
|
|
997
997
|
return UNKNOWN
|
998
998
|
|
999
999
|
|
1000
|
+
def get_container_env_type(container_type_name: Optional[str]) -> str:
|
1001
|
+
"""
|
1002
|
+
Determine the container environment type based on the container type name.
|
1003
|
+
|
1004
|
+
This function matches the provided container type name against the known
|
1005
|
+
values of `InferenceContainerType`. The check is case-insensitive and
|
1006
|
+
allows for partial matches so that changes in container naming conventions
|
1007
|
+
(e.g., prefixes or suffixes) will still be matched correctly.
|
1008
|
+
|
1009
|
+
Examples:
|
1010
|
+
>>> get_container_env_type("odsc-vllm-serving")
|
1011
|
+
'vllm'
|
1012
|
+
>>> get_container_env_type("ODSC-TGI-Serving")
|
1013
|
+
'tgi'
|
1014
|
+
>>> get_container_env_type("custom-unknown-container")
|
1015
|
+
'UNKNOWN'
|
1016
|
+
|
1017
|
+
Args:
|
1018
|
+
container_type_name (Optional[str]):
|
1019
|
+
The deployment container type name (e.g., "odsc-vllm-serving").
|
1020
|
+
|
1021
|
+
Returns:
|
1022
|
+
str:
|
1023
|
+
- A matching `InferenceContainerType` value string (e.g., "VLLM", "TGI", "LLAMA-CPP").
|
1024
|
+
- `"UNKNOWN"` if no match is found or the input is empty/None.
|
1025
|
+
"""
|
1026
|
+
if not container_type_name:
|
1027
|
+
return UNKNOWN
|
1028
|
+
|
1029
|
+
needle = container_type_name.strip().casefold()
|
1030
|
+
|
1031
|
+
for container_type in InferenceContainerType.values():
|
1032
|
+
if container_type and container_type.casefold() in needle:
|
1033
|
+
return container_type.upper()
|
1034
|
+
|
1035
|
+
return UNKNOWN
|
1036
|
+
|
1037
|
+
|
1000
1038
|
def get_restricted_params_by_container(container_type_name: str) -> set:
|
1001
1039
|
"""The utility function accepts the deployment container type name and returns a set of restricted params
|
1002
1040
|
for that container.
|
@@ -88,6 +88,7 @@ class MultiModelConfig(Serializable):
|
|
88
88
|
gpu_count (int, optional): Number of GPUs count to this model of this shape.
|
89
89
|
parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
|
90
90
|
configure the behavior of a particular GPU shape.
|
91
|
+
env (Dict[str, Dict[str, str]]): Environment variables grouped by namespace (e.g., "VLLM": {"VAR": "VAL"}).
|
91
92
|
"""
|
92
93
|
|
93
94
|
gpu_count: Optional[int] = Field(
|
@@ -97,6 +98,10 @@ class MultiModelConfig(Serializable):
|
|
97
98
|
default_factory=dict,
|
98
99
|
description="Key-value pairs for GPU shape parameters (e.g., VLLM_PARAMS).",
|
99
100
|
)
|
101
|
+
env: Optional[Dict[str, Dict[str, str]]] = Field(
|
102
|
+
default_factory=dict,
|
103
|
+
description="Environment variables grouped by namespace",
|
104
|
+
)
|
100
105
|
|
101
106
|
class Config:
|
102
107
|
extra = "allow"
|
@@ -130,6 +135,7 @@ class ConfigurationItem(Serializable):
|
|
130
135
|
configure the behavior of a particular GPU shape.
|
131
136
|
multi_model_deployment (List[MultiModelConfig], optional): A list of multi model configuration details.
|
132
137
|
shape_info (DeploymentShapeInfo, optional): The shape information to this model for specific CPU shape.
|
138
|
+
env (Dict[str, Dict[str, str]]): Environment variables grouped by namespace (e.g., "VLLM": {"VAR": "VAL"}).
|
133
139
|
"""
|
134
140
|
|
135
141
|
parameters: Optional[Dict[str, str]] = Field(
|
@@ -143,6 +149,10 @@ class ConfigurationItem(Serializable):
|
|
143
149
|
default_factory=DeploymentShapeInfo,
|
144
150
|
description="The shape information to this model for specific shape",
|
145
151
|
)
|
152
|
+
env: Optional[Dict[str, Dict[str, str]]] = Field(
|
153
|
+
default_factory=dict,
|
154
|
+
description="Environment variables grouped by namespace",
|
155
|
+
)
|
146
156
|
|
147
157
|
class Config:
|
148
158
|
extra = "allow"
|
@@ -27,6 +27,7 @@ from ads.aqua.common.utils import (
|
|
27
27
|
build_pydantic_error_message,
|
28
28
|
find_restricted_params,
|
29
29
|
get_combined_params,
|
30
|
+
get_container_env_type,
|
30
31
|
get_container_params_type,
|
31
32
|
get_ocid_substring,
|
32
33
|
get_params_list,
|
@@ -199,7 +200,7 @@ class AquaDeploymentApp(AquaApp):
|
|
199
200
|
if create_deployment_details.instance_shape.lower() not in available_shapes:
|
200
201
|
raise AquaValueError(
|
201
202
|
f"Invalid Instance Shape. The selected shape '{create_deployment_details.instance_shape}' "
|
202
|
-
f"is not
|
203
|
+
f"is not supported in the {self.region} region. Please choose another shape to deploy the model."
|
203
204
|
)
|
204
205
|
|
205
206
|
# Get container config
|
@@ -381,6 +382,7 @@ class AquaDeploymentApp(AquaApp):
|
|
381
382
|
Tags.AQUA_SERVICE_MODEL_TAG,
|
382
383
|
Tags.AQUA_FINE_TUNED_MODEL_TAG,
|
383
384
|
Tags.AQUA_TAG,
|
385
|
+
Tags.BASE_MODEL_CUSTOM,
|
384
386
|
]:
|
385
387
|
if tag in aqua_model.freeform_tags:
|
386
388
|
tags[tag] = aqua_model.freeform_tags[tag]
|
@@ -1042,6 +1044,7 @@ class AquaDeploymentApp(AquaApp):
|
|
1042
1044
|
config = self.get_config_from_metadata(
|
1043
1045
|
model_id, AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION
|
1044
1046
|
).config
|
1047
|
+
|
1045
1048
|
if config:
|
1046
1049
|
logger.info(
|
1047
1050
|
f"Fetched {AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION} from defined metadata for model: {model_id}."
|
@@ -1126,7 +1129,7 @@ class AquaDeploymentApp(AquaApp):
|
|
1126
1129
|
model_id: str,
|
1127
1130
|
instance_shape: str,
|
1128
1131
|
gpu_count: int = None,
|
1129
|
-
) ->
|
1132
|
+
) -> Dict:
|
1130
1133
|
"""Gets the default params set in the deployment configs for the given model and instance shape.
|
1131
1134
|
|
1132
1135
|
Parameters
|
@@ -1148,6 +1151,7 @@ class AquaDeploymentApp(AquaApp):
|
|
1148
1151
|
|
1149
1152
|
"""
|
1150
1153
|
default_params = []
|
1154
|
+
default_envs = {}
|
1151
1155
|
config_params = {}
|
1152
1156
|
model = DataScienceModel.from_id(model_id)
|
1153
1157
|
try:
|
@@ -1157,19 +1161,15 @@ class AquaDeploymentApp(AquaApp):
|
|
1157
1161
|
except ValueError:
|
1158
1162
|
container_type_key = UNKNOWN
|
1159
1163
|
logger.debug(
|
1160
|
-
f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the
|
1164
|
+
f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the "
|
1165
|
+
f"custom metadata field for model {model_id}."
|
1161
1166
|
)
|
1162
1167
|
|
1163
|
-
if
|
1164
|
-
container_type_key
|
1165
|
-
and container_type_key in InferenceContainerTypeFamily.values()
|
1166
|
-
):
|
1168
|
+
if container_type_key:
|
1167
1169
|
deployment_config = self.get_deployment_config(model_id)
|
1168
|
-
|
1169
1170
|
instance_shape_config = deployment_config.configuration.get(
|
1170
1171
|
instance_shape, ConfigurationItem()
|
1171
1172
|
)
|
1172
|
-
|
1173
1173
|
if instance_shape_config.multi_model_deployment and gpu_count:
|
1174
1174
|
gpu_params = instance_shape_config.multi_model_deployment
|
1175
1175
|
|
@@ -1178,12 +1178,18 @@ class AquaDeploymentApp(AquaApp):
|
|
1178
1178
|
config_params = gpu_config.parameters.get(
|
1179
1179
|
get_container_params_type(container_type_key), UNKNOWN
|
1180
1180
|
)
|
1181
|
+
default_envs = instance_shape_config.env.get(
|
1182
|
+
get_container_env_type(container_type_key), {}
|
1183
|
+
)
|
1181
1184
|
break
|
1182
1185
|
|
1183
1186
|
else:
|
1184
1187
|
config_params = instance_shape_config.parameters.get(
|
1185
1188
|
get_container_params_type(container_type_key), UNKNOWN
|
1186
1189
|
)
|
1190
|
+
default_envs = instance_shape_config.env.get(
|
1191
|
+
get_container_env_type(container_type_key), {}
|
1192
|
+
)
|
1187
1193
|
|
1188
1194
|
if config_params:
|
1189
1195
|
params_list = get_params_list(config_params)
|
@@ -1196,7 +1202,7 @@ class AquaDeploymentApp(AquaApp):
|
|
1196
1202
|
if params.split()[0] not in restricted_params_set:
|
1197
1203
|
default_params.append(params)
|
1198
1204
|
|
1199
|
-
return default_params
|
1205
|
+
return {"data": default_params, "env": default_envs}
|
1200
1206
|
|
1201
1207
|
def validate_deployment_params(
|
1202
1208
|
self,
|
@@ -233,6 +233,7 @@ class CreateModelDeploymentDetails(BaseModel):
|
|
233
233
|
None, description="The description of the deployment."
|
234
234
|
)
|
235
235
|
model_id: Optional[str] = Field(None, description="The model OCID to deploy.")
|
236
|
+
|
236
237
|
models: Optional[List[AquaMultiModelRef]] = Field(
|
237
238
|
None, description="List of models for multimodel deployment."
|
238
239
|
)
|
@@ -19,16 +19,21 @@ from .transformations import Transformations
|
|
19
19
|
|
20
20
|
|
21
21
|
class AbstractData(ABC):
|
22
|
-
def __init__(self, spec, name="input_data", data=None):
|
22
|
+
def __init__(self, spec, name="input_data", data=None, subset=None):
|
23
23
|
self.Transformations = Transformations
|
24
24
|
self.data = None
|
25
25
|
self._data_dict = dict()
|
26
26
|
self.name = name
|
27
27
|
self.spec = spec
|
28
|
+
self.subset = subset
|
28
29
|
if data is not None:
|
29
30
|
self.data = data
|
30
31
|
else:
|
31
32
|
self.load_transform_ingest_data(spec)
|
33
|
+
# Subset by series if requested
|
34
|
+
# if self.subset is not None and hasattr(self, 'data') and self.data is not None:
|
35
|
+
# subset_str = [str(s) for s in self.subset]
|
36
|
+
# self.data = self.data[self.data.index.get_level_values(DataColumns.Series).isin(subset_str)]
|
32
37
|
|
33
38
|
def get_raw_data_by_cat(self, category):
|
34
39
|
mapping = self._data_transformer.get_target_category_columns_map()
|
@@ -72,7 +77,7 @@ class AbstractData(ABC):
|
|
72
77
|
def _load_data(self, data_spec, **kwargs):
|
73
78
|
loading_start_time = time.time()
|
74
79
|
try:
|
75
|
-
raw_data = load_data(data_spec)
|
80
|
+
raw_data = load_data(data_spec, subset=self.subset if self.subset else None, target_category_columns=self.spec.target_category_columns)
|
76
81
|
except InvalidParameterError as e:
|
77
82
|
e.args = e.args + (f"Invalid Parameter: {self.name}",)
|
78
83
|
raise e
|
@@ -294,3 +294,210 @@ class Transformations(ABC):
|
|
294
294
|
def _fill_na(self, df: pd.DataFrame, na_value=0) -> pd.DataFrame:
|
295
295
|
"""Fill nans in dataframe"""
|
296
296
|
return df.fillna(value=na_value)
|
297
|
+
|
298
|
+
def build_fforms_meta_features(self, data, target_col=None, group_cols=None):
|
299
|
+
"""
|
300
|
+
Build meta-features for time series based on FFORMS paper and add them to the original DataFrame.
|
301
|
+
|
302
|
+
Parameters
|
303
|
+
----------
|
304
|
+
data : pandas.DataFrame
|
305
|
+
Input DataFrame containing time series data
|
306
|
+
target_col : str, optional
|
307
|
+
Name of the target column to calculate meta-features for.
|
308
|
+
If None, uses the target column specified in dataset_info.
|
309
|
+
group_cols : list of str, optional
|
310
|
+
List of columns to group by before calculating meta-features.
|
311
|
+
If None, calculates features for the entire series.
|
312
|
+
|
313
|
+
Returns
|
314
|
+
-------
|
315
|
+
pandas.DataFrame
|
316
|
+
Original DataFrame with additional meta-feature columns
|
317
|
+
|
318
|
+
References
|
319
|
+
----------
|
320
|
+
Talagala, T. S., Hyndman, R. J., & Athanasopoulos, G. (2023).
|
321
|
+
Meta-learning how to forecast time series. Journal of Forecasting, 42(6), 1476-1501.
|
322
|
+
"""
|
323
|
+
if not isinstance(data, pd.DataFrame):
|
324
|
+
raise ValueError("Input must be a pandas DataFrame")
|
325
|
+
|
326
|
+
# Use target column from dataset_info if not specified
|
327
|
+
if target_col is None:
|
328
|
+
target_col = self.target_column_name
|
329
|
+
if target_col not in data.columns:
|
330
|
+
raise ValueError(f"Target column '{target_col}' not found in DataFrame")
|
331
|
+
|
332
|
+
# Check if group_cols are provided and valid
|
333
|
+
if group_cols is not None:
|
334
|
+
if not isinstance(group_cols, list):
|
335
|
+
raise ValueError("group_cols must be a list of column names")
|
336
|
+
for col in group_cols:
|
337
|
+
if col not in data.columns:
|
338
|
+
raise ValueError(f"Group column '{col}' not found in DataFrame")
|
339
|
+
|
340
|
+
# If no group_cols, get the target_category_columns else treat the entire DataFrame as a single series
|
341
|
+
if not group_cols:
|
342
|
+
group_cols = self.target_category_columns if self.target_category_columns else []
|
343
|
+
|
344
|
+
# Calculate meta-features for each series
|
345
|
+
def calculate_series_features(series):
|
346
|
+
"""Calculate features for a single series"""
|
347
|
+
n = len(series)
|
348
|
+
values = series.values
|
349
|
+
|
350
|
+
# Basic statistics
|
351
|
+
mean = series.mean()
|
352
|
+
std = series.std()
|
353
|
+
variance = series.var()
|
354
|
+
skewness = series.skew()
|
355
|
+
kurtosis = series.kurtosis()
|
356
|
+
cv = std / mean if mean != 0 else np.inf
|
357
|
+
|
358
|
+
# Trend features
|
359
|
+
X = np.vstack([np.arange(n), np.ones(n)]).T
|
360
|
+
trend_coef = np.linalg.lstsq(X, values, rcond=None)[0][0]
|
361
|
+
trend_pred = X.dot(np.linalg.lstsq(X, values, rcond=None)[0])
|
362
|
+
residuals = values - trend_pred
|
363
|
+
std_residuals = np.std(residuals)
|
364
|
+
|
365
|
+
# Turning points
|
366
|
+
turning_points = 0
|
367
|
+
for i in range(1, n-1):
|
368
|
+
if (values[i-1] < values[i] and values[i] > values[i+1]) or \
|
369
|
+
(values[i-1] > values[i] and values[i] < values[i+1]):
|
370
|
+
turning_points += 1
|
371
|
+
turning_points_rate = turning_points / (n-2) if n > 2 else 0
|
372
|
+
|
373
|
+
# Serial correlation
|
374
|
+
acf1 = series.autocorr(lag=1) if n > 1 else 0
|
375
|
+
acf2 = series.autocorr(lag=2) if n > 2 else 0
|
376
|
+
acf10 = series.autocorr(lag=10) if n > 10 else 0
|
377
|
+
|
378
|
+
# Seasonality features
|
379
|
+
seasonal_strength = 0
|
380
|
+
seasonal_peak_strength = 0
|
381
|
+
if n >= 12:
|
382
|
+
seasonal_lags = [12, 24, 36]
|
383
|
+
seasonal_acfs = []
|
384
|
+
for lag in seasonal_lags:
|
385
|
+
if n > lag:
|
386
|
+
acf_val = series.autocorr(lag=lag)
|
387
|
+
seasonal_acfs.append(abs(acf_val))
|
388
|
+
seasonal_peak_strength = max(seasonal_acfs) if seasonal_acfs else 0
|
389
|
+
|
390
|
+
ma = series.rolling(window=12, center=True).mean()
|
391
|
+
seasonal_comp = series - ma
|
392
|
+
seasonal_strength = 1 - np.var(seasonal_comp.dropna()) / np.var(series)
|
393
|
+
|
394
|
+
# Stability and volatility features
|
395
|
+
values_above_mean = values >= mean
|
396
|
+
crossing_points = np.sum(values_above_mean[1:] != values_above_mean[:-1])
|
397
|
+
crossing_rate = crossing_points / (n - 1) if n > 1 else 0
|
398
|
+
|
399
|
+
# First and second differences
|
400
|
+
diff1 = np.diff(values)
|
401
|
+
diff2 = np.diff(diff1) if len(diff1) > 1 else np.array([])
|
402
|
+
|
403
|
+
diff1_mean = np.mean(np.abs(diff1)) if len(diff1) > 0 else 0
|
404
|
+
diff1_var = np.var(diff1) if len(diff1) > 0 else 0
|
405
|
+
diff2_mean = np.mean(np.abs(diff2)) if len(diff2) > 0 else 0
|
406
|
+
diff2_var = np.var(diff2) if len(diff2) > 0 else 0
|
407
|
+
|
408
|
+
# Nonlinearity features
|
409
|
+
if n > 3:
|
410
|
+
X = values[:-1].reshape(-1, 1)
|
411
|
+
y = values[1:]
|
412
|
+
X2 = X * X
|
413
|
+
X3 = X * X * X
|
414
|
+
X_aug = np.hstack([X, X2, X3])
|
415
|
+
nonlinearity = np.linalg.lstsq(X_aug, y, rcond=None)[1][0] if len(y) > 0 else 0
|
416
|
+
else:
|
417
|
+
nonlinearity = 0
|
418
|
+
|
419
|
+
# Long-term trend features
|
420
|
+
if n >= 10:
|
421
|
+
mid = n // 2
|
422
|
+
trend_change = np.mean(values[mid:]) - np.mean(values[:mid])
|
423
|
+
else:
|
424
|
+
trend_change = 0
|
425
|
+
|
426
|
+
# Step changes and spikes
|
427
|
+
step_changes = np.abs(diff1).max() if len(diff1) > 0 else 0
|
428
|
+
spikes = np.sum(np.abs(values - mean) > 2 * std) / n if std != 0 else 0
|
429
|
+
|
430
|
+
# Hurst exponent and entropy
|
431
|
+
lag = min(10, n // 2)
|
432
|
+
variance_ratio = np.var(series.diff(lag)) / (lag * np.var(series.diff())) if n > lag else 0
|
433
|
+
hurst = np.log(variance_ratio) / (2 * np.log(lag)) if variance_ratio > 0 and lag > 1 else 0
|
434
|
+
|
435
|
+
hist, _ = np.histogram(series, bins='auto', density=True)
|
436
|
+
entropy = -np.sum(hist[hist > 0] * np.log(hist[hist > 0]))
|
437
|
+
|
438
|
+
return pd.Series({
|
439
|
+
'ts_n_obs': n,
|
440
|
+
'ts_mean': mean,
|
441
|
+
'ts_std': std,
|
442
|
+
'ts_variance': variance,
|
443
|
+
'ts_cv': cv,
|
444
|
+
'ts_skewness': skewness,
|
445
|
+
'ts_kurtosis': kurtosis,
|
446
|
+
'ts_trend': trend_coef,
|
447
|
+
'ts_trend_change': trend_change,
|
448
|
+
'ts_std_residuals': std_residuals,
|
449
|
+
'ts_turning_points_rate': turning_points_rate,
|
450
|
+
'ts_seasonal_strength': seasonal_strength,
|
451
|
+
'ts_seasonal_peak_strength': seasonal_peak_strength,
|
452
|
+
'ts_acf1': acf1,
|
453
|
+
'ts_acf2': acf2,
|
454
|
+
'ts_acf10': acf10,
|
455
|
+
'ts_crossing_rate': crossing_rate,
|
456
|
+
'ts_diff1_mean': diff1_mean,
|
457
|
+
'ts_diff1_variance': diff1_var,
|
458
|
+
'ts_diff2_mean': diff2_mean,
|
459
|
+
'ts_diff2_variance': diff2_var,
|
460
|
+
'ts_nonlinearity': nonlinearity,
|
461
|
+
'ts_step_max': step_changes,
|
462
|
+
'ts_spikes_rate': spikes,
|
463
|
+
'ts_hurst': hurst,
|
464
|
+
'ts_entropy': entropy
|
465
|
+
})
|
466
|
+
|
467
|
+
# Create copy of input DataFrame
|
468
|
+
result_df = data.copy()
|
469
|
+
|
470
|
+
if group_cols:
|
471
|
+
# Calculate features for each group
|
472
|
+
features = []
|
473
|
+
# Sort by date within each group if date column exists
|
474
|
+
date_col = self.dt_column_name if self.dt_column_name else 'Date'
|
475
|
+
if date_col in data.columns:
|
476
|
+
data = data.sort_values([date_col] + group_cols)
|
477
|
+
|
478
|
+
for name, group in data.groupby(group_cols):
|
479
|
+
# Sort group by date if exists
|
480
|
+
if date_col in group.columns:
|
481
|
+
group = group.sort_values(date_col)
|
482
|
+
group_features = calculate_series_features(group[target_col])
|
483
|
+
if isinstance(name, tuple):
|
484
|
+
feature_row = dict(zip(group_cols, name))
|
485
|
+
else:
|
486
|
+
feature_row = {group_cols[0]: name}
|
487
|
+
feature_row.update(group_features)
|
488
|
+
features.append(feature_row)
|
489
|
+
|
490
|
+
# Create features DataFrame without merging
|
491
|
+
features_df = pd.DataFrame(features)
|
492
|
+
# Return only the meta-features DataFrame with group columns
|
493
|
+
return features_df
|
494
|
+
else:
|
495
|
+
# Sort by date if exists and calculate features for entire series
|
496
|
+
date_col = self.dt_column_name if self.dt_column_name else 'Date'
|
497
|
+
if date_col in data.columns:
|
498
|
+
data = data.sort_values(date_col)
|
499
|
+
features = calculate_series_features(data[target_col])
|
500
|
+
# Return single row DataFrame with meta-features
|
501
|
+
return pd.DataFrame([features])
|
502
|
+
|
503
|
+
return result_df
|
@@ -124,6 +124,14 @@ def load_data(data_spec, storage_options=None, **kwargs):
|
|
124
124
|
data = data[columns]
|
125
125
|
if limit:
|
126
126
|
data = data[:limit]
|
127
|
+
# Filtering by subset if provided
|
128
|
+
subset = kwargs.get('subset', None)
|
129
|
+
if subset is not None:
|
130
|
+
target_category_columns = kwargs.get('target_category_columns', None)
|
131
|
+
mask = False
|
132
|
+
for col in target_category_columns:
|
133
|
+
mask = mask | data[col].isin(subset)
|
134
|
+
data = data[mask]
|
127
135
|
return data
|
128
136
|
|
129
137
|
|
@@ -3,17 +3,20 @@
|
|
3
3
|
# Copyright (c) 2023, 2025 Oracle and/or its affiliates.
|
4
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
5
|
|
6
|
+
import copy
|
6
7
|
import json
|
7
8
|
import os
|
8
9
|
import sys
|
9
10
|
from typing import Dict, List
|
10
11
|
|
12
|
+
import pandas as pd
|
11
13
|
import yaml
|
12
14
|
|
13
15
|
from ads.opctl import logger
|
14
16
|
from ads.opctl.operator.common.const import ENV_OPERATOR_ARGS
|
15
17
|
from ads.opctl.operator.common.utils import _parse_input_args
|
16
18
|
|
19
|
+
from .const import AUTO_SELECT_SERIES
|
17
20
|
from .model.forecast_datasets import ForecastDatasets, ForecastResults
|
18
21
|
from .operator_config import ForecastOperatorConfig
|
19
22
|
from .whatifserve import ModelDeploymentManager
|
@@ -24,9 +27,56 @@ def operate(operator_config: ForecastOperatorConfig) -> ForecastResults:
|
|
24
27
|
from .model.factory import ForecastOperatorModelFactory
|
25
28
|
|
26
29
|
datasets = ForecastDatasets(operator_config)
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
+
model = ForecastOperatorModelFactory.get_model(operator_config, datasets)
|
31
|
+
|
32
|
+
if operator_config.spec.model == AUTO_SELECT_SERIES and hasattr(
|
33
|
+
operator_config.spec, "meta_features"
|
34
|
+
):
|
35
|
+
# For AUTO_SELECT_SERIES, handle each series with its specific model
|
36
|
+
meta_features = operator_config.spec.meta_features
|
37
|
+
results = ForecastResults()
|
38
|
+
sub_results_list = []
|
39
|
+
|
40
|
+
# Group the data by selected model
|
41
|
+
for model_name in meta_features["selected_model"].unique():
|
42
|
+
# Get series that use this model
|
43
|
+
series_groups = meta_features[meta_features["selected_model"] == model_name]
|
44
|
+
|
45
|
+
# Create a sub-config for this model
|
46
|
+
sub_config = copy.deepcopy(operator_config)
|
47
|
+
sub_config.spec.model = model_name
|
48
|
+
|
49
|
+
# Create sub-datasets for these series
|
50
|
+
sub_datasets = ForecastDatasets(
|
51
|
+
operator_config,
|
52
|
+
subset=series_groups[operator_config.spec.target_category_columns]
|
53
|
+
.values.flatten()
|
54
|
+
.tolist(),
|
55
|
+
)
|
56
|
+
|
57
|
+
# Get and run the appropriate model
|
58
|
+
sub_model = ForecastOperatorModelFactory.get_model(sub_config, sub_datasets)
|
59
|
+
sub_result_df, sub_elapsed_time = sub_model.build_model()
|
60
|
+
sub_results = sub_model.generate_report(
|
61
|
+
result_df=sub_result_df,
|
62
|
+
elapsed_time=sub_elapsed_time,
|
63
|
+
save_sub_reports=True,
|
64
|
+
)
|
65
|
+
sub_results_list.append(sub_results)
|
66
|
+
|
67
|
+
# results_df = pd.concat([results_df, sub_result_df], ignore_index=True, axis=0)
|
68
|
+
# elapsed_time += sub_elapsed_time
|
69
|
+
# Merge all sub_results into a single ForecastResults object
|
70
|
+
if sub_results_list:
|
71
|
+
results = sub_results_list[0]
|
72
|
+
for sub_result in sub_results_list[1:]:
|
73
|
+
results.merge(sub_result)
|
74
|
+
else:
|
75
|
+
results = None
|
76
|
+
|
77
|
+
else:
|
78
|
+
# For other cases, use the single selected model
|
79
|
+
results = model.generate_report()
|
30
80
|
# saving to model catalog
|
31
81
|
spec = operator_config.spec
|
32
82
|
if spec.what_if_analysis and datasets.additional_data:
|
@@ -89,4 +89,6 @@ SUMMARY_METRICS_HORIZON_LIMIT = 10
|
|
89
89
|
PROPHET_INTERNAL_DATE_COL = "ds"
|
90
90
|
RENDER_LIMIT = 5000
|
91
91
|
AUTO_SELECT = "auto-select"
|
92
|
+
AUTO_SELECT_SERIES = "auto-select-series"
|
92
93
|
BACKTEST_REPORT_NAME = "back_test.csv"
|
94
|
+
TROUBLESHOOTING_GUIDE = "https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-operators/troubleshooting.md"
|
@@ -4,6 +4,9 @@
|
|
4
4
|
# Copyright (c) 2023 Oracle and/or its affiliates.
|
5
5
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
6
|
|
7
|
+
from ads.opctl.operator.lowcode.forecast.const import TROUBLESHOOTING_GUIDE
|
8
|
+
|
9
|
+
|
7
10
|
class ForecastSchemaYamlError(Exception):
|
8
11
|
"""Exception raised when there is an issue with the schema."""
|
9
12
|
|
@@ -12,6 +15,7 @@ class ForecastSchemaYamlError(Exception):
|
|
12
15
|
"Invalid forecast operator specification. Check the YAML structure and ensure it "
|
13
16
|
"complies with the required schema for forecast operator. \n"
|
14
17
|
f"{error}"
|
18
|
+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
|
15
19
|
)
|
16
20
|
|
17
21
|
|
@@ -23,4 +27,5 @@ class ForecastInputDataError(Exception):
|
|
23
27
|
"Invalid input data. Check the input data and ensure it "
|
24
28
|
"complies with the validation criteria. \n"
|
25
29
|
f"{error}"
|
30
|
+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
|
26
31
|
)
|