oracle-ads 2.12.10rc0__py3-none-any.whl → 2.12.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. ads/aqua/__init__.py +2 -1
  2. ads/aqua/app.py +30 -16
  3. ads/aqua/client/__init__.py +3 -0
  4. ads/aqua/client/client.py +799 -0
  5. ads/aqua/evaluation/evaluation.py +20 -12
  6. ads/aqua/extension/aqua_ws_msg_handler.py +14 -7
  7. ads/aqua/extension/base_handler.py +12 -9
  8. ads/aqua/extension/model_handler.py +6 -1
  9. ads/aqua/finetuning/entities.py +3 -0
  10. ads/aqua/finetuning/finetuning.py +32 -1
  11. ads/aqua/model/entities.py +2 -1
  12. ads/aqua/model/model.py +136 -76
  13. ads/aqua/modeldeployment/deployment.py +22 -10
  14. ads/cli.py +16 -8
  15. ads/opctl/operator/lowcode/common/transformations.py +38 -3
  16. ads/opctl/operator/lowcode/common/utils.py +11 -1
  17. ads/opctl/operator/lowcode/forecast/__main__.py +10 -0
  18. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +1 -1
  19. ads/opctl/operator/lowcode/forecast/operator_config.py +31 -0
  20. ads/opctl/operator/lowcode/forecast/schema.yaml +63 -0
  21. ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
  22. ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +233 -0
  23. ads/opctl/operator/lowcode/forecast/whatifserve/score.py +238 -0
  24. {oracle_ads-2.12.10rc0.dist-info → oracle_ads-2.12.11.dist-info}/METADATA +3 -1
  25. {oracle_ads-2.12.10rc0.dist-info → oracle_ads-2.12.11.dist-info}/RECORD +28 -23
  26. {oracle_ads-2.12.10rc0.dist-info → oracle_ads-2.12.11.dist-info}/LICENSE.txt +0 -0
  27. {oracle_ads-2.12.10rc0.dist-info → oracle_ads-2.12.11.dist-info}/WHEEL +0 -0
  28. {oracle_ads-2.12.10rc0.dist-info → oracle_ads-2.12.11.dist-info}/entry_points.txt +0 -0
@@ -15,6 +15,7 @@ from ads.opctl.operator.lowcode.common.errors import (
15
15
  InvalidParameterError,
16
16
  )
17
17
  from ads.opctl.operator.lowcode.common.utils import merge_category_columns
18
+ from ads.opctl.operator.lowcode.forecast.operator_config import ForecastOperatorSpec
18
19
 
19
20
 
20
21
  class Transformations(ABC):
@@ -34,6 +35,7 @@ class Transformations(ABC):
34
35
  self.dataset_info = dataset_info
35
36
  self.target_category_columns = dataset_info.target_category_columns
36
37
  self.target_column_name = dataset_info.target_column
38
+ self.raw_column_names = None
37
39
  self.dt_column_name = (
38
40
  dataset_info.datetime_column.name if dataset_info.datetime_column else None
39
41
  )
@@ -60,7 +62,8 @@ class Transformations(ABC):
60
62
 
61
63
  """
62
64
  clean_df = self._remove_trailing_whitespace(data)
63
- # clean_df = self._normalize_column_names(clean_df)
65
+ if isinstance(self.dataset_info, ForecastOperatorSpec):
66
+ clean_df = self._clean_column_names(clean_df)
64
67
  if self.name == "historical_data":
65
68
  self._check_historical_dataset(clean_df)
66
69
  clean_df = self._set_series_id_column(clean_df)
@@ -98,8 +101,36 @@ class Transformations(ABC):
98
101
  def _remove_trailing_whitespace(self, df):
99
102
  return df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
100
103
 
101
- # def _normalize_column_names(self, df):
102
- # return df.rename(columns=lambda x: re.sub("[^A-Za-z0-9_]+", "", x))
104
+ def _clean_column_names(self, df):
105
+ """
106
+ Remove all whitespaces from column names in a DataFrame and store the original names.
107
+
108
+ Parameters:
109
+ df (pd.DataFrame): The DataFrame whose column names need to be cleaned.
110
+
111
+ Returns:
112
+ pd.DataFrame: The DataFrame with cleaned column names.
113
+ """
114
+
115
+ self.raw_column_names = {
116
+ col: col.replace(" ", "") for col in df.columns if " " in col
117
+ }
118
+ df.columns = [self.raw_column_names.get(col, col) for col in df.columns]
119
+
120
+ if self.target_column_name:
121
+ self.target_column_name = self.raw_column_names.get(
122
+ self.target_column_name, self.target_column_name
123
+ )
124
+ self.dt_column_name = self.raw_column_names.get(
125
+ self.dt_column_name, self.dt_column_name
126
+ )
127
+
128
+ if self.target_category_columns:
129
+ self.target_category_columns = [
130
+ self.raw_column_names.get(col, col)
131
+ for col in self.target_category_columns
132
+ ]
133
+ return df
103
134
 
104
135
  def _set_series_id_column(self, df):
105
136
  self._target_category_columns_map = {}
@@ -233,6 +264,10 @@ class Transformations(ABC):
233
264
  expected_names = [self.target_column_name, self.dt_column_name] + (
234
265
  self.target_category_columns if self.target_category_columns else []
235
266
  )
267
+
268
+ if self.raw_column_names:
269
+ expected_names.extend(list(self.raw_column_names.values()))
270
+
236
271
  if set(df.columns) != set(expected_names):
237
272
  raise DataMismatchError(
238
273
  f"Expected {self.name} to have columns: {expected_names}, but instead found column names: {df.columns}. Is the {self.name} path correct?"
@@ -12,6 +12,7 @@ from typing import List, Union
12
12
 
13
13
  import fsspec
14
14
  import oracledb
15
+ import json
15
16
  import pandas as pd
16
17
 
17
18
  from ads.common.object_storage_details import ObjectStorageDetails
@@ -125,7 +126,7 @@ def load_data(data_spec, storage_options=None, **kwargs):
125
126
  return data
126
127
 
127
128
 
128
- def write_data(data, filename, format, storage_options, index=False, **kwargs):
129
+ def write_data(data, filename, format, storage_options=None, index=False, **kwargs):
129
130
  disable_print()
130
131
  if not format:
131
132
  _, format = os.path.splitext(filename)
@@ -141,6 +142,15 @@ def write_data(data, filename, format, storage_options, index=False, **kwargs):
141
142
  )
142
143
 
143
144
 
145
+ def write_simple_json(data, path):
146
+ if ObjectStorageDetails.is_oci_path(path):
147
+ storage_options = default_signer()
148
+ else:
149
+ storage_options = {}
150
+ with fsspec.open(path, mode="w", **storage_options) as f:
151
+ json.dump(data, f, indent=4)
152
+
153
+
144
154
  def merge_category_columns(data, target_category_columns):
145
155
  result = data.apply(
146
156
  lambda x: "__".join([str(x[col]) for col in target_category_columns]), axis=1
@@ -17,6 +17,7 @@ from ads.opctl.operator.common.utils import _parse_input_args
17
17
 
18
18
  from .operator_config import ForecastOperatorConfig
19
19
  from .model.forecast_datasets import ForecastDatasets
20
+ from .whatifserve import ModelDeploymentManager
20
21
 
21
22
 
22
23
  def operate(operator_config: ForecastOperatorConfig) -> None:
@@ -27,6 +28,15 @@ def operate(operator_config: ForecastOperatorConfig) -> None:
27
28
  ForecastOperatorModelFactory.get_model(
28
29
  operator_config, datasets
29
30
  ).generate_report()
31
+ # saving to model catalog
32
+ spec = operator_config.spec
33
+ if spec.what_if_analysis and datasets.additional_data:
34
+ mdm = ModelDeploymentManager(spec, datasets.additional_data)
35
+ mdm.save_to_catalog()
36
+ if spec.what_if_analysis.model_deployment:
37
+ mdm.create_deployment()
38
+ mdm.save_deployment_info()
39
+
30
40
 
31
41
  def verify(spec: Dict, **kwargs: Dict) -> bool:
32
42
  """Verifies the forecasting operator config."""
@@ -167,7 +167,7 @@ class ForecastDatasets:
167
167
  self.historical_data.data,
168
168
  self.additional_data.data,
169
169
  ],
170
- axis=1,
170
+ axis=1
171
171
  )
172
172
 
173
173
  def get_data_by_series(self, include_horizon=True):
@@ -18,6 +18,35 @@ from ads.opctl.operator.lowcode.common.utils import find_output_dirname
18
18
 
19
19
  from .const import SpeedAccuracyMode, SupportedMetrics, SupportedModels
20
20
 
21
+ @dataclass
22
+ class AutoScaling(DataClassSerializable):
23
+ """Class representing simple autoscaling policy"""
24
+ minimum_instance: int = 1
25
+ maximum_instance: int = None
26
+ cool_down_in_seconds: int = 600
27
+ scale_in_threshold: int = 10
28
+ scale_out_threshold: int = 80
29
+ scaling_metric: str = "CPU_UTILIZATION"
30
+
31
+ @dataclass(repr=True)
32
+ class ModelDeploymentServer(DataClassSerializable):
33
+ """Class representing model deployment server specification for whatif-analysis."""
34
+ display_name: str = None
35
+ initial_shape: str = None
36
+ description: str = None
37
+ log_group: str = None
38
+ log_id: str = None
39
+ auto_scaling: AutoScaling = field(default_factory=AutoScaling)
40
+
41
+
42
+ @dataclass(repr=True)
43
+ class WhatIfAnalysis(DataClassSerializable):
44
+ """Class representing operator specification for whatif-analysis."""
45
+ model_display_name: str = None
46
+ compartment_id: str = None
47
+ project_id: str = None
48
+ model_deployment: ModelDeploymentServer = field(default_factory=ModelDeploymentServer)
49
+
21
50
 
22
51
  @dataclass(repr=True)
23
52
  class TestData(InputData):
@@ -90,12 +119,14 @@ class ForecastOperatorSpec(DataClassSerializable):
90
119
  confidence_interval_width: float = None
91
120
  metric: str = None
92
121
  tuning: Tuning = field(default_factory=Tuning)
122
+ what_if_analysis: WhatIfAnalysis = field(default_factory=WhatIfAnalysis)
93
123
 
94
124
  def __post_init__(self):
95
125
  """Adjusts the specification details."""
96
126
  self.output_directory = self.output_directory or OutputDirectory(
97
127
  url=find_output_dirname(self.output_directory)
98
128
  )
129
+ self.generate_model_pickle = True if self.generate_model_pickle or self.what_if_analysis else False
99
130
  self.metric = (self.metric or "").lower() or SupportedMetrics.SMAPE.lower()
100
131
  self.model = self.model or SupportedModels.Prophet
101
132
  self.confidence_interval_width = self.confidence_interval_width or 0.80
@@ -353,6 +353,69 @@ spec:
353
353
  meta:
354
354
  description: "Report file generation can be enabled using this flag. Defaults to true."
355
355
 
356
+ what_if_analysis:
357
+ type: dict
358
+ required: false
359
+ schema:
360
+ model_deployment:
361
+ type: dict
362
+ required: false
363
+ meta: "If model_deployment id is not specified, a new model deployment is created; otherwise, the model is linked to the specified model deployment."
364
+ schema:
365
+ id:
366
+ type: string
367
+ required: false
368
+ display_name:
369
+ type: string
370
+ required: false
371
+ initial_shape:
372
+ type: string
373
+ required: false
374
+ description:
375
+ type: string
376
+ required: false
377
+ log_group:
378
+ type: string
379
+ required: true
380
+ log_id:
381
+ type: string
382
+ required: true
383
+ auto_scaling:
384
+ type: dict
385
+ required: false
386
+ schema:
387
+ minimum_instance:
388
+ type: integer
389
+ required: true
390
+ maximum_instance:
391
+ type: integer
392
+ required: true
393
+ scale_in_threshold:
394
+ type: integer
395
+ required: true
396
+ scale_out_threshold:
397
+ type: integer
398
+ required: true
399
+ scaling_metric:
400
+ type: string
401
+ required: true
402
+ cool_down_in_seconds:
403
+ type: integer
404
+ required: true
405
+ model_display_name:
406
+ type: string
407
+ required: true
408
+ project_id:
409
+ type: string
410
+ required: false
411
+ meta: "If not provided, The project OCID from config.PROJECT_OCID is used"
412
+ compartment_id:
413
+ type: string
414
+ required: false
415
+ meta: "If not provided, The compartment OCID from config.NB_SESSION_COMPARTMENT_OCID is used."
416
+ meta:
417
+ description: "When enabled, the models are saved to the model catalog. Defaults to false."
418
+
356
419
  generate_metrics:
357
420
  type: boolean
358
421
  required: false
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright (c) 2023, 2024 Oracle and/or its affiliates.
4
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
5
+
6
+
7
+ from .deployment_manager import ModelDeploymentManager
@@ -0,0 +1,233 @@
1
+ #!/usr/bin/env python
2
+ import json
3
+ # Copyright (c) 2023, 2024 Oracle and/or its affiliates.
4
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
5
+
6
+ import os
7
+ import pickle
8
+ import shutil
9
+ import sys
10
+ import tempfile
11
+ import oci
12
+
13
+ import pandas as pd
14
+ import cloudpickle
15
+
16
+ from ads.opctl import logger
17
+ from ads.common.model_export_util import prepare_generic_model
18
+ from ads.opctl.operator.lowcode.common.utils import write_data, write_simple_json
19
+ from ads.opctl.operator.lowcode.common.utils import default_signer
20
+ from ..model.forecast_datasets import AdditionalData
21
+ from ..operator_config import ForecastOperatorSpec
22
+
23
+ from oci.data_science import DataScienceClient, DataScienceClientCompositeOperations
24
+
25
+ from oci.data_science.models import ModelConfigurationDetails, InstanceConfiguration, \
26
+ FixedSizeScalingPolicy, CategoryLogDetails, LogDetails, \
27
+ SingleModelDeploymentConfigurationDetails, CreateModelDeploymentDetails
28
+ from ads.common.object_storage_details import ObjectStorageDetails
29
+
30
+
31
+ class ModelDeploymentManager:
32
+ def __init__(self, spec: ForecastOperatorSpec, additional_data: AdditionalData, previous_model_version=None):
33
+ self.spec = spec
34
+ self.model_name = spec.model
35
+ self.horizon = spec.horizon
36
+ self.additional_data = additional_data.get_dict_by_series()
37
+ self.model_obj = {}
38
+ self.display_name = spec.what_if_analysis.model_display_name
39
+ self.project_id = spec.what_if_analysis.project_id if spec.what_if_analysis.project_id \
40
+ else os.environ.get('PROJECT_OCID')
41
+ self.compartment_id = spec.what_if_analysis.compartment_id if spec.what_if_analysis.compartment_id \
42
+ else os.environ.get('NB_SESSION_COMPARTMENT_OCID')
43
+ if self.project_id is None or self.compartment_id is None:
44
+ raise ValueError("Either project_id or compartment_id cannot be None.")
45
+ self.path_to_artifact = f"{self.spec.output_directory.url}/artifacts/"
46
+ self.pickle_file_path = f"{self.spec.output_directory.url}/model.pkl"
47
+ self.model_version = previous_model_version + 1 if previous_model_version else 1
48
+ self.catalog_id = None
49
+ self.test_mode = os.environ.get("TEST_MODE", False)
50
+ self.deployment_info = {}
51
+
52
+ def _sanity_test(self):
53
+ """
54
+ Function perform sanity test for saved artifact
55
+ """
56
+ org_sys_path = sys.path[:]
57
+ try:
58
+ sys.path.insert(0, f"{self.path_to_artifact}")
59
+ from score import load_model, predict
60
+ _ = load_model()
61
+
62
+ # Write additional data to tmp file and perform sanity check
63
+ with tempfile.NamedTemporaryFile(suffix='.csv') as temp_file:
64
+ one_series = next(iter(self.additional_data))
65
+ sample_prediction_data = self.additional_data[one_series].tail(self.horizon)
66
+ sample_prediction_data[self.spec.target_category_columns[0]] = one_series
67
+ date_col_name = self.spec.datetime_column.name
68
+ date_col_format = self.spec.datetime_column.format
69
+ sample_prediction_data[date_col_name] = sample_prediction_data[date_col_name].dt.strftime(
70
+ date_col_format)
71
+ sample_prediction_data.to_csv(temp_file.name, index=False)
72
+ input_data = {"additional_data": {"url": temp_file.name}}
73
+ prediction_test = predict(input_data, _)
74
+ logger.info(f"prediction test completed with result :{prediction_test}")
75
+ except Exception as e:
76
+ logger.error(f"An error occurred during the sanity test: {e}")
77
+ raise
78
+ finally:
79
+ sys.path = org_sys_path
80
+
81
+ def _copy_score_file(self):
82
+ """
83
+ Copies the score.py to the artifact_path.
84
+ """
85
+ try:
86
+ current_dir = os.path.dirname(os.path.abspath(__file__))
87
+ score_file = os.path.join(current_dir, "score.py")
88
+ destination_file = os.path.join(self.path_to_artifact, os.path.basename(score_file))
89
+ shutil.copy2(score_file, destination_file)
90
+ logger.info(f"score.py copied successfully to {self.path_to_artifact}")
91
+ except Exception as e:
92
+ logger.warn(f"Error copying file: {e}")
93
+ raise e
94
+
95
+ def save_to_catalog(self):
96
+ """Save the model to a model catalog"""
97
+ with open(self.pickle_file_path, 'rb') as file:
98
+ self.model_obj = pickle.load(file)
99
+
100
+ if not os.path.exists(self.path_to_artifact):
101
+ os.mkdir(self.path_to_artifact)
102
+
103
+ artifact_dict = {"spec": self.spec.to_dict(), "models": self.model_obj}
104
+ with open(f"{self.path_to_artifact}/models.pickle", "wb") as f:
105
+ cloudpickle.dump(artifact_dict, f)
106
+ artifact = prepare_generic_model(
107
+ self.path_to_artifact,
108
+ function_artifacts=False,
109
+ force_overwrite=True,
110
+ data_science_env=True)
111
+
112
+ self._copy_score_file()
113
+ self._sanity_test()
114
+
115
+ if isinstance(self.model_obj, dict):
116
+ series = self.model_obj.keys()
117
+ else:
118
+ series = self.additional_data.keys()
119
+ description = f"The object contains {len(series)} {self.model_name} models"
120
+
121
+ if not self.test_mode:
122
+ catalog_entry = artifact.save(
123
+ display_name=self.display_name,
124
+ compartment_id=self.compartment_id,
125
+ project_id=self.project_id,
126
+ description=description)
127
+ self.catalog_id = catalog_entry.id
128
+
129
+ logger.info(f"Saved {self.model_name} version-v{self.model_version} to model catalog"
130
+ f" with model ocid : {self.catalog_id}")
131
+
132
+ self.deployment_info = {"model_ocid": self.catalog_id, "series": list(series)}
133
+
134
+ def create_deployment(self):
135
+ """Create a model deployment serving"""
136
+
137
+ # create new model deployment
138
+ initial_shape = self.spec.what_if_analysis.model_deployment.initial_shape
139
+ name = self.spec.what_if_analysis.model_deployment.display_name
140
+ description = self.spec.what_if_analysis.model_deployment.description
141
+ auto_scaling_config = self.spec.what_if_analysis.model_deployment.auto_scaling
142
+
143
+ # if auto_scaling_config is defined
144
+ if auto_scaling_config:
145
+ scaling_policy = oci.data_science.models.AutoScalingPolicy(
146
+ policy_type="AUTOSCALING",
147
+ auto_scaling_policies=[
148
+ oci.data_science.models.ThresholdBasedAutoScalingPolicyDetails(
149
+ auto_scaling_policy_type="THRESHOLD",
150
+ rules=[
151
+ oci.data_science.models.PredefinedMetricExpressionRule(
152
+ metric_expression_rule_type="PREDEFINED_EXPRESSION",
153
+ metric_type=auto_scaling_config.scaling_metric,
154
+ scale_in_configuration=oci.data_science.models.PredefinedExpressionThresholdScalingConfiguration(
155
+ scaling_configuration_type="THRESHOLD",
156
+ threshold=auto_scaling_config.scale_in_threshold
157
+ ),
158
+ scale_out_configuration=oci.data_science.models.PredefinedExpressionThresholdScalingConfiguration(
159
+ scaling_configuration_type="THRESHOLD",
160
+ threshold=auto_scaling_config.scale_out_threshold
161
+ )
162
+ )],
163
+ maximum_instance_count=auto_scaling_config.maximum_instance,
164
+ minimum_instance_count=auto_scaling_config.minimum_instance,
165
+ initial_instance_count=auto_scaling_config.minimum_instance)],
166
+ cool_down_in_seconds=auto_scaling_config.cool_down_in_seconds,
167
+ is_enabled=True)
168
+ logger.info(f"Using autoscaling {auto_scaling_config.scaling_metric} for creating MD")
169
+ else:
170
+ scaling_policy = FixedSizeScalingPolicy(instance_count=1)
171
+ logger.info("Using fixed size policy for creating MD")
172
+
173
+ model_configuration_details_object = ModelConfigurationDetails(
174
+ model_id=self.catalog_id,
175
+ instance_configuration=InstanceConfiguration(
176
+ instance_shape_name=initial_shape),
177
+ scaling_policy=scaling_policy,
178
+ bandwidth_mbps=20)
179
+
180
+ single_model_config = SingleModelDeploymentConfigurationDetails(
181
+ deployment_type='SINGLE_MODEL',
182
+ model_configuration_details=model_configuration_details_object
183
+ )
184
+
185
+ log_group = self.spec.what_if_analysis.model_deployment.log_group
186
+ log_id = self.spec.what_if_analysis.model_deployment.log_id
187
+
188
+ logs_configuration_details_object = CategoryLogDetails(
189
+ access=LogDetails(log_group_id=log_group,
190
+ log_id=log_id),
191
+ predict=LogDetails(log_group_id=log_group,
192
+ log_id=log_id))
193
+
194
+ model_deploy_configuration = CreateModelDeploymentDetails(
195
+ display_name=name,
196
+ description=description,
197
+ project_id=self.project_id,
198
+ compartment_id=self.compartment_id,
199
+ model_deployment_configuration_details=single_model_config,
200
+ category_log_details=logs_configuration_details_object)
201
+
202
+ if not self.test_mode:
203
+ auth = oci.auth.signers.get_resource_principals_signer()
204
+ data_science = DataScienceClient({}, signer=auth)
205
+ data_science_composite = DataScienceClientCompositeOperations(data_science)
206
+ model_deployment = data_science_composite.create_model_deployment_and_wait_for_state(
207
+ model_deploy_configuration,
208
+ wait_for_states=[
209
+ "SUCCEEDED", "FAILED"])
210
+ self.deployment_info['work_request'] = model_deployment.data.id
211
+ logger.info(f"deployment metadata :{model_deployment.data}")
212
+ md = data_science.get_model_deployment(model_deployment_id=model_deployment.data.resources[0].identifier)
213
+ self.deployment_info['model_deployment_ocid'] = md.data.id
214
+ endpoint_url = md.data.model_deployment_url
215
+ self.deployment_info['model_deployment_endpoint'] = f"{endpoint_url}/predict"
216
+
217
+ def save_deployment_info(self):
218
+ output_dir = self.spec.output_directory.url
219
+ if ObjectStorageDetails.is_oci_path(output_dir):
220
+ storage_options = default_signer()
221
+ else:
222
+ storage_options = {}
223
+ write_data(
224
+ data=pd.DataFrame.from_dict(self.deployment_info),
225
+ filename=os.path.join(output_dir, "deployment_info.json"),
226
+ format="json",
227
+ storage_options=storage_options,
228
+ index=False,
229
+ indent=4,
230
+ orient="records"
231
+ )
232
+ write_simple_json(self.deployment_info, os.path.join(output_dir, "deployment_info.json"))
233
+ logger.info(f"Saved deployment info to {output_dir}")