oracle-ads 2.13.17rc0__py3-none-any.whl → 2.13.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/cli.py +7 -5
- ads/aqua/common/entities.py +88 -29
- ads/aqua/common/enums.py +7 -0
- ads/aqua/common/errors.py +5 -0
- ads/aqua/common/utils.py +87 -7
- ads/aqua/constants.py +3 -0
- ads/aqua/extension/deployment_handler.py +36 -0
- ads/aqua/modeldeployment/config_loader.py +10 -0
- ads/aqua/modeldeployment/constants.py +1 -0
- ads/aqua/modeldeployment/deployment.py +99 -22
- ads/aqua/modeldeployment/entities.py +4 -0
- ads/aqua/resources/gpu_shapes_index.json +315 -26
- ads/aqua/shaperecommend/__init__.py +6 -0
- ads/aqua/shaperecommend/constants.py +116 -0
- ads/aqua/shaperecommend/estimator.py +384 -0
- ads/aqua/shaperecommend/llm_config.py +283 -0
- ads/aqua/shaperecommend/recommend.py +493 -0
- ads/aqua/shaperecommend/shape_report.py +233 -0
- ads/aqua/version.json +1 -1
- ads/cli.py +9 -1
- ads/jobs/builders/infrastructure/dsc_job.py +1 -0
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +9 -1
- ads/model/service/oci_datascience_model_deployment.py +46 -19
- ads/opctl/operator/lowcode/common/data.py +7 -2
- ads/opctl/operator/lowcode/common/transformations.py +207 -0
- ads/opctl/operator/lowcode/common/utils.py +8 -0
- ads/opctl/operator/lowcode/forecast/__init__.py +3 -0
- ads/opctl/operator/lowcode/forecast/__main__.py +53 -3
- ads/opctl/operator/lowcode/forecast/const.py +2 -0
- ads/opctl/operator/lowcode/forecast/errors.py +5 -0
- ads/opctl/operator/lowcode/forecast/meta_selector.py +310 -0
- ads/opctl/operator/lowcode/forecast/model/automlx.py +1 -1
- ads/opctl/operator/lowcode/forecast/model/base_model.py +119 -30
- ads/opctl/operator/lowcode/forecast/model/factory.py +33 -2
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +54 -17
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +6 -1
- ads/opctl/operator/lowcode/forecast/schema.yaml +1 -0
- ads/pipeline/ads_pipeline.py +13 -9
- {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/METADATA +1 -1
- {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/RECORD +43 -36
- {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/WHEEL +0 -0
- {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/entry_points.txt +0 -0
- {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/licenses/LICENSE.txt +0 -0
@@ -8,6 +8,7 @@ import os
|
|
8
8
|
import tempfile
|
9
9
|
import time
|
10
10
|
import traceback
|
11
|
+
import warnings
|
11
12
|
from abc import ABC, abstractmethod
|
12
13
|
from typing import Tuple
|
13
14
|
|
@@ -50,6 +51,7 @@ from ..const import (
|
|
50
51
|
SpeedAccuracyMode,
|
51
52
|
SupportedMetrics,
|
52
53
|
SupportedModels,
|
54
|
+
TROUBLESHOOTING_GUIDE,
|
53
55
|
)
|
54
56
|
from ..operator_config import ForecastOperatorConfig, ForecastOperatorSpec
|
55
57
|
from .forecast_datasets import ForecastDatasets, ForecastResults
|
@@ -98,10 +100,21 @@ class ForecastOperatorBaseModel(ABC):
|
|
98
100
|
self.spec.tuning.n_trials is not None
|
99
101
|
)
|
100
102
|
|
101
|
-
def
|
102
|
-
"""
|
103
|
-
import
|
103
|
+
def build_model(self):
|
104
|
+
"""Builds the model and returns the result DataFrame and elapsed time."""
|
105
|
+
import time
|
104
106
|
|
107
|
+
start_time = time.time()
|
108
|
+
result_df = self._build_model()
|
109
|
+
elapsed_time = time.time() - start_time
|
110
|
+
logger.info("Building the models completed in %s seconds", elapsed_time)
|
111
|
+
return result_df, elapsed_time
|
112
|
+
|
113
|
+
def generate_report(
|
114
|
+
self, result_df=None, elapsed_time=None, save_sub_reports=False
|
115
|
+
):
|
116
|
+
"""Generates the forecasting report. Optionally accepts a precomputed result_df and elapsed_time.
|
117
|
+
If save_sub_reports is True, unique filenames are generated for all outputs."""
|
105
118
|
from sklearn.exceptions import ConvergenceWarning
|
106
119
|
|
107
120
|
with warnings.catch_warnings():
|
@@ -114,10 +127,8 @@ class ForecastOperatorBaseModel(ABC):
|
|
114
127
|
if self.spec.previous_output_dir is not None:
|
115
128
|
self._load_model()
|
116
129
|
|
117
|
-
|
118
|
-
|
119
|
-
elapsed_time = time.time() - start_time
|
120
|
-
logger.info("Building the models completed in %s seconds", elapsed_time)
|
130
|
+
if result_df is None or elapsed_time is None:
|
131
|
+
result_df, elapsed_time = self.build_model()
|
121
132
|
|
122
133
|
# Generate metrics
|
123
134
|
summary_metrics = None
|
@@ -354,6 +365,7 @@ class ForecastOperatorBaseModel(ABC):
|
|
354
365
|
metrics_df=self.eval_metrics,
|
355
366
|
test_metrics_df=self.test_eval_metrics,
|
356
367
|
test_data=test_data,
|
368
|
+
save_sub_reports=save_sub_reports,
|
357
369
|
)
|
358
370
|
|
359
371
|
def _test_evaluate_metrics(self, elapsed_time=0):
|
@@ -471,8 +483,9 @@ class ForecastOperatorBaseModel(ABC):
|
|
471
483
|
metrics_df: pd.DataFrame,
|
472
484
|
test_metrics_df: pd.DataFrame,
|
473
485
|
test_data: pd.DataFrame,
|
486
|
+
save_sub_reports: bool = False,
|
474
487
|
):
|
475
|
-
"""Saves resulting reports to the given folder."""
|
488
|
+
"""Saves resulting reports to the given folder. If save_sub_reports is True, use unique filenames."""
|
476
489
|
|
477
490
|
unique_output_dir = self.spec.output_directory.url
|
478
491
|
results = ForecastResults()
|
@@ -483,6 +496,12 @@ class ForecastOperatorBaseModel(ABC):
|
|
483
496
|
else {}
|
484
497
|
)
|
485
498
|
|
499
|
+
def get_path(filename):
|
500
|
+
path = os.path.join(unique_output_dir, filename)
|
501
|
+
if save_sub_reports:
|
502
|
+
return self._get_unique_filename(path, storage_options)
|
503
|
+
return path
|
504
|
+
|
486
505
|
# report-creator html report
|
487
506
|
if self.spec.generate_report:
|
488
507
|
with tempfile.TemporaryDirectory() as temp_dir:
|
@@ -492,7 +511,7 @@ class ForecastOperatorBaseModel(ABC):
|
|
492
511
|
report.save(rc.Block(*report_sections), report_local_path)
|
493
512
|
enable_print()
|
494
513
|
|
495
|
-
report_path =
|
514
|
+
report_path = get_path(self.spec.report_filename)
|
496
515
|
write_file(
|
497
516
|
local_filename=report_local_path,
|
498
517
|
remote_filename=report_path,
|
@@ -511,9 +530,10 @@ class ForecastOperatorBaseModel(ABC):
|
|
511
530
|
else result_df.drop(DataColumns.Series, axis=1)
|
512
531
|
)
|
513
532
|
if self.spec.generate_forecast_file:
|
533
|
+
forecast_path = get_path(self.spec.forecast_filename)
|
514
534
|
write_data(
|
515
535
|
data=result_df,
|
516
|
-
filename=
|
536
|
+
filename=forecast_path,
|
517
537
|
format="csv",
|
518
538
|
storage_options=storage_options,
|
519
539
|
)
|
@@ -531,11 +551,10 @@ class ForecastOperatorBaseModel(ABC):
|
|
531
551
|
{"index": "metrics", "Series 1": metrics_col_name}, axis=1
|
532
552
|
)
|
533
553
|
if self.spec.generate_metrics_file:
|
554
|
+
metrics_path = get_path(self.spec.metrics_filename)
|
534
555
|
write_data(
|
535
556
|
data=metrics_df_formatted,
|
536
|
-
filename=
|
537
|
-
unique_output_dir, self.spec.metrics_filename
|
538
|
-
),
|
557
|
+
filename=metrics_path,
|
539
558
|
format="csv",
|
540
559
|
storage_options=storage_options,
|
541
560
|
index=False,
|
@@ -553,11 +572,10 @@ class ForecastOperatorBaseModel(ABC):
|
|
553
572
|
{"index": "metrics", "Series 1": metrics_col_name}, axis=1
|
554
573
|
)
|
555
574
|
if self.spec.generate_metrics_file:
|
575
|
+
test_metrics_path = get_path(self.spec.test_metrics_filename)
|
556
576
|
write_data(
|
557
577
|
data=test_metrics_df_formatted,
|
558
|
-
filename=
|
559
|
-
unique_output_dir, self.spec.test_metrics_filename
|
560
|
-
),
|
578
|
+
filename=test_metrics_path,
|
561
579
|
format="csv",
|
562
580
|
storage_options=storage_options,
|
563
581
|
index=False,
|
@@ -567,6 +585,7 @@ class ForecastOperatorBaseModel(ABC):
|
|
567
585
|
logger.warning(
|
568
586
|
f"Attempted to generate the {self.spec.test_metrics_filename} file with the test metrics, however the test metrics could not be properly generated."
|
569
587
|
)
|
588
|
+
|
570
589
|
# explanations csv reports
|
571
590
|
if self.spec.generate_explanations:
|
572
591
|
try:
|
@@ -579,11 +598,12 @@ class ForecastOperatorBaseModel(ABC):
|
|
579
598
|
else col
|
580
599
|
)
|
581
600
|
if self.spec.generate_explanation_files:
|
601
|
+
global_exp_path = get_path(
|
602
|
+
self.spec.global_explanation_filename
|
603
|
+
)
|
582
604
|
write_data(
|
583
605
|
data=global_expl_rounded,
|
584
|
-
filename=
|
585
|
-
unique_output_dir, self.spec.global_explanation_filename
|
586
|
-
),
|
606
|
+
filename=global_exp_path,
|
587
607
|
format="csv",
|
588
608
|
storage_options=storage_options,
|
589
609
|
index=True,
|
@@ -603,11 +623,10 @@ class ForecastOperatorBaseModel(ABC):
|
|
603
623
|
else col
|
604
624
|
)
|
605
625
|
if self.spec.generate_explanation_files:
|
626
|
+
local_exp_path = get_path(self.spec.local_explanation_filename)
|
606
627
|
write_data(
|
607
628
|
data=local_expl_rounded,
|
608
|
-
filename=
|
609
|
-
unique_output_dir, self.spec.local_explanation_filename
|
610
|
-
),
|
629
|
+
filename=local_exp_path,
|
611
630
|
format="csv",
|
612
631
|
storage_options=storage_options,
|
613
632
|
index=True,
|
@@ -625,9 +644,10 @@ class ForecastOperatorBaseModel(ABC):
|
|
625
644
|
|
626
645
|
if self.spec.generate_model_parameters:
|
627
646
|
# model params
|
647
|
+
model_params_path = get_path("model_params.json")
|
628
648
|
write_data(
|
629
649
|
data=pd.DataFrame.from_dict(self.model_parameters),
|
630
|
-
filename=
|
650
|
+
filename=model_params_path,
|
631
651
|
format="json",
|
632
652
|
storage_options=storage_options,
|
633
653
|
index=True,
|
@@ -637,7 +657,13 @@ class ForecastOperatorBaseModel(ABC):
|
|
637
657
|
|
638
658
|
# model pickle
|
639
659
|
if self.spec.generate_model_pickle:
|
640
|
-
|
660
|
+
pickle_path = get_path("model.pkl")
|
661
|
+
write_pkl(
|
662
|
+
obj=self.models,
|
663
|
+
filename=os.path.basename(pickle_path),
|
664
|
+
output_dir=os.path.dirname(pickle_path),
|
665
|
+
storage_options=storage_options,
|
666
|
+
)
|
641
667
|
results.set_models(self.models)
|
642
668
|
|
643
669
|
logger.info(
|
@@ -648,11 +674,10 @@ class ForecastOperatorBaseModel(ABC):
|
|
648
674
|
f"The outputs have been successfully generated and placed into the directory: {unique_output_dir}."
|
649
675
|
)
|
650
676
|
if self.errors_dict:
|
677
|
+
errors_path = get_path(self.spec.errors_dict_filename)
|
651
678
|
write_json(
|
652
679
|
json_dict=self.errors_dict,
|
653
|
-
filename=
|
654
|
-
unique_output_dir, self.spec.errors_dict_filename
|
655
|
-
),
|
680
|
+
filename=errors_path,
|
656
681
|
storage_options=storage_options,
|
657
682
|
)
|
658
683
|
results.set_errors_dict(self.errors_dict)
|
@@ -719,6 +744,7 @@ class ForecastOperatorBaseModel(ABC):
|
|
719
744
|
raise ValueError(
|
720
745
|
"AUTOMLX explanation accuracy mode is only supported for AutoMLX models. "
|
721
746
|
"Please select mode other than AUTOMLX from the available explanations_accuracy_mode options"
|
747
|
+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
|
722
748
|
)
|
723
749
|
|
724
750
|
@runtime_dependency(
|
@@ -849,9 +875,9 @@ class ForecastOperatorBaseModel(ABC):
|
|
849
875
|
|
850
876
|
# Add date column to local explanation DataFrame
|
851
877
|
local_kernel_explnr_df[ForecastOutputColumns.DATE] = (
|
852
|
-
self.datasets.get_horizon_at_series(
|
853
|
-
|
854
|
-
|
878
|
+
self.datasets.get_horizon_at_series(s_id=series_id)[
|
879
|
+
self.spec.datetime_column.name
|
880
|
+
].reset_index(drop=True)
|
855
881
|
)
|
856
882
|
self.local_explanation[series_id] = local_kernel_explnr_df
|
857
883
|
|
@@ -873,3 +899,66 @@ class ForecastOperatorBaseModel(ABC):
|
|
873
899
|
return fcst
|
874
900
|
|
875
901
|
return _custom_predict
|
902
|
+
|
903
|
+
def _get_unique_filename(self, base_path: str, storage_options: dict = None) -> str:
|
904
|
+
"""Generate a unique filename by appending a sequential number if file exists.
|
905
|
+
|
906
|
+
Args:
|
907
|
+
base_path: The original file path to check
|
908
|
+
storage_options: Optional storage options for OCI paths
|
909
|
+
|
910
|
+
Returns:
|
911
|
+
A unique file path that doesn't exist
|
912
|
+
"""
|
913
|
+
if not ObjectStorageDetails.is_oci_path(base_path):
|
914
|
+
# For local files
|
915
|
+
directory = os.path.dirname(base_path)
|
916
|
+
basename = os.path.basename(base_path)
|
917
|
+
name, ext = os.path.splitext(basename)
|
918
|
+
|
919
|
+
model_suffix = "_" + self.spec.model
|
920
|
+
new_name = f"{name}{model_suffix}"
|
921
|
+
new_path = os.path.join(directory, f"{new_name}{ext}")
|
922
|
+
counter = 1
|
923
|
+
while os.path.exists(new_path):
|
924
|
+
new_path = os.path.join(directory, f"{new_name}_{counter}{ext}")
|
925
|
+
counter += 1
|
926
|
+
return new_path
|
927
|
+
else:
|
928
|
+
# For OCI paths, we need to list objects and check
|
929
|
+
try:
|
930
|
+
from oci.object_storage import ObjectStorageClient
|
931
|
+
|
932
|
+
client = ObjectStorageClient(config=storage_options)
|
933
|
+
|
934
|
+
# Parse OCI path components
|
935
|
+
bucket_name = ObjectStorageDetails.get_bucket_name(base_path)
|
936
|
+
namespace = ObjectStorageDetails.get_namespace(base_path)
|
937
|
+
object_name = ObjectStorageDetails.get_object_name(base_path)
|
938
|
+
|
939
|
+
name, ext = os.path.splitext(object_name)
|
940
|
+
|
941
|
+
model_suffix = "_" + self.spec.model
|
942
|
+
new_name = f"{name}{model_suffix}"
|
943
|
+
new_object_name = f"{new_name}{ext}"
|
944
|
+
counter = 1
|
945
|
+
while True:
|
946
|
+
try:
|
947
|
+
# Try to head the object to see if it exists
|
948
|
+
client.head_object(namespace, bucket_name, new_object_name)
|
949
|
+
# If we get here, the object exists
|
950
|
+
new_object_name = f"{new_name}_{counter}{ext}"
|
951
|
+
counter += 1
|
952
|
+
except:
|
953
|
+
# Object doesn't exist, we can use this name
|
954
|
+
break
|
955
|
+
|
956
|
+
# Reconstruct the full path
|
957
|
+
return ObjectStorageDetails.get_path(
|
958
|
+
namespace, bucket_name, new_object_name
|
959
|
+
)
|
960
|
+
except Exception as e:
|
961
|
+
logger.warning(
|
962
|
+
f"Error checking OCI path existence: {e}. Using original path."
|
963
|
+
)
|
964
|
+
return base_path
|
@@ -3,7 +3,16 @@
|
|
3
3
|
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
4
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
5
|
|
6
|
-
from
|
6
|
+
from ads.opctl.operator.lowcode.common.transformations import Transformations
|
7
|
+
|
8
|
+
from ..const import (
|
9
|
+
AUTO_SELECT,
|
10
|
+
AUTO_SELECT_SERIES,
|
11
|
+
TROUBLESHOOTING_GUIDE,
|
12
|
+
SpeedAccuracyMode,
|
13
|
+
SupportedModels,
|
14
|
+
)
|
15
|
+
from ..meta_selector import MetaSelector
|
7
16
|
from ..model_evaluator import ModelEvaluator
|
8
17
|
from ..operator_config import ForecastOperatorConfig
|
9
18
|
from .arima import ArimaOperatorModel
|
@@ -21,6 +30,7 @@ class UnSupportedModelError(Exception):
|
|
21
30
|
super().__init__(
|
22
31
|
f"Model: `{model_type}` "
|
23
32
|
f"is not supported. Supported models: {SupportedModels.values()}"
|
33
|
+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
|
24
34
|
)
|
25
35
|
|
26
36
|
|
@@ -63,7 +73,28 @@ class ForecastOperatorModelFactory:
|
|
63
73
|
In case of not supported model.
|
64
74
|
"""
|
65
75
|
model_type = operator_config.spec.model
|
66
|
-
|
76
|
+
|
77
|
+
if model_type == AUTO_SELECT_SERIES:
|
78
|
+
# Initialize MetaSelector for series-specific model selection
|
79
|
+
selector = MetaSelector()
|
80
|
+
# Create a Transformations instance
|
81
|
+
transformer = Transformations(dataset_info=datasets.historical_data.spec)
|
82
|
+
|
83
|
+
# Calculate meta-features
|
84
|
+
meta_features = selector.select_best_model(
|
85
|
+
meta_features_df=transformer.build_fforms_meta_features(
|
86
|
+
data=datasets.historical_data.raw_data,
|
87
|
+
target_col=datasets.historical_data.spec.target_column,
|
88
|
+
group_cols=datasets.historical_data.spec.target_category_columns
|
89
|
+
)
|
90
|
+
)
|
91
|
+
# Get the most common model as default
|
92
|
+
model_type = meta_features['selected_model'].mode().iloc[0]
|
93
|
+
# Store the series-specific model selections in the config for later use
|
94
|
+
operator_config.spec.meta_features = meta_features
|
95
|
+
operator_config.spec.model_kwargs = {}
|
96
|
+
|
97
|
+
elif model_type == AUTO_SELECT:
|
67
98
|
model_type = cls.auto_select_model(datasets, operator_config)
|
68
99
|
operator_config.spec.model_kwargs = {}
|
69
100
|
# set the explanations accuracy mode to AUTOMLX if the selected model is automlx
|
@@ -18,13 +18,14 @@ from ads.opctl.operator.lowcode.common.utils import (
|
|
18
18
|
get_frequency_of_datetime,
|
19
19
|
)
|
20
20
|
|
21
|
-
from ..const import ForecastOutputColumns, SupportedModels
|
21
|
+
from ..const import ForecastOutputColumns, SupportedModels, TROUBLESHOOTING_GUIDE
|
22
22
|
from ..operator_config import ForecastOperatorConfig
|
23
23
|
|
24
24
|
|
25
25
|
class HistoricalData(AbstractData):
|
26
|
-
def __init__(self, spec, historical_data=None):
|
27
|
-
super().__init__(spec=spec, name="historical_data", data=historical_data)
|
26
|
+
def __init__(self, spec, historical_data=None, subset=None):
|
27
|
+
super().__init__(spec=spec, name="historical_data", data=historical_data, subset=subset)
|
28
|
+
self.subset = subset
|
28
29
|
|
29
30
|
def _ingest_data(self, spec):
|
30
31
|
try:
|
@@ -48,25 +49,29 @@ class HistoricalData(AbstractData):
|
|
48
49
|
f"{SupportedModels.AutoMLX} requires data with a frequency of at least one hour. Please try using a different model,"
|
49
50
|
" or select the 'auto' option."
|
50
51
|
)
|
51
|
-
raise InvalidParameterError(message
|
52
|
+
raise InvalidParameterError(f"{message}"
|
53
|
+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps.")
|
52
54
|
|
53
55
|
|
54
56
|
class AdditionalData(AbstractData):
|
55
|
-
def __init__(self, spec, historical_data, additional_data=None):
|
57
|
+
def __init__(self, spec, historical_data, additional_data=None, subset=None):
|
58
|
+
self.subset = subset
|
56
59
|
if additional_data is not None:
|
57
|
-
super().__init__(spec=spec, name="additional_data", data=additional_data)
|
60
|
+
super().__init__(spec=spec, name="additional_data", data=additional_data, subset=subset)
|
58
61
|
self.additional_regressors = list(self.data.columns)
|
59
62
|
elif spec.additional_data is not None:
|
60
|
-
super().__init__(spec=spec, name="additional_data")
|
63
|
+
super().__init__(spec=spec, name="additional_data", subset=subset)
|
61
64
|
add_dates = self.data.index.get_level_values(0).unique().tolist()
|
62
65
|
add_dates.sort()
|
63
66
|
if historical_data.get_max_time() > add_dates[-spec.horizon]:
|
64
67
|
raise DataMismatchError(
|
65
68
|
f"The Historical Data ends on {historical_data.get_max_time()}. The additional data horizon starts on {add_dates[-spec.horizon]}. The horizon should have exactly {spec.horizon} dates after the Historical at a frequency of {historical_data.freq}"
|
69
|
+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
|
66
70
|
)
|
67
71
|
elif historical_data.get_max_time() != add_dates[-(spec.horizon + 1)]:
|
68
72
|
raise DataMismatchError(
|
69
73
|
f"The Additional Data must be present for all historical data and the entire horizon. The Historical Data ends on {historical_data.get_max_time()}. The additonal data horizon starts after {add_dates[-(spec.horizon+1)]}. These should be the same date."
|
74
|
+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
|
70
75
|
)
|
71
76
|
else:
|
72
77
|
self.name = "additional_data"
|
@@ -114,8 +119,7 @@ class AdditionalData(AbstractData):
|
|
114
119
|
|
115
120
|
class TestData(AbstractData):
|
116
121
|
def __init__(self, spec, test_data):
|
117
|
-
|
118
|
-
super().__init__(spec=spec, name="test_data", data=test_data)
|
122
|
+
super().__init__(spec=spec, name="test_data", data=test_data)
|
119
123
|
self.dt_column_name = spec.datetime_column.name
|
120
124
|
self.target_name = spec.target_column
|
121
125
|
|
@@ -127,6 +131,7 @@ class ForecastDatasets:
|
|
127
131
|
historical_data=None,
|
128
132
|
additional_data=None,
|
129
133
|
test_data=None,
|
134
|
+
subset=None, # New parameter for subsetting by group
|
130
135
|
):
|
131
136
|
"""Instantiates the DataIO instance.
|
132
137
|
|
@@ -134,26 +139,30 @@ class ForecastDatasets:
|
|
134
139
|
----------
|
135
140
|
config: ForecastOperatorConfig
|
136
141
|
The forecast operator configuration.
|
142
|
+
subset: list, optional
|
143
|
+
List of group keys to subset the data on initialization.
|
137
144
|
"""
|
145
|
+
self.config = config # Store the config for later use
|
138
146
|
self.historical_data: HistoricalData = None
|
139
147
|
self.additional_data: AdditionalData = None
|
148
|
+
self.test_data: TestData = None
|
140
149
|
self._horizon = config.spec.horizon
|
141
150
|
self._datetime_column_name = config.spec.datetime_column.name
|
142
151
|
self._target_col = config.spec.target_column
|
143
152
|
if historical_data is not None:
|
144
|
-
self.historical_data = HistoricalData(config.spec, historical_data)
|
153
|
+
self.historical_data = HistoricalData(config.spec, historical_data, subset=subset)
|
145
154
|
self.additional_data = AdditionalData(
|
146
|
-
config.spec, self.historical_data, additional_data
|
155
|
+
config.spec, self.historical_data, additional_data, subset=subset
|
147
156
|
)
|
148
157
|
else:
|
149
|
-
self._load_data(config.spec)
|
150
|
-
|
158
|
+
self._load_data(config.spec, subset=subset)
|
159
|
+
if test_data is not None or config.spec.test_data is not None:
|
160
|
+
self.test_data = TestData(config.spec, test_data)
|
151
161
|
|
152
|
-
def _load_data(self, spec):
|
162
|
+
def _load_data(self, spec, subset=None):
|
153
163
|
"""Loads forecasting input data."""
|
154
|
-
self.historical_data = HistoricalData(spec)
|
155
|
-
self.additional_data = AdditionalData(spec, self.historical_data)
|
156
|
-
|
164
|
+
self.historical_data = HistoricalData(spec, subset=subset)
|
165
|
+
self.additional_data = AdditionalData(spec, self.historical_data, subset=subset)
|
157
166
|
if spec.generate_explanations and spec.additional_data is None:
|
158
167
|
logger.warning(
|
159
168
|
"Unable to generate explanations as there is no additional data passed in. Either set generate_explanations to False, or pass in additional data."
|
@@ -210,6 +219,7 @@ class ForecastDatasets:
|
|
210
219
|
except Exception as e:
|
211
220
|
raise InvalidParameterError(
|
212
221
|
f"Unable to retrieve series id: {s_id} from data. Available series ids are: {self.list_series_ids()}"
|
222
|
+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
|
213
223
|
) from e
|
214
224
|
|
215
225
|
def get_horizon_at_series(self, s_id):
|
@@ -291,6 +301,7 @@ class ForecastOutput:
|
|
291
301
|
if not overwrite and series_id in self.series_id_map:
|
292
302
|
raise ValueError(
|
293
303
|
f"Attempting to update ForecastOutput for series_id {series_id} when this already exists. Set overwrite to True."
|
304
|
+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
|
294
305
|
)
|
295
306
|
forecast = self._check_forecast_format(forecast)
|
296
307
|
self.series_id_map[series_id] = forecast
|
@@ -331,6 +342,7 @@ class ForecastOutput:
|
|
331
342
|
except KeyError as e:
|
332
343
|
raise ValueError(
|
333
344
|
f"Attempting to update output for series: {series_id}, however no series output has been initialized."
|
345
|
+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
|
334
346
|
) from e
|
335
347
|
|
336
348
|
if (output_i.shape[0] - self.horizon) == len(fit_val):
|
@@ -351,18 +363,21 @@ class ForecastOutput:
|
|
351
363
|
if len(forecast_val) != self.horizon:
|
352
364
|
raise ValueError(
|
353
365
|
f"Attempting to set forecast along horizon ({self.horizon}) for series: {series_id}, however forecast is only length {len(forecast_val)}"
|
366
|
+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
|
354
367
|
)
|
355
368
|
output_i["forecast_value"].iloc[-self.horizon :] = forecast_val
|
356
369
|
|
357
370
|
if len(upper_bound) != self.horizon:
|
358
371
|
raise ValueError(
|
359
372
|
f"Attempting to set upper_bound along horizon ({self.horizon}) for series: {series_id}, however upper_bound is only length {len(upper_bound)}"
|
373
|
+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
|
360
374
|
)
|
361
375
|
output_i[self.upper_bound_name].iloc[-self.horizon :] = upper_bound
|
362
376
|
|
363
377
|
if len(lower_bound) != self.horizon:
|
364
378
|
raise ValueError(
|
365
379
|
f"Attempting to set lower_bound along horizon ({self.horizon}) for series: {series_id}, however lower_bound is only length {len(lower_bound)}"
|
380
|
+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
|
366
381
|
)
|
367
382
|
output_i[self.lower_bound_name].iloc[-self.horizon :] = lower_bound
|
368
383
|
|
@@ -490,3 +505,25 @@ class ForecastResults:
|
|
490
505
|
|
491
506
|
def get_errors_dict(self):
|
492
507
|
return getattr(self, "errors_dict", None)
|
508
|
+
|
509
|
+
def merge(self, other: 'ForecastResults'):
|
510
|
+
"""Merge another ForecastResults object into this one."""
|
511
|
+
# Merge DataFrames if they exist, else just set
|
512
|
+
for attr in [
|
513
|
+
'forecast', 'metrics', 'test_metrics', 'local_explanations', 'global_explanations', 'model_parameters', 'models', 'errors_dict']:
|
514
|
+
val_self = getattr(self, attr, None)
|
515
|
+
val_other = getattr(other, attr, None)
|
516
|
+
if val_self is not None and val_other is not None:
|
517
|
+
if isinstance(val_self, pd.DataFrame) and isinstance(val_other, pd.DataFrame):
|
518
|
+
setattr(self, attr, pd.concat([val_self, val_other], ignore_index=True, axis=0))
|
519
|
+
elif isinstance(val_self, dict) and isinstance(val_other, dict):
|
520
|
+
val_self.update(val_other)
|
521
|
+
setattr(self, attr, val_self)
|
522
|
+
elif isinstance(val_self, list) and isinstance(val_other, list):
|
523
|
+
setattr(self, attr, val_self + val_other)
|
524
|
+
else:
|
525
|
+
# If not mergeable, just keep self's value
|
526
|
+
pass
|
527
|
+
elif val_other is not None:
|
528
|
+
setattr(self, attr, val_other)
|
529
|
+
return self
|
@@ -10,7 +10,10 @@ import pandas as pd
|
|
10
10
|
from ads.opctl import logger
|
11
11
|
from ads.opctl.operator.lowcode.common.const import DataColumns
|
12
12
|
from ads.opctl.operator.lowcode.common.errors import InsufficientDataError
|
13
|
-
from ads.opctl.operator.lowcode.forecast.const import
|
13
|
+
from ads.opctl.operator.lowcode.forecast.const import (
|
14
|
+
BACKTEST_REPORT_NAME,
|
15
|
+
TROUBLESHOOTING_GUIDE,
|
16
|
+
)
|
14
17
|
from ads.opctl.operator.lowcode.forecast.model.factory import SupportedModels
|
15
18
|
|
16
19
|
from .model.forecast_datasets import ForecastDatasets
|
@@ -79,6 +82,7 @@ class ModelEvaluator:
|
|
79
82
|
raise InsufficientDataError(
|
80
83
|
"Insufficient data to evaluate multiple models. Please specify a model "
|
81
84
|
"instead of using auto-select."
|
85
|
+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
|
82
86
|
)
|
83
87
|
training_datasets = [
|
84
88
|
sampled_historical_data[sampled_historical_data[date_col] <= cut_off_date]
|
@@ -223,6 +227,7 @@ class ModelEvaluator:
|
|
223
227
|
model = SupportedModels.Prophet
|
224
228
|
logger.error(
|
225
229
|
f"Running {model} model as auto-select failed with the following error: {e.message}"
|
230
|
+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
|
226
231
|
)
|
227
232
|
return model
|
228
233
|
nonempty_metrics = {
|
ads/pipeline/ads_pipeline.py
CHANGED
@@ -1728,15 +1728,19 @@ class Pipeline(Builder):
|
|
1728
1728
|
|
1729
1729
|
def __step_infrastructure_configuration_details(self, step) -> dict:
|
1730
1730
|
step_infrastructure_configuration_details = {}
|
1731
|
-
step_infrastructure_configuration_details[
|
1732
|
-
|
1733
|
-
|
1734
|
-
step_infrastructure_configuration_details[
|
1735
|
-
|
1736
|
-
|
1737
|
-
step_infrastructure_configuration_details[
|
1738
|
-
|
1739
|
-
|
1731
|
+
step_infrastructure_configuration_details["blockStorageSizeInGBs"] = (
|
1732
|
+
step.infrastructure.block_storage_size
|
1733
|
+
)
|
1734
|
+
step_infrastructure_configuration_details["shapeName"] = (
|
1735
|
+
step.infrastructure.shape_name
|
1736
|
+
)
|
1737
|
+
step_infrastructure_configuration_details["shapeConfigDetails"] = (
|
1738
|
+
step.infrastructure.shape_config_details
|
1739
|
+
)
|
1740
|
+
if getattr(step.infrastructure, "subnet_id", ""):
|
1741
|
+
step_infrastructure_configuration_details["subnetId"] = (
|
1742
|
+
step.infrastructure.subnet_id
|
1743
|
+
)
|
1740
1744
|
return step_infrastructure_configuration_details
|
1741
1745
|
|
1742
1746
|
def __step_configuration_details(self, pipeline_details: Dict, step) -> dict:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: oracle_ads
|
3
|
-
Version: 2.13.
|
3
|
+
Version: 2.13.18
|
4
4
|
Summary: Oracle Accelerated Data Science SDK
|
5
5
|
Keywords: Oracle Cloud Infrastructure,OCI,Machine Learning,ML,Artificial Intelligence,AI,Data Science,Cloud,Oracle,GenAI,Generative AI,Forecast,Anomaly,Document Understanding,Anomaly Detection
|
6
6
|
Author: Oracle Data Science
|