oracle-ads 2.11.5__py3-none-any.whl → 2.11.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/utils.py +5 -2
- ads/catalog/model.py +3 -3
- ads/catalog/notebook.py +3 -3
- ads/catalog/project.py +2 -2
- ads/catalog/summary.py +2 -4
- ads/cli.py +2 -1
- ads/common/serializer.py +1 -1
- ads/data_labeling/metadata.py +2 -2
- ads/dataset/dataset.py +3 -5
- ads/dataset/factory.py +2 -3
- ads/dataset/label_encoder.py +1 -1
- ads/dataset/sampled_dataset.py +3 -5
- ads/jobs/ads_job.py +26 -2
- ads/jobs/builders/infrastructure/dsc_job.py +20 -7
- ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +1 -1
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +8 -15
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +2 -1
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +2 -2
- ads/opctl/operator/lowcode/anomaly/operator_config.py +18 -1
- ads/opctl/operator/lowcode/anomaly/schema.yaml +16 -4
- ads/opctl/operator/lowcode/common/data.py +16 -2
- ads/opctl/operator/lowcode/common/transformations.py +48 -14
- ads/opctl/operator/lowcode/forecast/environment.yaml +1 -0
- ads/opctl/operator/lowcode/forecast/model/arima.py +21 -12
- ads/opctl/operator/lowcode/forecast/model/automlx.py +79 -72
- ads/opctl/operator/lowcode/forecast/model/autots.py +182 -164
- ads/opctl/operator/lowcode/forecast/model/base_model.py +59 -41
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +47 -47
- ads/opctl/operator/lowcode/forecast/model/prophet.py +48 -48
- ads/opctl/operator/lowcode/forecast/operator_config.py +18 -2
- ads/opctl/operator/lowcode/forecast/schema.yaml +20 -4
- ads/opctl/operator/lowcode/forecast/utils.py +4 -0
- ads/pipeline/ads_pipeline_step.py +11 -12
- {oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/METADATA +4 -3
- {oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/RECORD +38 -38
- {oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/LICENSE.txt +0 -0
- {oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/WHEEL +0 -0
- {oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/entry_points.txt +0 -0
ads/aqua/utils.py
CHANGED
@@ -268,9 +268,12 @@ def is_valid_ocid(ocid: str) -> bool:
|
|
268
268
|
bool:
|
269
269
|
Whether the given ocid is valid.
|
270
270
|
"""
|
271
|
-
|
271
|
+
# TODO: revisit pattern
|
272
|
+
pattern = (
|
273
|
+
r"^ocid1\.([a-z0-9_]+)\.([a-z0-9]+)\.([a-z0-9-]*)(\.[^.]+)?\.([a-z0-9_]+)$"
|
274
|
+
)
|
272
275
|
match = re.match(pattern, ocid)
|
273
|
-
return
|
276
|
+
return True
|
274
277
|
|
275
278
|
|
276
279
|
def get_resource_type(ocid: str) -> str:
|
ads/catalog/model.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# -*- coding: utf-8; -*-
|
3
3
|
|
4
|
-
# Copyright (c) 2020,
|
4
|
+
# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
|
5
5
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
6
|
|
7
7
|
import warnings
|
@@ -404,13 +404,13 @@ class Model:
|
|
404
404
|
def _repr_html_(self):
|
405
405
|
"""Shows model in dataframe format."""
|
406
406
|
return (
|
407
|
-
self.to_dataframe().style.set_properties(**{"margin-left": "0px"}).
|
407
|
+
self.to_dataframe().style.set_properties(**{"margin-left": "0px"}).to_html()
|
408
408
|
)
|
409
409
|
|
410
410
|
def __repr__(self):
|
411
411
|
"""Shows model in dataframe format."""
|
412
412
|
return (
|
413
|
-
self.to_dataframe().style.set_properties(**{"margin-left": "0px"}).
|
413
|
+
self.to_dataframe().style.set_properties(**{"margin-left": "0px"}).to_html()
|
414
414
|
)
|
415
415
|
|
416
416
|
def activate(self) -> None:
|
ads/catalog/notebook.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# -*- coding: utf-8; -*-
|
3
3
|
|
4
|
-
# Copyright (c) 2020,
|
4
|
+
# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
|
5
5
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
6
|
|
7
7
|
import warnings
|
@@ -220,7 +220,7 @@ class NotebookCatalog:
|
|
220
220
|
lambda x: "<a href='%s'>%s</a>"
|
221
221
|
% (x if x.startswith("http") else "http://%s" % x, "open")
|
222
222
|
)
|
223
|
-
return df.style.set_properties(**{"margin-left": "0px"}).
|
223
|
+
return df.style.set_properties(**{"margin-left": "0px"}).to_html()
|
224
224
|
|
225
225
|
notebook.commit = MethodType(commit, notebook)
|
226
226
|
notebook.rollback = MethodType(rollback, notebook)
|
@@ -295,7 +295,7 @@ class NotebookCatalog:
|
|
295
295
|
shape=None,
|
296
296
|
block_storage_size_in_gbs=None,
|
297
297
|
subnet_id=None,
|
298
|
-
**kwargs
|
298
|
+
**kwargs,
|
299
299
|
):
|
300
300
|
"""
|
301
301
|
Create a new notebook session with the supplied details.
|
ads/catalog/project.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# -*- coding: utf-8; -*-
|
3
3
|
|
4
|
-
# Copyright (c) 2020,
|
4
|
+
# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
|
5
5
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
6
|
|
7
7
|
import warnings
|
@@ -237,7 +237,7 @@ class ProjectCatalog(Mapping):
|
|
237
237
|
return (
|
238
238
|
project_self.to_dataframe()
|
239
239
|
.style.set_properties(**{"margin-left": "0px"})
|
240
|
-
.
|
240
|
+
.to_html()
|
241
241
|
)
|
242
242
|
|
243
243
|
project.commit = MethodType(commit, project)
|
ads/catalog/summary.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# -*- coding: utf-8 -*--
|
3
3
|
|
4
|
-
# Copyright (c) 2020,
|
4
|
+
# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
|
5
5
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
6
|
|
7
7
|
from __future__ import print_function, absolute_import
|
@@ -98,7 +98,6 @@ class SummaryList(list, metaclass=ABCMeta):
|
|
98
98
|
pass
|
99
99
|
|
100
100
|
def to_dataframe(self, datetime_format=None):
|
101
|
-
|
102
101
|
"""
|
103
102
|
Returns the model catalog summary as a pandas dataframe
|
104
103
|
|
@@ -121,7 +120,6 @@ class SummaryList(list, metaclass=ABCMeta):
|
|
121
120
|
|
122
121
|
@runtime_dependency(module="IPython", install_from=OptionalDependency.NOTEBOOK)
|
123
122
|
def show_in_notebook(self, datetime_format=None):
|
124
|
-
|
125
123
|
"""
|
126
124
|
Displays the model catalog summary in a Jupyter Notebook cell
|
127
125
|
|
@@ -144,7 +142,7 @@ class SummaryList(list, metaclass=ABCMeta):
|
|
144
142
|
def _repr_html_(self):
|
145
143
|
return self.df.style.applymap(
|
146
144
|
self._color_lifecycle_state, subset=["lifecycle_state"]
|
147
|
-
).
|
145
|
+
).to_html()
|
148
146
|
|
149
147
|
def _sort_by(self, cols, reverse=False):
|
150
148
|
return sorted(
|
ads/cli.py
CHANGED
@@ -9,7 +9,6 @@ import sys
|
|
9
9
|
|
10
10
|
import fire
|
11
11
|
from ads.common import logger
|
12
|
-
from ads.aqua.cli import AquaCommand
|
13
12
|
|
14
13
|
try:
|
15
14
|
import click
|
@@ -73,6 +72,8 @@ fire.core.parser.SeparateFlagArgs = _SeparateFlagArgs
|
|
73
72
|
|
74
73
|
def cli():
|
75
74
|
if len(sys.argv) > 1 and sys.argv[1] == "aqua":
|
75
|
+
from ads.aqua.cli import AquaCommand
|
76
|
+
|
76
77
|
fire.Fire(AquaCommand, command=sys.argv[2:], name="ads aqua")
|
77
78
|
else:
|
78
79
|
click_cli()
|
ads/common/serializer.py
CHANGED
ads/data_labeling/metadata.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# -*- coding: utf-8; -*-
|
3
3
|
|
4
|
-
# Copyright (c) 2021,
|
4
|
+
# Copyright (c) 2021, 2024 Oracle and/or its affiliates.
|
5
5
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
6
|
|
7
7
|
from dataclasses import asdict, dataclass, field
|
@@ -75,7 +75,7 @@ class Metadata(DataClassSerializable):
|
|
75
75
|
def _repr_html_(self):
|
76
76
|
"""Shows metadata in dataframe format."""
|
77
77
|
return (
|
78
|
-
self.to_dataframe().style.set_properties(**{"margin-left": "0px"}).
|
78
|
+
self.to_dataframe().style.set_properties(**{"margin-left": "0px"}).to_html()
|
79
79
|
)
|
80
80
|
|
81
81
|
@classmethod
|
ads/dataset/dataset.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# -*- coding: utf-8 -*--
|
3
3
|
|
4
|
-
# Copyright (c) 2020,
|
4
|
+
# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
|
5
5
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
6
|
|
7
7
|
from __future__ import print_function, absolute_import, division
|
@@ -85,7 +85,6 @@ class ADSDataset(PandasDataset):
|
|
85
85
|
interactive=False,
|
86
86
|
**kwargs,
|
87
87
|
):
|
88
|
-
|
89
88
|
#
|
90
89
|
# to keep performance high and linear no matter the size of the distributed dataset we
|
91
90
|
# create a pandas df that's used internally because this has a fixed upper size.
|
@@ -204,7 +203,7 @@ class ADSDataset(PandasDataset):
|
|
204
203
|
.style.set_table_styles(utils.get_dataframe_styles())
|
205
204
|
.set_table_attributes("class=table")
|
206
205
|
.hide_index()
|
207
|
-
.
|
206
|
+
.to_html()
|
208
207
|
)
|
209
208
|
)
|
210
209
|
)
|
@@ -263,7 +262,7 @@ class ADSDataset(PandasDataset):
|
|
263
262
|
self.style.set_table_styles(utils.get_dataframe_styles())
|
264
263
|
.set_table_attributes("class=table")
|
265
264
|
.hide_index()
|
266
|
-
.
|
265
|
+
.to_html()
|
267
266
|
)
|
268
267
|
)
|
269
268
|
)
|
@@ -1265,7 +1264,6 @@ class ADSDataset(PandasDataset):
|
|
1265
1264
|
n=None,
|
1266
1265
|
**init_kwargs,
|
1267
1266
|
):
|
1268
|
-
|
1269
1267
|
prev_doc_mode = utils.is_documentation_mode()
|
1270
1268
|
|
1271
1269
|
set_documentation_mode(False)
|
ads/dataset/factory.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# -*- coding: utf-8; -*-
|
3
3
|
|
4
|
-
# Copyright (c) 2020,
|
4
|
+
# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
|
5
5
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
6
|
|
7
7
|
from __future__ import print_function, absolute_import
|
@@ -367,7 +367,7 @@ class DatasetFactory:
|
|
367
367
|
HTML(
|
368
368
|
list_df.style.set_table_attributes("class=table")
|
369
369
|
.hide_index()
|
370
|
-
.
|
370
|
+
.to_html()
|
371
371
|
)
|
372
372
|
)
|
373
373
|
return list_df
|
@@ -884,7 +884,6 @@ class CustomFormatReaders:
|
|
884
884
|
import xml.etree.cElementTree as et
|
885
885
|
|
886
886
|
def get_children(df, node, parent, i):
|
887
|
-
|
888
887
|
for name in node.attrib.keys():
|
889
888
|
df.at[i, parent + name] = node.attrib[name]
|
890
889
|
for child in list(node):
|
ads/dataset/label_encoder.py
CHANGED
@@ -52,7 +52,7 @@ class DataFrameLabelEncoder(TransformerMixin):
|
|
52
52
|
|
53
53
|
"""
|
54
54
|
for column in X.columns:
|
55
|
-
if X[column].dtype.name in ["object", "category"]:
|
55
|
+
if X[column].dtype.name in ["object", "category", "bool"]:
|
56
56
|
X[column] = X[column].astype(str)
|
57
57
|
self.label_encoders[column] = LabelEncoder()
|
58
58
|
self.label_encoders[column].fit(X[column])
|
ads/dataset/sampled_dataset.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# -*- coding: utf-8; -*-
|
3
3
|
|
4
|
-
# Copyright (c) 2020,
|
4
|
+
# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
|
5
5
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
6
|
|
7
7
|
import matplotlib
|
@@ -49,6 +49,7 @@ from ads.common.decorator.runtime_dependency import (
|
|
49
49
|
|
50
50
|
NATURAL_EARTH_DATASET = "naturalearth_lowres"
|
51
51
|
|
52
|
+
|
52
53
|
class PandasDataset(object):
|
53
54
|
"""
|
54
55
|
This class provides APIs that can work on a sampled dataset.
|
@@ -107,7 +108,6 @@ class PandasDataset(object):
|
|
107
108
|
self.sampled_df = self.sampled_df.reset_index(drop=True)
|
108
109
|
|
109
110
|
def _find_feature_subset(self, df, target_name, include_n_features=32):
|
110
|
-
|
111
111
|
if len(df.columns) <= include_n_features:
|
112
112
|
return self.sampled_df
|
113
113
|
else:
|
@@ -212,7 +212,6 @@ class PandasDataset(object):
|
|
212
212
|
def _generate_features_html(
|
213
213
|
self, is_wide_dataset, n_features, df_stats, visualizations_follow
|
214
214
|
):
|
215
|
-
|
216
215
|
html = utils.get_bootstrap_styles()
|
217
216
|
|
218
217
|
if is_wide_dataset:
|
@@ -233,7 +232,7 @@ class PandasDataset(object):
|
|
233
232
|
if ("float" in str(type(x))) or ("int" in str(type(x)))
|
234
233
|
else x
|
235
234
|
)
|
236
|
-
.
|
235
|
+
.to_html()
|
237
236
|
)
|
238
237
|
|
239
238
|
if visualizations_follow:
|
@@ -244,7 +243,6 @@ class PandasDataset(object):
|
|
244
243
|
def _generate_warnings_html(
|
245
244
|
self, is_wide_dataset, n_rows, n_features, df_stats, out, accordion
|
246
245
|
):
|
247
|
-
|
248
246
|
#
|
249
247
|
# create the "Warnings" accordion section:
|
250
248
|
# - show high cardinal categoricals
|
ads/jobs/ads_job.py
CHANGED
@@ -10,6 +10,7 @@ from urllib.parse import urlparse
|
|
10
10
|
|
11
11
|
import fsspec
|
12
12
|
import oci
|
13
|
+
import yaml
|
13
14
|
from ads.common.auth import default_signer
|
14
15
|
from ads.common.decorator.utils import class_or_instance_method
|
15
16
|
from ads.jobs.builders.base import Builder
|
@@ -263,6 +264,9 @@ class Job(Builder):
|
|
263
264
|
Job runtime, by default None.
|
264
265
|
|
265
266
|
"""
|
267
|
+
# Saves a copy of the auth object from the class to the instance.
|
268
|
+
# Future changes to the class level Job.auth will not affect the auth of existing instances.
|
269
|
+
self.auth = self.auth.copy()
|
266
270
|
for key in ["config", "signer", "client_kwargs"]:
|
267
271
|
if kwargs.get(key):
|
268
272
|
self.auth[key] = kwargs.pop(key)
|
@@ -545,6 +549,26 @@ class Job(Builder):
|
|
545
549
|
"spec": spec,
|
546
550
|
}
|
547
551
|
|
552
|
+
@class_or_instance_method
|
553
|
+
def from_yaml(
|
554
|
+
cls,
|
555
|
+
yaml_string: str = None,
|
556
|
+
uri: str = None,
|
557
|
+
loader: callable = yaml.SafeLoader,
|
558
|
+
**kwargs,
|
559
|
+
):
|
560
|
+
if inspect.isclass(cls):
|
561
|
+
job = cls(**cls.auth)
|
562
|
+
else:
|
563
|
+
job = cls.__class__(**cls.auth)
|
564
|
+
|
565
|
+
if yaml_string:
|
566
|
+
return job.from_dict(yaml.load(yaml_string, Loader=loader))
|
567
|
+
if uri:
|
568
|
+
yaml_dict = yaml.load(cls._read_from_file(uri=uri, **kwargs), Loader=loader)
|
569
|
+
return job.from_dict(yaml_dict)
|
570
|
+
raise ValueError("Must provide either YAML string or URI location")
|
571
|
+
|
548
572
|
@class_or_instance_method
|
549
573
|
def from_dict(cls, config: dict) -> "Job":
|
550
574
|
"""Initializes a job from a dictionary containing the configurations.
|
@@ -573,9 +597,9 @@ class Job(Builder):
|
|
573
597
|
"runtime": cls._RUNTIME_MAPPING,
|
574
598
|
}
|
575
599
|
if inspect.isclass(cls):
|
576
|
-
job = cls()
|
600
|
+
job = cls(**cls.auth)
|
577
601
|
else:
|
578
|
-
job = cls.__class__()
|
602
|
+
job = cls.__class__(**cls.auth)
|
579
603
|
|
580
604
|
for key, value in spec.items():
|
581
605
|
if key in mappings:
|
@@ -6,8 +6,8 @@
|
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
8
|
import datetime
|
9
|
+
import inspect
|
9
10
|
import logging
|
10
|
-
import oci
|
11
11
|
import os
|
12
12
|
import time
|
13
13
|
import traceback
|
@@ -17,11 +17,12 @@ from string import Template
|
|
17
17
|
from typing import Any, Dict, List, Optional, Union
|
18
18
|
|
19
19
|
import fsspec
|
20
|
+
import oci
|
20
21
|
import oci.data_science
|
21
22
|
import oci.util as oci_util
|
22
|
-
import yaml
|
23
23
|
from oci.data_science.models import JobInfrastructureConfigurationDetails
|
24
24
|
from oci.exceptions import ServiceError
|
25
|
+
import yaml
|
25
26
|
from ads.common import utils
|
26
27
|
from ads.common.oci_datascience import DSCNotebookSession, OCIDataScienceMixin
|
27
28
|
from ads.common.oci_logging import OCILog
|
@@ -782,7 +783,7 @@ class DataScienceJobRun(
|
|
782
783
|
# Update runtime from job run
|
783
784
|
from ads.jobs import Job
|
784
785
|
|
785
|
-
job = Job.from_dict(job_dict)
|
786
|
+
job = Job(**self.auth).from_dict(job_dict)
|
786
787
|
envs = job.runtime.envs
|
787
788
|
run_config_override = run_dict.get("jobConfigurationOverrideDetails", {})
|
788
789
|
envs.update(run_config_override.get("environmentVariables", {}))
|
@@ -811,7 +812,7 @@ class DataScienceJobRun(
|
|
811
812
|
"""
|
812
813
|
from ads.jobs import Job
|
813
814
|
|
814
|
-
return Job.from_datascience_job(self.job_id)
|
815
|
+
return Job(**self.auth).from_datascience_job(self.job_id)
|
815
816
|
|
816
817
|
def download(self, to_dir):
|
817
818
|
"""Downloads files from job run output URI to local.
|
@@ -953,9 +954,9 @@ class DataScienceJob(Infrastructure):
|
|
953
954
|
if key not in attribute_map and key.lower() in snake_to_camel_map:
|
954
955
|
value = spec.pop(key)
|
955
956
|
if isinstance(value, dict):
|
956
|
-
spec[
|
957
|
-
|
958
|
-
|
957
|
+
spec[snake_to_camel_map[key.lower()]] = (
|
958
|
+
DataScienceJob.standardize_spec(value)
|
959
|
+
)
|
959
960
|
else:
|
960
961
|
spec[snake_to_camel_map[key.lower()]] = value
|
961
962
|
return spec
|
@@ -971,6 +972,9 @@ class DataScienceJob(Infrastructure):
|
|
971
972
|
Specification as keyword arguments.
|
972
973
|
If spec contains the same key as the one in kwargs, the value from kwargs will be used.
|
973
974
|
"""
|
975
|
+
# Saves a copy of the auth object from the class to the instance.
|
976
|
+
# Future changes to the class level Job.auth will not affect the auth of existing instances.
|
977
|
+
self.auth = self.auth.copy()
|
974
978
|
for key in ["config", "signer", "client_kwargs"]:
|
975
979
|
if kwargs.get(key):
|
976
980
|
self.auth[key] = kwargs.pop(key)
|
@@ -1710,6 +1714,15 @@ class DataScienceJob(Infrastructure):
|
|
1710
1714
|
"""
|
1711
1715
|
return cls.from_dsc_job(DSCJob(**cls.auth).from_ocid(job_id))
|
1712
1716
|
|
1717
|
+
@class_or_instance_method
|
1718
|
+
def from_dict(cls, obj_dict: dict):
|
1719
|
+
"""Initialize the object from a Python dictionary"""
|
1720
|
+
if inspect.isclass(cls):
|
1721
|
+
job_cls = cls
|
1722
|
+
else:
|
1723
|
+
job_cls = cls.__class__
|
1724
|
+
return job_cls(spec=obj_dict.get("spec"), **cls.auth)
|
1725
|
+
|
1713
1726
|
@class_or_instance_method
|
1714
1727
|
def list_jobs(cls, compartment_id: str = None, **kwargs) -> List[DataScienceJob]:
|
1715
1728
|
"""Lists all jobs in a compartment.
|
@@ -29,7 +29,7 @@ _cwd = os.path.dirname(__file__)
|
|
29
29
|
TESTS_PATH = os.path.join(_cwd, "resources", "tests.yaml")
|
30
30
|
HTML_PATH = os.path.join(_cwd, "resources", "template.html")
|
31
31
|
CONFIG_PATH = os.path.join(_cwd, "resources", "config.yaml")
|
32
|
-
PYTHON_VER_PATTERN = "^([3])(\.[6-9])(\.\d+)?$"
|
32
|
+
PYTHON_VER_PATTERN = "^([3])(\.([6-9]|1[0-2]))(\.\d+)?$"
|
33
33
|
PAR_URL = "https://objectstorage.us-ashburn-1.oraclecloud.com/p/WyjtfVIG0uda-P3-2FmAfwaLlXYQZbvPZmfX1qg0-sbkwEQO6jpwabGr2hMDBmBp/n/ociodscdev/b/service-conda-packs/o/service_pack/index.json"
|
34
34
|
|
35
35
|
TESTS = {
|
@@ -10,7 +10,6 @@ from ads.opctl.operator.lowcode.common.utils import (
|
|
10
10
|
merge_category_columns,
|
11
11
|
)
|
12
12
|
from ads.opctl.operator.lowcode.common.data import AbstractData
|
13
|
-
from ads.opctl.operator.lowcode.common.data import AbstractData
|
14
13
|
from ads.opctl.operator.lowcode.anomaly.utils import get_frequency_of_datetime
|
15
14
|
from ads.opctl import logger
|
16
15
|
import pandas as pd
|
@@ -56,6 +55,10 @@ class AnomalyDatasets:
|
|
56
55
|
self.X_valid_dict = self.valid_data.X_valid_dict
|
57
56
|
self.y_valid_dict = self.valid_data.y_valid_dict
|
58
57
|
|
58
|
+
# Returns raw data based on the series_id i.e; the merged target_category_column value
|
59
|
+
def get_raw_data_by_cat(self, category):
|
60
|
+
return self._data.get_raw_data_by_cat(category)
|
61
|
+
|
59
62
|
|
60
63
|
class AnomalyOutput:
|
61
64
|
def __init__(self, date_column):
|
@@ -94,38 +97,28 @@ class AnomalyOutput:
|
|
94
97
|
outliers = pd.merge(outliers, scores, on=self.date_column, how="inner")
|
95
98
|
return outliers
|
96
99
|
|
97
|
-
def get_inliers(self,
|
100
|
+
def get_inliers(self, datasets):
|
98
101
|
inliers = pd.DataFrame()
|
99
102
|
|
100
103
|
for category in self.list_categories():
|
101
104
|
inliers = pd.concat(
|
102
105
|
[
|
103
106
|
inliers,
|
104
|
-
self.get_inliers_by_cat(
|
105
|
-
category,
|
106
|
-
data[data[OutputColumns.Series] == category]
|
107
|
-
.reset_index(drop=True)
|
108
|
-
.drop(OutputColumns.Series, axis=1),
|
109
|
-
),
|
107
|
+
self.get_inliers_by_cat(category, datasets.get_raw_data_by_cat(category)),
|
110
108
|
],
|
111
109
|
axis=0,
|
112
110
|
ignore_index=True,
|
113
111
|
)
|
114
112
|
return inliers
|
115
113
|
|
116
|
-
def get_outliers(self,
|
114
|
+
def get_outliers(self, datasets):
|
117
115
|
outliers = pd.DataFrame()
|
118
116
|
|
119
117
|
for category in self.list_categories():
|
120
118
|
outliers = pd.concat(
|
121
119
|
[
|
122
120
|
outliers,
|
123
|
-
self.get_outliers_by_cat(
|
124
|
-
category,
|
125
|
-
data[data[OutputColumns.Series] == category]
|
126
|
-
.reset_index(drop=True)
|
127
|
-
.drop(OutputColumns.Series, axis=1),
|
128
|
-
),
|
121
|
+
self.get_outliers_by_cat(category, datasets.get_raw_data_by_cat(category)),
|
129
122
|
],
|
130
123
|
axis=0,
|
131
124
|
ignore_index=True,
|
@@ -26,8 +26,9 @@ class AutoMLXOperatorModel(AnomalyOperatorBaseModel):
|
|
26
26
|
)
|
27
27
|
def _build_model(self) -> pd.DataFrame:
|
28
28
|
from automlx import init
|
29
|
+
import logging
|
29
30
|
try:
|
30
|
-
init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}})
|
31
|
+
init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}}, loglevel=logging.CRITICAL)
|
31
32
|
except Exception as e:
|
32
33
|
logger.info("Ray already initialized")
|
33
34
|
date_column = self.spec.datetime_column.name
|
@@ -272,7 +272,7 @@ class AnomalyOperatorBaseModel(ABC):
|
|
272
272
|
f2.write(f1.read())
|
273
273
|
|
274
274
|
if self.spec.generate_inliers:
|
275
|
-
inliers = anomaly_output.get_inliers(self.datasets
|
275
|
+
inliers = anomaly_output.get_inliers(self.datasets)
|
276
276
|
write_data(
|
277
277
|
data=inliers,
|
278
278
|
filename=os.path.join(unique_output_dir, self.spec.inliers_filename),
|
@@ -280,7 +280,7 @@ class AnomalyOperatorBaseModel(ABC):
|
|
280
280
|
storage_options=storage_options,
|
281
281
|
)
|
282
282
|
|
283
|
-
outliers = anomaly_output.get_outliers(self.datasets
|
283
|
+
outliers = anomaly_output.get_outliers(self.datasets)
|
284
284
|
write_data(
|
285
285
|
data=outliers,
|
286
286
|
filename=os.path.join(unique_output_dir, self.spec.outliers_filename),
|
@@ -36,6 +36,21 @@ class TestData(InputData):
|
|
36
36
|
"""Class representing operator specification test data details."""
|
37
37
|
|
38
38
|
|
39
|
+
@dataclass(repr=True)
|
40
|
+
class PreprocessingSteps(DataClassSerializable):
|
41
|
+
"""Class representing preprocessing steps for operator."""
|
42
|
+
|
43
|
+
missing_value_imputation: bool = True
|
44
|
+
outlier_treatment: bool = False
|
45
|
+
|
46
|
+
|
47
|
+
@dataclass(repr=True)
|
48
|
+
class DataPreprocessor(DataClassSerializable):
|
49
|
+
"""Class representing operator specification preprocessing details."""
|
50
|
+
|
51
|
+
enabled: bool = True
|
52
|
+
steps: PreprocessingSteps = field(default_factory=PreprocessingSteps)
|
53
|
+
|
39
54
|
@dataclass(repr=True)
|
40
55
|
class AnomalyOperatorSpec(DataClassSerializable):
|
41
56
|
"""Class representing operator specification."""
|
@@ -74,7 +89,9 @@ class AnomalyOperatorSpec(DataClassSerializable):
|
|
74
89
|
self.generate_inliers if self.generate_inliers is not None else False
|
75
90
|
)
|
76
91
|
self.model_kwargs = self.model_kwargs or dict()
|
77
|
-
|
92
|
+
self.preprocessing = (
|
93
|
+
self.preprocessing if self.preprocessing is not None else DataPreprocessor(enabled=True)
|
94
|
+
)
|
78
95
|
|
79
96
|
@dataclass(repr=True)
|
80
97
|
class AnomalyOperatorConfig(OperatorConfig):
|
@@ -307,11 +307,23 @@ spec:
|
|
307
307
|
description: "When provided, target_category_columns [list] indexes the data into multiple related datasets for anomaly detection"
|
308
308
|
|
309
309
|
preprocessing:
|
310
|
-
type:
|
310
|
+
type: dict
|
311
311
|
required: false
|
312
|
-
|
313
|
-
|
314
|
-
|
312
|
+
schema:
|
313
|
+
enabled:
|
314
|
+
type: boolean
|
315
|
+
required: false
|
316
|
+
default: true
|
317
|
+
meta:
|
318
|
+
description: "preprocessing and feature engineering can be disabled using this flag, Defaults to true"
|
319
|
+
steps:
|
320
|
+
type: dict
|
321
|
+
required: false
|
322
|
+
schema:
|
323
|
+
missing_value_imputation:
|
324
|
+
type: boolean
|
325
|
+
required: false
|
326
|
+
default: true
|
315
327
|
|
316
328
|
generate_report:
|
317
329
|
type: boolean
|
@@ -16,6 +16,7 @@ from ads.opctl.operator.lowcode.common.errors import (
|
|
16
16
|
DataMismatchError,
|
17
17
|
)
|
18
18
|
from abc import ABC
|
19
|
+
import pandas as pd
|
19
20
|
|
20
21
|
|
21
22
|
class AbstractData(ABC):
|
@@ -26,6 +27,19 @@ class AbstractData(ABC):
|
|
26
27
|
self.name = name
|
27
28
|
self.load_transform_ingest_data(spec)
|
28
29
|
|
30
|
+
def get_raw_data_by_cat(self, category):
|
31
|
+
mapping = self._data_transformer.get_target_category_columns_map()
|
32
|
+
# For given category, mapping gives the target_category_columns and it's values.
|
33
|
+
# condition filters raw_data based on the values of target_category_columns for the given category
|
34
|
+
condition = pd.Series(True, index=self.raw_data.index)
|
35
|
+
if category in mapping:
|
36
|
+
for col, val in mapping[category].items():
|
37
|
+
condition &= (self.raw_data[col] == val)
|
38
|
+
data_by_cat = self.raw_data[condition].reset_index(drop=True)
|
39
|
+
data_by_cat = self._data_transformer._format_datetime_col(data_by_cat)
|
40
|
+
return data_by_cat
|
41
|
+
|
42
|
+
|
29
43
|
def get_dict_by_series(self):
|
30
44
|
if not self._data_dict:
|
31
45
|
for s_id in self.list_series_ids():
|
@@ -73,8 +87,8 @@ class AbstractData(ABC):
|
|
73
87
|
return data
|
74
88
|
|
75
89
|
def load_transform_ingest_data(self, spec):
|
76
|
-
raw_data = self._load_data(getattr(spec, self.name))
|
77
|
-
self.data = self._transform_data(spec, raw_data)
|
90
|
+
self.raw_data = self._load_data(getattr(spec, self.name))
|
91
|
+
self.data = self._transform_data(spec, self.raw_data)
|
78
92
|
self._ingest_data(spec)
|
79
93
|
|
80
94
|
def _ingest_data(self, spec):
|