PyPI - oracle-ads - Versions diffs - 2.11.5__py3-none-any.whl → 2.11.7__py3-none-any.whl - Mend

oracle-ads 2.11.5py3-none-any.whl → 2.11.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

ads/aqua/utils.py +5 -2
ads/catalog/model.py +3 -3
ads/catalog/notebook.py +3 -3
ads/catalog/project.py +2 -2
ads/catalog/summary.py +2 -4
ads/cli.py +2 -1
ads/common/serializer.py +1 -1
ads/data_labeling/metadata.py +2 -2
ads/dataset/dataset.py +3 -5
ads/dataset/factory.py +2 -3
ads/dataset/label_encoder.py +1 -1
ads/dataset/sampled_dataset.py +3 -5
ads/jobs/ads_job.py +26 -2
ads/jobs/builders/infrastructure/dsc_job.py +20 -7
ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +1 -1
ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +8 -15
ads/opctl/operator/lowcode/anomaly/model/automlx.py +2 -1
ads/opctl/operator/lowcode/anomaly/model/base_model.py +2 -2
ads/opctl/operator/lowcode/anomaly/operator_config.py +18 -1
ads/opctl/operator/lowcode/anomaly/schema.yaml +16 -4
ads/opctl/operator/lowcode/common/data.py +16 -2
ads/opctl/operator/lowcode/common/transformations.py +48 -14
ads/opctl/operator/lowcode/forecast/environment.yaml +1 -0
ads/opctl/operator/lowcode/forecast/model/arima.py +21 -12
ads/opctl/operator/lowcode/forecast/model/automlx.py +79 -72
ads/opctl/operator/lowcode/forecast/model/autots.py +182 -164
ads/opctl/operator/lowcode/forecast/model/base_model.py +59 -41
ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +47 -47
ads/opctl/operator/lowcode/forecast/model/prophet.py +48 -48
ads/opctl/operator/lowcode/forecast/operator_config.py +18 -2
ads/opctl/operator/lowcode/forecast/schema.yaml +20 -4
ads/opctl/operator/lowcode/forecast/utils.py +4 -0
ads/pipeline/ads_pipeline_step.py +11 -12
{oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/METADATA +4 -3
{oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/RECORD +38 -38
{oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/LICENSE.txt +0 -0
{oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/WHEEL +0 -0
{oracle_ads-2.11.5.dist-info → oracle_ads-2.11.7.dist-info}/entry_points.txt +0 -0

ads/aqua/utils.py CHANGED Viewed

@@ -268,9 +268,12 @@ def is_valid_ocid(ocid: str) -> bool:
     bool:
         Whether the given ocid is valid.
     """
-    pattern = r"^ocid1\.([a-z0-9_]+)\.([a-z0-9]+)\.([a-z0-9]*)(\.[^.]+)?\.([a-z0-9_]+)$"
+    # TODO: revisit pattern
+    pattern = (
+        r"^ocid1\.([a-z0-9_]+)\.([a-z0-9]+)\.([a-z0-9-]*)(\.[^.]+)?\.([a-z0-9_]+)$"
+    )
     match = re.match(pattern, ocid)
-    return bool(match)
+    return True
 def get_resource_type(ocid: str) -> str:

ads/catalog/model.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8; -*-
-# Copyright (c) 2020, 2023 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 import warnings
@@ -404,13 +404,13 @@ class Model:
     def _repr_html_(self):
         """Shows model in dataframe format."""
         return (
-            self.to_dataframe().style.set_properties(**{"margin-left": "0px"}).render()
+            self.to_dataframe().style.set_properties(**{"margin-left": "0px"}).to_html()
         )
     def __repr__(self):
         """Shows model in dataframe format."""
         return (
-            self.to_dataframe().style.set_properties(**{"margin-left": "0px"}).render()
+            self.to_dataframe().style.set_properties(**{"margin-left": "0px"}).to_html()
         )
     def activate(self) -> None:

ads/catalog/notebook.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8; -*-
-# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 import warnings
@@ -220,7 +220,7 @@ class NotebookCatalog:
                 lambda x: "<a href='%s'>%s</a>"
                 % (x if x.startswith("http") else "http://%s" % x, "open")
             )
-            return df.style.set_properties(**{"margin-left": "0px"}).render()
+            return df.style.set_properties(**{"margin-left": "0px"}).to_html()
         notebook.commit = MethodType(commit, notebook)
         notebook.rollback = MethodType(rollback, notebook)
@@ -295,7 +295,7 @@ class NotebookCatalog:
         shape=None,
         block_storage_size_in_gbs=None,
         subnet_id=None,
-        **kwargs
+        **kwargs,
     ):
         """
         Create a new notebook session with the supplied details.

ads/catalog/project.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8; -*-
-# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 import warnings
@@ -237,7 +237,7 @@ class ProjectCatalog(Mapping):
             return (
                 project_self.to_dataframe()
                 .style.set_properties(**{"margin-left": "0px"})
-                .render()
+                .to_html()
             )
         project.commit = MethodType(commit, project)

ads/catalog/summary.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
-# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 from __future__ import print_function, absolute_import
@@ -98,7 +98,6 @@ class SummaryList(list, metaclass=ABCMeta):
         pass
     def to_dataframe(self, datetime_format=None):
         """
         Returns the model catalog summary as a pandas dataframe
@@ -121,7 +120,6 @@ class SummaryList(list, metaclass=ABCMeta):
     @runtime_dependency(module="IPython", install_from=OptionalDependency.NOTEBOOK)
     def show_in_notebook(self, datetime_format=None):
         """
         Displays the model catalog summary in a Jupyter Notebook cell
@@ -144,7 +142,7 @@ class SummaryList(list, metaclass=ABCMeta):
     def _repr_html_(self):
         return self.df.style.applymap(
             self._color_lifecycle_state, subset=["lifecycle_state"]
-        ).render()
+        ).to_html()
     def _sort_by(self, cols, reverse=False):
         return sorted(

ads/cli.py CHANGED Viewed

@@ -9,7 +9,6 @@ import sys
 import fire
 from ads.common import logger
-from ads.aqua.cli import AquaCommand
 try:
     import click
@@ -73,6 +72,8 @@ fire.core.parser.SeparateFlagArgs = _SeparateFlagArgs
 def cli():
     if len(sys.argv) > 1 and sys.argv[1] == "aqua":
+        from ads.aqua.cli import AquaCommand
         fire.Fire(AquaCommand, command=sys.argv[2:], name="ads aqua")
     else:
         click_cli()

ads/common/serializer.py CHANGED Viewed

@@ -464,7 +464,7 @@ class DataClassSerializable(Serializable):
             )
         obj = cls(
-            **{key: obj_dict.get(key) for key in allowed_fields if key in obj_dict}
+            **{key: obj_dict.get(key) for key in allowed_fields}
         )
         for key, value in obj_dict.items():

ads/data_labeling/metadata.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8; -*-
-# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
+# Copyright (c) 2021, 2024 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 from dataclasses import asdict, dataclass, field
@@ -75,7 +75,7 @@ class Metadata(DataClassSerializable):
     def _repr_html_(self):
         """Shows metadata in dataframe format."""
         return (
-            self.to_dataframe().style.set_properties(**{"margin-left": "0px"}).render()
+            self.to_dataframe().style.set_properties(**{"margin-left": "0px"}).to_html()
         )
     @classmethod

ads/dataset/dataset.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
-# Copyright (c) 2020, 2023 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 from __future__ import print_function, absolute_import, division
@@ -85,7 +85,6 @@ class ADSDataset(PandasDataset):
         interactive=False,
         **kwargs,
     ):
         #
         # to keep performance high and linear no matter the size of the distributed dataset we
         # create a pandas df that's used internally because this has a fixed upper size.
@@ -204,7 +203,7 @@ class ADSDataset(PandasDataset):
                     .style.set_table_styles(utils.get_dataframe_styles())
                     .set_table_attributes("class=table")
                     .hide_index()
-                    .render()
+                    .to_html()
                 )
             )
         )
@@ -263,7 +262,7 @@ class ADSDataset(PandasDataset):
                             self.style.set_table_styles(utils.get_dataframe_styles())
                             .set_table_attributes("class=table")
                             .hide_index()
-                            .render()
+                            .to_html()
                         )
                     )
                 )
@@ -1265,7 +1264,6 @@ class ADSDataset(PandasDataset):
         n=None,
         **init_kwargs,
     ):
         prev_doc_mode = utils.is_documentation_mode()
         set_documentation_mode(False)

ads/dataset/factory.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8; -*-
-# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 from __future__ import print_function, absolute_import
@@ -367,7 +367,7 @@ class DatasetFactory:
                 HTML(
                     list_df.style.set_table_attributes("class=table")
                     .hide_index()
-                    .render()
+                    .to_html()
                 )
             )
         return list_df
@@ -884,7 +884,6 @@ class CustomFormatReaders:
         import xml.etree.cElementTree as et
         def get_children(df, node, parent, i):
             for name in node.attrib.keys():
                 df.at[i, parent + name] = node.attrib[name]
             for child in list(node):

ads/dataset/label_encoder.py CHANGED Viewed

@@ -52,7 +52,7 @@ class DataFrameLabelEncoder(TransformerMixin):
         """
         for column in X.columns:
-            if X[column].dtype.name in ["object", "category"]:
+            if X[column].dtype.name in ["object", "category", "bool"]:
                 X[column] = X[column].astype(str)
                 self.label_encoders[column] = LabelEncoder()
                 self.label_encoders[column].fit(X[column])

ads/dataset/sampled_dataset.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8; -*-
-# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 import matplotlib
@@ -49,6 +49,7 @@ from ads.common.decorator.runtime_dependency import (
 NATURAL_EARTH_DATASET = "naturalearth_lowres"
 class PandasDataset(object):
     """
     This class provides APIs that can work on a sampled dataset.
@@ -107,7 +108,6 @@ class PandasDataset(object):
         self.sampled_df = self.sampled_df.reset_index(drop=True)
     def _find_feature_subset(self, df, target_name, include_n_features=32):
         if len(df.columns) <= include_n_features:
             return self.sampled_df
         else:
@@ -212,7 +212,6 @@ class PandasDataset(object):
     def _generate_features_html(
         self, is_wide_dataset, n_features, df_stats, visualizations_follow
     ):
         html = utils.get_bootstrap_styles()
         if is_wide_dataset:
@@ -233,7 +232,7 @@ class PandasDataset(object):
                 if ("float" in str(type(x))) or ("int" in str(type(x)))
                 else x
             )
-            .render()
+            .to_html()
         )
         if visualizations_follow:
@@ -244,7 +243,6 @@ class PandasDataset(object):
     def _generate_warnings_html(
         self, is_wide_dataset, n_rows, n_features, df_stats, out, accordion
     ):
         #
         # create the "Warnings" accordion section:
         #  - show high cardinal categoricals

ads/jobs/ads_job.py CHANGED Viewed

@@ -10,6 +10,7 @@ from urllib.parse import urlparse
 import fsspec
 import oci
+import yaml
 from ads.common.auth import default_signer
 from ads.common.decorator.utils import class_or_instance_method
 from ads.jobs.builders.base import Builder
@@ -263,6 +264,9 @@ class Job(Builder):
             Job runtime, by default None.
         """
+        # Saves a copy of the auth object from the class to the instance.
+        # Future changes to the class level Job.auth will not affect the auth of existing instances.
+        self.auth = self.auth.copy()
         for key in ["config", "signer", "client_kwargs"]:
             if kwargs.get(key):
                 self.auth[key] = kwargs.pop(key)
@@ -545,6 +549,26 @@ class Job(Builder):
             "spec": spec,
         }
+    @class_or_instance_method
+    def from_yaml(
+        cls,
+        yaml_string: str = None,
+        uri: str = None,
+        loader: callable = yaml.SafeLoader,
+        **kwargs,
+    ):
+        if inspect.isclass(cls):
+            job = cls(**cls.auth)
+        else:
+            job = cls.__class__(**cls.auth)
+        if yaml_string:
+            return job.from_dict(yaml.load(yaml_string, Loader=loader))
+        if uri:
+            yaml_dict = yaml.load(cls._read_from_file(uri=uri, **kwargs), Loader=loader)
+            return job.from_dict(yaml_dict)
+        raise ValueError("Must provide either YAML string or URI location")
     @class_or_instance_method
     def from_dict(cls, config: dict) -> "Job":
         """Initializes a job from a dictionary containing the configurations.
@@ -573,9 +597,9 @@ class Job(Builder):
             "runtime": cls._RUNTIME_MAPPING,
         }
         if inspect.isclass(cls):
-            job = cls()
+            job = cls(**cls.auth)
         else:
-            job = cls.__class__()
+            job = cls.__class__(**cls.auth)
         for key, value in spec.items():
             if key in mappings:

ads/jobs/builders/infrastructure/dsc_job.py CHANGED Viewed

@@ -6,8 +6,8 @@
 from __future__ import annotations
 import datetime
+import inspect
 import logging
-import oci
 import os
 import time
 import traceback
@@ -17,11 +17,12 @@ from string import Template
 from typing import Any, Dict, List, Optional, Union
 import fsspec
+import oci
 import oci.data_science
 import oci.util as oci_util
-import yaml
 from oci.data_science.models import JobInfrastructureConfigurationDetails
 from oci.exceptions import ServiceError
+import yaml
 from ads.common import utils
 from ads.common.oci_datascience import DSCNotebookSession, OCIDataScienceMixin
 from ads.common.oci_logging import OCILog
@@ -782,7 +783,7 @@ class DataScienceJobRun(
         # Update runtime from job run
         from ads.jobs import Job
-        job = Job.from_dict(job_dict)
+        job = Job(**self.auth).from_dict(job_dict)
         envs = job.runtime.envs
         run_config_override = run_dict.get("jobConfigurationOverrideDetails", {})
         envs.update(run_config_override.get("environmentVariables", {}))
@@ -811,7 +812,7 @@ class DataScienceJobRun(
         """
         from ads.jobs import Job
-        return Job.from_datascience_job(self.job_id)
+        return Job(**self.auth).from_datascience_job(self.job_id)
     def download(self, to_dir):
         """Downloads files from job run output URI to local.
@@ -953,9 +954,9 @@ class DataScienceJob(Infrastructure):
             if key not in attribute_map and key.lower() in snake_to_camel_map:
                 value = spec.pop(key)
                 if isinstance(value, dict):
-                    spec[
-                        snake_to_camel_map[key.lower()]
-                    ] = DataScienceJob.standardize_spec(value)
+                    spec[snake_to_camel_map[key.lower()]] = (
+                        DataScienceJob.standardize_spec(value)
+                    )
                 else:
                     spec[snake_to_camel_map[key.lower()]] = value
         return spec
@@ -971,6 +972,9 @@ class DataScienceJob(Infrastructure):
             Specification as keyword arguments.
             If spec contains the same key as the one in kwargs, the value from kwargs will be used.
         """
+        # Saves a copy of the auth object from the class to the instance.
+        # Future changes to the class level Job.auth will not affect the auth of existing instances.
+        self.auth = self.auth.copy()
         for key in ["config", "signer", "client_kwargs"]:
             if kwargs.get(key):
                 self.auth[key] = kwargs.pop(key)
@@ -1710,6 +1714,15 @@ class DataScienceJob(Infrastructure):
         """
         return cls.from_dsc_job(DSCJob(**cls.auth).from_ocid(job_id))
+    @class_or_instance_method
+    def from_dict(cls, obj_dict: dict):
+        """Initialize the object from a Python dictionary"""
+        if inspect.isclass(cls):
+            job_cls = cls
+        else:
+            job_cls = cls.__class__
+        return job_cls(spec=obj_dict.get("spec"), **cls.auth)
     @class_or_instance_method
     def list_jobs(cls, compartment_id: str = None, **kwargs) -> List[DataScienceJob]:
         """Lists all jobs in a compartment.

ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py CHANGED Viewed

@@ -29,7 +29,7 @@ _cwd = os.path.dirname(__file__)
 TESTS_PATH = os.path.join(_cwd, "resources", "tests.yaml")
 HTML_PATH = os.path.join(_cwd, "resources", "template.html")
 CONFIG_PATH = os.path.join(_cwd, "resources", "config.yaml")
-PYTHON_VER_PATTERN = "^([3])(\.[6-9])(\.\d+)?$"
+PYTHON_VER_PATTERN = "^([3])(\.([6-9]|1[0-2]))(\.\d+)?$"
 PAR_URL = "https://objectstorage.us-ashburn-1.oraclecloud.com/p/WyjtfVIG0uda-P3-2FmAfwaLlXYQZbvPZmfX1qg0-sbkwEQO6jpwabGr2hMDBmBp/n/ociodscdev/b/service-conda-packs/o/service_pack/index.json"
 TESTS = {

ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py CHANGED Viewed

@@ -10,7 +10,6 @@ from ads.opctl.operator.lowcode.common.utils import (
     merge_category_columns,
 )
 from ads.opctl.operator.lowcode.common.data import AbstractData
-from ads.opctl.operator.lowcode.common.data import AbstractData
 from ads.opctl.operator.lowcode.anomaly.utils import get_frequency_of_datetime
 from ads.opctl import logger
 import pandas as pd
@@ -56,6 +55,10 @@ class AnomalyDatasets:
             self.X_valid_dict = self.valid_data.X_valid_dict
             self.y_valid_dict = self.valid_data.y_valid_dict
+    # Returns raw data based on the series_id i.e; the merged target_category_column value
+    def get_raw_data_by_cat(self, category):
+        return self._data.get_raw_data_by_cat(category)
 class AnomalyOutput:
     def __init__(self, date_column):
@@ -94,38 +97,28 @@ class AnomalyOutput:
             outliers = pd.merge(outliers, scores, on=self.date_column, how="inner")
         return outliers
-    def get_inliers(self, data):
+    def get_inliers(self, datasets):
         inliers = pd.DataFrame()
         for category in self.list_categories():
             inliers = pd.concat(
                 [
                     inliers,
-                    self.get_inliers_by_cat(
-                        category,
-                        data[data[OutputColumns.Series] == category]
-                        .reset_index(drop=True)
-                        .drop(OutputColumns.Series, axis=1),
-                    ),
+                    self.get_inliers_by_cat(category, datasets.get_raw_data_by_cat(category)),
                 ],
                 axis=0,
                 ignore_index=True,
             )
         return inliers
-    def get_outliers(self, data):
+    def get_outliers(self, datasets):
         outliers = pd.DataFrame()
         for category in self.list_categories():
             outliers = pd.concat(
                 [
                     outliers,
-                    self.get_outliers_by_cat(
-                        category,
-                        data[data[OutputColumns.Series] == category]
-                        .reset_index(drop=True)
-                        .drop(OutputColumns.Series, axis=1),
-                    ),
+                    self.get_outliers_by_cat(category, datasets.get_raw_data_by_cat(category)),
                 ],
                 axis=0,
                 ignore_index=True,

ads/opctl/operator/lowcode/anomaly/model/automlx.py CHANGED Viewed

@@ -26,8 +26,9 @@ class AutoMLXOperatorModel(AnomalyOperatorBaseModel):
     )
     def _build_model(self) -> pd.DataFrame:
         from automlx import init
+        import logging
         try:
-            init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}})
+            init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}}, loglevel=logging.CRITICAL)
         except Exception as e:
             logger.info("Ray already initialized")
         date_column = self.spec.datetime_column.name

ads/opctl/operator/lowcode/anomaly/model/base_model.py CHANGED Viewed

@@ -272,7 +272,7 @@ class AnomalyOperatorBaseModel(ABC):
                     f2.write(f1.read())
         if self.spec.generate_inliers:
-            inliers = anomaly_output.get_inliers(self.datasets.data)
+            inliers = anomaly_output.get_inliers(self.datasets)
             write_data(
                 data=inliers,
                 filename=os.path.join(unique_output_dir, self.spec.inliers_filename),
@@ -280,7 +280,7 @@ class AnomalyOperatorBaseModel(ABC):
                 storage_options=storage_options,
             )
-        outliers = anomaly_output.get_outliers(self.datasets.data)
+        outliers = anomaly_output.get_outliers(self.datasets)
         write_data(
             data=outliers,
             filename=os.path.join(unique_output_dir, self.spec.outliers_filename),

ads/opctl/operator/lowcode/anomaly/operator_config.py CHANGED Viewed

@@ -36,6 +36,21 @@ class TestData(InputData):
     """Class representing operator specification test data details."""
+@dataclass(repr=True)
+class PreprocessingSteps(DataClassSerializable):
+    """Class representing preprocessing steps for operator."""
+    missing_value_imputation: bool = True
+    outlier_treatment: bool = False
+@dataclass(repr=True)
+class DataPreprocessor(DataClassSerializable):
+    """Class representing operator specification preprocessing details."""
+    enabled: bool = True
+    steps: PreprocessingSteps = field(default_factory=PreprocessingSteps)
 @dataclass(repr=True)
 class AnomalyOperatorSpec(DataClassSerializable):
     """Class representing operator specification."""
@@ -74,7 +89,9 @@ class AnomalyOperatorSpec(DataClassSerializable):
             self.generate_inliers if self.generate_inliers is not None else False
         )
         self.model_kwargs = self.model_kwargs or dict()
+        self.preprocessing = (
+            self.preprocessing if self.preprocessing is not None else DataPreprocessor(enabled=True)
+        )
 @dataclass(repr=True)
 class AnomalyOperatorConfig(OperatorConfig):

ads/opctl/operator/lowcode/anomaly/schema.yaml CHANGED Viewed

@@ -307,11 +307,23 @@ spec:
         description: "When provided, target_category_columns [list] indexes the data into multiple related datasets for anomaly detection"
     preprocessing:
-      type: boolean
+      type: dict
       required: false
-      default: true
-      meta:
-        description: "preprocessing and feature engineering can be disabled using this flag, Defaults to true"
+      schema:
+        enabled:
+          type: boolean
+          required: false
+          default: true
+          meta:
+            description: "preprocessing and feature engineering can be disabled using this flag, Defaults to true"
+        steps:
+          type: dict
+          required: false
+          schema:
+            missing_value_imputation:
+              type: boolean
+              required: false
+              default: true
     generate_report:
       type: boolean

ads/opctl/operator/lowcode/common/data.py CHANGED Viewed

@@ -16,6 +16,7 @@ from ads.opctl.operator.lowcode.common.errors import (
     DataMismatchError,
 )
 from abc import ABC
+import pandas as pd
 class AbstractData(ABC):
@@ -26,6 +27,19 @@ class AbstractData(ABC):
         self.name = name
         self.load_transform_ingest_data(spec)
+    def get_raw_data_by_cat(self, category):
+        mapping = self._data_transformer.get_target_category_columns_map()
+        # For given category, mapping gives the target_category_columns and it's values.
+        # condition filters raw_data based on the values of target_category_columns for the given category
+        condition = pd.Series(True, index=self.raw_data.index)
+        if category in mapping:
+            for col, val in mapping[category].items():
+                condition &= (self.raw_data[col] == val)
+        data_by_cat = self.raw_data[condition].reset_index(drop=True)
+        data_by_cat = self._data_transformer._format_datetime_col(data_by_cat)
+        return data_by_cat
     def get_dict_by_series(self):
         if not self._data_dict:
             for s_id in self.list_series_ids():
@@ -73,8 +87,8 @@ class AbstractData(ABC):
         return data
     def load_transform_ingest_data(self, spec):
-        raw_data = self._load_data(getattr(spec, self.name))
-        self.data = self._transform_data(spec, raw_data)
+        self.raw_data = self._load_data(getattr(spec, self.name))
+        self.data = self._transform_data(spec, self.raw_data)
         self._ingest_data(spec)
     def _ingest_data(self, spec):

oracle-ads 2.11.5__py3-none-any.whl → 2.11.7__py3-none-any.whl

oracle-ads 2.11.5py3-none-any.whl → 2.11.7py3-none-any.whl