oracle-ads 2.11.14__py3-none-any.whl → 2.11.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/common/utils.py +77 -20
- ads/aqua/constants.py +30 -17
- ads/aqua/evaluation/evaluation.py +118 -107
- ads/aqua/extension/evaluation_handler.py +4 -7
- ads/aqua/extension/evaluation_ws_msg_handler.py +0 -4
- ads/aqua/model/entities.py +6 -8
- ads/aqua/modeldeployment/constants.py +0 -16
- ads/aqua/modeldeployment/deployment.py +45 -67
- ads/opctl/operator/common/operator_config.py +1 -0
- ads/opctl/operator/lowcode/anomaly/README.md +3 -3
- ads/opctl/operator/lowcode/anomaly/__main__.py +5 -6
- ads/opctl/operator/lowcode/anomaly/const.py +8 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +6 -2
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +29 -20
- ads/opctl/operator/lowcode/anomaly/model/factory.py +41 -13
- ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +79 -0
- ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +79 -0
- ads/opctl/operator/lowcode/anomaly/schema.yaml +12 -2
- ads/opctl/operator/lowcode/anomaly/utils.py +16 -13
- ads/opctl/operator/lowcode/common/data.py +2 -1
- ads/opctl/operator/lowcode/common/transformations.py +37 -9
- ads/opctl/operator/lowcode/common/utils.py +32 -10
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +14 -18
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +4 -2
- ads/opctl/operator/lowcode/forecast/schema.yaml +9 -0
- ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
- ads/opctl/operator/lowcode/recommender/README.md +206 -0
- ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
- ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
- ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
- ads/opctl/operator/lowcode/recommender/constant.py +25 -0
- ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
- ads/opctl/operator/lowcode/recommender/model/base_model.py +198 -0
- ads/opctl/operator/lowcode/recommender/model/factory.py +58 -0
- ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
- ads/opctl/operator/lowcode/recommender/model/svd.py +88 -0
- ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
- ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
- ads/opctl/operator/lowcode/recommender/utils.py +13 -0
- {oracle_ads-2.11.14.dist-info → oracle_ads-2.11.15.dist-info}/METADATA +6 -1
- {oracle_ads-2.11.14.dist-info → oracle_ads-2.11.15.dist-info}/RECORD +44 -28
- {oracle_ads-2.11.14.dist-info → oracle_ads-2.11.15.dist-info}/LICENSE.txt +0 -0
- {oracle_ads-2.11.14.dist-info → oracle_ads-2.11.15.dist-info}/WHEEL +0 -0
- {oracle_ads-2.11.14.dist-info → oracle_ads-2.11.15.dist-info}/entry_points.txt +0 -0
@@ -1,31 +1,27 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
# Copyright (c) 2024 Oracle and/or its affiliates.
|
4
3
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
4
|
|
6
|
-
import json
|
7
5
|
import logging
|
8
6
|
from typing import Dict, List, Union
|
9
7
|
|
10
|
-
from oci.data_science.models import ModelDeployment
|
11
|
-
|
12
8
|
from ads.aqua.app import AquaApp, logger
|
13
9
|
from ads.aqua.common.enums import (
|
14
|
-
Tags,
|
15
|
-
InferenceContainerParamType,
|
16
|
-
InferenceContainerType,
|
17
10
|
InferenceContainerTypeFamily,
|
11
|
+
Tags,
|
18
12
|
)
|
19
13
|
from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
|
20
14
|
from ads.aqua.common.utils import (
|
15
|
+
get_combined_params,
|
21
16
|
get_container_config,
|
22
17
|
get_container_image,
|
18
|
+
get_container_params_type,
|
23
19
|
get_model_by_reference_paths,
|
24
20
|
get_ocid_substring,
|
25
|
-
get_combined_params,
|
26
21
|
get_params_dict,
|
27
22
|
get_params_list,
|
28
23
|
get_resource_name,
|
24
|
+
get_restricted_params_by_container,
|
29
25
|
load_config,
|
30
26
|
)
|
31
27
|
from ads.aqua.constants import (
|
@@ -43,10 +39,6 @@ from ads.aqua.modeldeployment.entities import (
|
|
43
39
|
AquaDeploymentDetail,
|
44
40
|
ContainerSpec,
|
45
41
|
)
|
46
|
-
from ads.aqua.modeldeployment.constants import (
|
47
|
-
VLLMInferenceRestrictedParams,
|
48
|
-
TGIInferenceRestrictedParams,
|
49
|
-
)
|
50
42
|
from ads.common.object_storage_details import ObjectStorageDetails
|
51
43
|
from ads.common.utils import get_log_links
|
52
44
|
from ads.config import (
|
@@ -187,24 +179,24 @@ class AquaDeploymentApp(AquaApp):
|
|
187
179
|
model_name = aqua_model.custom_metadata_list.get(
|
188
180
|
FineTuneCustomMetadata.FINE_TUNE_SOURCE_NAME
|
189
181
|
).value
|
190
|
-
except:
|
182
|
+
except ValueError as err:
|
191
183
|
raise AquaValueError(
|
192
184
|
f"Either {FineTuneCustomMetadata.FINE_TUNE_SOURCE} or {FineTuneCustomMetadata.FINE_TUNE_SOURCE_NAME} is missing "
|
193
185
|
f"from custom metadata for the model {config_source_id}"
|
194
|
-
)
|
186
|
+
) from err
|
195
187
|
|
196
188
|
# set up env vars
|
197
189
|
if not env_var:
|
198
|
-
env_var =
|
190
|
+
env_var = {}
|
199
191
|
|
200
192
|
try:
|
201
193
|
model_path_prefix = aqua_model.custom_metadata_list.get(
|
202
194
|
MODEL_BY_REFERENCE_OSS_PATH_KEY
|
203
195
|
).value.rstrip("/")
|
204
|
-
except ValueError:
|
196
|
+
except ValueError as err:
|
205
197
|
raise AquaValueError(
|
206
198
|
f"{MODEL_BY_REFERENCE_OSS_PATH_KEY} key is not available in the custom metadata field."
|
207
|
-
)
|
199
|
+
) from err
|
208
200
|
|
209
201
|
if ObjectStorageDetails.is_oci_path(model_path_prefix):
|
210
202
|
os_path = ObjectStorageDetails.from_path(model_path_prefix)
|
@@ -219,7 +211,7 @@ class AquaDeploymentApp(AquaApp):
|
|
219
211
|
|
220
212
|
if not fine_tune_output_path:
|
221
213
|
raise AquaValueError(
|
222
|
-
|
214
|
+
"Fine tuned output path is not available in the model artifact."
|
223
215
|
)
|
224
216
|
|
225
217
|
os_path = ObjectStorageDetails.from_path(fine_tune_output_path)
|
@@ -232,7 +224,7 @@ class AquaDeploymentApp(AquaApp):
|
|
232
224
|
container_type_key = aqua_model.custom_metadata_list.get(
|
233
225
|
AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME
|
234
226
|
).value
|
235
|
-
except ValueError:
|
227
|
+
except ValueError as err:
|
236
228
|
message = (
|
237
229
|
f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the custom metadata field "
|
238
230
|
f"for model {aqua_model.id}."
|
@@ -242,7 +234,7 @@ class AquaDeploymentApp(AquaApp):
|
|
242
234
|
raise AquaValueError(
|
243
235
|
f"{message}. For unverified Aqua models, container_family parameter should be "
|
244
236
|
f"set and value can be one of {', '.join(InferenceContainerTypeFamily.values())}."
|
245
|
-
)
|
237
|
+
) from err
|
246
238
|
container_type_key = container_family
|
247
239
|
try:
|
248
240
|
# Check if the container override flag is set. If set, then the user has chosen custom image
|
@@ -282,11 +274,12 @@ class AquaDeploymentApp(AquaApp):
|
|
282
274
|
) # Give precendece to the input parameter
|
283
275
|
|
284
276
|
deployment_config = self.get_deployment_config(config_source_id)
|
285
|
-
|
277
|
+
|
278
|
+
config_params = (
|
286
279
|
deployment_config.get("configuration", UNKNOWN_DICT)
|
287
280
|
.get(instance_shape, UNKNOWN_DICT)
|
288
281
|
.get("parameters", UNKNOWN_DICT)
|
289
|
-
.get(
|
282
|
+
.get(get_container_params_type(container_type_key), UNKNOWN)
|
290
283
|
)
|
291
284
|
|
292
285
|
# validate user provided params
|
@@ -301,7 +294,7 @@ class AquaDeploymentApp(AquaApp):
|
|
301
294
|
f"and cannot be overridden or are invalid."
|
302
295
|
)
|
303
296
|
|
304
|
-
deployment_params = get_combined_params(
|
297
|
+
deployment_params = get_combined_params(config_params, user_params)
|
305
298
|
|
306
299
|
if deployment_params:
|
307
300
|
params = f"{params} {deployment_params}"
|
@@ -429,7 +422,7 @@ class AquaDeploymentApp(AquaApp):
|
|
429
422
|
# tracks unique deployments that were listed in the user compartment
|
430
423
|
# we arbitrarily choose last 8 characters of OCID to identify MD in telemetry
|
431
424
|
self.telemetry.record_event_async(
|
432
|
-
category=
|
425
|
+
category="aqua/deployment",
|
433
426
|
action="list",
|
434
427
|
detail=get_ocid_substring(deployment_id, key_len=8),
|
435
428
|
value=state,
|
@@ -570,32 +563,27 @@ class AquaDeploymentApp(AquaApp):
|
|
570
563
|
f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the custom metadata field for model {model_id}."
|
571
564
|
)
|
572
565
|
|
573
|
-
if
|
574
|
-
container_type_key
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
566
|
+
if (
|
567
|
+
container_type_key
|
568
|
+
and container_type_key in InferenceContainerTypeFamily.values()
|
569
|
+
):
|
570
|
+
deployment_config = self.get_deployment_config(model_id)
|
571
|
+
config_params = (
|
572
|
+
deployment_config.get("configuration", UNKNOWN_DICT)
|
573
|
+
.get(instance_shape, UNKNOWN_DICT)
|
574
|
+
.get("parameters", UNKNOWN_DICT)
|
575
|
+
.get(get_container_params_type(container_type_key), UNKNOWN)
|
576
|
+
)
|
577
|
+
if config_params:
|
578
|
+
params_list = get_params_list(config_params)
|
579
|
+
restricted_params_set = get_restricted_params_by_container(
|
580
|
+
container_type_key
|
581
581
|
)
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
)
|
586
|
-
|
587
|
-
params = config_parameters.get(
|
588
|
-
InferenceContainerParamType.PARAM_TYPE_TGI, UNKNOWN
|
589
|
-
)
|
590
|
-
else:
|
591
|
-
params = UNKNOWN
|
592
|
-
logger.debug(
|
593
|
-
f"Default inference parameters are not available for the model {model_id} and "
|
594
|
-
f"instance {instance_shape}."
|
595
|
-
)
|
596
|
-
if params:
|
597
|
-
# account for param that can have --arg but no values, e.g. --trust-remote-code
|
598
|
-
default_params.extend(get_params_list(params))
|
582
|
+
|
583
|
+
# remove restricted params from the list as user cannot override them during deployment
|
584
|
+
for params in params_list:
|
585
|
+
if params.split()[0] not in restricted_params_set:
|
586
|
+
default_params.append(params)
|
599
587
|
|
600
588
|
return default_params
|
601
589
|
|
@@ -629,7 +617,7 @@ class AquaDeploymentApp(AquaApp):
|
|
629
617
|
container_type_key = model.custom_metadata_list.get(
|
630
618
|
AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME
|
631
619
|
).value
|
632
|
-
except ValueError:
|
620
|
+
except ValueError as err:
|
633
621
|
message = (
|
634
622
|
f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the custom metadata field "
|
635
623
|
f"for model {model_id}."
|
@@ -640,7 +628,7 @@ class AquaDeploymentApp(AquaApp):
|
|
640
628
|
raise AquaValueError(
|
641
629
|
f"{message}. For unverified Aqua models, container_family parameter should be "
|
642
630
|
f"set and value can be one of {', '.join(InferenceContainerTypeFamily.values())}."
|
643
|
-
)
|
631
|
+
) from err
|
644
632
|
container_type_key = container_family
|
645
633
|
|
646
634
|
container_config = get_container_config()
|
@@ -658,7 +646,7 @@ class AquaDeploymentApp(AquaApp):
|
|
658
646
|
f"Parameters {restricted_params} are set by Aqua "
|
659
647
|
f"and cannot be overridden or are invalid."
|
660
648
|
)
|
661
|
-
return
|
649
|
+
return {"valid": True}
|
662
650
|
|
663
651
|
@staticmethod
|
664
652
|
def _find_restricted_params(
|
@@ -667,8 +655,7 @@ class AquaDeploymentApp(AquaApp):
|
|
667
655
|
container_family: str,
|
668
656
|
) -> List[str]:
|
669
657
|
"""Returns a list of restricted params that user chooses to override when creating an Aqua deployment.
|
670
|
-
The default parameters coming from the container index json file cannot be overridden.
|
671
|
-
a set of parameters maintained in
|
658
|
+
The default parameters coming from the container index json file cannot be overridden.
|
672
659
|
|
673
660
|
Parameters
|
674
661
|
----------
|
@@ -689,18 +676,9 @@ class AquaDeploymentApp(AquaApp):
|
|
689
676
|
default_params_dict = get_params_dict(default_params)
|
690
677
|
user_params_dict = get_params_dict(user_params)
|
691
678
|
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
InferenceContainerType.CONTAINER_TYPE_VLLM in container_family
|
697
|
-
and key in VLLMInferenceRestrictedParams
|
698
|
-
)
|
699
|
-
or (
|
700
|
-
InferenceContainerType.CONTAINER_TYPE_TGI in container_family
|
701
|
-
and key in TGIInferenceRestrictedParams
|
702
|
-
)
|
703
|
-
):
|
704
|
-
restricted_params.append(key.lstrip("--"))
|
679
|
+
restricted_params_set = get_restricted_params_by_container(container_family)
|
680
|
+
for key, _items in user_params_dict.items():
|
681
|
+
if key in default_params_dict or key in restricted_params_set:
|
682
|
+
restricted_params.append(key.lstrip("-"))
|
705
683
|
|
706
684
|
return restricted_params
|
@@ -58,7 +58,7 @@ The operator will run in your local environment without requiring any additional
|
|
58
58
|
|
59
59
|
## 4. Running anomaly detection on the local container
|
60
60
|
|
61
|
-
To run the anomaly detection
|
61
|
+
To run the anomaly detection operator within a local container, follow these steps:
|
62
62
|
|
63
63
|
Use the command below to build the anomaly detection container.
|
64
64
|
|
@@ -106,7 +106,7 @@ ads operator run -f ~/anomaly/anomaly.yaml --backend-config ~/anomaly/backend_op
|
|
106
106
|
|
107
107
|
## 5. Running anomaly detection in the Data Science job within container runtime
|
108
108
|
|
109
|
-
To execute the anomaly detection
|
109
|
+
To execute the anomaly detection operator within a Data Science job using container runtime, please follow the steps outlined below:
|
110
110
|
|
111
111
|
You can use the following command to build the anomaly detection container. This step can be skipped if you have already done this for running the operator within a local container.
|
112
112
|
|
@@ -155,7 +155,7 @@ ads opctl watch <OCID>
|
|
155
155
|
|
156
156
|
## 6. Running anomaly detection in the Data Science job within conda runtime
|
157
157
|
|
158
|
-
To execute the anomaly detection
|
158
|
+
To execute the anomaly detection operator within a Data Science job using conda runtime, please follow the steps outlined below:
|
159
159
|
|
160
160
|
You can use the following command to build the anomaly detection conda environment.
|
161
161
|
|
@@ -1,5 +1,4 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*--
|
3
2
|
|
4
3
|
# Copyright (c) 2024 Oracle and/or its affiliates.
|
5
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
@@ -15,7 +14,7 @@ from ads.opctl import logger
|
|
15
14
|
from ads.opctl.operator.common.const import ENV_OPERATOR_ARGS
|
16
15
|
from ads.opctl.operator.common.utils import _parse_input_args
|
17
16
|
|
18
|
-
from .model.anomaly_dataset import AnomalyDatasets
|
17
|
+
from .model.anomaly_dataset import AnomalyDatasets
|
19
18
|
from .operator_config import AnomalyOperatorConfig
|
20
19
|
|
21
20
|
|
@@ -34,7 +33,7 @@ def operate(operator_config: AnomalyOperatorConfig) -> None:
|
|
34
33
|
f"Failed to forecast with error {e.args}. Trying again with model `autots`."
|
35
34
|
)
|
36
35
|
operator_config.spec.model = "autots"
|
37
|
-
operator_config.spec.model_kwargs =
|
36
|
+
operator_config.spec.model_kwargs = {}
|
38
37
|
datasets = AnomalyDatasets(operator_config.spec)
|
39
38
|
try:
|
40
39
|
AnomalyOperatorModelFactory.get_model(
|
@@ -44,12 +43,12 @@ def operate(operator_config: AnomalyOperatorConfig) -> None:
|
|
44
43
|
logger.debug(
|
45
44
|
f"Failed to backup forecast with error {ee.args}. Raising original error."
|
46
45
|
)
|
47
|
-
|
46
|
+
raise ee
|
48
47
|
else:
|
49
48
|
raise e
|
50
49
|
|
51
50
|
|
52
|
-
def verify(spec: Dict
|
51
|
+
def verify(spec: Dict) -> bool:
|
53
52
|
"""Verifies the anomaly detection operator config."""
|
54
53
|
operator = AnomalyOperatorConfig.from_dict(spec)
|
55
54
|
msg_header = (
|
@@ -83,7 +82,7 @@ def main(raw_args: List[str]):
|
|
83
82
|
yaml_string = yaml.safe_dump(json.loads(operator_spec_str))
|
84
83
|
except json.JSONDecodeError:
|
85
84
|
yaml_string = yaml.safe_dump(yaml.safe_load(operator_spec_str))
|
86
|
-
except:
|
85
|
+
except Exception:
|
87
86
|
yaml_string = operator_spec_str
|
88
87
|
|
89
88
|
operator_config = AnomalyOperatorConfig.from_yaml(
|
@@ -16,6 +16,14 @@ class SupportedModels(str, metaclass=ExtendedEnumMeta):
|
|
16
16
|
Auto = "auto"
|
17
17
|
# TODS = "tods"
|
18
18
|
|
19
|
+
class NonTimeADSupportedModels(str, metaclass=ExtendedEnumMeta):
|
20
|
+
"""Supported non time-based anomaly detection models."""
|
21
|
+
|
22
|
+
OneClassSVM = "oneclasssvm"
|
23
|
+
IsolationForest = "isolationforest"
|
24
|
+
# TODO : Add DBScan
|
25
|
+
# DBScan = "dbscan"
|
26
|
+
|
19
27
|
|
20
28
|
class TODSSubModels(str, metaclass=ExtendedEnumMeta):
|
21
29
|
"""Supported TODS sub models."""
|
@@ -84,8 +84,10 @@ class AnomalyOutput:
|
|
84
84
|
scores = self.get_scores_by_cat(category)
|
85
85
|
inlier_indices = anomaly.index[anomaly[OutputColumns.ANOMALY_COL] == 0]
|
86
86
|
inliers = data.iloc[inlier_indices]
|
87
|
-
if scores is not None and not scores.empty:
|
87
|
+
if scores is not None and not scores.empty and self.date_column != "index":
|
88
88
|
inliers = pd.merge(inliers, scores, on=self.date_column, how="inner")
|
89
|
+
else:
|
90
|
+
inliers = pd.merge(inliers, anomaly, left_index=True, right_index=True, how="inner")
|
89
91
|
return inliers
|
90
92
|
|
91
93
|
def get_outliers_by_cat(self, category: str, data: pd.DataFrame):
|
@@ -93,8 +95,10 @@ class AnomalyOutput:
|
|
93
95
|
scores = self.get_scores_by_cat(category)
|
94
96
|
outliers_indices = anomaly.index[anomaly[OutputColumns.ANOMALY_COL] == 1]
|
95
97
|
outliers = data.iloc[outliers_indices]
|
96
|
-
if scores is not None and not scores.empty:
|
98
|
+
if scores is not None and not scores.empty and self.date_column != "index":
|
97
99
|
outliers = pd.merge(outliers, scores, on=self.date_column, how="inner")
|
100
|
+
else:
|
101
|
+
outliers = pd.merge(outliers, anomaly, left_index=True, right_index=True, how="inner")
|
98
102
|
return outliers
|
99
103
|
|
100
104
|
def get_inliers(self, datasets):
|
@@ -1,32 +1,33 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*--
|
3
2
|
|
4
3
|
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
5
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
5
|
|
7
|
-
import fsspec
|
8
|
-
import numpy as np
|
9
6
|
import os
|
10
|
-
import pandas as pd
|
11
7
|
import tempfile
|
12
8
|
import time
|
13
9
|
from abc import ABC, abstractmethod
|
14
|
-
from sklearn import linear_model
|
15
10
|
from typing import Tuple
|
16
11
|
|
12
|
+
import fsspec
|
13
|
+
import numpy as np
|
14
|
+
import pandas as pd
|
15
|
+
from sklearn import linear_model
|
16
|
+
|
17
17
|
from ads.common.object_storage_details import ObjectStorageDetails
|
18
18
|
from ads.opctl import logger
|
19
19
|
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns, SupportedMetrics
|
20
20
|
from ads.opctl.operator.lowcode.anomaly.utils import _build_metrics_df, default_signer
|
21
21
|
from ads.opctl.operator.lowcode.common.utils import (
|
22
|
-
human_time_friendly,
|
23
|
-
enable_print,
|
24
22
|
disable_print,
|
23
|
+
enable_print,
|
24
|
+
human_time_friendly,
|
25
25
|
write_data,
|
26
26
|
)
|
27
|
-
|
28
|
-
from ..const import SupportedModels
|
27
|
+
|
28
|
+
from ..const import NonTimeADSupportedModels, SupportedModels
|
29
29
|
from ..operator_config import AnomalyOperatorConfig, AnomalyOperatorSpec
|
30
|
+
from .anomaly_dataset import AnomalyDatasets, AnomalyOutput, TestData
|
30
31
|
|
31
32
|
|
32
33
|
class AnomalyOperatorBaseModel(ABC):
|
@@ -53,15 +54,18 @@ class AnomalyOperatorBaseModel(ABC):
|
|
53
54
|
|
54
55
|
def generate_report(self):
|
55
56
|
"""Generates the report."""
|
56
|
-
import report_creator as rc
|
57
57
|
import matplotlib.pyplot as plt
|
58
|
+
import report_creator as rc
|
58
59
|
|
59
60
|
start_time = time.time()
|
60
61
|
# fallback using sklearn oneclasssvm when the sub model _build_model fails
|
61
62
|
try:
|
62
63
|
anomaly_output = self._build_model()
|
63
64
|
except Exception as e:
|
64
|
-
|
65
|
+
logger.warn(f"Found exception: {e}")
|
66
|
+
if self.spec.datetime_column:
|
67
|
+
anomaly_output = self._fallback_build_model()
|
68
|
+
raise e
|
65
69
|
|
66
70
|
elapsed_time = time.time() - start_time
|
67
71
|
|
@@ -79,7 +83,9 @@ class AnomalyOperatorBaseModel(ABC):
|
|
79
83
|
for col, df in self.datasets.full_data_dict.items()
|
80
84
|
]
|
81
85
|
data_table = rc.Select(blocks=table_blocks)
|
82
|
-
date_column =
|
86
|
+
date_column = (
|
87
|
+
self.spec.datetime_column.name if self.spec.datetime_column else "index"
|
88
|
+
)
|
83
89
|
|
84
90
|
blocks = []
|
85
91
|
for target, df in self.datasets.full_data_dict.items():
|
@@ -95,7 +101,7 @@ class AnomalyOperatorBaseModel(ABC):
|
|
95
101
|
ax.grid()
|
96
102
|
ax.plot(time_col, y, color="black")
|
97
103
|
for i, index in enumerate(anomaly_col):
|
98
|
-
if
|
104
|
+
if index == 1:
|
99
105
|
ax.scatter(time_col[i], y[i], color="red", marker="o")
|
100
106
|
plt.xlabel(date_column)
|
101
107
|
plt.ylabel(col)
|
@@ -114,7 +120,7 @@ class AnomalyOperatorBaseModel(ABC):
|
|
114
120
|
rc.Text(f"You selected the **`{self.spec.model}`** model."),
|
115
121
|
rc.Text(
|
116
122
|
"Based on your dataset, you could have also selected "
|
117
|
-
f"any of the models: `{'`, `'.join(SupportedModels.keys())}`."
|
123
|
+
f"any of the models: `{'`, `'.join(SupportedModels.keys() if self.spec.datetime_column else NonTimeADSupportedModels.keys())}`."
|
118
124
|
),
|
119
125
|
rc.Metric(
|
120
126
|
heading="Analysis was completed in ",
|
@@ -170,7 +176,9 @@ class AnomalyOperatorBaseModel(ABC):
|
|
170
176
|
|
171
177
|
for cat in anomaly_output.list_categories():
|
172
178
|
output = anomaly_output.category_map[cat][0]
|
173
|
-
date_col =
|
179
|
+
date_col = (
|
180
|
+
self.spec.datetime_column.name if self.spec.datetime_column else "index"
|
181
|
+
)
|
174
182
|
|
175
183
|
test_data_i = test_data.get_data_for_series(cat)
|
176
184
|
|
@@ -247,7 +255,7 @@ class AnomalyOperatorBaseModel(ABC):
|
|
247
255
|
if ObjectStorageDetails.is_oci_path(unique_output_dir):
|
248
256
|
storage_options = default_signer()
|
249
257
|
else:
|
250
|
-
storage_options =
|
258
|
+
storage_options = {}
|
251
259
|
|
252
260
|
# report-creator html report
|
253
261
|
with tempfile.TemporaryDirectory() as temp_dir:
|
@@ -301,12 +309,11 @@ class AnomalyOperatorBaseModel(ABC):
|
|
301
309
|
Fallback method for the sub model _build_model method.
|
302
310
|
"""
|
303
311
|
logger.warn(
|
304
|
-
"The build_model method has failed for the model: {}. "
|
305
|
-
"A fallback model will be built."
|
312
|
+
f"The build_model method has failed for the model: {self.spec.model}. "
|
313
|
+
"A fallback model will be built."
|
306
314
|
)
|
307
315
|
|
308
316
|
date_column = self.spec.datetime_column.name
|
309
|
-
dataset = self.datasets
|
310
317
|
|
311
318
|
anomaly_output = AnomalyOutput(date_column=date_column)
|
312
319
|
|
@@ -320,7 +327,9 @@ class AnomalyOperatorBaseModel(ABC):
|
|
320
327
|
y_pred = np.vectorize(self.outlier_map.get)(
|
321
328
|
est.predict(df[self.spec.target_column].fillna(0).values.reshape(-1, 1))
|
322
329
|
)
|
323
|
-
scores = est.score_samples(
|
330
|
+
scores = est.score_samples(
|
331
|
+
df[self.spec.target_column].fillna(0).values.reshape(-1, 1)
|
332
|
+
)
|
324
333
|
|
325
334
|
anomaly = pd.DataFrame(
|
326
335
|
{date_column: df[date_column], OutputColumns.ANOMALY_COL: y_pred}
|
@@ -1,26 +1,41 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*--
|
3
2
|
|
4
|
-
# Copyright (c) 2023 Oracle and/or its affiliates.
|
3
|
+
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
5
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
5
|
|
7
|
-
from
|
6
|
+
from ads.opctl.operator.lowcode.anomaly.utils import select_auto_model
|
7
|
+
|
8
|
+
from ..const import NonTimeADSupportedModels, SupportedModels
|
8
9
|
from ..operator_config import AnomalyOperatorConfig
|
10
|
+
from .anomaly_dataset import AnomalyDatasets
|
9
11
|
from .automlx import AutoMLXOperatorModel
|
10
12
|
from .autots import AutoTSOperatorModel
|
11
|
-
from ads.opctl.operator.lowcode.anomaly.utils import select_auto_model
|
12
13
|
|
13
14
|
# from .tods import TODSOperatorModel
|
14
15
|
from .base_model import AnomalyOperatorBaseModel
|
15
|
-
from .
|
16
|
+
from .isolationforest import IsolationForestOperatorModel
|
17
|
+
from .oneclasssvm import OneClassSVMOperatorModel
|
16
18
|
|
17
19
|
|
18
20
|
class UnSupportedModelError(Exception):
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
21
|
+
"""Exception raised when the model is not supported.
|
22
|
+
|
23
|
+
Attributes:
|
24
|
+
operator_config (AnomalyOperatorConfig): The operator configuration.
|
25
|
+
model_type (str): The type of the unsupported model.
|
26
|
+
"""
|
27
|
+
|
28
|
+
def __init__(self, operator_config: AnomalyOperatorConfig, model_type: str):
|
29
|
+
supported_models = (
|
30
|
+
SupportedModels.values
|
31
|
+
if operator_config.spec.datetime_column
|
32
|
+
else NonTimeADSupportedModels.values
|
23
33
|
)
|
34
|
+
message = (
|
35
|
+
f"Model: `{model_type}` is not supported. "
|
36
|
+
f"Supported models: {supported_models}"
|
37
|
+
)
|
38
|
+
super().__init__(message)
|
24
39
|
|
25
40
|
|
26
41
|
class AnomalyOperatorModelFactory:
|
@@ -34,6 +49,13 @@ class AnomalyOperatorModelFactory:
|
|
34
49
|
SupportedModels.AutoTS: AutoTSOperatorModel,
|
35
50
|
}
|
36
51
|
|
52
|
+
_NonTime_MAP = {
|
53
|
+
NonTimeADSupportedModels.OneClassSVM: OneClassSVMOperatorModel,
|
54
|
+
NonTimeADSupportedModels.IsolationForest: IsolationForestOperatorModel,
|
55
|
+
# TODO: Add DBScan model for non time based anomaly
|
56
|
+
# NonTimeADSupportedModels.DBScan: DBScanOperatorModel,
|
57
|
+
}
|
58
|
+
|
37
59
|
@classmethod
|
38
60
|
def get_model(
|
39
61
|
cls, operator_config: AnomalyOperatorConfig, datasets: AnomalyDatasets
|
@@ -61,7 +83,13 @@ class AnomalyOperatorModelFactory:
|
|
61
83
|
"""
|
62
84
|
model_type = operator_config.spec.model
|
63
85
|
if model_type == "auto":
|
64
|
-
model_type = select_auto_model(
|
65
|
-
|
66
|
-
|
67
|
-
|
86
|
+
model_type = select_auto_model(operator_config)
|
87
|
+
|
88
|
+
model_map = (
|
89
|
+
cls._MAP if operator_config.spec.datetime_column else cls._NonTime_MAP
|
90
|
+
)
|
91
|
+
|
92
|
+
if model_type not in model_map:
|
93
|
+
raise UnSupportedModelError(operator_config, model_type)
|
94
|
+
|
95
|
+
return model_map[model_type](config=operator_config, datasets=datasets)
|
@@ -0,0 +1,79 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*--
|
3
|
+
|
4
|
+
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import pandas as pd
|
9
|
+
|
10
|
+
from ads.common.decorator.runtime_dependency import runtime_dependency
|
11
|
+
|
12
|
+
from .base_model import AnomalyOperatorBaseModel
|
13
|
+
from .anomaly_dataset import AnomalyOutput
|
14
|
+
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns
|
15
|
+
|
16
|
+
|
17
|
+
class IsolationForestOperatorModel(AnomalyOperatorBaseModel):
|
18
|
+
"""Class representing OneClassSVM Anomaly Detection operator model."""
|
19
|
+
|
20
|
+
@runtime_dependency(
|
21
|
+
module="sklearn",
|
22
|
+
err_msg=(
|
23
|
+
"Please run `pip3 install scikit-learn` to "
|
24
|
+
"install the required dependencies for OneClassSVM."
|
25
|
+
),
|
26
|
+
)
|
27
|
+
def _build_model(self) -> AnomalyOutput:
|
28
|
+
from sklearn.ensemble import IsolationForest
|
29
|
+
|
30
|
+
model_kwargs = self.spec.model_kwargs
|
31
|
+
# map the output as per anomaly dataset class, 1: outlier, 0: inlier
|
32
|
+
self.outlier_map = {1: 0, -1: 1}
|
33
|
+
|
34
|
+
anomaly_output = AnomalyOutput(date_column="index")
|
35
|
+
|
36
|
+
for target, df in self.datasets.full_data_dict.items():
|
37
|
+
model = IsolationForest(**model_kwargs)
|
38
|
+
model.fit(df)
|
39
|
+
y_pred = np.vectorize(self.outlier_map.get)(
|
40
|
+
model.predict(df)
|
41
|
+
)
|
42
|
+
|
43
|
+
scores = model.score_samples(
|
44
|
+
df
|
45
|
+
)
|
46
|
+
|
47
|
+
index_col = df.columns[0]
|
48
|
+
|
49
|
+
anomaly = pd.DataFrame(
|
50
|
+
{index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred}
|
51
|
+
).reset_index(drop=True)
|
52
|
+
score = pd.DataFrame(
|
53
|
+
{"index": df[index_col], OutputColumns.SCORE_COL: scores}
|
54
|
+
).reset_index(drop=True)
|
55
|
+
|
56
|
+
anomaly_output.add_output(target, anomaly, score)
|
57
|
+
|
58
|
+
return anomaly_output
|
59
|
+
|
60
|
+
def _generate_report(self):
|
61
|
+
"""Generates the report."""
|
62
|
+
import report_creator as rc
|
63
|
+
|
64
|
+
other_sections = [
|
65
|
+
rc.Heading("Selected Models Overview", level=2),
|
66
|
+
rc.Text(
|
67
|
+
"The following tables provide information regarding the chosen model."
|
68
|
+
),
|
69
|
+
]
|
70
|
+
|
71
|
+
model_description = rc.Text(
|
72
|
+
"The Isolation Forest is an ensemble of “Isolation Trees” that “isolate” observations by recursive random partitioning"
|
73
|
+
" which can be represented by a tree structure. The number of splittings required to isolate a sample is lower for outliers and higher for inliers."
|
74
|
+
)
|
75
|
+
|
76
|
+
return (
|
77
|
+
model_description,
|
78
|
+
other_sections,
|
79
|
+
)
|