oracle-ads 2.12.2__py3-none-any.whl → 2.12.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/common/enums.py +9 -0
- ads/aqua/common/utils.py +83 -6
- ads/aqua/config/config.py +0 -16
- ads/aqua/constants.py +2 -0
- ads/aqua/extension/deployment_handler.py +2 -0
- ads/aqua/extension/finetune_handler.py +1 -2
- ads/aqua/extension/ui_handler.py +22 -3
- ads/aqua/finetuning/entities.py +5 -4
- ads/aqua/finetuning/finetuning.py +13 -8
- ads/aqua/model/constants.py +1 -0
- ads/aqua/model/entities.py +2 -0
- ads/aqua/model/model.py +223 -138
- ads/aqua/modeldeployment/deployment.py +106 -62
- ads/aqua/modeldeployment/entities.py +10 -2
- ads/aqua/ui.py +29 -16
- ads/config.py +3 -8
- ads/llm/deploy.py +6 -0
- ads/llm/guardrails/base.py +0 -1
- ads/llm/langchain/plugins/chat_models/oci_data_science.py +118 -41
- ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +18 -14
- ads/llm/templates/score_chain.jinja2 +0 -1
- ads/model/datascience_model.py +519 -16
- ads/model/deployment/model_deployment.py +13 -0
- ads/model/deployment/model_deployment_infrastructure.py +34 -0
- ads/model/generic_model.py +10 -0
- ads/model/model_properties.py +1 -0
- ads/model/service/oci_datascience_model.py +28 -0
- ads/opctl/operator/lowcode/anomaly/const.py +66 -1
- ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +161 -0
- ads/opctl/operator/lowcode/anomaly/model/autots.py +30 -15
- ads/opctl/operator/lowcode/anomaly/model/factory.py +15 -3
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +1 -1
- ads/opctl/operator/lowcode/anomaly/schema.yaml +10 -0
- ads/opctl/operator/lowcode/anomaly/utils.py +3 -0
- {oracle_ads-2.12.2.dist-info → oracle_ads-2.12.3.dist-info}/METADATA +2 -1
- {oracle_ads-2.12.2.dist-info → oracle_ads-2.12.3.dist-info}/RECORD +39 -40
- ads/aqua/config/deployment_config_defaults.json +0 -38
- ads/aqua/config/resource_limit_names.json +0 -9
- {oracle_ads-2.12.2.dist-info → oracle_ads-2.12.3.dist-info}/LICENSE.txt +0 -0
- {oracle_ads-2.12.2.dist-info → oracle_ads-2.12.3.dist-info}/WHEEL +0 -0
- {oracle_ads-2.12.2.dist-info → oracle_ads-2.12.3.dist-info}/entry_points.txt +0 -0
@@ -57,6 +57,8 @@ class ModelDeploymentInfrastructure(Builder):
|
|
57
57
|
The web concurrency of model deployment
|
58
58
|
subnet_id: str
|
59
59
|
The subnet id of model deployment
|
60
|
+
private_endpoint_id: str
|
61
|
+
The private endpoint id of model deployment
|
60
62
|
|
61
63
|
Methods
|
62
64
|
-------
|
@@ -84,6 +86,8 @@ class ModelDeploymentInfrastructure(Builder):
|
|
84
86
|
Sets the web concurrency of model deployment
|
85
87
|
with_subnet_id(subnet_id)
|
86
88
|
Sets the subnet id of model deployment
|
89
|
+
with_private_endpoint_id(private_endpoint)
|
90
|
+
Sets the private endpoint id of model deployment
|
87
91
|
|
88
92
|
Example
|
89
93
|
-------
|
@@ -100,6 +104,7 @@ class ModelDeploymentInfrastructure(Builder):
|
|
100
104
|
... .with_bandwidth_mbps(10)
|
101
105
|
... .with_web_concurrency(10)
|
102
106
|
... .with_subnet_id(<subnet_id>)
|
107
|
+
... .with_private_endpoint_id(<private_endpoint_id>)
|
103
108
|
... .with_access_log(
|
104
109
|
... log_group_id=<log_group_id>,
|
105
110
|
... log_id=<log_id>
|
@@ -143,6 +148,7 @@ class ModelDeploymentInfrastructure(Builder):
|
|
143
148
|
CONST_WEB_CONCURRENCY = "webConcurrency"
|
144
149
|
CONST_STREAM_CONFIG_DETAILS = "streamConfigurationDetails"
|
145
150
|
CONST_SUBNET_ID = "subnetId"
|
151
|
+
CONST_PRIVATE_ENDPOINT_ID = "privateEndpointId"
|
146
152
|
|
147
153
|
attribute_map = {
|
148
154
|
CONST_PROJECT_ID: "project_id",
|
@@ -159,6 +165,7 @@ class ModelDeploymentInfrastructure(Builder):
|
|
159
165
|
CONST_LOG_GROUP_ID: "log_group_id",
|
160
166
|
CONST_WEB_CONCURRENCY: "web_concurrency",
|
161
167
|
CONST_SUBNET_ID: "subnet_id",
|
168
|
+
CONST_PRIVATE_ENDPOINT_ID: "private_endpoint_id",
|
162
169
|
}
|
163
170
|
|
164
171
|
shape_config_details_attribute_map = {
|
@@ -186,6 +193,7 @@ class ModelDeploymentInfrastructure(Builder):
|
|
186
193
|
CONST_SHAPE_NAME: f"{MODEL_CONFIG_DETAILS_PATH}.instance_configuration.instance_shape_name",
|
187
194
|
CONST_SHAPE_CONFIG_DETAILS: f"{MODEL_CONFIG_DETAILS_PATH}.instance_configuration.model_deployment_instance_shape_config_details",
|
188
195
|
CONST_SUBNET_ID: f"{MODEL_CONFIG_DETAILS_PATH}.instance_configuration.subnet_id",
|
196
|
+
CONST_PRIVATE_ENDPOINT_ID: f"{MODEL_CONFIG_DETAILS_PATH}.instance_configuration.private_endpoint_id",
|
189
197
|
CONST_REPLICA: f"{MODEL_CONFIG_DETAILS_PATH}.scaling_policy.instance_count",
|
190
198
|
CONST_BANDWIDTH_MBPS: f"{MODEL_CONFIG_DETAILS_PATH}.bandwidth_mbps",
|
191
199
|
CONST_ACCESS_LOG: "category_log_details.access",
|
@@ -613,6 +621,32 @@ class ModelDeploymentInfrastructure(Builder):
|
|
613
621
|
"""
|
614
622
|
return self.get_spec(self.CONST_SUBNET_ID, None)
|
615
623
|
|
624
|
+
def with_private_endpoint_id(self, private_endpoint_id: str) -> "ModelDeploymentInfrastructure":
|
625
|
+
"""Sets the private endpoint id of model deployment.
|
626
|
+
|
627
|
+
Parameters
|
628
|
+
----------
|
629
|
+
private_endpoint_id : str
|
630
|
+
The private endpoint id of model deployment.
|
631
|
+
|
632
|
+
Returns
|
633
|
+
-------
|
634
|
+
ModelDeploymentInfrastructure
|
635
|
+
The ModelDeploymentInfrastructure instance (self).
|
636
|
+
"""
|
637
|
+
return self.set_spec(self.CONST_PRIVATE_ENDPOINT_ID, private_endpoint_id)
|
638
|
+
|
639
|
+
@property
|
640
|
+
def private_endpoint_id(self) -> str:
|
641
|
+
"""The model deployment private endpoint id.
|
642
|
+
|
643
|
+
Returns
|
644
|
+
-------
|
645
|
+
str
|
646
|
+
The model deployment private endpoint id.
|
647
|
+
"""
|
648
|
+
return self.get_spec(self.CONST_PRIVATE_ENDPOINT_ID, None)
|
649
|
+
|
616
650
|
def init(self, **kwargs) -> "ModelDeploymentInfrastructure":
|
617
651
|
"""Initializes a starter specification for the ModelDeploymentInfrastructure.
|
618
652
|
|
ads/model/generic_model.py
CHANGED
@@ -2262,6 +2262,7 @@ class GenericModel(MetadataMixin, Introspectable, EvaluatorMixin):
|
|
2262
2262
|
description: Optional[str] = None,
|
2263
2263
|
deployment_instance_shape: Optional[str] = None,
|
2264
2264
|
deployment_instance_subnet_id: Optional[str] = None,
|
2265
|
+
deployment_instance_private_endpoint_id: Optional[str] = None,
|
2265
2266
|
deployment_instance_count: Optional[int] = None,
|
2266
2267
|
deployment_bandwidth_mbps: Optional[int] = None,
|
2267
2268
|
deployment_log_group_id: Optional[str] = None,
|
@@ -2312,6 +2313,8 @@ class GenericModel(MetadataMixin, Introspectable, EvaluatorMixin):
|
|
2312
2313
|
The shape of the instance used for deployment.
|
2313
2314
|
deployment_instance_subnet_id: (str, optional). Default to None.
|
2314
2315
|
The subnet id of the instance used for deployment.
|
2316
|
+
deployment_instance_private_endpoint_id: (str, optional). Default to None.
|
2317
|
+
The private endpoint id of instance used for deployment.
|
2315
2318
|
deployment_instance_count: (int, optional). Defaults to 1.
|
2316
2319
|
The number of instance used for deployment.
|
2317
2320
|
deployment_bandwidth_mbps: (int, optional). Defaults to 10.
|
@@ -2432,6 +2435,8 @@ class GenericModel(MetadataMixin, Introspectable, EvaluatorMixin):
|
|
2432
2435
|
or self.properties.deployment_image,
|
2433
2436
|
deployment_instance_subnet_id=existing_infrastructure.subnet_id
|
2434
2437
|
or self.properties.deployment_instance_subnet_id,
|
2438
|
+
deployment_instance_private_endpoint_id=existing_infrastructure.private_endpoint_id
|
2439
|
+
or self.properties.deployment_instance_private_endpoint_id,
|
2435
2440
|
).to_dict()
|
2436
2441
|
|
2437
2442
|
property_dict.update(override_properties)
|
@@ -2465,6 +2470,7 @@ class GenericModel(MetadataMixin, Introspectable, EvaluatorMixin):
|
|
2465
2470
|
.with_shape_name(self.properties.deployment_instance_shape)
|
2466
2471
|
.with_replica(self.properties.deployment_instance_count)
|
2467
2472
|
.with_subnet_id(self.properties.deployment_instance_subnet_id)
|
2473
|
+
.with_private_endpoint_id(self.properties.deployment_instance_private_endpoint_id)
|
2468
2474
|
)
|
2469
2475
|
|
2470
2476
|
web_concurrency = (
|
@@ -2611,6 +2617,7 @@ class GenericModel(MetadataMixin, Introspectable, EvaluatorMixin):
|
|
2611
2617
|
deployment_description: Optional[str] = None,
|
2612
2618
|
deployment_instance_shape: Optional[str] = None,
|
2613
2619
|
deployment_instance_subnet_id: Optional[str] = None,
|
2620
|
+
deployment_instance_private_endpoint_id: Optional[str] = None,
|
2614
2621
|
deployment_instance_count: Optional[int] = None,
|
2615
2622
|
deployment_bandwidth_mbps: Optional[int] = None,
|
2616
2623
|
deployment_log_group_id: Optional[str] = None,
|
@@ -2701,6 +2708,8 @@ class GenericModel(MetadataMixin, Introspectable, EvaluatorMixin):
|
|
2701
2708
|
The shape of the instance used for deployment.
|
2702
2709
|
deployment_instance_subnet_id: (str, optional). Default to None.
|
2703
2710
|
The subnet id of the instance used for deployment.
|
2711
|
+
deployment_instance_private_endpoint_id: (str, optional). Default to None.
|
2712
|
+
The private endpoint id of instance used for deployment.
|
2704
2713
|
deployment_instance_count: (int, optional). Defaults to 1.
|
2705
2714
|
The number of instance used for deployment.
|
2706
2715
|
deployment_bandwidth_mbps: (int, optional). Defaults to 10.
|
@@ -2846,6 +2855,7 @@ class GenericModel(MetadataMixin, Introspectable, EvaluatorMixin):
|
|
2846
2855
|
description=deployment_description,
|
2847
2856
|
deployment_instance_shape=self.properties.deployment_instance_shape,
|
2848
2857
|
deployment_instance_subnet_id=self.properties.deployment_instance_subnet_id,
|
2858
|
+
deployment_instance_private_endpoint_id=self.properties.deployment_instance_private_endpoint_id,
|
2849
2859
|
deployment_instance_count=self.properties.deployment_instance_count,
|
2850
2860
|
deployment_bandwidth_mbps=self.properties.deployment_bandwidth_mbps,
|
2851
2861
|
deployment_log_group_id=self.properties.deployment_log_group_id,
|
ads/model/model_properties.py
CHANGED
@@ -29,6 +29,7 @@ class ModelProperties(BaseProperties):
|
|
29
29
|
overwrite_existing_artifact: bool = None
|
30
30
|
deployment_instance_shape: str = None
|
31
31
|
deployment_instance_subnet_id: str = None
|
32
|
+
deployment_instance_private_endpoint_id: str = None
|
32
33
|
deployment_instance_count: int = None
|
33
34
|
deployment_bandwidth_mbps: int = None
|
34
35
|
deployment_log_group_id: str = None
|
@@ -278,6 +278,34 @@ class OCIDataScienceModel(
|
|
278
278
|
raise ModelArtifactNotFoundError()
|
279
279
|
return {}
|
280
280
|
|
281
|
+
@check_for_model_id(
|
282
|
+
msg="Model needs to be restored before the archived artifact content can be accessed."
|
283
|
+
)
|
284
|
+
def restore_archived_model_artifact(
|
285
|
+
self, restore_model_for_hours_specified: Optional[int] = None
|
286
|
+
) -> None:
|
287
|
+
"""Restores the archived model artifact.
|
288
|
+
|
289
|
+
Parameters
|
290
|
+
----------
|
291
|
+
model_id : str
|
292
|
+
The unique identifier of the model to restore.
|
293
|
+
restore_model_for_hours_specified : Optional[int]
|
294
|
+
The duration (in hours) for which the model should be restored.
|
295
|
+
|
296
|
+
Returns
|
297
|
+
-------
|
298
|
+
None
|
299
|
+
|
300
|
+
Raises
|
301
|
+
------
|
302
|
+
ModelArtifactNotFoundError
|
303
|
+
If model artifact not found.
|
304
|
+
"""
|
305
|
+
return self.client.restore_archived_model_artifact(
|
306
|
+
model_id=self.id,
|
307
|
+
restore_model_for_hours_specified=restore_model_for_hours_specified).headers["opc-work-request-id"]
|
308
|
+
|
281
309
|
@check_for_model_id(
|
282
310
|
msg="Model needs to be saved to the Model Catalog before the artifact content can be read."
|
283
311
|
)
|
@@ -21,6 +21,23 @@ class SupportedModels(str, metaclass=ExtendedEnumMeta):
|
|
21
21
|
EE = "ee"
|
22
22
|
ISOLATIONFOREST = "isolationforest"
|
23
23
|
|
24
|
+
# point anomaly
|
25
|
+
DAGMM = "dagmm"
|
26
|
+
DEEP_POINT_ANOMALY_DETECTOR = "deep_point_anomaly_detector"
|
27
|
+
LSTM_ED = "lstm_ed"
|
28
|
+
SPECTRAL_RESIDUAL = "spectral_residual"
|
29
|
+
VAE = "vae"
|
30
|
+
|
31
|
+
# forecast_based
|
32
|
+
ARIMA = "arima"
|
33
|
+
ETS = "ets"
|
34
|
+
PROPHET = "prophet"
|
35
|
+
SARIMA = "sarima"
|
36
|
+
|
37
|
+
# changepoint
|
38
|
+
BOCPD = "bocpd"
|
39
|
+
|
40
|
+
|
24
41
|
class NonTimeADSupportedModels(str, metaclass=ExtendedEnumMeta):
|
25
42
|
"""Supported non time-based anomaly detection models."""
|
26
43
|
|
@@ -29,7 +46,7 @@ class NonTimeADSupportedModels(str, metaclass=ExtendedEnumMeta):
|
|
29
46
|
RandomCutForest = "randomcutforest"
|
30
47
|
# TODO : Add DBScan
|
31
48
|
# DBScan = "dbscan"
|
32
|
-
|
49
|
+
|
33
50
|
|
34
51
|
class TODSSubModels(str, metaclass=ExtendedEnumMeta):
|
35
52
|
"""Supported TODS sub models."""
|
@@ -61,6 +78,54 @@ TODS_MODEL_MAP = {
|
|
61
78
|
}
|
62
79
|
|
63
80
|
|
81
|
+
class MerlionADModels(str, metaclass=ExtendedEnumMeta):
|
82
|
+
"""Supported Merlion AD sub models."""
|
83
|
+
|
84
|
+
# point anomaly
|
85
|
+
DAGMM = "dagmm"
|
86
|
+
DEEP_POINT_ANOMALY_DETECTOR = "deep_point_anomaly_detector"
|
87
|
+
LSTM_ED = "lstm_ed"
|
88
|
+
SPECTRAL_RESIDUAL = "spectral_residual"
|
89
|
+
VAE = "vae"
|
90
|
+
|
91
|
+
# forecast_based
|
92
|
+
ARIMA = "arima"
|
93
|
+
ETS = "ets"
|
94
|
+
PROPHET = "prophet"
|
95
|
+
SARIMA = "sarima"
|
96
|
+
|
97
|
+
# changepoint
|
98
|
+
BOCPD = "bocpd"
|
99
|
+
|
100
|
+
|
101
|
+
MERLIONAD_IMPORT_MODEL_MAP = {
|
102
|
+
MerlionADModels.DAGMM: ".dagmm",
|
103
|
+
MerlionADModels.DEEP_POINT_ANOMALY_DETECTOR: ".deep_point_anomaly_detector",
|
104
|
+
MerlionADModels.LSTM_ED: ".lstm_ed",
|
105
|
+
MerlionADModels.SPECTRAL_RESIDUAL: ".spectral_residual",
|
106
|
+
MerlionADModels.VAE: ".vae",
|
107
|
+
MerlionADModels.ARIMA: ".forecast_based.arima",
|
108
|
+
MerlionADModels.ETS: ".forecast_based.ets",
|
109
|
+
MerlionADModels.PROPHET: ".forecast_based.prophet",
|
110
|
+
MerlionADModels.SARIMA: ".forecast_based.sarima",
|
111
|
+
MerlionADModels.BOCPD: ".change_point.bocpd",
|
112
|
+
}
|
113
|
+
|
114
|
+
|
115
|
+
MERLIONAD_MODEL_MAP = {
|
116
|
+
MerlionADModels.DAGMM: "DAGMM",
|
117
|
+
MerlionADModels.DEEP_POINT_ANOMALY_DETECTOR: "DeepPointAnomalyDetector",
|
118
|
+
MerlionADModels.LSTM_ED: "LSTMED",
|
119
|
+
MerlionADModels.SPECTRAL_RESIDUAL: "SpectralResidual",
|
120
|
+
MerlionADModels.VAE: "VAE",
|
121
|
+
MerlionADModels.ARIMA: "ArimaDetector",
|
122
|
+
MerlionADModels.ETS: "ETSDetector",
|
123
|
+
MerlionADModels.PROPHET: "ProphetDetector",
|
124
|
+
MerlionADModels.SARIMA: "SarimaDetector",
|
125
|
+
MerlionADModels.BOCPD: "BOCPD",
|
126
|
+
}
|
127
|
+
|
128
|
+
|
64
129
|
class SupportedMetrics(str, metaclass=ExtendedEnumMeta):
|
65
130
|
UNSUPERVISED_UNIFY95 = "unsupervised_unify95"
|
66
131
|
UNSUPERVISED_UNIFY95_LOG_LOSS = "unsupervised_unify95_log_loss"
|
@@ -0,0 +1,161 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
4
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
|
+
|
6
|
+
import importlib
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
import pandas as pd
|
10
|
+
from merlion.post_process.threshold import AggregateAlarms
|
11
|
+
from merlion.utils import TimeSeries
|
12
|
+
|
13
|
+
from ads.common.decorator.runtime_dependency import runtime_dependency
|
14
|
+
from ads.opctl.operator.lowcode.anomaly.const import (
|
15
|
+
MERLIONAD_IMPORT_MODEL_MAP,
|
16
|
+
MERLIONAD_MODEL_MAP,
|
17
|
+
OutputColumns,
|
18
|
+
SupportedModels,
|
19
|
+
)
|
20
|
+
|
21
|
+
from .anomaly_dataset import AnomalyOutput
|
22
|
+
from .base_model import AnomalyOperatorBaseModel
|
23
|
+
|
24
|
+
|
25
|
+
class AnomalyMerlionOperatorModel(AnomalyOperatorBaseModel):
|
26
|
+
"""Class representing Merlion Anomaly Detection operator model."""
|
27
|
+
|
28
|
+
@runtime_dependency(
|
29
|
+
module="merlion",
|
30
|
+
err_msg=(
|
31
|
+
"Please run `pip3 install salesforce-merlion[all]` to "
|
32
|
+
"install the required packages."
|
33
|
+
),
|
34
|
+
)
|
35
|
+
def _get_config_model(self, model_name):
|
36
|
+
"""
|
37
|
+
Returns a dictionary with model names as keys and a list of model config and model object as values.
|
38
|
+
|
39
|
+
Parameters
|
40
|
+
----------
|
41
|
+
model_name : str
|
42
|
+
model name from the Merlion model list.
|
43
|
+
|
44
|
+
Returns
|
45
|
+
-------
|
46
|
+
dict
|
47
|
+
A dictionary with model names as keys and a list of model config and model object as values.
|
48
|
+
"""
|
49
|
+
model_config_map = {}
|
50
|
+
model_module = importlib.import_module(
|
51
|
+
name=MERLIONAD_IMPORT_MODEL_MAP.get(model_name),
|
52
|
+
package="merlion.models.anomaly",
|
53
|
+
)
|
54
|
+
model_config = getattr(
|
55
|
+
model_module, MERLIONAD_MODEL_MAP.get(model_name) + "Config"
|
56
|
+
)
|
57
|
+
model = getattr(model_module, MERLIONAD_MODEL_MAP.get(model_name))
|
58
|
+
model_config_map[model_name] = [model_config, model]
|
59
|
+
return model_config_map
|
60
|
+
|
61
|
+
def _build_model(self) -> AnomalyOutput:
|
62
|
+
"""
|
63
|
+
Builds a Merlion anomaly detection model and trains it using the given data.
|
64
|
+
|
65
|
+
Parameters
|
66
|
+
----------
|
67
|
+
None
|
68
|
+
|
69
|
+
Returns
|
70
|
+
-------
|
71
|
+
AnomalyOutput
|
72
|
+
An AnomalyOutput object containing the anomaly detection results.
|
73
|
+
"""
|
74
|
+
model_kwargs = self.spec.model_kwargs
|
75
|
+
anomaly_output = AnomalyOutput(date_column="index")
|
76
|
+
anomaly_threshold = model_kwargs.get("anomaly_threshold", 95)
|
77
|
+
model_config_map = {}
|
78
|
+
model_config_map = self._get_config_model(self.spec.model)
|
79
|
+
|
80
|
+
date_column = self.spec.datetime_column.name
|
81
|
+
|
82
|
+
anomaly_output = AnomalyOutput(date_column=date_column)
|
83
|
+
# model_objects = defaultdict(list)
|
84
|
+
for target, df in self.datasets.full_data_dict.items():
|
85
|
+
data = df.set_index(date_column)
|
86
|
+
data = TimeSeries.from_pd(data)
|
87
|
+
for model_name, (model_config, model) in model_config_map.items():
|
88
|
+
if self.spec.model == SupportedModels.BOCPD:
|
89
|
+
model_config = model_config(**self.spec.model_kwargs)
|
90
|
+
else:
|
91
|
+
model_config = model_config(
|
92
|
+
**{
|
93
|
+
**self.spec.model_kwargs,
|
94
|
+
"threshold": AggregateAlarms(
|
95
|
+
alm_threshold=model_kwargs.get("alm_threshold")
|
96
|
+
if model_kwargs.get("alm_threshold")
|
97
|
+
else None
|
98
|
+
),
|
99
|
+
}
|
100
|
+
)
|
101
|
+
if hasattr(model_config, "target_seq_index"):
|
102
|
+
model_config.target_seq_index = df.columns.get_loc(
|
103
|
+
self.spec.target_column
|
104
|
+
)
|
105
|
+
model = model(model_config)
|
106
|
+
|
107
|
+
scores = model.train(train_data=data, anomaly_labels=None)
|
108
|
+
scores = scores.to_pd().reset_index()
|
109
|
+
scores["anom_score"] = (
|
110
|
+
scores["anom_score"] - scores["anom_score"].min()
|
111
|
+
) / (scores["anom_score"].max() - scores["anom_score"].min())
|
112
|
+
|
113
|
+
try:
|
114
|
+
y_pred = model.get_anomaly_label(data)
|
115
|
+
y_pred = (y_pred.to_pd().reset_index()["anom_score"] > 0).astype(
|
116
|
+
int
|
117
|
+
)
|
118
|
+
except Exception as e:
|
119
|
+
y_pred = (
|
120
|
+
scores["anom_score"]
|
121
|
+
> np.percentile(
|
122
|
+
scores["anom_score"],
|
123
|
+
anomaly_threshold,
|
124
|
+
)
|
125
|
+
).astype(int)
|
126
|
+
|
127
|
+
index_col = df.columns[0]
|
128
|
+
|
129
|
+
anomaly = pd.DataFrame(
|
130
|
+
{index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred}
|
131
|
+
).reset_index(drop=True)
|
132
|
+
score = pd.DataFrame(
|
133
|
+
{
|
134
|
+
index_col: df[index_col],
|
135
|
+
OutputColumns.SCORE_COL: scores["anom_score"],
|
136
|
+
}
|
137
|
+
).reset_index(drop=True)
|
138
|
+
# model_objects[model_name].append(model)
|
139
|
+
|
140
|
+
anomaly_output.add_output(target, anomaly, score)
|
141
|
+
return anomaly_output
|
142
|
+
|
143
|
+
def _generate_report(self):
|
144
|
+
"""Genreates a report for the model."""
|
145
|
+
import report_creator as rc
|
146
|
+
|
147
|
+
other_sections = [
|
148
|
+
rc.Heading("Selected Models Overview", level=2),
|
149
|
+
rc.Text(
|
150
|
+
"The following tables provide information regarding the chosen model."
|
151
|
+
),
|
152
|
+
]
|
153
|
+
|
154
|
+
model_description = rc.Text(
|
155
|
+
"The Merlion anomaly detection model is a full-stack automated machine learning system for anomaly detection."
|
156
|
+
)
|
157
|
+
|
158
|
+
return (
|
159
|
+
model_description,
|
160
|
+
other_sections,
|
161
|
+
)
|
@@ -5,15 +5,17 @@
|
|
5
5
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
6
|
|
7
7
|
from ads.common.decorator.runtime_dependency import runtime_dependency
|
8
|
+
from ads.opctl import logger
|
8
9
|
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns
|
10
|
+
|
11
|
+
from ..const import SupportedModels
|
9
12
|
from .anomaly_dataset import AnomalyOutput
|
10
13
|
from .base_model import AnomalyOperatorBaseModel
|
11
|
-
from ..const import SupportedModels
|
12
|
-
from ads.opctl import logger
|
13
14
|
|
14
15
|
|
15
16
|
class AutoTSOperatorModel(AnomalyOperatorBaseModel):
|
16
17
|
"""Class representing AutoTS Anomaly Detection operator model."""
|
18
|
+
|
17
19
|
model_mapping = {
|
18
20
|
"isolationforest": "IsolationForest",
|
19
21
|
"lof": "LOF",
|
@@ -22,30 +24,43 @@ class AutoTSOperatorModel(AnomalyOperatorBaseModel):
|
|
22
24
|
"rolling_zscore": "rolling_zscore",
|
23
25
|
"mad": "mad",
|
24
26
|
"minmax": "minmax",
|
25
|
-
"iqr": "IQR"
|
27
|
+
"iqr": "IQR",
|
26
28
|
}
|
27
29
|
|
28
30
|
@runtime_dependency(
|
29
31
|
module="autots",
|
30
32
|
err_msg=(
|
31
|
-
|
32
|
-
|
33
|
+
"Please run `pip3 install autots` to "
|
34
|
+
"install the required dependencies for AutoTS."
|
33
35
|
),
|
34
36
|
)
|
35
37
|
def _build_model(self) -> AnomalyOutput:
|
36
38
|
from autots.evaluator.anomaly_detector import AnomalyDetector
|
37
39
|
|
38
|
-
method =
|
39
|
-
|
40
|
-
|
41
|
-
|
40
|
+
method = (
|
41
|
+
SupportedModels.ISOLATIONFOREST
|
42
|
+
if self.spec.model == SupportedModels.AutoTS
|
43
|
+
else self.spec.model
|
44
|
+
)
|
45
|
+
model_params = {
|
46
|
+
"method": self.model_mapping[method],
|
47
|
+
"transform_dict": self.spec.model_kwargs.get("transform_dict", {}),
|
48
|
+
"output": self.spec.model_kwargs.get("output", "univariate"),
|
49
|
+
"method_params": {},
|
50
|
+
}
|
42
51
|
# Supported methods with contamination param
|
43
|
-
if method in [
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
52
|
+
if method in [
|
53
|
+
SupportedModels.ISOLATIONFOREST,
|
54
|
+
SupportedModels.LOF,
|
55
|
+
SupportedModels.EE,
|
56
|
+
]:
|
57
|
+
model_params["method_params"]["contamination"] = (
|
58
|
+
self.spec.contamination if self.spec.contamination else 0.01
|
59
|
+
)
|
60
|
+
elif self.spec.contamination:
|
61
|
+
raise ValueError(
|
62
|
+
f'The contamination parameter is not supported for the selected model "{method}"'
|
63
|
+
)
|
49
64
|
logger.info(f"model params: {model_params}")
|
50
65
|
|
51
66
|
model = AnomalyDetector(**model_params)
|
@@ -4,14 +4,16 @@
|
|
4
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
5
|
|
6
6
|
from ads.opctl.operator.lowcode.anomaly.utils import select_auto_model
|
7
|
+
|
8
|
+
from ..const import NonTimeADSupportedModels, SupportedModels
|
9
|
+
from ..operator_config import AnomalyOperatorConfig
|
7
10
|
from .anomaly_dataset import AnomalyDatasets
|
11
|
+
from .anomaly_merlion import AnomalyMerlionOperatorModel
|
8
12
|
from .autots import AutoTSOperatorModel
|
9
13
|
from .base_model import AnomalyOperatorBaseModel
|
10
14
|
from .isolationforest import IsolationForestOperatorModel
|
11
15
|
from .oneclasssvm import OneClassSVMOperatorModel
|
12
16
|
from .randomcutforest import RandomCutForestOperatorModel
|
13
|
-
from ..const import NonTimeADSupportedModels, SupportedModels
|
14
|
-
from ..operator_config import AnomalyOperatorConfig
|
15
17
|
|
16
18
|
|
17
19
|
class UnSupportedModelError(Exception):
|
@@ -48,7 +50,17 @@ class AnomalyOperatorModelFactory:
|
|
48
50
|
SupportedModels.ZSCORE: AutoTSOperatorModel,
|
49
51
|
SupportedModels.ROLLING_ZSCORE: AutoTSOperatorModel,
|
50
52
|
SupportedModels.EE: AutoTSOperatorModel,
|
51
|
-
SupportedModels.MAD: AutoTSOperatorModel
|
53
|
+
SupportedModels.MAD: AutoTSOperatorModel,
|
54
|
+
SupportedModels.DAGMM: AnomalyMerlionOperatorModel,
|
55
|
+
SupportedModels.DEEP_POINT_ANOMALY_DETECTOR: AnomalyMerlionOperatorModel,
|
56
|
+
SupportedModels.LSTM_ED: AnomalyMerlionOperatorModel,
|
57
|
+
SupportedModels.SPECTRAL_RESIDUAL: AnomalyMerlionOperatorModel,
|
58
|
+
SupportedModels.VAE: AnomalyMerlionOperatorModel,
|
59
|
+
SupportedModels.ARIMA: AnomalyMerlionOperatorModel,
|
60
|
+
SupportedModels.ETS: AnomalyMerlionOperatorModel,
|
61
|
+
SupportedModels.PROPHET: AnomalyMerlionOperatorModel,
|
62
|
+
SupportedModels.SARIMA: AnomalyMerlionOperatorModel,
|
63
|
+
SupportedModels.BOCPD: AnomalyMerlionOperatorModel,
|
52
64
|
}
|
53
65
|
|
54
66
|
_NonTime_MAP = {
|
@@ -36,7 +36,7 @@ class RandomCutForestOperatorModel(AnomalyOperatorBaseModel):
|
|
36
36
|
# Set tree parameters
|
37
37
|
num_trees = model_kwargs.get("num_trees", 200)
|
38
38
|
shingle_size = model_kwargs.get("shingle_size", None)
|
39
|
-
anomaly_threshold = model_kwargs.get("
|
39
|
+
anomaly_threshold = model_kwargs.get("anomaly_threshold", 95)
|
40
40
|
|
41
41
|
for target, df in self.datasets.full_data_dict.items():
|
42
42
|
try:
|
@@ -370,6 +370,16 @@ spec:
|
|
370
370
|
- rolling_zscore
|
371
371
|
- mad
|
372
372
|
- ee
|
373
|
+
- dagmm
|
374
|
+
- deep_point_anomaly_detector
|
375
|
+
- lstm_ed
|
376
|
+
- spectral_residual
|
377
|
+
- vae
|
378
|
+
- arima
|
379
|
+
- ets
|
380
|
+
- sarima
|
381
|
+
- bocpd
|
382
|
+
- prophet
|
373
383
|
meta:
|
374
384
|
description: "The model to be used for anomaly detection"
|
375
385
|
|
@@ -5,6 +5,7 @@
|
|
5
5
|
|
6
6
|
import os
|
7
7
|
|
8
|
+
import numpy as np
|
8
9
|
import pandas as pd
|
9
10
|
|
10
11
|
from ads.opctl import logger
|
@@ -27,6 +28,8 @@ def _build_metrics_df(y_true, y_pred, column_name):
|
|
27
28
|
)
|
28
29
|
|
29
30
|
metrics = {}
|
31
|
+
np.nan_to_num(y_true, copy=False)
|
32
|
+
np.nan_to_num(y_pred, copy=False)
|
30
33
|
metrics[SupportedMetrics.RECALL] = recall_score(y_true, y_pred)
|
31
34
|
metrics[SupportedMetrics.PRECISION] = precision_score(y_true, y_pred)
|
32
35
|
metrics[SupportedMetrics.ACCURACY] = accuracy_score(y_true, y_pred)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: oracle_ads
|
3
|
-
Version: 2.12.
|
3
|
+
Version: 2.12.3
|
4
4
|
Summary: Oracle Accelerated Data Science SDK
|
5
5
|
Keywords: Oracle Cloud Infrastructure,OCI,Machine Learning,ML,Artificial Intelligence,AI,Data Science,Cloud,Oracle
|
6
6
|
Author: Oracle Data Science
|
@@ -40,6 +40,7 @@ Requires-Dist: oracledb ; extra == "anomaly"
|
|
40
40
|
Requires-Dist: report-creator==1.0.9 ; extra == "anomaly"
|
41
41
|
Requires-Dist: rrcf==0.4.4 ; extra == "anomaly"
|
42
42
|
Requires-Dist: scikit-learn ; extra == "anomaly"
|
43
|
+
Requires-Dist: salesforce-merlion[all]==2.0.4 ; extra == "anomaly"
|
43
44
|
Requires-Dist: jupyter_server ; extra == "aqua"
|
44
45
|
Requires-Dist: hdfs[kerberos] ; extra == "bds"
|
45
46
|
Requires-Dist: ibis-framework[impala] ; extra == "bds"
|