mlrun 1.7.0rc25__py3-none-any.whl → 1.7.0rc29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +7 -7
- mlrun/alerts/alert.py +13 -1
- mlrun/artifacts/manager.py +5 -0
- mlrun/common/constants.py +2 -2
- mlrun/common/formatters/base.py +9 -9
- mlrun/common/schemas/alert.py +4 -8
- mlrun/common/schemas/api_gateway.py +7 -0
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +27 -12
- mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -12
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/config.py +16 -9
- mlrun/datastore/azure_blob.py +2 -1
- mlrun/datastore/base.py +1 -5
- mlrun/datastore/datastore.py +3 -3
- mlrun/datastore/google_cloud_storage.py +6 -2
- mlrun/datastore/inmem.py +1 -1
- mlrun/datastore/snowflake_utils.py +3 -1
- mlrun/datastore/sources.py +26 -11
- mlrun/datastore/store_resources.py +2 -0
- mlrun/datastore/targets.py +60 -25
- mlrun/db/base.py +11 -0
- mlrun/db/httpdb.py +47 -33
- mlrun/db/nopdb.py +11 -1
- mlrun/errors.py +4 -0
- mlrun/execution.py +18 -10
- mlrun/feature_store/retrieval/spark_merger.py +2 -1
- mlrun/launcher/local.py +2 -2
- mlrun/model.py +30 -0
- mlrun/model_monitoring/api.py +6 -52
- mlrun/model_monitoring/applications/histogram_data_drift.py +4 -1
- mlrun/model_monitoring/db/stores/__init__.py +21 -9
- mlrun/model_monitoring/db/stores/base/store.py +39 -1
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +4 -2
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +34 -79
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +19 -27
- mlrun/model_monitoring/db/tsdb/__init__.py +19 -14
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +4 -2
- mlrun/model_monitoring/helpers.py +9 -5
- mlrun/model_monitoring/writer.py +1 -5
- mlrun/projects/operations.py +1 -0
- mlrun/projects/project.py +76 -76
- mlrun/render.py +10 -5
- mlrun/run.py +2 -2
- mlrun/runtimes/daskjob.py +7 -1
- mlrun/runtimes/local.py +24 -7
- mlrun/runtimes/nuclio/function.py +20 -0
- mlrun/runtimes/pod.py +5 -29
- mlrun/serving/routers.py +75 -59
- mlrun/serving/server.py +1 -0
- mlrun/serving/v2_serving.py +8 -1
- mlrun/utils/helpers.py +46 -2
- mlrun/utils/logger.py +36 -2
- mlrun/utils/notifications/notification/base.py +4 -0
- mlrun/utils/notifications/notification/git.py +21 -0
- mlrun/utils/notifications/notification/slack.py +8 -0
- mlrun/utils/notifications/notification/webhook.py +41 -1
- mlrun/utils/notifications/notification_pusher.py +2 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc25.dist-info → mlrun-1.7.0rc29.dist-info}/METADATA +11 -6
- {mlrun-1.7.0rc25.dist-info → mlrun-1.7.0rc29.dist-info}/RECORD +67 -67
- {mlrun-1.7.0rc25.dist-info → mlrun-1.7.0rc29.dist-info}/WHEEL +1 -1
- {mlrun-1.7.0rc25.dist-info → mlrun-1.7.0rc29.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc25.dist-info → mlrun-1.7.0rc29.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc25.dist-info → mlrun-1.7.0rc29.dist-info}/top_level.txt +0 -0
mlrun/__main__.py
CHANGED
|
@@ -50,12 +50,12 @@ from .run import (
|
|
|
50
50
|
from .runtimes import RemoteRuntime, RunError, RuntimeKinds, ServingRuntime
|
|
51
51
|
from .secrets import SecretsStore
|
|
52
52
|
from .utils import (
|
|
53
|
+
RunKeys,
|
|
53
54
|
dict_to_yaml,
|
|
54
55
|
get_in,
|
|
55
56
|
is_relative_path,
|
|
56
57
|
list2dict,
|
|
57
58
|
logger,
|
|
58
|
-
run_keys,
|
|
59
59
|
update_in,
|
|
60
60
|
)
|
|
61
61
|
from .utils.version import Version
|
|
@@ -380,15 +380,15 @@ def run(
|
|
|
380
380
|
set_item(runobj.spec.hyper_param_options, hyper_param_strategy, "strategy")
|
|
381
381
|
set_item(runobj.spec.hyper_param_options, selector, "selector")
|
|
382
382
|
|
|
383
|
-
set_item(runobj.spec, inputs,
|
|
383
|
+
set_item(runobj.spec, inputs, RunKeys.inputs, list2dict(inputs))
|
|
384
384
|
set_item(
|
|
385
|
-
runobj.spec, returns,
|
|
385
|
+
runobj.spec, returns, RunKeys.returns, [py_eval(value) for value in returns]
|
|
386
386
|
)
|
|
387
|
-
set_item(runobj.spec, in_path,
|
|
388
|
-
set_item(runobj.spec, out_path,
|
|
389
|
-
set_item(runobj.spec, outputs,
|
|
387
|
+
set_item(runobj.spec, in_path, RunKeys.input_path)
|
|
388
|
+
set_item(runobj.spec, out_path, RunKeys.output_path)
|
|
389
|
+
set_item(runobj.spec, outputs, RunKeys.outputs, list(outputs))
|
|
390
390
|
set_item(
|
|
391
|
-
runobj.spec, secrets,
|
|
391
|
+
runobj.spec, secrets, RunKeys.secrets, line2keylist(secrets, "kind", "source")
|
|
392
392
|
)
|
|
393
393
|
set_item(runobj.spec, verbose, "verbose")
|
|
394
394
|
set_item(runobj.spec, scrape_metrics, "scrape_metrics")
|
mlrun/alerts/alert.py
CHANGED
|
@@ -26,7 +26,6 @@ class AlertConfig(ModelObj):
|
|
|
26
26
|
"description",
|
|
27
27
|
"summary",
|
|
28
28
|
"severity",
|
|
29
|
-
"criteria",
|
|
30
29
|
"reset_policy",
|
|
31
30
|
"state",
|
|
32
31
|
]
|
|
@@ -34,6 +33,7 @@ class AlertConfig(ModelObj):
|
|
|
34
33
|
"entities",
|
|
35
34
|
"notifications",
|
|
36
35
|
"trigger",
|
|
36
|
+
"criteria",
|
|
37
37
|
]
|
|
38
38
|
|
|
39
39
|
def __init__(
|
|
@@ -104,6 +104,14 @@ class AlertConfig(ModelObj):
|
|
|
104
104
|
else self.trigger
|
|
105
105
|
)
|
|
106
106
|
return None
|
|
107
|
+
if field_name == "criteria":
|
|
108
|
+
if self.criteria:
|
|
109
|
+
return (
|
|
110
|
+
self.criteria.dict()
|
|
111
|
+
if not isinstance(self.criteria, dict)
|
|
112
|
+
else self.criteria
|
|
113
|
+
)
|
|
114
|
+
return None
|
|
107
115
|
return super()._serialize_field(struct, field_name, strip)
|
|
108
116
|
|
|
109
117
|
def to_dict(self, fields: list = None, exclude: list = None, strip: bool = False):
|
|
@@ -137,6 +145,10 @@ class AlertConfig(ModelObj):
|
|
|
137
145
|
trigger_obj = alert_objects.AlertTrigger.parse_obj(trigger_data)
|
|
138
146
|
new_obj.trigger = trigger_obj
|
|
139
147
|
|
|
148
|
+
criteria_data = struct.get("criteria")
|
|
149
|
+
if criteria_data:
|
|
150
|
+
criteria_obj = alert_objects.AlertCriteria.parse_obj(criteria_data)
|
|
151
|
+
new_obj.criteria = criteria_obj
|
|
140
152
|
return new_obj
|
|
141
153
|
|
|
142
154
|
def with_notifications(self, notifications: list[alert_objects.AlertNotification]):
|
mlrun/artifacts/manager.py
CHANGED
|
@@ -100,6 +100,11 @@ class ArtifactProducer:
|
|
|
100
100
|
|
|
101
101
|
def dict_to_artifact(struct: dict) -> Artifact:
|
|
102
102
|
kind = struct.get("kind", "")
|
|
103
|
+
|
|
104
|
+
# TODO: remove this in 1.8.0
|
|
105
|
+
if mlrun.utils.is_legacy_artifact(struct):
|
|
106
|
+
return mlrun.artifacts.base.convert_legacy_artifact_to_new_format(struct)
|
|
107
|
+
|
|
103
108
|
artifact_class = artifact_types[kind]
|
|
104
109
|
return artifact_class.from_dict(struct)
|
|
105
110
|
|
mlrun/common/constants.py
CHANGED
|
@@ -64,12 +64,12 @@ class MLRunInternalLabels:
|
|
|
64
64
|
username = f"{MLRUN_LABEL_PREFIX}username"
|
|
65
65
|
username_domain = f"{MLRUN_LABEL_PREFIX}username_domain"
|
|
66
66
|
task_name = f"{MLRUN_LABEL_PREFIX}task-name"
|
|
67
|
+
resource_name = f"{MLRUN_LABEL_PREFIX}resource_name"
|
|
68
|
+
created = f"{MLRUN_LABEL_PREFIX}created"
|
|
67
69
|
host = "host"
|
|
68
70
|
job_type = "job-type"
|
|
69
71
|
kind = "kind"
|
|
70
72
|
component = "component"
|
|
71
|
-
resource_name = "resource_name"
|
|
72
|
-
created = "mlrun-created"
|
|
73
73
|
|
|
74
74
|
owner = "owner"
|
|
75
75
|
v3io_user = "v3io_user"
|
mlrun/common/formatters/base.py
CHANGED
|
@@ -28,42 +28,42 @@ class ObjectFormat:
|
|
|
28
28
|
full = "full"
|
|
29
29
|
|
|
30
30
|
@staticmethod
|
|
31
|
-
def format_method(
|
|
31
|
+
def format_method(format_: str) -> typing.Optional[typing.Callable]:
|
|
32
32
|
"""
|
|
33
33
|
Get the formatting method for the provided format.
|
|
34
34
|
A `None` value signifies a pass-through formatting method (no formatting).
|
|
35
|
-
:param
|
|
35
|
+
:param format_: The format as a string representation.
|
|
36
36
|
:return: The formatting method.
|
|
37
37
|
"""
|
|
38
38
|
return {
|
|
39
39
|
ObjectFormat.full: None,
|
|
40
|
-
}[
|
|
40
|
+
}[format_]
|
|
41
41
|
|
|
42
42
|
@classmethod
|
|
43
43
|
def format_obj(
|
|
44
44
|
cls,
|
|
45
45
|
obj: typing.Any,
|
|
46
|
-
|
|
46
|
+
format_: str,
|
|
47
47
|
exclude_formats: typing.Optional[list[str]] = None,
|
|
48
48
|
) -> typing.Any:
|
|
49
49
|
"""
|
|
50
50
|
Format the provided object based on the provided format.
|
|
51
51
|
:param obj: The object to format.
|
|
52
|
-
:param
|
|
52
|
+
:param format_: The format as a string representation.
|
|
53
53
|
:param exclude_formats: A list of formats to exclude from the formatting process. If the provided format is in
|
|
54
54
|
this list, an invalid format exception will be raised.
|
|
55
55
|
"""
|
|
56
56
|
exclude_formats = exclude_formats or []
|
|
57
|
-
|
|
57
|
+
format_ = format_ or cls.full
|
|
58
58
|
invalid_format_exc = mlrun.errors.MLRunBadRequestError(
|
|
59
|
-
f"Provided format is not supported. format={
|
|
59
|
+
f"Provided format is not supported. format={format_}"
|
|
60
60
|
)
|
|
61
61
|
|
|
62
|
-
if
|
|
62
|
+
if format_ in exclude_formats:
|
|
63
63
|
raise invalid_format_exc
|
|
64
64
|
|
|
65
65
|
try:
|
|
66
|
-
format_method = cls.format_method(
|
|
66
|
+
format_method = cls.format_method(format_)
|
|
67
67
|
except KeyError:
|
|
68
68
|
raise invalid_format_exc
|
|
69
69
|
|
mlrun/common/schemas/alert.py
CHANGED
|
@@ -39,8 +39,8 @@ class EventKind(StrEnum):
|
|
|
39
39
|
CONCEPT_DRIFT_SUSPECTED = "concept_drift_suspected"
|
|
40
40
|
MODEL_PERFORMANCE_DETECTED = "model_performance_detected"
|
|
41
41
|
MODEL_PERFORMANCE_SUSPECTED = "model_performance_suspected"
|
|
42
|
-
|
|
43
|
-
|
|
42
|
+
SYSTEM_PERFORMANCE_DETECTED = "system_performance_detected"
|
|
43
|
+
SYSTEM_PERFORMANCE_SUSPECTED = "system_performance_suspected"
|
|
44
44
|
MM_APP_ANOMALY_DETECTED = "mm_app_anomaly_detected"
|
|
45
45
|
MM_APP_ANOMALY_SUSPECTED = "mm_app_anomaly_suspected"
|
|
46
46
|
FAILED = "failed"
|
|
@@ -53,12 +53,8 @@ _event_kind_entity_map = {
|
|
|
53
53
|
EventKind.CONCEPT_DRIFT_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
54
54
|
EventKind.MODEL_PERFORMANCE_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
55
55
|
EventKind.MODEL_PERFORMANCE_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
56
|
-
EventKind.
|
|
57
|
-
|
|
58
|
-
],
|
|
59
|
-
EventKind.MODEL_SERVING_PERFORMANCE_SUSPECTED: [
|
|
60
|
-
EventEntityKind.MODEL_ENDPOINT_RESULT
|
|
61
|
-
],
|
|
56
|
+
EventKind.SYSTEM_PERFORMANCE_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
57
|
+
EventKind.SYSTEM_PERFORMANCE_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
62
58
|
EventKind.MM_APP_ANOMALY_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
63
59
|
EventKind.MM_APP_ANOMALY_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
64
60
|
EventKind.FAILED: [EventEntityKind.JOB],
|
|
@@ -102,6 +102,13 @@ class APIGateway(_APIGatewayBaseModel):
|
|
|
102
102
|
if upstream.nucliofunction.get("name")
|
|
103
103
|
]
|
|
104
104
|
|
|
105
|
+
def get_invoke_url(self):
|
|
106
|
+
return (
|
|
107
|
+
self.spec.host + self.spec.path
|
|
108
|
+
if self.spec.path and self.spec.host
|
|
109
|
+
else self.spec.host
|
|
110
|
+
)
|
|
111
|
+
|
|
105
112
|
def enrich_mlrun_names(self):
|
|
106
113
|
self._enrich_api_gateway_mlrun_name()
|
|
107
114
|
self._enrich_mlrun_function_names()
|
|
@@ -120,10 +120,13 @@ class FeatureStorePartitionByField(mlrun.common.types.StrEnum):
|
|
|
120
120
|
|
|
121
121
|
class RunPartitionByField(mlrun.common.types.StrEnum):
|
|
122
122
|
name = "name" # Supported for runs objects
|
|
123
|
+
project_and_name = "project_and_name" # Supported for runs objects
|
|
123
124
|
|
|
124
125
|
def to_partition_by_db_field(self, db_cls):
|
|
125
126
|
if self.value == RunPartitionByField.name:
|
|
126
127
|
return db_cls.name
|
|
128
|
+
elif self.value == RunPartitionByField.project_and_name:
|
|
129
|
+
return db_cls.project, db_cls.name
|
|
127
130
|
else:
|
|
128
131
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
129
132
|
f"Unknown group by field: {self.value}"
|
|
@@ -78,8 +78,6 @@ class EventFieldType:
|
|
|
78
78
|
FEATURE_SET_URI = "monitoring_feature_set_uri"
|
|
79
79
|
ALGORITHM = "algorithm"
|
|
80
80
|
VALUE = "value"
|
|
81
|
-
DRIFT_DETECTED_THRESHOLD = "drift_detected_threshold"
|
|
82
|
-
POSSIBLE_DRIFT_THRESHOLD = "possible_drift_threshold"
|
|
83
81
|
SAMPLE_PARQUET_PATH = "sample_parquet_path"
|
|
84
82
|
TIME = "time"
|
|
85
83
|
TABLE_COLUMN = "table_column"
|
|
@@ -158,19 +156,42 @@ class EventKeyMetrics:
|
|
|
158
156
|
REAL_TIME = "real_time"
|
|
159
157
|
|
|
160
158
|
|
|
161
|
-
class ModelEndpointTarget:
|
|
159
|
+
class ModelEndpointTarget(MonitoringStrEnum):
|
|
162
160
|
V3IO_NOSQL = "v3io-nosql"
|
|
163
161
|
SQL = "sql"
|
|
164
162
|
|
|
165
163
|
|
|
164
|
+
class StreamKind(MonitoringStrEnum):
|
|
165
|
+
V3IO_STREAM = "v3io_stream"
|
|
166
|
+
KAFKA = "kafka"
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class TSDBTarget(MonitoringStrEnum):
|
|
170
|
+
V3IO_TSDB = "v3io-tsdb"
|
|
171
|
+
TDEngine = "tdengine"
|
|
172
|
+
PROMETHEUS = "prometheus"
|
|
173
|
+
|
|
174
|
+
|
|
166
175
|
class ProjectSecretKeys:
|
|
167
176
|
ENDPOINT_STORE_CONNECTION = "MODEL_MONITORING_ENDPOINT_STORE_CONNECTION"
|
|
168
177
|
ACCESS_KEY = "MODEL_MONITORING_ACCESS_KEY"
|
|
169
|
-
PIPELINES_ACCESS_KEY = "MODEL_MONITORING_PIPELINES_ACCESS_KEY"
|
|
170
|
-
KAFKA_BROKERS = "KAFKA_BROKERS"
|
|
171
178
|
STREAM_PATH = "STREAM_PATH"
|
|
172
179
|
TSDB_CONNECTION = "TSDB_CONNECTION"
|
|
173
180
|
|
|
181
|
+
@classmethod
|
|
182
|
+
def mandatory_secrets(cls):
|
|
183
|
+
return [
|
|
184
|
+
cls.ENDPOINT_STORE_CONNECTION,
|
|
185
|
+
cls.STREAM_PATH,
|
|
186
|
+
cls.TSDB_CONNECTION,
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class ModelEndpointTargetSchemas(MonitoringStrEnum):
|
|
191
|
+
V3IO = "v3io"
|
|
192
|
+
MYSQL = "mysql"
|
|
193
|
+
SQLITE = "sqlite"
|
|
194
|
+
|
|
174
195
|
|
|
175
196
|
class ModelMonitoringStoreKinds:
|
|
176
197
|
ENDPOINTS = "endpoints"
|
|
@@ -318,7 +339,7 @@ class ResultKindApp(Enum):
|
|
|
318
339
|
concept_drift = 1
|
|
319
340
|
model_performance = 2
|
|
320
341
|
system_performance = 3
|
|
321
|
-
|
|
342
|
+
mm_app_anomaly = 4
|
|
322
343
|
|
|
323
344
|
|
|
324
345
|
class ResultStatusApp(IntEnum):
|
|
@@ -344,12 +365,6 @@ class ControllerPolicy:
|
|
|
344
365
|
BASE_PERIOD = "base_period"
|
|
345
366
|
|
|
346
367
|
|
|
347
|
-
class TSDBTarget:
|
|
348
|
-
V3IO_TSDB = "v3io-tsdb"
|
|
349
|
-
TDEngine = "tdengine"
|
|
350
|
-
PROMETHEUS = "prometheus"
|
|
351
|
-
|
|
352
|
-
|
|
353
368
|
class HistogramDataDriftApplicationConstants:
|
|
354
369
|
NAME = "histogram-data-drift"
|
|
355
370
|
GENERAL_RESULT_NAME = "general_drift"
|
|
@@ -103,18 +103,6 @@ class ModelEndpointSpec(ObjectSpec):
|
|
|
103
103
|
json_parse_values=json_parse_values,
|
|
104
104
|
)
|
|
105
105
|
|
|
106
|
-
@validator("monitor_configuration")
|
|
107
|
-
@classmethod
|
|
108
|
-
def set_name(cls, monitor_configuration):
|
|
109
|
-
return monitor_configuration or {
|
|
110
|
-
EventFieldType.DRIFT_DETECTED_THRESHOLD: (
|
|
111
|
-
mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected
|
|
112
|
-
),
|
|
113
|
-
EventFieldType.POSSIBLE_DRIFT_THRESHOLD: (
|
|
114
|
-
mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift
|
|
115
|
-
),
|
|
116
|
-
}
|
|
117
|
-
|
|
118
106
|
@validator("model_uri")
|
|
119
107
|
@classmethod
|
|
120
108
|
def validate_model_uri(cls, model_uri):
|
mlrun/common/schemas/schedule.py
CHANGED
|
@@ -96,7 +96,7 @@ class ScheduleUpdate(BaseModel):
|
|
|
96
96
|
scheduled_object: Optional[Any]
|
|
97
97
|
cron_trigger: Optional[Union[str, ScheduleCronTrigger]]
|
|
98
98
|
desired_state: Optional[str]
|
|
99
|
-
labels: Optional[dict] =
|
|
99
|
+
labels: Optional[dict] = None
|
|
100
100
|
concurrency_limit: Optional[int]
|
|
101
101
|
credentials: Credentials = Credentials()
|
|
102
102
|
|
mlrun/config.py
CHANGED
|
@@ -64,11 +64,15 @@ default_config = {
|
|
|
64
64
|
"api_base_version": "v1",
|
|
65
65
|
"version": "", # will be set to current version
|
|
66
66
|
"images_tag": "", # tag to use with mlrun images e.g. mlrun/mlrun (defaults to version)
|
|
67
|
-
|
|
67
|
+
# registry to use with mlrun images that start with "mlrun/" e.g. quay.io/ (defaults to empty, for dockerhub)
|
|
68
|
+
"images_registry": "",
|
|
69
|
+
# registry to use with non-mlrun images (don't start with "mlrun/") specified in 'images_to_enrich_registry'
|
|
70
|
+
# defaults to empty, for dockerhub
|
|
71
|
+
"vendor_images_registry": "",
|
|
68
72
|
# comma separated list of images that are in the specified images_registry, and therefore will be enriched with this
|
|
69
73
|
# registry when used. default to mlrun/* which means any image which is of the mlrun repository (mlrun/mlrun,
|
|
70
74
|
# mlrun/ml-base, etc...)
|
|
71
|
-
"images_to_enrich_registry": "^mlrun
|
|
75
|
+
"images_to_enrich_registry": "^mlrun/*,python:3.9",
|
|
72
76
|
"kfp_url": "",
|
|
73
77
|
"kfp_ttl": "14400", # KFP ttl in sec, after that completed PODs will be deleted
|
|
74
78
|
"kfp_image": "mlrun/mlrun", # image to use for KFP runner (defaults to mlrun/mlrun)
|
|
@@ -250,7 +254,7 @@ default_config = {
|
|
|
250
254
|
"remote": "mlrun/mlrun",
|
|
251
255
|
"dask": "mlrun/ml-base",
|
|
252
256
|
"mpijob": "mlrun/mlrun",
|
|
253
|
-
"application": "python:3.9
|
|
257
|
+
"application": "python:3.9",
|
|
254
258
|
},
|
|
255
259
|
# see enrich_function_preemption_spec for more info,
|
|
256
260
|
# and mlrun.common.schemas.function.PreemptionModes for available options
|
|
@@ -504,13 +508,12 @@ default_config = {
|
|
|
504
508
|
"model_endpoint_monitoring": {
|
|
505
509
|
"serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
|
|
506
510
|
"application_stream_args": {"shard_count": 1, "retention_period_hours": 24},
|
|
507
|
-
"drift_thresholds": {"default": {"possible_drift": 0.5, "drift_detected": 0.7}},
|
|
508
511
|
# Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
|
|
509
512
|
# stream, and endpoints.
|
|
510
513
|
"store_prefixes": {
|
|
511
514
|
"default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
|
|
512
515
|
"user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
|
|
513
|
-
"stream": "",
|
|
516
|
+
"stream": "", # TODO: Delete in 1.9.0
|
|
514
517
|
"monitoring_application": "v3io:///users/pipelines/{project}/monitoring-apps/",
|
|
515
518
|
},
|
|
516
519
|
# Offline storage path can be either relative or a full path. This path is used for general offline data
|
|
@@ -523,11 +526,12 @@ default_config = {
|
|
|
523
526
|
"parquet_batching_max_events": 10_000,
|
|
524
527
|
"parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
|
|
525
528
|
# See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
|
|
526
|
-
"store_type": "v3io-nosql",
|
|
529
|
+
"store_type": "v3io-nosql", # TODO: Delete in 1.9.0
|
|
527
530
|
"endpoint_store_connection": "",
|
|
528
531
|
# See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
|
|
529
|
-
"tsdb_connector_type": "v3io-tsdb",
|
|
530
532
|
"tsdb_connection": "",
|
|
533
|
+
# See mlrun.common.schemas.model_monitoring.constants.StreamKind for available options
|
|
534
|
+
"stream_connection": "",
|
|
531
535
|
},
|
|
532
536
|
"secret_stores": {
|
|
533
537
|
# Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
|
|
@@ -660,7 +664,9 @@ default_config = {
|
|
|
660
664
|
"failed_runs_grace_period": 3600,
|
|
661
665
|
"verbose": True,
|
|
662
666
|
# the number of workers which will be used to trigger the start log collection
|
|
663
|
-
"concurrent_start_logs_workers":
|
|
667
|
+
"concurrent_start_logs_workers": 50,
|
|
668
|
+
# the number of runs for which to start logs on api startup
|
|
669
|
+
"start_logs_startup_run_limit": 150,
|
|
664
670
|
# the time in hours in which to start log collection from.
|
|
665
671
|
# after upgrade, we might have runs which completed in the mean time or still in non-terminal state and
|
|
666
672
|
# we want to collect their logs in the new log collection method (sidecar)
|
|
@@ -708,6 +714,8 @@ default_config = {
|
|
|
708
714
|
# maximum number of alerts we allow to be configured.
|
|
709
715
|
# user will get an error when exceeding this
|
|
710
716
|
"max_allowed": 10000,
|
|
717
|
+
# maximum allowed value for count in criteria field inside AlertConfig
|
|
718
|
+
"max_criteria_count": 100,
|
|
711
719
|
},
|
|
712
720
|
"auth_with_client_id": {
|
|
713
721
|
"enabled": False,
|
|
@@ -1118,7 +1126,6 @@ class Config:
|
|
|
1118
1126
|
if store_prefix_dict.get(kind):
|
|
1119
1127
|
# Target exist in store prefix and has a valid string value
|
|
1120
1128
|
return store_prefix_dict[kind].format(project=project, **kwargs)
|
|
1121
|
-
|
|
1122
1129
|
if (
|
|
1123
1130
|
function_name
|
|
1124
1131
|
and function_name
|
mlrun/datastore/azure_blob.py
CHANGED
mlrun/datastore/base.py
CHANGED
|
@@ -319,11 +319,7 @@ class DataStore:
|
|
|
319
319
|
dfs.append(df_module.read_csv(*updated_args, **kwargs))
|
|
320
320
|
return df_module.concat(dfs)
|
|
321
321
|
|
|
322
|
-
elif (
|
|
323
|
-
file_url.endswith(".parquet")
|
|
324
|
-
or file_url.endswith(".pq")
|
|
325
|
-
or format == "parquet"
|
|
326
|
-
):
|
|
322
|
+
elif mlrun.utils.helpers.is_parquet_file(file_url, format):
|
|
327
323
|
if columns:
|
|
328
324
|
kwargs["columns"] = columns
|
|
329
325
|
|
mlrun/datastore/datastore.py
CHANGED
|
@@ -21,7 +21,7 @@ from mlrun.datastore.datastore_profile import datastore_profile_read
|
|
|
21
21
|
from mlrun.errors import err_to_str
|
|
22
22
|
from mlrun.utils.helpers import get_local_file_schema
|
|
23
23
|
|
|
24
|
-
from ..utils import DB_SCHEMA,
|
|
24
|
+
from ..utils import DB_SCHEMA, RunKeys
|
|
25
25
|
from .base import DataItem, DataStore, HttpStore
|
|
26
26
|
from .filestore import FileStore
|
|
27
27
|
from .inmem import InMemoryStore
|
|
@@ -133,7 +133,7 @@ class StoreManager:
|
|
|
133
133
|
return self._db
|
|
134
134
|
|
|
135
135
|
def from_dict(self, struct: dict):
|
|
136
|
-
stor_list = struct.get(
|
|
136
|
+
stor_list = struct.get(RunKeys.data_stores)
|
|
137
137
|
if stor_list and isinstance(stor_list, list):
|
|
138
138
|
for stor in stor_list:
|
|
139
139
|
schema, endpoint, parsed_url = parse_url(stor.get("url"))
|
|
@@ -145,7 +145,7 @@ class StoreManager:
|
|
|
145
145
|
self._stores[stor["name"]] = new_stor
|
|
146
146
|
|
|
147
147
|
def to_dict(self, struct):
|
|
148
|
-
struct[
|
|
148
|
+
struct[RunKeys.data_stores] = [
|
|
149
149
|
stor.to_dict() for stor in self._stores.values() if stor.from_spec
|
|
150
150
|
]
|
|
151
151
|
|
|
@@ -55,8 +55,12 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
55
55
|
) or self._get_secret_or_env("GOOGLE_APPLICATION_CREDENTIALS")
|
|
56
56
|
if credentials:
|
|
57
57
|
try:
|
|
58
|
-
# Try to handle credentials as a json connection string
|
|
59
|
-
token =
|
|
58
|
+
# Try to handle credentials as a json connection string or do nothing if already a dict
|
|
59
|
+
token = (
|
|
60
|
+
credentials
|
|
61
|
+
if isinstance(credentials, dict)
|
|
62
|
+
else json.loads(credentials)
|
|
63
|
+
)
|
|
60
64
|
except json.JSONDecodeError:
|
|
61
65
|
# If it's not json, handle it as a filename
|
|
62
66
|
token = credentials
|
mlrun/datastore/inmem.py
CHANGED
|
@@ -72,7 +72,7 @@ class InMemoryStore(DataStore):
|
|
|
72
72
|
if columns:
|
|
73
73
|
kwargs["usecols"] = columns
|
|
74
74
|
reader = df_module.read_csv
|
|
75
|
-
elif
|
|
75
|
+
elif mlrun.utils.helpers.is_parquet_file(url, format):
|
|
76
76
|
if columns:
|
|
77
77
|
kwargs["columns"] = columns
|
|
78
78
|
reader = df_module.read_parquet
|
|
@@ -30,13 +30,15 @@ def get_snowflake_password():
|
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
def get_snowflake_spark_options(attributes):
|
|
33
|
+
if not attributes:
|
|
34
|
+
return {}
|
|
33
35
|
return {
|
|
34
36
|
"format": "net.snowflake.spark.snowflake",
|
|
35
37
|
"sfURL": attributes.get("url"),
|
|
36
38
|
"sfUser": attributes.get("user"),
|
|
37
39
|
"sfPassword": get_snowflake_password(),
|
|
38
40
|
"sfDatabase": attributes.get("database"),
|
|
39
|
-
"sfSchema": attributes.get("
|
|
41
|
+
"sfSchema": attributes.get("db_schema"),
|
|
40
42
|
"sfWarehouse": attributes.get("warehouse"),
|
|
41
43
|
"application": "iguazio_platform",
|
|
42
44
|
"TIMESTAMP_TYPE_MAPPING": "TIMESTAMP_LTZ",
|
mlrun/datastore/sources.py
CHANGED
|
@@ -747,7 +747,7 @@ class SnowflakeSource(BaseSourceDriver):
|
|
|
747
747
|
url="...",
|
|
748
748
|
user="...",
|
|
749
749
|
database="...",
|
|
750
|
-
|
|
750
|
+
db_schema="...",
|
|
751
751
|
warehouse="...",
|
|
752
752
|
)
|
|
753
753
|
|
|
@@ -762,7 +762,8 @@ class SnowflakeSource(BaseSourceDriver):
|
|
|
762
762
|
:parameter url: URL of the snowflake cluster
|
|
763
763
|
:parameter user: snowflake user
|
|
764
764
|
:parameter database: snowflake database
|
|
765
|
-
:parameter schema: snowflake schema
|
|
765
|
+
:parameter schema: snowflake schema - deprecated, use db_schema
|
|
766
|
+
:parameter db_schema: snowflake schema
|
|
766
767
|
:parameter warehouse: snowflake warehouse
|
|
767
768
|
"""
|
|
768
769
|
|
|
@@ -774,6 +775,7 @@ class SnowflakeSource(BaseSourceDriver):
|
|
|
774
775
|
self,
|
|
775
776
|
name: str = "",
|
|
776
777
|
key_field: str = None,
|
|
778
|
+
attributes: dict[str, object] = None,
|
|
777
779
|
time_field: str = None,
|
|
778
780
|
schedule: str = None,
|
|
779
781
|
start_time=None,
|
|
@@ -783,21 +785,34 @@ class SnowflakeSource(BaseSourceDriver):
|
|
|
783
785
|
user: str = None,
|
|
784
786
|
database: str = None,
|
|
785
787
|
schema: str = None,
|
|
788
|
+
db_schema: str = None,
|
|
786
789
|
warehouse: str = None,
|
|
787
790
|
**kwargs,
|
|
788
791
|
):
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
}
|
|
792
|
+
# TODO: Remove in 1.9.0
|
|
793
|
+
if schema:
|
|
794
|
+
warnings.warn(
|
|
795
|
+
"schema is deprecated in 1.7.0, and will be removed in 1.9.0, please use db_schema"
|
|
796
|
+
)
|
|
797
|
+
db_schema = db_schema or schema # TODO: Remove in 1.9.0
|
|
798
|
+
|
|
799
|
+
attributes = attributes or {}
|
|
800
|
+
if url:
|
|
801
|
+
attributes["url"] = url
|
|
802
|
+
if user:
|
|
803
|
+
attributes["user"] = user
|
|
804
|
+
if database:
|
|
805
|
+
attributes["database"] = database
|
|
806
|
+
if db_schema:
|
|
807
|
+
attributes["db_schema"] = db_schema
|
|
808
|
+
if warehouse:
|
|
809
|
+
attributes["warehouse"] = warehouse
|
|
810
|
+
if query:
|
|
811
|
+
attributes["query"] = query
|
|
797
812
|
|
|
798
813
|
super().__init__(
|
|
799
814
|
name,
|
|
800
|
-
attributes=
|
|
815
|
+
attributes=attributes,
|
|
801
816
|
key_field=key_field,
|
|
802
817
|
time_field=time_field,
|
|
803
818
|
schedule=schedule,
|