mlrun 1.7.0rc26__py3-none-any.whl → 1.7.0rc31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +7 -7
- mlrun/alerts/alert.py +13 -1
- mlrun/artifacts/manager.py +5 -0
- mlrun/common/constants.py +3 -3
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/common/formatters/base.py +9 -9
- mlrun/common/schemas/alert.py +4 -8
- mlrun/common/schemas/api_gateway.py +7 -0
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +32 -13
- mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -12
- mlrun/common/schemas/project.py +10 -9
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/config.py +37 -11
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +48 -16
- mlrun/datastore/__init__.py +1 -0
- mlrun/datastore/azure_blob.py +2 -1
- mlrun/datastore/base.py +21 -13
- mlrun/datastore/datastore.py +7 -5
- mlrun/datastore/datastore_profile.py +1 -1
- mlrun/datastore/google_cloud_storage.py +1 -0
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/s3.py +2 -0
- mlrun/datastore/snowflake_utils.py +3 -1
- mlrun/datastore/sources.py +40 -11
- mlrun/datastore/store_resources.py +2 -0
- mlrun/datastore/targets.py +71 -26
- mlrun/db/base.py +11 -0
- mlrun/db/httpdb.py +50 -31
- mlrun/db/nopdb.py +11 -1
- mlrun/errors.py +4 -0
- mlrun/execution.py +18 -10
- mlrun/feature_store/retrieval/spark_merger.py +4 -32
- mlrun/launcher/local.py +2 -2
- mlrun/model.py +27 -1
- mlrun/model_monitoring/api.py +9 -55
- mlrun/model_monitoring/applications/histogram_data_drift.py +4 -1
- mlrun/model_monitoring/controller.py +57 -73
- mlrun/model_monitoring/db/stores/__init__.py +21 -9
- mlrun/model_monitoring/db/stores/base/store.py +39 -1
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +4 -2
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +41 -80
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +22 -27
- mlrun/model_monitoring/db/tsdb/__init__.py +19 -14
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +4 -2
- mlrun/model_monitoring/helpers.py +15 -17
- mlrun/model_monitoring/writer.py +2 -7
- mlrun/projects/operations.py +1 -0
- mlrun/projects/project.py +87 -75
- mlrun/render.py +10 -5
- mlrun/run.py +7 -7
- mlrun/runtimes/base.py +1 -1
- mlrun/runtimes/daskjob.py +7 -1
- mlrun/runtimes/local.py +24 -7
- mlrun/runtimes/nuclio/function.py +20 -0
- mlrun/runtimes/pod.py +5 -29
- mlrun/serving/routers.py +75 -59
- mlrun/serving/server.py +1 -0
- mlrun/serving/v2_serving.py +8 -1
- mlrun/utils/helpers.py +46 -2
- mlrun/utils/logger.py +36 -2
- mlrun/utils/notifications/notification/base.py +4 -0
- mlrun/utils/notifications/notification/git.py +21 -0
- mlrun/utils/notifications/notification/slack.py +8 -0
- mlrun/utils/notifications/notification/webhook.py +41 -1
- mlrun/utils/notifications/notification_pusher.py +2 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/METADATA +13 -8
- {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/RECORD +76 -78
- {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -271
- mlrun/model_monitoring/controller_handler.py +0 -37
- {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/top_level.txt +0 -0
mlrun/__main__.py
CHANGED
|
@@ -50,12 +50,12 @@ from .run import (
|
|
|
50
50
|
from .runtimes import RemoteRuntime, RunError, RuntimeKinds, ServingRuntime
|
|
51
51
|
from .secrets import SecretsStore
|
|
52
52
|
from .utils import (
|
|
53
|
+
RunKeys,
|
|
53
54
|
dict_to_yaml,
|
|
54
55
|
get_in,
|
|
55
56
|
is_relative_path,
|
|
56
57
|
list2dict,
|
|
57
58
|
logger,
|
|
58
|
-
run_keys,
|
|
59
59
|
update_in,
|
|
60
60
|
)
|
|
61
61
|
from .utils.version import Version
|
|
@@ -380,15 +380,15 @@ def run(
|
|
|
380
380
|
set_item(runobj.spec.hyper_param_options, hyper_param_strategy, "strategy")
|
|
381
381
|
set_item(runobj.spec.hyper_param_options, selector, "selector")
|
|
382
382
|
|
|
383
|
-
set_item(runobj.spec, inputs,
|
|
383
|
+
set_item(runobj.spec, inputs, RunKeys.inputs, list2dict(inputs))
|
|
384
384
|
set_item(
|
|
385
|
-
runobj.spec, returns,
|
|
385
|
+
runobj.spec, returns, RunKeys.returns, [py_eval(value) for value in returns]
|
|
386
386
|
)
|
|
387
|
-
set_item(runobj.spec, in_path,
|
|
388
|
-
set_item(runobj.spec, out_path,
|
|
389
|
-
set_item(runobj.spec, outputs,
|
|
387
|
+
set_item(runobj.spec, in_path, RunKeys.input_path)
|
|
388
|
+
set_item(runobj.spec, out_path, RunKeys.output_path)
|
|
389
|
+
set_item(runobj.spec, outputs, RunKeys.outputs, list(outputs))
|
|
390
390
|
set_item(
|
|
391
|
-
runobj.spec, secrets,
|
|
391
|
+
runobj.spec, secrets, RunKeys.secrets, line2keylist(secrets, "kind", "source")
|
|
392
392
|
)
|
|
393
393
|
set_item(runobj.spec, verbose, "verbose")
|
|
394
394
|
set_item(runobj.spec, scrape_metrics, "scrape_metrics")
|
mlrun/alerts/alert.py
CHANGED
|
@@ -26,7 +26,6 @@ class AlertConfig(ModelObj):
|
|
|
26
26
|
"description",
|
|
27
27
|
"summary",
|
|
28
28
|
"severity",
|
|
29
|
-
"criteria",
|
|
30
29
|
"reset_policy",
|
|
31
30
|
"state",
|
|
32
31
|
]
|
|
@@ -34,6 +33,7 @@ class AlertConfig(ModelObj):
|
|
|
34
33
|
"entities",
|
|
35
34
|
"notifications",
|
|
36
35
|
"trigger",
|
|
36
|
+
"criteria",
|
|
37
37
|
]
|
|
38
38
|
|
|
39
39
|
def __init__(
|
|
@@ -104,6 +104,14 @@ class AlertConfig(ModelObj):
|
|
|
104
104
|
else self.trigger
|
|
105
105
|
)
|
|
106
106
|
return None
|
|
107
|
+
if field_name == "criteria":
|
|
108
|
+
if self.criteria:
|
|
109
|
+
return (
|
|
110
|
+
self.criteria.dict()
|
|
111
|
+
if not isinstance(self.criteria, dict)
|
|
112
|
+
else self.criteria
|
|
113
|
+
)
|
|
114
|
+
return None
|
|
107
115
|
return super()._serialize_field(struct, field_name, strip)
|
|
108
116
|
|
|
109
117
|
def to_dict(self, fields: list = None, exclude: list = None, strip: bool = False):
|
|
@@ -137,6 +145,10 @@ class AlertConfig(ModelObj):
|
|
|
137
145
|
trigger_obj = alert_objects.AlertTrigger.parse_obj(trigger_data)
|
|
138
146
|
new_obj.trigger = trigger_obj
|
|
139
147
|
|
|
148
|
+
criteria_data = struct.get("criteria")
|
|
149
|
+
if criteria_data:
|
|
150
|
+
criteria_obj = alert_objects.AlertCriteria.parse_obj(criteria_data)
|
|
151
|
+
new_obj.criteria = criteria_obj
|
|
140
152
|
return new_obj
|
|
141
153
|
|
|
142
154
|
def with_notifications(self, notifications: list[alert_objects.AlertNotification]):
|
mlrun/artifacts/manager.py
CHANGED
|
@@ -100,6 +100,11 @@ class ArtifactProducer:
|
|
|
100
100
|
|
|
101
101
|
def dict_to_artifact(struct: dict) -> Artifact:
|
|
102
102
|
kind = struct.get("kind", "")
|
|
103
|
+
|
|
104
|
+
# TODO: remove this in 1.8.0
|
|
105
|
+
if mlrun.utils.is_legacy_artifact(struct):
|
|
106
|
+
return mlrun.artifacts.base.convert_legacy_artifact_to_new_format(struct)
|
|
107
|
+
|
|
103
108
|
artifact_class = artifact_types[kind]
|
|
104
109
|
return artifact_class.from_dict(struct)
|
|
105
110
|
|
mlrun/common/constants.py
CHANGED
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
16
15
|
IMAGE_NAME_ENRICH_REGISTRY_PREFIX = "." # prefix for image name to enrich with registry
|
|
17
16
|
MLRUN_SERVING_CONF = "serving-conf"
|
|
@@ -64,12 +63,13 @@ class MLRunInternalLabels:
|
|
|
64
63
|
username = f"{MLRUN_LABEL_PREFIX}username"
|
|
65
64
|
username_domain = f"{MLRUN_LABEL_PREFIX}username_domain"
|
|
66
65
|
task_name = f"{MLRUN_LABEL_PREFIX}task-name"
|
|
66
|
+
resource_name = f"{MLRUN_LABEL_PREFIX}resource_name"
|
|
67
|
+
created = f"{MLRUN_LABEL_PREFIX}created"
|
|
67
68
|
host = "host"
|
|
68
69
|
job_type = "job-type"
|
|
69
70
|
kind = "kind"
|
|
70
71
|
component = "component"
|
|
71
|
-
|
|
72
|
-
created = "mlrun-created"
|
|
72
|
+
mlrun_type = "mlrun__type"
|
|
73
73
|
|
|
74
74
|
owner = "owner"
|
|
75
75
|
v3io_user = "v3io_user"
|
mlrun/common/formatters/base.py
CHANGED
|
@@ -28,42 +28,42 @@ class ObjectFormat:
|
|
|
28
28
|
full = "full"
|
|
29
29
|
|
|
30
30
|
@staticmethod
|
|
31
|
-
def format_method(
|
|
31
|
+
def format_method(format_: str) -> typing.Optional[typing.Callable]:
|
|
32
32
|
"""
|
|
33
33
|
Get the formatting method for the provided format.
|
|
34
34
|
A `None` value signifies a pass-through formatting method (no formatting).
|
|
35
|
-
:param
|
|
35
|
+
:param format_: The format as a string representation.
|
|
36
36
|
:return: The formatting method.
|
|
37
37
|
"""
|
|
38
38
|
return {
|
|
39
39
|
ObjectFormat.full: None,
|
|
40
|
-
}[
|
|
40
|
+
}[format_]
|
|
41
41
|
|
|
42
42
|
@classmethod
|
|
43
43
|
def format_obj(
|
|
44
44
|
cls,
|
|
45
45
|
obj: typing.Any,
|
|
46
|
-
|
|
46
|
+
format_: str,
|
|
47
47
|
exclude_formats: typing.Optional[list[str]] = None,
|
|
48
48
|
) -> typing.Any:
|
|
49
49
|
"""
|
|
50
50
|
Format the provided object based on the provided format.
|
|
51
51
|
:param obj: The object to format.
|
|
52
|
-
:param
|
|
52
|
+
:param format_: The format as a string representation.
|
|
53
53
|
:param exclude_formats: A list of formats to exclude from the formatting process. If the provided format is in
|
|
54
54
|
this list, an invalid format exception will be raised.
|
|
55
55
|
"""
|
|
56
56
|
exclude_formats = exclude_formats or []
|
|
57
|
-
|
|
57
|
+
format_ = format_ or cls.full
|
|
58
58
|
invalid_format_exc = mlrun.errors.MLRunBadRequestError(
|
|
59
|
-
f"Provided format is not supported. format={
|
|
59
|
+
f"Provided format is not supported. format={format_}"
|
|
60
60
|
)
|
|
61
61
|
|
|
62
|
-
if
|
|
62
|
+
if format_ in exclude_formats:
|
|
63
63
|
raise invalid_format_exc
|
|
64
64
|
|
|
65
65
|
try:
|
|
66
|
-
format_method = cls.format_method(
|
|
66
|
+
format_method = cls.format_method(format_)
|
|
67
67
|
except KeyError:
|
|
68
68
|
raise invalid_format_exc
|
|
69
69
|
|
mlrun/common/schemas/alert.py
CHANGED
|
@@ -39,8 +39,8 @@ class EventKind(StrEnum):
|
|
|
39
39
|
CONCEPT_DRIFT_SUSPECTED = "concept_drift_suspected"
|
|
40
40
|
MODEL_PERFORMANCE_DETECTED = "model_performance_detected"
|
|
41
41
|
MODEL_PERFORMANCE_SUSPECTED = "model_performance_suspected"
|
|
42
|
-
|
|
43
|
-
|
|
42
|
+
SYSTEM_PERFORMANCE_DETECTED = "system_performance_detected"
|
|
43
|
+
SYSTEM_PERFORMANCE_SUSPECTED = "system_performance_suspected"
|
|
44
44
|
MM_APP_ANOMALY_DETECTED = "mm_app_anomaly_detected"
|
|
45
45
|
MM_APP_ANOMALY_SUSPECTED = "mm_app_anomaly_suspected"
|
|
46
46
|
FAILED = "failed"
|
|
@@ -53,12 +53,8 @@ _event_kind_entity_map = {
|
|
|
53
53
|
EventKind.CONCEPT_DRIFT_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
54
54
|
EventKind.MODEL_PERFORMANCE_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
55
55
|
EventKind.MODEL_PERFORMANCE_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
56
|
-
EventKind.
|
|
57
|
-
|
|
58
|
-
],
|
|
59
|
-
EventKind.MODEL_SERVING_PERFORMANCE_SUSPECTED: [
|
|
60
|
-
EventEntityKind.MODEL_ENDPOINT_RESULT
|
|
61
|
-
],
|
|
56
|
+
EventKind.SYSTEM_PERFORMANCE_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
57
|
+
EventKind.SYSTEM_PERFORMANCE_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
62
58
|
EventKind.MM_APP_ANOMALY_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
63
59
|
EventKind.MM_APP_ANOMALY_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
64
60
|
EventKind.FAILED: [EventEntityKind.JOB],
|
|
@@ -102,6 +102,13 @@ class APIGateway(_APIGatewayBaseModel):
|
|
|
102
102
|
if upstream.nucliofunction.get("name")
|
|
103
103
|
]
|
|
104
104
|
|
|
105
|
+
def get_invoke_url(self):
|
|
106
|
+
return (
|
|
107
|
+
self.spec.host + self.spec.path
|
|
108
|
+
if self.spec.path and self.spec.host
|
|
109
|
+
else self.spec.host
|
|
110
|
+
)
|
|
111
|
+
|
|
105
112
|
def enrich_mlrun_names(self):
|
|
106
113
|
self._enrich_api_gateway_mlrun_name()
|
|
107
114
|
self._enrich_mlrun_function_names()
|
|
@@ -120,10 +120,13 @@ class FeatureStorePartitionByField(mlrun.common.types.StrEnum):
|
|
|
120
120
|
|
|
121
121
|
class RunPartitionByField(mlrun.common.types.StrEnum):
|
|
122
122
|
name = "name" # Supported for runs objects
|
|
123
|
+
project_and_name = "project_and_name" # Supported for runs objects
|
|
123
124
|
|
|
124
125
|
def to_partition_by_db_field(self, db_cls):
|
|
125
126
|
if self.value == RunPartitionByField.name:
|
|
126
127
|
return db_cls.name
|
|
128
|
+
elif self.value == RunPartitionByField.project_and_name:
|
|
129
|
+
return db_cls.project, db_cls.name
|
|
127
130
|
else:
|
|
128
131
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
129
132
|
f"Unknown group by field: {self.value}"
|
|
@@ -17,6 +17,7 @@ from dataclasses import dataclass
|
|
|
17
17
|
from enum import Enum, IntEnum
|
|
18
18
|
from typing import Optional
|
|
19
19
|
|
|
20
|
+
import mlrun.common.constants
|
|
20
21
|
import mlrun.common.helpers
|
|
21
22
|
from mlrun.common.types import StrEnum
|
|
22
23
|
|
|
@@ -78,8 +79,6 @@ class EventFieldType:
|
|
|
78
79
|
FEATURE_SET_URI = "monitoring_feature_set_uri"
|
|
79
80
|
ALGORITHM = "algorithm"
|
|
80
81
|
VALUE = "value"
|
|
81
|
-
DRIFT_DETECTED_THRESHOLD = "drift_detected_threshold"
|
|
82
|
-
POSSIBLE_DRIFT_THRESHOLD = "possible_drift_threshold"
|
|
83
82
|
SAMPLE_PARQUET_PATH = "sample_parquet_path"
|
|
84
83
|
TIME = "time"
|
|
85
84
|
TABLE_COLUMN = "table_column"
|
|
@@ -158,19 +157,42 @@ class EventKeyMetrics:
|
|
|
158
157
|
REAL_TIME = "real_time"
|
|
159
158
|
|
|
160
159
|
|
|
161
|
-
class ModelEndpointTarget:
|
|
160
|
+
class ModelEndpointTarget(MonitoringStrEnum):
|
|
162
161
|
V3IO_NOSQL = "v3io-nosql"
|
|
163
162
|
SQL = "sql"
|
|
164
163
|
|
|
165
164
|
|
|
165
|
+
class StreamKind(MonitoringStrEnum):
|
|
166
|
+
V3IO_STREAM = "v3io_stream"
|
|
167
|
+
KAFKA = "kafka"
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class TSDBTarget(MonitoringStrEnum):
|
|
171
|
+
V3IO_TSDB = "v3io-tsdb"
|
|
172
|
+
TDEngine = "tdengine"
|
|
173
|
+
PROMETHEUS = "prometheus"
|
|
174
|
+
|
|
175
|
+
|
|
166
176
|
class ProjectSecretKeys:
|
|
167
177
|
ENDPOINT_STORE_CONNECTION = "MODEL_MONITORING_ENDPOINT_STORE_CONNECTION"
|
|
168
178
|
ACCESS_KEY = "MODEL_MONITORING_ACCESS_KEY"
|
|
169
|
-
PIPELINES_ACCESS_KEY = "MODEL_MONITORING_PIPELINES_ACCESS_KEY"
|
|
170
|
-
KAFKA_BROKERS = "KAFKA_BROKERS"
|
|
171
179
|
STREAM_PATH = "STREAM_PATH"
|
|
172
180
|
TSDB_CONNECTION = "TSDB_CONNECTION"
|
|
173
181
|
|
|
182
|
+
@classmethod
|
|
183
|
+
def mandatory_secrets(cls):
|
|
184
|
+
return [
|
|
185
|
+
cls.ENDPOINT_STORE_CONNECTION,
|
|
186
|
+
cls.STREAM_PATH,
|
|
187
|
+
cls.TSDB_CONNECTION,
|
|
188
|
+
]
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
class ModelEndpointTargetSchemas(MonitoringStrEnum):
|
|
192
|
+
V3IO = "v3io"
|
|
193
|
+
MYSQL = "mysql"
|
|
194
|
+
SQLITE = "sqlite"
|
|
195
|
+
|
|
174
196
|
|
|
175
197
|
class ModelMonitoringStoreKinds:
|
|
176
198
|
ENDPOINTS = "endpoints"
|
|
@@ -318,7 +340,7 @@ class ResultKindApp(Enum):
|
|
|
318
340
|
concept_drift = 1
|
|
319
341
|
model_performance = 2
|
|
320
342
|
system_performance = 3
|
|
321
|
-
|
|
343
|
+
mm_app_anomaly = 4
|
|
322
344
|
|
|
323
345
|
|
|
324
346
|
class ResultStatusApp(IntEnum):
|
|
@@ -333,7 +355,7 @@ class ResultStatusApp(IntEnum):
|
|
|
333
355
|
|
|
334
356
|
|
|
335
357
|
class ModelMonitoringAppLabel:
|
|
336
|
-
KEY =
|
|
358
|
+
KEY = mlrun.common.constants.MLRunInternalLabels.mlrun_type
|
|
337
359
|
VAL = "mlrun__model-monitoring-application"
|
|
338
360
|
|
|
339
361
|
def __str__(self) -> str:
|
|
@@ -344,12 +366,6 @@ class ControllerPolicy:
|
|
|
344
366
|
BASE_PERIOD = "base_period"
|
|
345
367
|
|
|
346
368
|
|
|
347
|
-
class TSDBTarget:
|
|
348
|
-
V3IO_TSDB = "v3io-tsdb"
|
|
349
|
-
TDEngine = "tdengine"
|
|
350
|
-
PROMETHEUS = "prometheus"
|
|
351
|
-
|
|
352
|
-
|
|
353
369
|
class HistogramDataDriftApplicationConstants:
|
|
354
370
|
NAME = "histogram-data-drift"
|
|
355
371
|
GENERAL_RESULT_NAME = "general_drift"
|
|
@@ -362,3 +378,6 @@ class PredictionsQueryConstants:
|
|
|
362
378
|
|
|
363
379
|
class SpecialApps:
|
|
364
380
|
MLRUN_INFRA = "mlrun-infra"
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
_RESERVED_FUNCTION_NAMES = MonitoringFunctionNames.list() + [SpecialApps.MLRUN_INFRA]
|
|
@@ -103,18 +103,6 @@ class ModelEndpointSpec(ObjectSpec):
|
|
|
103
103
|
json_parse_values=json_parse_values,
|
|
104
104
|
)
|
|
105
105
|
|
|
106
|
-
@validator("monitor_configuration")
|
|
107
|
-
@classmethod
|
|
108
|
-
def set_name(cls, monitor_configuration):
|
|
109
|
-
return monitor_configuration or {
|
|
110
|
-
EventFieldType.DRIFT_DETECTED_THRESHOLD: (
|
|
111
|
-
mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected
|
|
112
|
-
),
|
|
113
|
-
EventFieldType.POSSIBLE_DRIFT_THRESHOLD: (
|
|
114
|
-
mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift
|
|
115
|
-
),
|
|
116
|
-
}
|
|
117
|
-
|
|
118
106
|
@validator("model_uri")
|
|
119
107
|
@classmethod
|
|
120
108
|
def validate_model_uri(cls, model_uri):
|
mlrun/common/schemas/project.py
CHANGED
|
@@ -114,18 +114,19 @@ class ProjectOwner(pydantic.BaseModel):
|
|
|
114
114
|
|
|
115
115
|
class ProjectSummary(pydantic.BaseModel):
|
|
116
116
|
name: str
|
|
117
|
-
files_count: int
|
|
118
|
-
feature_sets_count: int
|
|
119
|
-
models_count: int
|
|
120
|
-
runs_completed_recent_count: int
|
|
121
|
-
runs_failed_recent_count: int
|
|
122
|
-
runs_running_count: int
|
|
123
|
-
distinct_schedules_count: int
|
|
124
|
-
distinct_scheduled_jobs_pending_count: int
|
|
125
|
-
distinct_scheduled_pipelines_pending_count: int
|
|
117
|
+
files_count: int = 0
|
|
118
|
+
feature_sets_count: int = 0
|
|
119
|
+
models_count: int = 0
|
|
120
|
+
runs_completed_recent_count: int = 0
|
|
121
|
+
runs_failed_recent_count: int = 0
|
|
122
|
+
runs_running_count: int = 0
|
|
123
|
+
distinct_schedules_count: int = 0
|
|
124
|
+
distinct_scheduled_jobs_pending_count: int = 0
|
|
125
|
+
distinct_scheduled_pipelines_pending_count: int = 0
|
|
126
126
|
pipelines_completed_recent_count: typing.Optional[int] = None
|
|
127
127
|
pipelines_failed_recent_count: typing.Optional[int] = None
|
|
128
128
|
pipelines_running_count: typing.Optional[int] = None
|
|
129
|
+
updated: typing.Optional[datetime.datetime] = None
|
|
129
130
|
|
|
130
131
|
|
|
131
132
|
class IguazioProject(pydantic.BaseModel):
|
mlrun/common/schemas/schedule.py
CHANGED
|
@@ -96,7 +96,7 @@ class ScheduleUpdate(BaseModel):
|
|
|
96
96
|
scheduled_object: Optional[Any]
|
|
97
97
|
cron_trigger: Optional[Union[str, ScheduleCronTrigger]]
|
|
98
98
|
desired_state: Optional[str]
|
|
99
|
-
labels: Optional[dict] =
|
|
99
|
+
labels: Optional[dict] = None
|
|
100
100
|
concurrency_limit: Optional[int]
|
|
101
101
|
credentials: Credentials = Credentials()
|
|
102
102
|
|
mlrun/config.py
CHANGED
|
@@ -52,6 +52,11 @@ default_config = {
|
|
|
52
52
|
"kubernetes": {
|
|
53
53
|
"kubeconfig_path": "", # local path to kubeconfig file (for development purposes),
|
|
54
54
|
# empty by default as the API already running inside k8s cluster
|
|
55
|
+
"pagination": {
|
|
56
|
+
# pagination config for interacting with k8s API
|
|
57
|
+
"list_pods_limit": 200,
|
|
58
|
+
"list_crd_objects_limit": 200,
|
|
59
|
+
},
|
|
55
60
|
},
|
|
56
61
|
"dbpath": "", # db/api url
|
|
57
62
|
# url to nuclio dashboard api (can be with user & token, e.g. https://username:password@dashboard-url.com)
|
|
@@ -64,11 +69,15 @@ default_config = {
|
|
|
64
69
|
"api_base_version": "v1",
|
|
65
70
|
"version": "", # will be set to current version
|
|
66
71
|
"images_tag": "", # tag to use with mlrun images e.g. mlrun/mlrun (defaults to version)
|
|
67
|
-
|
|
72
|
+
# registry to use with mlrun images that start with "mlrun/" e.g. quay.io/ (defaults to empty, for dockerhub)
|
|
73
|
+
"images_registry": "",
|
|
74
|
+
# registry to use with non-mlrun images (don't start with "mlrun/") specified in 'images_to_enrich_registry'
|
|
75
|
+
# defaults to empty, for dockerhub
|
|
76
|
+
"vendor_images_registry": "",
|
|
68
77
|
# comma separated list of images that are in the specified images_registry, and therefore will be enriched with this
|
|
69
78
|
# registry when used. default to mlrun/* which means any image which is of the mlrun repository (mlrun/mlrun,
|
|
70
79
|
# mlrun/ml-base, etc...)
|
|
71
|
-
"images_to_enrich_registry": "^mlrun
|
|
80
|
+
"images_to_enrich_registry": "^mlrun/*,python:3.9",
|
|
72
81
|
"kfp_url": "",
|
|
73
82
|
"kfp_ttl": "14400", # KFP ttl in sec, after that completed PODs will be deleted
|
|
74
83
|
"kfp_image": "mlrun/mlrun", # image to use for KFP runner (defaults to mlrun/mlrun)
|
|
@@ -104,7 +113,12 @@ default_config = {
|
|
|
104
113
|
# max number of parallel abort run jobs in runs monitoring
|
|
105
114
|
"concurrent_abort_stale_runs_workers": 10,
|
|
106
115
|
"list_runs_time_period_in_days": 7, # days
|
|
107
|
-
}
|
|
116
|
+
},
|
|
117
|
+
"projects": {
|
|
118
|
+
"summaries": {
|
|
119
|
+
"cache_interval": "30",
|
|
120
|
+
},
|
|
121
|
+
},
|
|
108
122
|
},
|
|
109
123
|
"crud": {
|
|
110
124
|
"runs": {
|
|
@@ -250,7 +264,7 @@ default_config = {
|
|
|
250
264
|
"remote": "mlrun/mlrun",
|
|
251
265
|
"dask": "mlrun/ml-base",
|
|
252
266
|
"mpijob": "mlrun/mlrun",
|
|
253
|
-
"application": "python:3.9
|
|
267
|
+
"application": "python:3.9",
|
|
254
268
|
},
|
|
255
269
|
# see enrich_function_preemption_spec for more info,
|
|
256
270
|
# and mlrun.common.schemas.function.PreemptionModes for available options
|
|
@@ -265,6 +279,16 @@ default_config = {
|
|
|
265
279
|
"url": "",
|
|
266
280
|
"service": "mlrun-api-chief",
|
|
267
281
|
"port": 8080,
|
|
282
|
+
"feature_gates": {
|
|
283
|
+
"scheduler": "enabled",
|
|
284
|
+
"project_sync": "enabled",
|
|
285
|
+
"cleanup": "enabled",
|
|
286
|
+
"runs_monitoring": "enabled",
|
|
287
|
+
"pagination_cache": "enabled",
|
|
288
|
+
"project_summaries": "enabled",
|
|
289
|
+
"start_logs": "enabled",
|
|
290
|
+
"stop_logs": "enabled",
|
|
291
|
+
},
|
|
268
292
|
},
|
|
269
293
|
"worker": {
|
|
270
294
|
"sync_with_chief": {
|
|
@@ -433,7 +457,6 @@ default_config = {
|
|
|
433
457
|
"followers": "",
|
|
434
458
|
# This is used as the interval for the sync loop both when mlrun is leader and follower
|
|
435
459
|
"periodic_sync_interval": "1 minute",
|
|
436
|
-
"counters_cache_ttl": "2 minutes",
|
|
437
460
|
"project_owners_cache_ttl": "30 seconds",
|
|
438
461
|
# access key to be used when the leader is iguazio and polling is done from it
|
|
439
462
|
"iguazio_access_key": "",
|
|
@@ -504,13 +527,12 @@ default_config = {
|
|
|
504
527
|
"model_endpoint_monitoring": {
|
|
505
528
|
"serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
|
|
506
529
|
"application_stream_args": {"shard_count": 1, "retention_period_hours": 24},
|
|
507
|
-
"drift_thresholds": {"default": {"possible_drift": 0.5, "drift_detected": 0.7}},
|
|
508
530
|
# Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
|
|
509
531
|
# stream, and endpoints.
|
|
510
532
|
"store_prefixes": {
|
|
511
533
|
"default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
|
|
512
534
|
"user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
|
|
513
|
-
"stream": "",
|
|
535
|
+
"stream": "", # TODO: Delete in 1.9.0
|
|
514
536
|
"monitoring_application": "v3io:///users/pipelines/{project}/monitoring-apps/",
|
|
515
537
|
},
|
|
516
538
|
# Offline storage path can be either relative or a full path. This path is used for general offline data
|
|
@@ -523,11 +545,12 @@ default_config = {
|
|
|
523
545
|
"parquet_batching_max_events": 10_000,
|
|
524
546
|
"parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
|
|
525
547
|
# See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
|
|
526
|
-
"store_type": "v3io-nosql",
|
|
548
|
+
"store_type": "v3io-nosql", # TODO: Delete in 1.9.0
|
|
527
549
|
"endpoint_store_connection": "",
|
|
528
550
|
# See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
|
|
529
|
-
"tsdb_connector_type": "v3io-tsdb",
|
|
530
551
|
"tsdb_connection": "",
|
|
552
|
+
# See mlrun.common.schemas.model_monitoring.constants.StreamKind for available options
|
|
553
|
+
"stream_connection": "",
|
|
531
554
|
},
|
|
532
555
|
"secret_stores": {
|
|
533
556
|
# Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
|
|
@@ -660,7 +683,9 @@ default_config = {
|
|
|
660
683
|
"failed_runs_grace_period": 3600,
|
|
661
684
|
"verbose": True,
|
|
662
685
|
# the number of workers which will be used to trigger the start log collection
|
|
663
|
-
"concurrent_start_logs_workers":
|
|
686
|
+
"concurrent_start_logs_workers": 50,
|
|
687
|
+
# the number of runs for which to start logs on api startup
|
|
688
|
+
"start_logs_startup_run_limit": 150,
|
|
664
689
|
# the time in hours in which to start log collection from.
|
|
665
690
|
# after upgrade, we might have runs which completed in the mean time or still in non-terminal state and
|
|
666
691
|
# we want to collect their logs in the new log collection method (sidecar)
|
|
@@ -708,6 +733,8 @@ default_config = {
|
|
|
708
733
|
# maximum number of alerts we allow to be configured.
|
|
709
734
|
# user will get an error when exceeding this
|
|
710
735
|
"max_allowed": 10000,
|
|
736
|
+
# maximum allowed value for count in criteria field inside AlertConfig
|
|
737
|
+
"max_criteria_count": 100,
|
|
711
738
|
},
|
|
712
739
|
"auth_with_client_id": {
|
|
713
740
|
"enabled": False,
|
|
@@ -1118,7 +1145,6 @@ class Config:
|
|
|
1118
1145
|
if store_prefix_dict.get(kind):
|
|
1119
1146
|
# Target exist in store prefix and has a valid string value
|
|
1120
1147
|
return store_prefix_dict[kind].format(project=project, **kwargs)
|
|
1121
|
-
|
|
1122
1148
|
if (
|
|
1123
1149
|
function_name
|
|
1124
1150
|
and function_name
|
mlrun/data_types/spark.py
CHANGED
|
@@ -20,10 +20,10 @@ import pytz
|
|
|
20
20
|
from pyspark.sql.functions import to_utc_timestamp
|
|
21
21
|
from pyspark.sql.types import BooleanType, DoubleType, TimestampType
|
|
22
22
|
|
|
23
|
+
from mlrun.feature_store.retrieval.spark_merger import spark_df_to_pandas
|
|
23
24
|
from mlrun.utils import logger
|
|
24
25
|
|
|
25
26
|
from .data_types import InferOptions, spark_to_value_type
|
|
26
|
-
from .to_pandas import toPandas
|
|
27
27
|
|
|
28
28
|
try:
|
|
29
29
|
import pyspark.sql.functions as funcs
|
|
@@ -75,7 +75,7 @@ def get_df_preview_spark(df, preview_lines=20):
|
|
|
75
75
|
"""capture preview data from spark df"""
|
|
76
76
|
df = df.limit(preview_lines)
|
|
77
77
|
|
|
78
|
-
result_dict =
|
|
78
|
+
result_dict = spark_df_to_pandas(df).to_dict(orient="split")
|
|
79
79
|
return [result_dict["columns"], *result_dict["data"]]
|
|
80
80
|
|
|
81
81
|
|
mlrun/data_types/to_pandas.py
CHANGED
|
@@ -15,21 +15,11 @@
|
|
|
15
15
|
import warnings
|
|
16
16
|
from collections import Counter
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
IntegerType,
|
|
24
|
-
IntegralType,
|
|
25
|
-
LongType,
|
|
26
|
-
MapType,
|
|
27
|
-
ShortType,
|
|
28
|
-
TimestampType,
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def toPandas(spark_df):
|
|
18
|
+
import pandas as pd
|
|
19
|
+
import semver
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _toPandas(spark_df):
|
|
33
23
|
"""
|
|
34
24
|
Modified version of spark DataFrame.toPandas() –
|
|
35
25
|
https://github.com/apache/spark/blob/v3.2.3/python/pyspark/sql/pandas/conversion.py#L35
|
|
@@ -40,6 +30,12 @@ def toPandas(spark_df):
|
|
|
40
30
|
This modification adds the missing unit to the dtype.
|
|
41
31
|
"""
|
|
42
32
|
from pyspark.sql.dataframe import DataFrame
|
|
33
|
+
from pyspark.sql.types import (
|
|
34
|
+
BooleanType,
|
|
35
|
+
IntegralType,
|
|
36
|
+
MapType,
|
|
37
|
+
TimestampType,
|
|
38
|
+
)
|
|
43
39
|
|
|
44
40
|
assert isinstance(spark_df, DataFrame)
|
|
45
41
|
|
|
@@ -48,7 +44,6 @@ def toPandas(spark_df):
|
|
|
48
44
|
require_minimum_pandas_version()
|
|
49
45
|
|
|
50
46
|
import numpy as np
|
|
51
|
-
import pandas as pd
|
|
52
47
|
|
|
53
48
|
timezone = spark_df.sql_ctx._conf.sessionLocalTimeZone()
|
|
54
49
|
|
|
@@ -217,6 +212,16 @@ def toPandas(spark_df):
|
|
|
217
212
|
|
|
218
213
|
def _to_corrected_pandas_type(dt):
|
|
219
214
|
import numpy as np
|
|
215
|
+
from pyspark.sql.types import (
|
|
216
|
+
BooleanType,
|
|
217
|
+
ByteType,
|
|
218
|
+
DoubleType,
|
|
219
|
+
FloatType,
|
|
220
|
+
IntegerType,
|
|
221
|
+
LongType,
|
|
222
|
+
ShortType,
|
|
223
|
+
TimestampType,
|
|
224
|
+
)
|
|
220
225
|
|
|
221
226
|
if type(dt) == ByteType:
|
|
222
227
|
return np.int8
|
|
@@ -236,3 +241,30 @@ def _to_corrected_pandas_type(dt):
|
|
|
236
241
|
return "datetime64[ns]"
|
|
237
242
|
else:
|
|
238
243
|
return None
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def spark_df_to_pandas(spark_df):
|
|
247
|
+
# as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
|
|
248
|
+
# when we upgrade pyspark, we should check whether this workaround is still necessary
|
|
249
|
+
# see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
|
|
250
|
+
if semver.parse(pd.__version__)["major"] >= 2:
|
|
251
|
+
import pyspark.sql.functions as pyspark_functions
|
|
252
|
+
|
|
253
|
+
type_conversion_dict = {}
|
|
254
|
+
for field in spark_df.schema.fields:
|
|
255
|
+
if str(field.dataType) == "TimestampType":
|
|
256
|
+
spark_df = spark_df.withColumn(
|
|
257
|
+
field.name,
|
|
258
|
+
pyspark_functions.date_format(
|
|
259
|
+
pyspark_functions.to_timestamp(field.name),
|
|
260
|
+
"yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS",
|
|
261
|
+
),
|
|
262
|
+
)
|
|
263
|
+
type_conversion_dict[field.name] = "datetime64[ns]"
|
|
264
|
+
|
|
265
|
+
df = _toPandas(spark_df)
|
|
266
|
+
if type_conversion_dict:
|
|
267
|
+
df = df.astype(type_conversion_dict)
|
|
268
|
+
return df
|
|
269
|
+
else:
|
|
270
|
+
return _toPandas(spark_df)
|
mlrun/datastore/__init__.py
CHANGED
|
@@ -117,6 +117,7 @@ def get_stream_pusher(stream_path: str, **kwargs):
|
|
|
117
117
|
return OutputStream(stream_path, **kwargs)
|
|
118
118
|
elif stream_path.startswith("v3io"):
|
|
119
119
|
endpoint, stream_path = parse_path(stream_path)
|
|
120
|
+
endpoint = kwargs.pop("endpoint", None) or endpoint
|
|
120
121
|
return OutputStream(stream_path, endpoint=endpoint, **kwargs)
|
|
121
122
|
elif stream_path.startswith("dummy://"):
|
|
122
123
|
return _DummyStream(**kwargs)
|
mlrun/datastore/azure_blob.py
CHANGED