mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +10 -1
- mlrun/__main__.py +18 -109
- mlrun/{runtimes/mpijob/v1alpha1.py → alerts/__init__.py} +2 -16
- mlrun/alerts/alert.py +141 -0
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +36 -253
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +20 -41
- mlrun/artifacts/model.py +8 -140
- mlrun/artifacts/plots.py +14 -375
- mlrun/common/schemas/__init__.py +4 -2
- mlrun/common/schemas/alert.py +46 -4
- mlrun/common/schemas/api_gateway.py +4 -0
- mlrun/common/schemas/artifact.py +15 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/model_monitoring/__init__.py +8 -1
- mlrun/common/schemas/model_monitoring/constants.py +40 -4
- mlrun/common/schemas/model_monitoring/model_endpoints.py +73 -2
- mlrun/common/schemas/project.py +2 -0
- mlrun/config.py +7 -4
- mlrun/data_types/to_pandas.py +4 -4
- mlrun/datastore/base.py +41 -9
- mlrun/datastore/datastore_profile.py +54 -4
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/sources.py +43 -2
- mlrun/datastore/store_resources.py +2 -6
- mlrun/datastore/targets.py +106 -39
- mlrun/db/base.py +23 -3
- mlrun/db/httpdb.py +101 -47
- mlrun/db/nopdb.py +20 -2
- mlrun/errors.py +5 -0
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +12 -47
- mlrun/feature_store/feature_set.py +9 -0
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +4 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +2 -0
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +5 -0
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
- mlrun/launcher/base.py +4 -3
- mlrun/launcher/client.py +1 -1
- mlrun/lists.py +4 -2
- mlrun/model.py +25 -11
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +41 -18
- mlrun/model_monitoring/application.py +5 -305
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +3 -1
- mlrun/model_monitoring/db/__init__.py +2 -0
- mlrun/model_monitoring/db/stores/base/store.py +9 -36
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +63 -110
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +104 -187
- mlrun/model_monitoring/db/tsdb/__init__.py +71 -0
- mlrun/model_monitoring/db/tsdb/base.py +135 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +404 -0
- mlrun/model_monitoring/db/v3io_tsdb_reader.py +134 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/helpers.py +1 -1
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +48 -213
- mlrun/model_monitoring/writer.py +101 -121
- mlrun/platforms/__init__.py +10 -9
- mlrun/platforms/iguazio.py +21 -202
- mlrun/projects/operations.py +11 -7
- mlrun/projects/pipelines.py +13 -76
- mlrun/projects/project.py +73 -45
- mlrun/render.py +11 -13
- mlrun/run.py +6 -41
- mlrun/runtimes/__init__.py +3 -3
- mlrun/runtimes/base.py +6 -6
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/kubejob.py +2 -1
- mlrun/runtimes/local.py +1 -1
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/api_gateway.py +75 -9
- mlrun/runtimes/nuclio/function.py +9 -35
- mlrun/runtimes/pod.py +16 -36
- mlrun/runtimes/remotesparkjob.py +1 -1
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/runtimes/utils.py +1 -39
- mlrun/utils/helpers.py +72 -71
- mlrun/utils/notifications/notification/base.py +1 -1
- mlrun/utils/notifications/notification/slack.py +12 -5
- mlrun/utils/notifications/notification/webhook.py +1 -1
- mlrun/utils/notifications/notification_pusher.py +134 -14
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/METADATA +4 -3
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/RECORD +105 -95
- mlrun/kfpops.py +0 -865
- mlrun/platforms/other.py +0 -305
- /mlrun/{runtimes → common/runtimes}/constants.py +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/top_level.txt +0 -0
mlrun/common/schemas/alert.py
CHANGED
|
@@ -26,10 +26,10 @@ class EventEntityKind(StrEnum):
|
|
|
26
26
|
JOB = "job"
|
|
27
27
|
|
|
28
28
|
|
|
29
|
-
class
|
|
29
|
+
class EventEntities(pydantic.BaseModel):
|
|
30
30
|
kind: EventEntityKind
|
|
31
31
|
project: str
|
|
32
|
-
|
|
32
|
+
ids: pydantic.conlist(str, min_items=1, max_items=1)
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class EventKind(StrEnum):
|
|
@@ -48,7 +48,7 @@ _event_kind_entity_map = {
|
|
|
48
48
|
class Event(pydantic.BaseModel):
|
|
49
49
|
kind: EventKind
|
|
50
50
|
timestamp: Union[str, datetime] = None # occurrence time
|
|
51
|
-
entity:
|
|
51
|
+
entity: EventEntities
|
|
52
52
|
value_dict: Optional[dict] = pydantic.Field(default_factory=dict)
|
|
53
53
|
|
|
54
54
|
def is_valid(self):
|
|
@@ -71,6 +71,12 @@ class AlertTrigger(pydantic.BaseModel):
|
|
|
71
71
|
events: list[EventKind] = []
|
|
72
72
|
prometheus_alert: str = None
|
|
73
73
|
|
|
74
|
+
def __eq__(self, other):
|
|
75
|
+
return (
|
|
76
|
+
self.prometheus_alert == other.prometheus_alert
|
|
77
|
+
and self.events == other.events
|
|
78
|
+
)
|
|
79
|
+
|
|
74
80
|
|
|
75
81
|
class AlertCriteria(pydantic.BaseModel):
|
|
76
82
|
count: Annotated[
|
|
@@ -86,6 +92,9 @@ class AlertCriteria(pydantic.BaseModel):
|
|
|
86
92
|
),
|
|
87
93
|
] = None
|
|
88
94
|
|
|
95
|
+
def __eq__(self, other):
|
|
96
|
+
return self.count == other.count and self.period == other.period
|
|
97
|
+
|
|
89
98
|
|
|
90
99
|
class ResetPolicy(StrEnum):
|
|
91
100
|
MANUAL = "manual"
|
|
@@ -108,7 +117,7 @@ class AlertConfig(pydantic.BaseModel):
|
|
|
108
117
|
]
|
|
109
118
|
created: Union[str, datetime] = None
|
|
110
119
|
severity: AlertSeverity
|
|
111
|
-
|
|
120
|
+
entities: EventEntities
|
|
112
121
|
trigger: AlertTrigger
|
|
113
122
|
criteria: Optional[AlertCriteria]
|
|
114
123
|
reset_policy: ResetPolicy = ResetPolicy.MANUAL
|
|
@@ -120,3 +129,36 @@ class AlertConfig(pydantic.BaseModel):
|
|
|
120
129
|
class AlertsModes(StrEnum):
|
|
121
130
|
enabled = "enabled"
|
|
122
131
|
disabled = "disabled"
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class AlertTemplate(
|
|
135
|
+
pydantic.BaseModel
|
|
136
|
+
): # Template fields that are not shared with created configs
|
|
137
|
+
template_id: int = None
|
|
138
|
+
template_name: str
|
|
139
|
+
template_description: Optional[str] = (
|
|
140
|
+
"String explaining the purpose of this template"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# A property that identifies templates that were created by the system and cannot be modified/deleted by the user
|
|
144
|
+
system_generated: bool = False
|
|
145
|
+
|
|
146
|
+
# AlertConfig fields that are pre-defined
|
|
147
|
+
description: Optional[str] = (
|
|
148
|
+
"String to be sent in the generated notifications e.g. 'Model {{ $project }}/{{ $entity }} is drifting.'"
|
|
149
|
+
)
|
|
150
|
+
severity: AlertSeverity
|
|
151
|
+
trigger: AlertTrigger
|
|
152
|
+
criteria: Optional[AlertCriteria]
|
|
153
|
+
reset_policy: ResetPolicy = ResetPolicy.MANUAL
|
|
154
|
+
|
|
155
|
+
# This is slightly different than __eq__ as it doesn't compare everything
|
|
156
|
+
def templates_differ(self, other):
|
|
157
|
+
return (
|
|
158
|
+
self.template_description != other.template_description
|
|
159
|
+
or self.description != other.description
|
|
160
|
+
or self.severity != other.severity
|
|
161
|
+
or self.trigger != other.trigger
|
|
162
|
+
or self.reset_policy != other.reset_policy
|
|
163
|
+
or self.criteria != other.criteria
|
|
164
|
+
)
|
|
@@ -23,6 +23,7 @@ import mlrun.common.types
|
|
|
23
23
|
class APIGatewayAuthenticationMode(mlrun.common.types.StrEnum):
|
|
24
24
|
basic = "basicAuth"
|
|
25
25
|
none = "none"
|
|
26
|
+
access_key = "accessKey"
|
|
26
27
|
|
|
27
28
|
@classmethod
|
|
28
29
|
def from_str(cls, authentication_mode: str):
|
|
@@ -30,6 +31,8 @@ class APIGatewayAuthenticationMode(mlrun.common.types.StrEnum):
|
|
|
30
31
|
return cls.none
|
|
31
32
|
elif authentication_mode == "basicAuth":
|
|
32
33
|
return cls.basic
|
|
34
|
+
elif authentication_mode == "accessKey":
|
|
35
|
+
return cls.access_key
|
|
33
36
|
else:
|
|
34
37
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
35
38
|
f"Authentication mode `{authentication_mode}` is not supported",
|
|
@@ -63,6 +66,7 @@ class APIGatewayUpstream(_APIGatewayBaseModel):
|
|
|
63
66
|
kind: Optional[str] = "nucliofunction"
|
|
64
67
|
nucliofunction: dict[str, str]
|
|
65
68
|
percentage: Optional[int] = 0
|
|
69
|
+
port: Optional[int] = 0
|
|
66
70
|
|
|
67
71
|
|
|
68
72
|
class APIGatewaySpec(_APIGatewayBaseModel):
|
mlrun/common/schemas/artifact.py
CHANGED
|
@@ -93,3 +93,18 @@ class Artifact(pydantic.BaseModel):
|
|
|
93
93
|
metadata: ArtifactMetadata
|
|
94
94
|
spec: ArtifactSpec
|
|
95
95
|
status: ObjectStatus
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class ArtifactsDeletionStrategies(mlrun.common.types.StrEnum):
|
|
99
|
+
"""Artifacts deletion strategies types."""
|
|
100
|
+
|
|
101
|
+
metadata_only = "metadata-only"
|
|
102
|
+
"""Only removes the artifact db record, leaving all related artifact data in-place"""
|
|
103
|
+
|
|
104
|
+
data_optional = "data-optional"
|
|
105
|
+
"""Delete the artifact data of the artifact as a best-effort.
|
|
106
|
+
If artifact data deletion fails still try to delete the artifact db record"""
|
|
107
|
+
|
|
108
|
+
data_force = "data-force"
|
|
109
|
+
"""Delete the artifact data, and if cannot delete it fail the deletion
|
|
110
|
+
and don’t delete the artifact db record"""
|
mlrun/common/schemas/auth.py
CHANGED
|
@@ -59,6 +59,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
|
|
|
59
59
|
hub_source = "hub-source"
|
|
60
60
|
workflow = "workflow"
|
|
61
61
|
alert = "alert"
|
|
62
|
+
alert_templates = "alert-templates"
|
|
62
63
|
event = "event"
|
|
63
64
|
datastore_profile = "datastore-profile"
|
|
64
65
|
api_gateway = "api-gateway"
|
|
@@ -87,6 +88,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
|
|
|
87
88
|
AuthorizationResourceTypes.run: "/projects/{project_name}/runs/{resource_name}",
|
|
88
89
|
AuthorizationResourceTypes.event: "/projects/{project_name}/events/{resource_name}",
|
|
89
90
|
AuthorizationResourceTypes.alert: "/projects/{project_name}/alerts/{resource_name}",
|
|
91
|
+
AuthorizationResourceTypes.alert_templates: "/alert-templates/{resource_name}",
|
|
90
92
|
# runtime resource doesn't have an identifier, we don't need any auth granularity behind project level
|
|
91
93
|
AuthorizationResourceTypes.runtime_resource: "/projects/{project_name}/runtime-resources",
|
|
92
94
|
AuthorizationResourceTypes.model_endpoint: "/projects/{project_name}/model-endpoints/{resource_name}",
|
|
@@ -25,17 +25,22 @@ from .constants import (
|
|
|
25
25
|
FeatureSetFeatures,
|
|
26
26
|
FileTargetKind,
|
|
27
27
|
FunctionURI,
|
|
28
|
+
MetricData,
|
|
28
29
|
ModelEndpointTarget,
|
|
29
30
|
ModelMonitoringMode,
|
|
30
31
|
ModelMonitoringStoreKinds,
|
|
31
32
|
MonitoringFunctionNames,
|
|
33
|
+
MonitoringTSDBTables,
|
|
32
34
|
ProjectSecretKeys,
|
|
33
35
|
PrometheusEndpoints,
|
|
34
36
|
PrometheusMetric,
|
|
37
|
+
ResultData,
|
|
35
38
|
SchedulingKeys,
|
|
36
|
-
|
|
39
|
+
TimeSeriesConnector,
|
|
40
|
+
TSDBTarget,
|
|
37
41
|
VersionedModel,
|
|
38
42
|
WriterEvent,
|
|
43
|
+
WriterEventKind,
|
|
39
44
|
)
|
|
40
45
|
from .grafana import (
|
|
41
46
|
GrafanaColumn,
|
|
@@ -51,6 +56,8 @@ from .model_endpoints import (
|
|
|
51
56
|
ModelEndpoint,
|
|
52
57
|
ModelEndpointList,
|
|
53
58
|
ModelEndpointMetadata,
|
|
59
|
+
ModelEndpointMonitoringMetric,
|
|
60
|
+
ModelEndpointMonitoringMetricType,
|
|
54
61
|
ModelEndpointSpec,
|
|
55
62
|
ModelEndpointStatus,
|
|
56
63
|
)
|
|
@@ -99,14 +99,17 @@ class FeatureSetFeatures(MonitoringStrEnum):
|
|
|
99
99
|
|
|
100
100
|
class ApplicationEvent:
|
|
101
101
|
APPLICATION_NAME = "application_name"
|
|
102
|
-
CURRENT_STATS = "current_stats"
|
|
103
|
-
FEATURE_STATS = "feature_stats"
|
|
104
|
-
SAMPLE_PARQUET_PATH = "sample_parquet_path"
|
|
105
102
|
START_INFER_TIME = "start_infer_time"
|
|
106
103
|
END_INFER_TIME = "end_infer_time"
|
|
107
104
|
LAST_REQUEST = "last_request"
|
|
108
105
|
ENDPOINT_ID = "endpoint_id"
|
|
109
106
|
OUTPUT_STREAM_URI = "output_stream_uri"
|
|
107
|
+
MLRUN_CONTEXT = "mlrun_context"
|
|
108
|
+
|
|
109
|
+
# Deprecated fields - TODO : delete in 1.9.0 (V1 app deprecation)
|
|
110
|
+
SAMPLE_PARQUET_PATH = "sample_parquet_path"
|
|
111
|
+
CURRENT_STATS = "current_stats"
|
|
112
|
+
FEATURE_STATS = "feature_stats"
|
|
110
113
|
|
|
111
114
|
|
|
112
115
|
class WriterEvent(MonitoringStrEnum):
|
|
@@ -114,6 +117,21 @@ class WriterEvent(MonitoringStrEnum):
|
|
|
114
117
|
ENDPOINT_ID = "endpoint_id"
|
|
115
118
|
START_INFER_TIME = "start_infer_time"
|
|
116
119
|
END_INFER_TIME = "end_infer_time"
|
|
120
|
+
EVENT_KIND = "event_kind" # metric or result
|
|
121
|
+
DATA = "data"
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class WriterEventKind(MonitoringStrEnum):
|
|
125
|
+
METRIC = "metric"
|
|
126
|
+
RESULT = "result"
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class MetricData(MonitoringStrEnum):
|
|
130
|
+
METRIC_NAME = "metric_name"
|
|
131
|
+
METRIC_VALUE = "metric_value"
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class ResultData(MonitoringStrEnum):
|
|
117
135
|
RESULT_NAME = "result_name"
|
|
118
136
|
RESULT_VALUE = "result_value"
|
|
119
137
|
RESULT_KIND = "result_kind"
|
|
@@ -138,7 +156,7 @@ class EventKeyMetrics:
|
|
|
138
156
|
REAL_TIME = "real_time"
|
|
139
157
|
|
|
140
158
|
|
|
141
|
-
class
|
|
159
|
+
class TimeSeriesConnector:
|
|
142
160
|
TSDB = "tsdb"
|
|
143
161
|
|
|
144
162
|
|
|
@@ -176,6 +194,7 @@ class FileTargetKind:
|
|
|
176
194
|
LOG_STREAM = "log_stream"
|
|
177
195
|
APP_RESULTS = "app_results"
|
|
178
196
|
MONITORING_SCHEDULES = "monitoring_schedules"
|
|
197
|
+
MONITORING_APPLICATION = "monitoring_application"
|
|
179
198
|
|
|
180
199
|
|
|
181
200
|
class ModelMonitoringMode(str, Enum):
|
|
@@ -210,6 +229,12 @@ class MonitoringFunctionNames(MonitoringStrEnum):
|
|
|
210
229
|
WRITER = "model-monitoring-writer"
|
|
211
230
|
|
|
212
231
|
|
|
232
|
+
class MonitoringTSDBTables(MonitoringStrEnum):
|
|
233
|
+
APP_RESULTS = "app-results"
|
|
234
|
+
METRICS = "metrics"
|
|
235
|
+
EVENTS = "events"
|
|
236
|
+
|
|
237
|
+
|
|
213
238
|
@dataclass
|
|
214
239
|
class FunctionURI:
|
|
215
240
|
project: str
|
|
@@ -303,11 +328,22 @@ class ModelMonitoringAppLabel:
|
|
|
303
328
|
KEY = "mlrun__type"
|
|
304
329
|
VAL = "mlrun__model-monitoring-application"
|
|
305
330
|
|
|
331
|
+
def __str__(self) -> str:
|
|
332
|
+
return f"{self.KEY}={self.VAL}"
|
|
333
|
+
|
|
306
334
|
|
|
307
335
|
class ControllerPolicy:
|
|
308
336
|
BASE_PERIOD = "base_period"
|
|
309
337
|
|
|
310
338
|
|
|
339
|
+
class TSDBTarget:
|
|
340
|
+
V3IO_TSDB = "v3io-tsdb"
|
|
341
|
+
PROMETHEUS = "prometheus"
|
|
342
|
+
APP_RESULTS_TABLE = "app-results"
|
|
343
|
+
V3IO_BE = "tsdb"
|
|
344
|
+
V3IO_RATE = "1/s"
|
|
345
|
+
|
|
346
|
+
|
|
311
347
|
class HistogramDataDriftApplicationConstants:
|
|
312
348
|
NAME = "histogram-data-drift"
|
|
313
349
|
GENERAL_RESULT_NAME = "general_drift"
|
|
@@ -11,16 +11,18 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
16
15
|
import enum
|
|
17
16
|
import json
|
|
18
|
-
|
|
17
|
+
import re
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
from typing import Any, NamedTuple, Optional
|
|
19
20
|
|
|
20
21
|
from pydantic import BaseModel, Field, validator
|
|
21
22
|
from pydantic.main import Extra
|
|
22
23
|
|
|
23
24
|
import mlrun.common.model_monitoring
|
|
25
|
+
import mlrun.common.types
|
|
24
26
|
|
|
25
27
|
from ..object import ObjectKind, ObjectSpec, ObjectStatus
|
|
26
28
|
from .constants import (
|
|
@@ -29,6 +31,8 @@ from .constants import (
|
|
|
29
31
|
EventKeyMetrics,
|
|
30
32
|
EventLiveStats,
|
|
31
33
|
ModelMonitoringMode,
|
|
34
|
+
ResultKindApp,
|
|
35
|
+
ResultStatusApp,
|
|
32
36
|
)
|
|
33
37
|
|
|
34
38
|
|
|
@@ -292,6 +296,73 @@ class ModelEndpointList(BaseModel):
|
|
|
292
296
|
endpoints: list[ModelEndpoint] = []
|
|
293
297
|
|
|
294
298
|
|
|
299
|
+
class ModelEndpointMonitoringMetricType(mlrun.common.types.StrEnum):
|
|
300
|
+
RESULT = "result"
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
class ModelEndpointMonitoringMetric(BaseModel):
|
|
304
|
+
project: str
|
|
305
|
+
app: str
|
|
306
|
+
type: ModelEndpointMonitoringMetricType
|
|
307
|
+
name: str
|
|
308
|
+
full_name: str
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _compose_full_name(
|
|
312
|
+
*,
|
|
313
|
+
project: str,
|
|
314
|
+
app: str,
|
|
315
|
+
name: str,
|
|
316
|
+
type: ModelEndpointMonitoringMetricType = ModelEndpointMonitoringMetricType.RESULT,
|
|
317
|
+
) -> str:
|
|
318
|
+
return ".".join([project, app, type, name])
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
_FQN_PART_PATTERN = r"[a-zA-Z0-9_-]+"
|
|
322
|
+
_FQN_PATTERN = (
|
|
323
|
+
rf"^(?P<project>{_FQN_PART_PATTERN})\."
|
|
324
|
+
rf"(?P<app>{_FQN_PART_PATTERN})\."
|
|
325
|
+
rf"(?P<type>{_FQN_PART_PATTERN})\."
|
|
326
|
+
rf"(?P<name>{_FQN_PART_PATTERN})$"
|
|
327
|
+
)
|
|
328
|
+
_FQN_REGEX = re.compile(_FQN_PATTERN)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _parse_metric_fqn_to_monitoring_metric(fqn: str) -> ModelEndpointMonitoringMetric:
|
|
332
|
+
match = _FQN_REGEX.fullmatch(fqn)
|
|
333
|
+
if match is None:
|
|
334
|
+
raise ValueError("The fully qualified name is not in the expected format")
|
|
335
|
+
return ModelEndpointMonitoringMetric.parse_obj(
|
|
336
|
+
match.groupdict() | {"full_name": fqn}
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
class _ResultPoint(NamedTuple):
|
|
341
|
+
timestamp: datetime
|
|
342
|
+
value: float
|
|
343
|
+
status: ResultStatusApp
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
class _ModelEndpointMonitoringResultValuesBase(BaseModel):
|
|
347
|
+
full_name: str
|
|
348
|
+
type: ModelEndpointMonitoringMetricType
|
|
349
|
+
data: bool
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
class ModelEndpointMonitoringResultValues(_ModelEndpointMonitoringResultValuesBase):
|
|
353
|
+
full_name: str
|
|
354
|
+
type: ModelEndpointMonitoringMetricType
|
|
355
|
+
result_kind: ResultKindApp
|
|
356
|
+
values: list[_ResultPoint]
|
|
357
|
+
data: bool = True
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
class ModelEndpointMonitoringResultNoData(_ModelEndpointMonitoringResultValuesBase):
|
|
361
|
+
full_name: str
|
|
362
|
+
type: ModelEndpointMonitoringMetricType
|
|
363
|
+
data: bool = False
|
|
364
|
+
|
|
365
|
+
|
|
295
366
|
def _mapping_attributes(
|
|
296
367
|
base_model: BaseModel,
|
|
297
368
|
flattened_dictionary: dict,
|
mlrun/common/schemas/project.py
CHANGED
|
@@ -114,6 +114,8 @@ class ProjectSummary(pydantic.BaseModel):
|
|
|
114
114
|
runs_failed_recent_count: int
|
|
115
115
|
runs_running_count: int
|
|
116
116
|
schedules_count: int
|
|
117
|
+
pipelines_completed_recent_count: typing.Optional[int] = None
|
|
118
|
+
pipelines_failed_recent_count: typing.Optional[int] = None
|
|
117
119
|
pipelines_running_count: typing.Optional[int] = None
|
|
118
120
|
|
|
119
121
|
|
mlrun/config.py
CHANGED
|
@@ -361,7 +361,7 @@ default_config = {
|
|
|
361
361
|
# is set to ClusterIP
|
|
362
362
|
# ---------------------------------------------------------------------
|
|
363
363
|
# Note: adding a mode requires special handling on
|
|
364
|
-
# - mlrun.runtimes.constants.NuclioIngressAddTemplatedIngressModes
|
|
364
|
+
# - mlrun.common.runtimes.constants.NuclioIngressAddTemplatedIngressModes
|
|
365
365
|
# - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
|
|
366
366
|
"add_templated_ingress_host_mode": "never",
|
|
367
367
|
"explicit_ack": "enabled",
|
|
@@ -503,6 +503,7 @@ default_config = {
|
|
|
503
503
|
"default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
|
|
504
504
|
"user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
|
|
505
505
|
"stream": "",
|
|
506
|
+
"monitoring_application": "v3io:///users/pipelines/{project}/monitoring-apps/",
|
|
506
507
|
},
|
|
507
508
|
# Offline storage path can be either relative or a full path. This path is used for general offline data
|
|
508
509
|
# storage such as the parquet file which is generated from the monitoring stream function for the drift analysis
|
|
@@ -516,6 +517,7 @@ default_config = {
|
|
|
516
517
|
# See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
|
|
517
518
|
"store_type": "v3io-nosql",
|
|
518
519
|
"endpoint_store_connection": "",
|
|
520
|
+
"tsdb_connector_type": "v3io-tsdb",
|
|
519
521
|
},
|
|
520
522
|
"secret_stores": {
|
|
521
523
|
# Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
|
|
@@ -554,7 +556,7 @@ default_config = {
|
|
|
554
556
|
"nosql": "v3io:///projects/{project}/FeatureStore/{name}/nosql",
|
|
555
557
|
# "authority" is optional and generalizes [userinfo "@"] host [":" port]
|
|
556
558
|
"redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/nosql",
|
|
557
|
-
"dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/
|
|
559
|
+
"dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/{kind}",
|
|
558
560
|
},
|
|
559
561
|
"default_targets": "parquet,nosql",
|
|
560
562
|
"default_job_image": "mlrun/mlrun",
|
|
@@ -692,7 +694,7 @@ default_config = {
|
|
|
692
694
|
"grafana_url": "",
|
|
693
695
|
"alerts": {
|
|
694
696
|
# supported modes: "enabled", "disabled".
|
|
695
|
-
"mode": "
|
|
697
|
+
"mode": "enabled"
|
|
696
698
|
},
|
|
697
699
|
"auth_with_client_id": {
|
|
698
700
|
"enabled": False,
|
|
@@ -1088,6 +1090,7 @@ class Config:
|
|
|
1088
1090
|
target: str = "online",
|
|
1089
1091
|
artifact_path: str = None,
|
|
1090
1092
|
function_name: str = None,
|
|
1093
|
+
**kwargs,
|
|
1091
1094
|
) -> typing.Union[str, list[str]]:
|
|
1092
1095
|
"""Get the full path from the configuration based on the provided project and kind.
|
|
1093
1096
|
|
|
@@ -1114,7 +1117,7 @@ class Config:
|
|
|
1114
1117
|
)
|
|
1115
1118
|
if store_prefix_dict.get(kind):
|
|
1116
1119
|
# Target exist in store prefix and has a valid string value
|
|
1117
|
-
return store_prefix_dict[kind].format(project=project)
|
|
1120
|
+
return store_prefix_dict[kind].format(project=project, **kwargs)
|
|
1118
1121
|
|
|
1119
1122
|
if (
|
|
1120
1123
|
function_name
|
mlrun/data_types/to_pandas.py
CHANGED
|
@@ -65,10 +65,10 @@ def toPandas(spark_df):
|
|
|
65
65
|
msg = (
|
|
66
66
|
"toPandas attempted Arrow optimization because "
|
|
67
67
|
"'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
|
|
68
|
-
"failed by the reason below:\n
|
|
68
|
+
f"failed by the reason below:\n {e}\n"
|
|
69
69
|
"Attempting non-optimization as "
|
|
70
70
|
"'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
|
|
71
|
-
"true."
|
|
71
|
+
"true."
|
|
72
72
|
)
|
|
73
73
|
warnings.warn(msg)
|
|
74
74
|
use_arrow = False
|
|
@@ -78,7 +78,7 @@ def toPandas(spark_df):
|
|
|
78
78
|
"'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
|
|
79
79
|
"reached the error below and will not continue because automatic fallback "
|
|
80
80
|
"with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to "
|
|
81
|
-
"false.\n
|
|
81
|
+
f"false.\n {e}"
|
|
82
82
|
)
|
|
83
83
|
warnings.warn(msg)
|
|
84
84
|
raise
|
|
@@ -144,7 +144,7 @@ def toPandas(spark_df):
|
|
|
144
144
|
"reached the error below and can not continue. Note that "
|
|
145
145
|
"'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an "
|
|
146
146
|
"effect on failures in the middle of "
|
|
147
|
-
"computation.\n
|
|
147
|
+
f"computation.\n {e}"
|
|
148
148
|
)
|
|
149
149
|
warnings.warn(msg)
|
|
150
150
|
raise
|
mlrun/datastore/base.py
CHANGED
|
@@ -179,11 +179,23 @@ class DataStore:
|
|
|
179
179
|
return {}
|
|
180
180
|
|
|
181
181
|
@staticmethod
|
|
182
|
-
def _parquet_reader(
|
|
182
|
+
def _parquet_reader(
|
|
183
|
+
df_module,
|
|
184
|
+
url,
|
|
185
|
+
file_system,
|
|
186
|
+
time_column,
|
|
187
|
+
start_time,
|
|
188
|
+
end_time,
|
|
189
|
+
additional_filters,
|
|
190
|
+
):
|
|
183
191
|
from storey.utils import find_filters, find_partitions
|
|
184
192
|
|
|
185
193
|
def set_filters(
|
|
186
|
-
partitions_time_attributes,
|
|
194
|
+
partitions_time_attributes,
|
|
195
|
+
start_time_inner,
|
|
196
|
+
end_time_inner,
|
|
197
|
+
filters_inner,
|
|
198
|
+
kwargs,
|
|
187
199
|
):
|
|
188
200
|
filters = []
|
|
189
201
|
find_filters(
|
|
@@ -193,20 +205,23 @@ class DataStore:
|
|
|
193
205
|
filters,
|
|
194
206
|
time_column,
|
|
195
207
|
)
|
|
208
|
+
if filters and filters_inner:
|
|
209
|
+
filters[0] += filters_inner
|
|
210
|
+
|
|
196
211
|
kwargs["filters"] = filters
|
|
197
212
|
|
|
198
213
|
def reader(*args, **kwargs):
|
|
199
|
-
if start_time or end_time:
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
214
|
+
if time_column is None and (start_time or end_time):
|
|
215
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
216
|
+
"When providing start_time or end_time, must provide time_column"
|
|
217
|
+
)
|
|
218
|
+
if start_time or end_time or additional_filters:
|
|
205
219
|
partitions_time_attributes = find_partitions(url, file_system)
|
|
206
220
|
set_filters(
|
|
207
221
|
partitions_time_attributes,
|
|
208
222
|
start_time,
|
|
209
223
|
end_time,
|
|
224
|
+
additional_filters,
|
|
210
225
|
kwargs,
|
|
211
226
|
)
|
|
212
227
|
try:
|
|
@@ -217,6 +232,7 @@ class DataStore:
|
|
|
217
232
|
):
|
|
218
233
|
raise ex
|
|
219
234
|
|
|
235
|
+
# TODO: fix timezone issue (ML-6308)
|
|
220
236
|
if start_time.tzinfo:
|
|
221
237
|
start_time_inner = start_time.replace(tzinfo=None)
|
|
222
238
|
end_time_inner = end_time.replace(tzinfo=None)
|
|
@@ -228,6 +244,7 @@ class DataStore:
|
|
|
228
244
|
partitions_time_attributes,
|
|
229
245
|
start_time_inner,
|
|
230
246
|
end_time_inner,
|
|
247
|
+
additional_filters,
|
|
231
248
|
kwargs,
|
|
232
249
|
)
|
|
233
250
|
return df_module.read_parquet(*args, **kwargs)
|
|
@@ -246,6 +263,7 @@ class DataStore:
|
|
|
246
263
|
start_time=None,
|
|
247
264
|
end_time=None,
|
|
248
265
|
time_column=None,
|
|
266
|
+
additional_filters=None,
|
|
249
267
|
**kwargs,
|
|
250
268
|
):
|
|
251
269
|
df_module = df_module or pd
|
|
@@ -310,7 +328,13 @@ class DataStore:
|
|
|
310
328
|
kwargs["columns"] = columns
|
|
311
329
|
|
|
312
330
|
reader = self._parquet_reader(
|
|
313
|
-
df_module,
|
|
331
|
+
df_module,
|
|
332
|
+
url,
|
|
333
|
+
file_system,
|
|
334
|
+
time_column,
|
|
335
|
+
start_time,
|
|
336
|
+
end_time,
|
|
337
|
+
additional_filters,
|
|
314
338
|
)
|
|
315
339
|
|
|
316
340
|
elif file_url.endswith(".json") or format == "json":
|
|
@@ -539,6 +563,7 @@ class DataItem:
|
|
|
539
563
|
time_column=None,
|
|
540
564
|
start_time=None,
|
|
541
565
|
end_time=None,
|
|
566
|
+
additional_filters=None,
|
|
542
567
|
**kwargs,
|
|
543
568
|
):
|
|
544
569
|
"""return a dataframe object (generated from the dataitem).
|
|
@@ -550,6 +575,12 @@ class DataItem:
|
|
|
550
575
|
:param end_time: filters out data after this time
|
|
551
576
|
:param time_column: Store timestamp_key will be used if None.
|
|
552
577
|
The results will be filtered by this column and start_time & end_time.
|
|
578
|
+
:param additional_filters: List of additional_filter conditions as tuples.
|
|
579
|
+
Each tuple should be in the format (column_name, operator, value).
|
|
580
|
+
Supported operators: "=", ">=", "<=", ">", "<".
|
|
581
|
+
Example: [("Product", "=", "Computer")]
|
|
582
|
+
For all supported filters, please see:
|
|
583
|
+
https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
|
|
553
584
|
"""
|
|
554
585
|
df = self._store.as_df(
|
|
555
586
|
self._url,
|
|
@@ -560,6 +591,7 @@ class DataItem:
|
|
|
560
591
|
time_column=time_column,
|
|
561
592
|
start_time=start_time,
|
|
562
593
|
end_time=end_time,
|
|
594
|
+
additional_filters=additional_filters,
|
|
563
595
|
**kwargs,
|
|
564
596
|
)
|
|
565
597
|
return df
|