mlrun 1.7.0rc9__py3-none-any.whl → 1.7.0rc12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +1 -0
- mlrun/artifacts/manager.py +17 -6
- mlrun/artifacts/model.py +29 -25
- mlrun/common/schemas/__init__.py +1 -0
- mlrun/common/schemas/alert.py +122 -0
- mlrun/common/schemas/auth.py +4 -0
- mlrun/common/schemas/client_spec.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +3 -1
- mlrun/config.py +6 -3
- mlrun/datastore/__init__.py +4 -3
- mlrun/datastore/base.py +6 -5
- mlrun/datastore/sources.py +9 -4
- mlrun/datastore/targets.py +11 -3
- mlrun/datastore/utils.py +16 -0
- mlrun/datastore/v3io.py +27 -50
- mlrun/db/base.py +44 -2
- mlrun/db/httpdb.py +192 -20
- mlrun/db/nopdb.py +36 -1
- mlrun/execution.py +21 -14
- mlrun/feature_store/api.py +6 -3
- mlrun/feature_store/feature_set.py +39 -23
- mlrun/feature_store/feature_vector.py +2 -1
- mlrun/feature_store/steps.py +30 -19
- mlrun/features.py +4 -13
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/pytorch/__init__.py +2 -2
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +1 -1
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/model.py +2 -2
- mlrun/model_monitoring/application.py +11 -2
- mlrun/model_monitoring/applications/histogram_data_drift.py +3 -3
- mlrun/model_monitoring/controller.py +2 -3
- mlrun/model_monitoring/stream_processing.py +0 -1
- mlrun/model_monitoring/writer.py +32 -0
- mlrun/package/packagers_manager.py +1 -0
- mlrun/platforms/__init__.py +1 -1
- mlrun/platforms/other.py +1 -1
- mlrun/projects/operations.py +11 -4
- mlrun/projects/project.py +168 -62
- mlrun/run.py +72 -40
- mlrun/runtimes/mpijob/abstract.py +8 -8
- mlrun/runtimes/nuclio/function.py +9 -5
- mlrun/runtimes/nuclio/serving.py +12 -14
- mlrun/runtimes/pod.py +3 -3
- mlrun/secrets.py +6 -2
- mlrun/serving/routers.py +3 -1
- mlrun/serving/states.py +9 -35
- mlrun/serving/v2_serving.py +4 -4
- mlrun/utils/helpers.py +1 -1
- mlrun/utils/notifications/notification/base.py +12 -0
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +3 -1
- mlrun/utils/notifications/notification/ipython.py +2 -0
- mlrun/utils/notifications/notification/slack.py +41 -13
- mlrun/utils/notifications/notification/webhook.py +11 -1
- mlrun/utils/retryer.py +2 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc9.dist-info → mlrun-1.7.0rc12.dist-info}/METADATA +1 -1
- {mlrun-1.7.0rc9.dist-info → mlrun-1.7.0rc12.dist-info}/RECORD +67 -67
- mlrun/datastore/helpers.py +0 -18
- {mlrun-1.7.0rc9.dist-info → mlrun-1.7.0rc12.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc9.dist-info → mlrun-1.7.0rc12.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc9.dist-info → mlrun-1.7.0rc12.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc9.dist-info → mlrun-1.7.0rc12.dist-info}/top_level.txt +0 -0
mlrun/__init__.py
CHANGED
mlrun/artifacts/manager.py
CHANGED
|
@@ -180,11 +180,13 @@ class ArtifactManager:
|
|
|
180
180
|
upload=None,
|
|
181
181
|
labels=None,
|
|
182
182
|
db_key=None,
|
|
183
|
+
project=None,
|
|
184
|
+
is_retained_producer=None,
|
|
183
185
|
**kwargs,
|
|
184
186
|
) -> Artifact:
|
|
185
187
|
"""
|
|
186
188
|
Log an artifact to the DB and upload it to the artifact store.
|
|
187
|
-
:param producer: The producer of the artifact, the producer depends
|
|
189
|
+
:param producer: The producer of the artifact, the producer depends on where the artifact is being logged.
|
|
188
190
|
:param item: The artifact to log.
|
|
189
191
|
:param body: The body of the artifact.
|
|
190
192
|
:param target_path: The target path of the artifact. (cannot be a relative path)
|
|
@@ -202,6 +204,9 @@ class ArtifactManager:
|
|
|
202
204
|
:param labels: Labels to add to the artifact.
|
|
203
205
|
:param db_key: The key to use when logging the artifact to the DB.
|
|
204
206
|
If not provided, will generate a key based on the producer name and the artifact key.
|
|
207
|
+
:param project: The project to log the artifact to. If not provided, will use the producer's project.
|
|
208
|
+
:param is_retained_producer: Whether the producer is retained or not. Relevant to register artifacts flow
|
|
209
|
+
where a project may log artifacts which were produced by another producer.
|
|
205
210
|
:param kwargs: Arguments to pass to the artifact class.
|
|
206
211
|
:return: The logged artifact.
|
|
207
212
|
"""
|
|
@@ -226,7 +231,7 @@ class ArtifactManager:
|
|
|
226
231
|
|
|
227
232
|
if db_key is None:
|
|
228
233
|
# set the default artifact db key
|
|
229
|
-
if producer.kind == "run":
|
|
234
|
+
if producer.kind == "run" and not is_retained_producer:
|
|
230
235
|
# When the producer's type is "run,"
|
|
231
236
|
# we generate a different db_key than the one we obtained in the request.
|
|
232
237
|
# As a result, a new artifact for the requested key will be created,
|
|
@@ -251,8 +256,11 @@ class ArtifactManager:
|
|
|
251
256
|
item.labels.update({"workflow-id": item.producer.get("workflow")})
|
|
252
257
|
|
|
253
258
|
item.iter = producer.iteration
|
|
254
|
-
project = producer.project
|
|
259
|
+
project = project or producer.project
|
|
255
260
|
item.project = project
|
|
261
|
+
if is_retained_producer:
|
|
262
|
+
# if the producer is retained, we want to use the original target path
|
|
263
|
+
target_path = target_path or item.target_path
|
|
256
264
|
|
|
257
265
|
# if target_path is provided and not relative, then no need to upload the artifact as it already exists
|
|
258
266
|
if target_path:
|
|
@@ -260,7 +268,8 @@ class ArtifactManager:
|
|
|
260
268
|
raise ValueError(
|
|
261
269
|
f"target_path ({target_path}) param cannot be relative"
|
|
262
270
|
)
|
|
263
|
-
upload
|
|
271
|
+
if upload is None:
|
|
272
|
+
upload = False
|
|
264
273
|
|
|
265
274
|
# if target_path wasn't provided, but src_path is not relative, then no need to upload the artifact as it
|
|
266
275
|
# already exists. In this case set the target_path to the src_path and set upload to False
|
|
@@ -287,7 +296,9 @@ class ArtifactManager:
|
|
|
287
296
|
|
|
288
297
|
if target_path and item.is_dir and not target_path.endswith("/"):
|
|
289
298
|
target_path += "/"
|
|
290
|
-
target_path = template_artifact_path(
|
|
299
|
+
target_path = template_artifact_path(
|
|
300
|
+
artifact_path=target_path, project=producer.project
|
|
301
|
+
)
|
|
291
302
|
item.target_path = target_path
|
|
292
303
|
|
|
293
304
|
item.before_log()
|
|
@@ -303,7 +314,7 @@ class ArtifactManager:
|
|
|
303
314
|
item.upload(artifact_path=artifact_path)
|
|
304
315
|
|
|
305
316
|
if db_key:
|
|
306
|
-
self._log_to_db(db_key,
|
|
317
|
+
self._log_to_db(db_key, project, producer.inputs, item)
|
|
307
318
|
size = str(item.size) or "?"
|
|
308
319
|
db_str = "Y" if (self.artifact_db and db_key) else "N"
|
|
309
320
|
logger.debug(
|
mlrun/artifacts/model.py
CHANGED
|
@@ -11,9 +11,10 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
|
|
14
15
|
import tempfile
|
|
15
16
|
from os import path
|
|
16
|
-
from typing import Any
|
|
17
|
+
from typing import Any, Optional
|
|
17
18
|
|
|
18
19
|
import pandas as pd
|
|
19
20
|
import yaml
|
|
@@ -69,8 +70,8 @@ class ModelArtifactSpec(ArtifactSpec):
|
|
|
69
70
|
model_file=None,
|
|
70
71
|
metrics=None,
|
|
71
72
|
paraemeters=None,
|
|
72
|
-
inputs: list[Feature] = None,
|
|
73
|
-
outputs: list[Feature] = None,
|
|
73
|
+
inputs: Optional[list[Feature]] = None,
|
|
74
|
+
outputs: Optional[list[Feature]] = None,
|
|
74
75
|
framework=None,
|
|
75
76
|
algorithm=None,
|
|
76
77
|
feature_vector=None,
|
|
@@ -92,8 +93,8 @@ class ModelArtifactSpec(ArtifactSpec):
|
|
|
92
93
|
self.model_file = model_file
|
|
93
94
|
self.metrics = metrics or {}
|
|
94
95
|
self.parameters = paraemeters or {}
|
|
95
|
-
self.inputs
|
|
96
|
-
self.outputs
|
|
96
|
+
self.inputs = inputs or []
|
|
97
|
+
self.outputs = outputs or []
|
|
97
98
|
self.framework = framework
|
|
98
99
|
self.algorithm = algorithm
|
|
99
100
|
self.feature_vector = feature_vector
|
|
@@ -102,21 +103,21 @@ class ModelArtifactSpec(ArtifactSpec):
|
|
|
102
103
|
self.model_target_file = model_target_file
|
|
103
104
|
|
|
104
105
|
@property
|
|
105
|
-
def inputs(self) ->
|
|
106
|
+
def inputs(self) -> ObjectList:
|
|
106
107
|
"""input feature list"""
|
|
107
108
|
return self._inputs
|
|
108
109
|
|
|
109
110
|
@inputs.setter
|
|
110
|
-
def inputs(self, inputs: list[Feature]):
|
|
111
|
+
def inputs(self, inputs: list[Feature]) -> None:
|
|
111
112
|
self._inputs = ObjectList.from_list(Feature, inputs)
|
|
112
113
|
|
|
113
114
|
@property
|
|
114
|
-
def outputs(self) ->
|
|
115
|
+
def outputs(self) -> ObjectList:
|
|
115
116
|
"""output feature list"""
|
|
116
117
|
return self._outputs
|
|
117
118
|
|
|
118
119
|
@outputs.setter
|
|
119
|
-
def outputs(self, outputs: list[Feature]):
|
|
120
|
+
def outputs(self, outputs: list[Feature]) -> None:
|
|
120
121
|
self._outputs = ObjectList.from_list(Feature, outputs)
|
|
121
122
|
|
|
122
123
|
|
|
@@ -176,22 +177,22 @@ class ModelArtifact(Artifact):
|
|
|
176
177
|
self._spec = self._verify_dict(spec, "spec", ModelArtifactSpec)
|
|
177
178
|
|
|
178
179
|
@property
|
|
179
|
-
def inputs(self) ->
|
|
180
|
+
def inputs(self) -> ObjectList:
|
|
180
181
|
"""input feature list"""
|
|
181
182
|
return self.spec.inputs
|
|
182
183
|
|
|
183
184
|
@inputs.setter
|
|
184
|
-
def inputs(self, inputs: list[Feature]):
|
|
185
|
+
def inputs(self, inputs: list[Feature]) -> None:
|
|
185
186
|
"""input feature list"""
|
|
186
187
|
self.spec.inputs = inputs
|
|
187
188
|
|
|
188
189
|
@property
|
|
189
|
-
def outputs(self) ->
|
|
190
|
+
def outputs(self) -> ObjectList:
|
|
190
191
|
"""input feature list"""
|
|
191
192
|
return self.spec.outputs
|
|
192
193
|
|
|
193
194
|
@outputs.setter
|
|
194
|
-
def outputs(self, outputs: list[Feature]):
|
|
195
|
+
def outputs(self, outputs: list[Feature]) -> None:
|
|
195
196
|
"""input feature list"""
|
|
196
197
|
self.spec.outputs = outputs
|
|
197
198
|
|
|
@@ -445,14 +446,14 @@ class LegacyModelArtifact(LegacyArtifact):
|
|
|
445
446
|
**kwargs,
|
|
446
447
|
):
|
|
447
448
|
super().__init__(key, body, format=format, target_path=target_path, **kwargs)
|
|
448
|
-
self._inputs: ObjectList = None
|
|
449
|
-
self._outputs: ObjectList = None
|
|
449
|
+
self._inputs: Optional[ObjectList] = None
|
|
450
|
+
self._outputs: Optional[ObjectList] = None
|
|
450
451
|
|
|
451
452
|
self.model_file = model_file
|
|
452
453
|
self.parameters = parameters or {}
|
|
453
454
|
self.metrics = metrics or {}
|
|
454
|
-
self.inputs
|
|
455
|
-
self.outputs
|
|
455
|
+
self.inputs = inputs or []
|
|
456
|
+
self.outputs = outputs or []
|
|
456
457
|
self.extra_data = extra_data or {}
|
|
457
458
|
self.framework = framework
|
|
458
459
|
self.algorithm = algorithm
|
|
@@ -462,21 +463,21 @@ class LegacyModelArtifact(LegacyArtifact):
|
|
|
462
463
|
self.model_target_file = model_target_file
|
|
463
464
|
|
|
464
465
|
@property
|
|
465
|
-
def inputs(self) ->
|
|
466
|
+
def inputs(self) -> Optional[ObjectList]:
|
|
466
467
|
"""input feature list"""
|
|
467
468
|
return self._inputs
|
|
468
469
|
|
|
469
470
|
@inputs.setter
|
|
470
|
-
def inputs(self, inputs: list[Feature]):
|
|
471
|
+
def inputs(self, inputs: list[Feature]) -> None:
|
|
471
472
|
self._inputs = ObjectList.from_list(Feature, inputs)
|
|
472
473
|
|
|
473
474
|
@property
|
|
474
|
-
def outputs(self) ->
|
|
475
|
+
def outputs(self) -> Optional[ObjectList]:
|
|
475
476
|
"""output feature list"""
|
|
476
477
|
return self._outputs
|
|
477
478
|
|
|
478
479
|
@outputs.setter
|
|
479
|
-
def outputs(self, outputs: list[Feature]):
|
|
480
|
+
def outputs(self, outputs: list[Feature]) -> None:
|
|
480
481
|
self._outputs = ObjectList.from_list(Feature, outputs)
|
|
481
482
|
|
|
482
483
|
def infer_from_df(self, df, label_columns=None, with_stats=True, num_bins=None):
|
|
@@ -552,9 +553,9 @@ def get_model(model_dir, suffix=""):
|
|
|
552
553
|
|
|
553
554
|
example::
|
|
554
555
|
|
|
555
|
-
model_file, model_artifact, extra_data = get_model(models_path, suffix=
|
|
556
|
+
model_file, model_artifact, extra_data = get_model(models_path, suffix=".pkl")
|
|
556
557
|
model = load(open(model_file, "rb"))
|
|
557
|
-
categories = extra_data[
|
|
558
|
+
categories = extra_data["categories"].as_df()
|
|
558
559
|
|
|
559
560
|
:param model_dir: model dir or artifact path (store://..) or DataItem
|
|
560
561
|
:param suffix: model filename suffix (when using a dir)
|
|
@@ -663,8 +664,11 @@ def update_model(
|
|
|
663
664
|
|
|
664
665
|
example::
|
|
665
666
|
|
|
666
|
-
update_model(
|
|
667
|
-
|
|
667
|
+
update_model(
|
|
668
|
+
model_path,
|
|
669
|
+
metrics={"speed": 100},
|
|
670
|
+
extra_data={"my_data": b"some text", "file": "s3://mybucket/.."},
|
|
671
|
+
)
|
|
668
672
|
|
|
669
673
|
:param model_artifact: model artifact object or path (store://..) or DataItem
|
|
670
674
|
:param parameters: parameters dict
|
mlrun/common/schemas/__init__.py
CHANGED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
from typing import Annotated, Optional, Union
|
|
17
|
+
|
|
18
|
+
import pydantic
|
|
19
|
+
|
|
20
|
+
from mlrun.common.schemas.notification import Notification
|
|
21
|
+
from mlrun.common.types import StrEnum
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class EventEntityKind(StrEnum):
|
|
25
|
+
MODEL = "model"
|
|
26
|
+
JOB = "job"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class EventEntity(pydantic.BaseModel):
|
|
30
|
+
kind: EventEntityKind
|
|
31
|
+
project: str
|
|
32
|
+
id: str
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class EventKind(StrEnum):
|
|
36
|
+
DRIFT_DETECTED = "drift_detected"
|
|
37
|
+
DRIFT_SUSPECTED = "drift_suspected"
|
|
38
|
+
FAILED = "failed"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
_event_kind_entity_map = {
|
|
42
|
+
EventKind.DRIFT_SUSPECTED: [EventEntityKind.MODEL],
|
|
43
|
+
EventKind.DRIFT_DETECTED: [EventEntityKind.MODEL],
|
|
44
|
+
EventKind.FAILED: [EventEntityKind.JOB],
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class Event(pydantic.BaseModel):
|
|
49
|
+
kind: EventKind
|
|
50
|
+
timestamp: Union[str, datetime] = None # occurrence time
|
|
51
|
+
entity: EventEntity
|
|
52
|
+
value: Optional[Union[float, str]] = None
|
|
53
|
+
|
|
54
|
+
def is_valid(self):
|
|
55
|
+
return self.entity.kind in _event_kind_entity_map[self.kind]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class AlertActiveState(StrEnum):
|
|
59
|
+
ACTIVE = "active"
|
|
60
|
+
INACTIVE = "inactive"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class AlertSeverity(StrEnum):
|
|
64
|
+
LOW = "low"
|
|
65
|
+
MEDIUM = "medium"
|
|
66
|
+
HIGH = "high"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# what should trigger the alert. must be either event (at least 1), or prometheus query
|
|
70
|
+
class AlertTrigger(pydantic.BaseModel):
|
|
71
|
+
events: list[EventKind] = []
|
|
72
|
+
prometheus_alert: str = None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class AlertCriteria(pydantic.BaseModel):
|
|
76
|
+
count: Annotated[
|
|
77
|
+
int,
|
|
78
|
+
pydantic.Field(
|
|
79
|
+
description="Number of events to wait until notification is sent"
|
|
80
|
+
),
|
|
81
|
+
] = 0
|
|
82
|
+
period: Annotated[
|
|
83
|
+
str,
|
|
84
|
+
pydantic.Field(
|
|
85
|
+
description="Time period during which event occurred. e.g. 1d, 3h, 5m, 15s"
|
|
86
|
+
),
|
|
87
|
+
] = None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class ResetPolicy(StrEnum):
|
|
91
|
+
MANUAL = "manual"
|
|
92
|
+
AUTO = "auto"
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class AlertConfig(pydantic.BaseModel):
|
|
96
|
+
project: str
|
|
97
|
+
id: int = None
|
|
98
|
+
name: str
|
|
99
|
+
description: Optional[str] = ""
|
|
100
|
+
summary: Annotated[
|
|
101
|
+
str,
|
|
102
|
+
pydantic.Field(
|
|
103
|
+
description=(
|
|
104
|
+
"String to be sent in the notifications generated."
|
|
105
|
+
"e.g. 'Model {{ $project }}/{{ $entity }} is drifting.'"
|
|
106
|
+
)
|
|
107
|
+
),
|
|
108
|
+
]
|
|
109
|
+
created: Union[str, datetime] = None
|
|
110
|
+
severity: AlertSeverity
|
|
111
|
+
entity: EventEntity
|
|
112
|
+
trigger: AlertTrigger
|
|
113
|
+
criteria: Optional[AlertCriteria]
|
|
114
|
+
reset_policy: ResetPolicy = ResetPolicy.MANUAL
|
|
115
|
+
notifications: pydantic.conlist(Notification, min_items=1)
|
|
116
|
+
state: AlertActiveState = AlertActiveState.INACTIVE
|
|
117
|
+
count: Optional[int] = 0
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class AlertsModes(StrEnum):
|
|
121
|
+
enabled = "enabled"
|
|
122
|
+
disabled = "disabled"
|
mlrun/common/schemas/auth.py
CHANGED
|
@@ -58,6 +58,8 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
|
|
|
58
58
|
pipeline = "pipeline"
|
|
59
59
|
hub_source = "hub-source"
|
|
60
60
|
workflow = "workflow"
|
|
61
|
+
alert = "alert"
|
|
62
|
+
event = "event"
|
|
61
63
|
datastore_profile = "datastore-profile"
|
|
62
64
|
api_gateway = "api-gateway"
|
|
63
65
|
|
|
@@ -83,6 +85,8 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
|
|
|
83
85
|
AuthorizationResourceTypes.schedule: "/projects/{project_name}/schedules/{resource_name}",
|
|
84
86
|
AuthorizationResourceTypes.secret: "/projects/{project_name}/secrets/{resource_name}",
|
|
85
87
|
AuthorizationResourceTypes.run: "/projects/{project_name}/runs/{resource_name}",
|
|
88
|
+
AuthorizationResourceTypes.event: "/projects/{project_name}/events/{resource_name}",
|
|
89
|
+
AuthorizationResourceTypes.alert: "/projects/{project_name}/alerts/{resource_name}",
|
|
86
90
|
# runtime resource doesn't have an identifier, we don't need any auth granularity behind project level
|
|
87
91
|
AuthorizationResourceTypes.runtime_resource: "/projects/{project_name}/runtime-resources",
|
|
88
92
|
AuthorizationResourceTypes.model_endpoint: "/projects/{project_name}/model-endpoints/{resource_name}",
|
mlrun/config.py
CHANGED
|
@@ -549,10 +549,9 @@ default_config = {
|
|
|
549
549
|
"feature_store": {
|
|
550
550
|
"data_prefixes": {
|
|
551
551
|
"default": "v3io:///projects/{project}/FeatureStore/{name}/{kind}",
|
|
552
|
-
"nosql": "v3io:///projects/{project}/FeatureStore/{name}/
|
|
552
|
+
"nosql": "v3io:///projects/{project}/FeatureStore/{name}/nosql",
|
|
553
553
|
# "authority" is optional and generalizes [userinfo "@"] host [":" port]
|
|
554
|
-
"redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/
|
|
555
|
-
"dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/{kind}",
|
|
554
|
+
"redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/nosql",
|
|
556
555
|
},
|
|
557
556
|
"default_targets": "parquet,nosql",
|
|
558
557
|
"default_job_image": "mlrun/mlrun",
|
|
@@ -688,6 +687,10 @@ default_config = {
|
|
|
688
687
|
"access_key": "",
|
|
689
688
|
},
|
|
690
689
|
"grafana_url": "",
|
|
690
|
+
"alerts": {
|
|
691
|
+
# supported modes: "enabled", "disabled".
|
|
692
|
+
"mode": "disabled"
|
|
693
|
+
},
|
|
691
694
|
}
|
|
692
695
|
|
|
693
696
|
_is_running_as_api = None
|
mlrun/datastore/__init__.py
CHANGED
|
@@ -64,7 +64,7 @@ from .store_resources import (
|
|
|
64
64
|
parse_store_uri,
|
|
65
65
|
)
|
|
66
66
|
from .targets import CSVTarget, NoSqlTarget, ParquetTarget, StreamTarget
|
|
67
|
-
from .utils import parse_kafka_url
|
|
67
|
+
from .utils import get_kafka_brokers_from_dict, parse_kafka_url
|
|
68
68
|
|
|
69
69
|
store_manager = StoreManager()
|
|
70
70
|
|
|
@@ -107,8 +107,9 @@ def get_stream_pusher(stream_path: str, **kwargs):
|
|
|
107
107
|
:param stream_path: path/url of stream
|
|
108
108
|
"""
|
|
109
109
|
|
|
110
|
-
|
|
111
|
-
|
|
110
|
+
kafka_brokers = get_kafka_brokers_from_dict(kwargs)
|
|
111
|
+
if stream_path.startswith("kafka://") or kafka_brokers:
|
|
112
|
+
topic, brokers = parse_kafka_url(stream_path, kafka_brokers)
|
|
112
113
|
return KafkaOutputStream(topic, brokers, kwargs.get("kafka_producer_options"))
|
|
113
114
|
elif stream_path.startswith("http://") or stream_path.startswith("https://"):
|
|
114
115
|
return HTTPOutputStream(stream_path=stream_path)
|
mlrun/datastore/base.py
CHANGED
|
@@ -389,14 +389,15 @@ class DataItem:
|
|
|
389
389
|
|
|
390
390
|
|
|
391
391
|
# reading run results using DataItem (run.artifact())
|
|
392
|
-
train_run = train_iris_func.run(
|
|
393
|
-
|
|
392
|
+
train_run = train_iris_func.run(
|
|
393
|
+
inputs={"dataset": dataset}, params={"label_column": "label"}
|
|
394
|
+
)
|
|
394
395
|
|
|
395
|
-
train_run.artifact(
|
|
396
|
-
test_set = train_run.artifact(
|
|
396
|
+
train_run.artifact("confusion-matrix").show()
|
|
397
|
+
test_set = train_run.artifact("test_set").as_df()
|
|
397
398
|
|
|
398
399
|
# create and use DataItem from uri
|
|
399
|
-
data = mlrun.get_dataitem(
|
|
400
|
+
data = mlrun.get_dataitem("http://xyz/data.json").get()
|
|
400
401
|
"""
|
|
401
402
|
|
|
402
403
|
def __init__(
|
mlrun/datastore/sources.py
CHANGED
|
@@ -406,12 +406,17 @@ class BigQuerySource(BaseSourceDriver):
|
|
|
406
406
|
|
|
407
407
|
# use sql query
|
|
408
408
|
query_string = "SELECT * FROM `the-psf.pypi.downloads20210328` LIMIT 5000"
|
|
409
|
-
source = BigQuerySource(
|
|
410
|
-
|
|
411
|
-
|
|
409
|
+
source = BigQuerySource(
|
|
410
|
+
"bq1",
|
|
411
|
+
query=query_string,
|
|
412
|
+
gcp_project="my_project",
|
|
413
|
+
materialization_dataset="dataviews",
|
|
414
|
+
)
|
|
412
415
|
|
|
413
416
|
# read a table
|
|
414
|
-
source = BigQuerySource(
|
|
417
|
+
source = BigQuerySource(
|
|
418
|
+
"bq2", table="the-psf.pypi.downloads20210328", gcp_project="my_project"
|
|
419
|
+
)
|
|
415
420
|
|
|
416
421
|
|
|
417
422
|
:parameter name: source name
|
mlrun/datastore/targets.py
CHANGED
|
@@ -1532,15 +1532,23 @@ class KafkaTarget(BaseStoreTarget):
|
|
|
1532
1532
|
**kwargs,
|
|
1533
1533
|
):
|
|
1534
1534
|
attrs = {}
|
|
1535
|
+
|
|
1536
|
+
# TODO: Remove this in 1.9.0
|
|
1535
1537
|
if bootstrap_servers:
|
|
1538
|
+
if brokers:
|
|
1539
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1540
|
+
"KafkaTarget cannot be created with both the 'brokers' parameter and the deprecated "
|
|
1541
|
+
"'bootstrap_servers' parameter. Please use 'brokers' only."
|
|
1542
|
+
)
|
|
1536
1543
|
warnings.warn(
|
|
1537
1544
|
"'bootstrap_servers' parameter is deprecated in 1.7.0 and will be removed in 1.9.0, "
|
|
1538
1545
|
"use 'brokers' instead.",
|
|
1539
|
-
# TODO: Remove this in 1.9.0
|
|
1540
1546
|
FutureWarning,
|
|
1541
1547
|
)
|
|
1542
|
-
|
|
1543
|
-
|
|
1548
|
+
brokers = bootstrap_servers
|
|
1549
|
+
|
|
1550
|
+
if brokers:
|
|
1551
|
+
attrs["brokers"] = brokers
|
|
1544
1552
|
if producer_options is not None:
|
|
1545
1553
|
attrs["producer_options"] = producer_options
|
|
1546
1554
|
|
mlrun/datastore/utils.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import tarfile
|
|
16
16
|
import tempfile
|
|
17
17
|
import typing
|
|
18
|
+
import warnings
|
|
18
19
|
from urllib.parse import parse_qs, urlparse
|
|
19
20
|
|
|
20
21
|
import pandas as pd
|
|
@@ -164,3 +165,18 @@ def _generate_sql_query_with_time_filter(
|
|
|
164
165
|
query = query.filter(getattr(table.c, time_column) <= end_time)
|
|
165
166
|
|
|
166
167
|
return query, parse_dates
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def get_kafka_brokers_from_dict(options: dict, pop=False) -> typing.Optional[str]:
|
|
171
|
+
get_or_pop = options.pop if pop else options.get
|
|
172
|
+
kafka_brokers = get_or_pop("kafka_brokers", None)
|
|
173
|
+
if kafka_brokers:
|
|
174
|
+
return kafka_brokers
|
|
175
|
+
kafka_bootstrap_servers = get_or_pop("kafka_bootstrap_servers", None)
|
|
176
|
+
if kafka_bootstrap_servers:
|
|
177
|
+
warnings.warn(
|
|
178
|
+
"The 'kafka_bootstrap_servers' parameter is deprecated and will be removed in "
|
|
179
|
+
"1.9.0. Please pass the 'kafka_brokers' parameter instead.",
|
|
180
|
+
FutureWarning,
|
|
181
|
+
)
|
|
182
|
+
return kafka_bootstrap_servers
|
mlrun/datastore/v3io.py
CHANGED
|
@@ -12,8 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import mmap
|
|
16
|
-
import os
|
|
17
15
|
import time
|
|
18
16
|
from datetime import datetime
|
|
19
17
|
|
|
@@ -22,7 +20,6 @@ import v3io
|
|
|
22
20
|
from v3io.dataplane.response import HttpResponseError
|
|
23
21
|
|
|
24
22
|
import mlrun
|
|
25
|
-
from mlrun.datastore.helpers import ONE_GB, ONE_MB
|
|
26
23
|
|
|
27
24
|
from ..platforms.iguazio import parse_path, split_path
|
|
28
25
|
from .base import (
|
|
@@ -32,6 +29,7 @@ from .base import (
|
|
|
32
29
|
)
|
|
33
30
|
|
|
34
31
|
V3IO_LOCAL_ROOT = "v3io"
|
|
32
|
+
V3IO_DEFAULT_UPLOAD_CHUNK_SIZE = 1024 * 1024 * 100
|
|
35
33
|
|
|
36
34
|
|
|
37
35
|
class V3ioStore(DataStore):
|
|
@@ -98,46 +96,28 @@ class V3ioStore(DataStore):
|
|
|
98
96
|
)
|
|
99
97
|
return self._sanitize_storage_options(res)
|
|
100
98
|
|
|
101
|
-
def _upload(
|
|
99
|
+
def _upload(
|
|
100
|
+
self,
|
|
101
|
+
key: str,
|
|
102
|
+
src_path: str,
|
|
103
|
+
max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
|
|
104
|
+
):
|
|
102
105
|
"""helper function for upload method, allows for controlling max_chunk_size in testing"""
|
|
103
106
|
container, path = split_path(self._join(key))
|
|
104
|
-
file_size = os.path.getsize(src_path) # in bytes
|
|
105
|
-
if file_size <= ONE_MB:
|
|
106
|
-
with open(src_path, "rb") as source_file:
|
|
107
|
-
data = source_file.read()
|
|
108
|
-
self._do_object_request(
|
|
109
|
-
self.object.put,
|
|
110
|
-
container=container,
|
|
111
|
-
path=path,
|
|
112
|
-
body=data,
|
|
113
|
-
append=False,
|
|
114
|
-
)
|
|
115
|
-
return
|
|
116
|
-
# chunk must be a multiple of the ALLOCATIONGRANULARITY
|
|
117
|
-
# https://docs.python.org/3/library/mmap.html
|
|
118
|
-
if residue := max_chunk_size % mmap.ALLOCATIONGRANULARITY:
|
|
119
|
-
# round down to the nearest multiple of ALLOCATIONGRANULARITY
|
|
120
|
-
max_chunk_size -= residue
|
|
121
|
-
|
|
122
107
|
with open(src_path, "rb") as file_obj:
|
|
123
|
-
|
|
124
|
-
while
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
path=path,
|
|
137
|
-
body=mmap_obj,
|
|
138
|
-
append=append,
|
|
139
|
-
)
|
|
140
|
-
file_offset += chunk_size
|
|
108
|
+
append = False
|
|
109
|
+
while True:
|
|
110
|
+
data = memoryview(file_obj.read(max_chunk_size))
|
|
111
|
+
if not data:
|
|
112
|
+
break
|
|
113
|
+
self._do_object_request(
|
|
114
|
+
self.object.put,
|
|
115
|
+
container=container,
|
|
116
|
+
path=path,
|
|
117
|
+
body=data,
|
|
118
|
+
append=append,
|
|
119
|
+
)
|
|
120
|
+
append = True
|
|
141
121
|
|
|
142
122
|
def upload(self, key, src_path):
|
|
143
123
|
return self._upload(key, src_path)
|
|
@@ -152,19 +132,16 @@ class V3ioStore(DataStore):
|
|
|
152
132
|
num_bytes=size,
|
|
153
133
|
).body
|
|
154
134
|
|
|
155
|
-
def _put(
|
|
135
|
+
def _put(
|
|
136
|
+
self,
|
|
137
|
+
key,
|
|
138
|
+
data,
|
|
139
|
+
append=False,
|
|
140
|
+
max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
|
|
141
|
+
):
|
|
156
142
|
"""helper function for put method, allows for controlling max_chunk_size in testing"""
|
|
157
143
|
container, path = split_path(self._join(key))
|
|
158
144
|
buffer_size = len(data) # in bytes
|
|
159
|
-
if buffer_size <= ONE_MB:
|
|
160
|
-
self._do_object_request(
|
|
161
|
-
self.object.put,
|
|
162
|
-
container=container,
|
|
163
|
-
path=path,
|
|
164
|
-
body=data,
|
|
165
|
-
append=append,
|
|
166
|
-
)
|
|
167
|
-
return
|
|
168
145
|
buffer_offset = 0
|
|
169
146
|
try:
|
|
170
147
|
data = memoryview(data)
|