mlrun 1.10.0rc13__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +22 -2
- mlrun/artifacts/base.py +0 -31
- mlrun/artifacts/document.py +6 -1
- mlrun/artifacts/llm_prompt.py +123 -25
- mlrun/artifacts/manager.py +0 -5
- mlrun/artifacts/model.py +3 -3
- mlrun/common/constants.py +10 -1
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/common/model_monitoring/helpers.py +86 -0
- mlrun/common/schemas/__init__.py +3 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/function.py +10 -0
- mlrun/common/schemas/hub.py +30 -18
- mlrun/common/schemas/model_monitoring/__init__.py +3 -0
- mlrun/common/schemas/model_monitoring/constants.py +30 -6
- mlrun/common/schemas/model_monitoring/functions.py +14 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -0
- mlrun/common/schemas/pipeline.py +1 -1
- mlrun/common/schemas/serving.py +3 -0
- mlrun/common/schemas/workflow.py +3 -1
- mlrun/common/secrets.py +22 -1
- mlrun/config.py +33 -11
- mlrun/datastore/__init__.py +11 -3
- mlrun/datastore/azure_blob.py +162 -47
- mlrun/datastore/datastore.py +9 -4
- mlrun/datastore/datastore_profile.py +61 -5
- mlrun/datastore/model_provider/huggingface_provider.py +363 -0
- mlrun/datastore/model_provider/mock_model_provider.py +87 -0
- mlrun/datastore/model_provider/model_provider.py +230 -65
- mlrun/datastore/model_provider/openai_provider.py +295 -42
- mlrun/datastore/s3.py +24 -2
- mlrun/datastore/storeytargets.py +2 -3
- mlrun/datastore/utils.py +15 -3
- mlrun/db/base.py +47 -19
- mlrun/db/httpdb.py +120 -56
- mlrun/db/nopdb.py +38 -10
- mlrun/execution.py +70 -19
- mlrun/hub/__init__.py +15 -0
- mlrun/hub/module.py +181 -0
- mlrun/k8s_utils.py +105 -16
- mlrun/launcher/base.py +13 -6
- mlrun/launcher/local.py +15 -0
- mlrun/model.py +24 -3
- mlrun/model_monitoring/__init__.py +1 -0
- mlrun/model_monitoring/api.py +66 -27
- mlrun/model_monitoring/applications/__init__.py +1 -1
- mlrun/model_monitoring/applications/base.py +509 -117
- mlrun/model_monitoring/applications/context.py +2 -4
- mlrun/model_monitoring/applications/results.py +4 -7
- mlrun/model_monitoring/controller.py +239 -101
- mlrun/model_monitoring/db/_schedules.py +116 -33
- mlrun/model_monitoring/db/_stats.py +4 -3
- mlrun/model_monitoring/db/tsdb/base.py +100 -9
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +11 -6
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +191 -50
- mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +259 -40
- mlrun/model_monitoring/helpers.py +54 -9
- mlrun/model_monitoring/stream_processing.py +45 -14
- mlrun/model_monitoring/writer.py +220 -1
- mlrun/platforms/__init__.py +3 -2
- mlrun/platforms/iguazio.py +7 -3
- mlrun/projects/operations.py +6 -1
- mlrun/projects/pipelines.py +46 -26
- mlrun/projects/project.py +166 -58
- mlrun/run.py +94 -17
- mlrun/runtimes/__init__.py +18 -0
- mlrun/runtimes/base.py +14 -6
- mlrun/runtimes/daskjob.py +7 -0
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mounts.py +20 -2
- mlrun/runtimes/mpijob/abstract.py +6 -0
- mlrun/runtimes/mpijob/v1.py +6 -0
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/application/application.py +149 -17
- mlrun/runtimes/nuclio/function.py +76 -27
- mlrun/runtimes/nuclio/serving.py +97 -15
- mlrun/runtimes/pod.py +234 -21
- mlrun/runtimes/remotesparkjob.py +6 -0
- mlrun/runtimes/sparkjob/spark3job.py +6 -0
- mlrun/runtimes/utils.py +49 -11
- mlrun/secrets.py +54 -13
- mlrun/serving/__init__.py +2 -0
- mlrun/serving/remote.py +79 -6
- mlrun/serving/routers.py +23 -41
- mlrun/serving/server.py +320 -80
- mlrun/serving/states.py +725 -157
- mlrun/serving/steps.py +62 -0
- mlrun/serving/system_steps.py +200 -119
- mlrun/serving/v2_serving.py +9 -10
- mlrun/utils/helpers.py +288 -88
- mlrun/utils/logger.py +3 -1
- mlrun/utils/notifications/notification/base.py +18 -0
- mlrun/utils/notifications/notification/git.py +2 -4
- mlrun/utils/notifications/notification/slack.py +2 -4
- mlrun/utils/notifications/notification/webhook.py +2 -5
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/retryer.py +15 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/METADATA +45 -51
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/RECORD +106 -101
- mlrun/api/schemas/__init__.py +0 -259
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/top_level.txt +0 -0
mlrun/common/schemas/hub.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
from datetime import datetime, timezone
|
|
16
16
|
from typing import Optional
|
|
17
17
|
|
|
18
|
+
import deepdiff
|
|
18
19
|
from pydantic.v1 import BaseModel, Extra, Field
|
|
19
20
|
|
|
20
21
|
import mlrun.common.types
|
|
@@ -36,9 +37,9 @@ class HubObjectMetadata(BaseModel):
|
|
|
36
37
|
extra = Extra.allow
|
|
37
38
|
|
|
38
39
|
|
|
39
|
-
# Currently only functions are supported. Will add more in the future.
|
|
40
40
|
class HubSourceType(mlrun.common.types.StrEnum):
|
|
41
41
|
functions = "functions"
|
|
42
|
+
modules = "modules"
|
|
42
43
|
|
|
43
44
|
|
|
44
45
|
# Sources-related objects
|
|
@@ -46,7 +47,6 @@ class HubSourceSpec(ObjectSpec):
|
|
|
46
47
|
path: str # URL to base directory, should include schema (s3://, etc...)
|
|
47
48
|
channel: str
|
|
48
49
|
credentials: Optional[dict] = {}
|
|
49
|
-
object_type: HubSourceType = Field(HubSourceType.functions, const=True)
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
class HubSource(BaseModel):
|
|
@@ -55,11 +55,11 @@ class HubSource(BaseModel):
|
|
|
55
55
|
spec: HubSourceSpec
|
|
56
56
|
status: Optional[ObjectStatus] = ObjectStatus(state="created")
|
|
57
57
|
|
|
58
|
-
def get_full_uri(self, relative_path):
|
|
59
|
-
return f"{self.spec.path}/{
|
|
58
|
+
def get_full_uri(self, relative_path, object_type):
|
|
59
|
+
return f"{self.spec.path}/{object_type}/{self.spec.channel}/{relative_path}"
|
|
60
60
|
|
|
61
|
-
def get_catalog_uri(self):
|
|
62
|
-
return self.get_full_uri(mlrun.mlconf.hub.catalog_filename)
|
|
61
|
+
def get_catalog_uri(self, object_type):
|
|
62
|
+
return self.get_full_uri(mlrun.mlconf.hub.catalog_filename, object_type)
|
|
63
63
|
|
|
64
64
|
@classmethod
|
|
65
65
|
def generate_default_source(cls):
|
|
@@ -78,11 +78,23 @@ class HubSource(BaseModel):
|
|
|
78
78
|
spec=HubSourceSpec(
|
|
79
79
|
path=mlrun.mlconf.hub.default_source.url,
|
|
80
80
|
channel=mlrun.mlconf.hub.default_source.channel,
|
|
81
|
-
object_type=HubSourceType(mlrun.mlconf.hub.default_source.object_type),
|
|
82
81
|
),
|
|
83
82
|
status=ObjectStatus(state="created"),
|
|
84
83
|
)
|
|
85
84
|
|
|
85
|
+
def diff(self, another_source: "HubSource") -> dict:
|
|
86
|
+
"""
|
|
87
|
+
Compare this HubSource with another one.
|
|
88
|
+
Returns a dict of differences (metadata, spec, status).
|
|
89
|
+
"""
|
|
90
|
+
exclude_paths = [
|
|
91
|
+
"root['metadata']['updated']",
|
|
92
|
+
"root['metadata']['created']",
|
|
93
|
+
]
|
|
94
|
+
return deepdiff.DeepDiff(
|
|
95
|
+
self.dict(), another_source.dict(), exclude_paths=exclude_paths
|
|
96
|
+
)
|
|
97
|
+
|
|
86
98
|
|
|
87
99
|
last_source_index = -1
|
|
88
100
|
|
|
@@ -94,21 +106,16 @@ class IndexedHubSource(BaseModel):
|
|
|
94
106
|
|
|
95
107
|
# Item-related objects
|
|
96
108
|
class HubItemMetadata(HubObjectMetadata):
|
|
97
|
-
source: HubSourceType =
|
|
109
|
+
source: HubSourceType = HubSourceType.functions
|
|
98
110
|
version: str
|
|
99
111
|
tag: Optional[str]
|
|
100
112
|
|
|
101
113
|
def get_relative_path(self) -> str:
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
return f"{modified_name}/{version}/"
|
|
108
|
-
else:
|
|
109
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
110
|
-
f"Bad source for hub item - {self.source}"
|
|
111
|
-
)
|
|
114
|
+
# This is needed since the hub deployment script modifies the paths to use _ instead of -.
|
|
115
|
+
modified_name = self.name.replace("-", "_")
|
|
116
|
+
# Prefer using the tag if exists. Otherwise, use version.
|
|
117
|
+
version = self.tag or self.version
|
|
118
|
+
return f"{modified_name}/{version}/"
|
|
112
119
|
|
|
113
120
|
|
|
114
121
|
class HubItemSpec(ObjectSpec):
|
|
@@ -127,3 +134,8 @@ class HubCatalog(BaseModel):
|
|
|
127
134
|
kind: ObjectKind = Field(ObjectKind.hub_catalog, const=True)
|
|
128
135
|
channel: str
|
|
129
136
|
catalog: list[HubItem]
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class HubModuleType(mlrun.common.types.StrEnum):
|
|
140
|
+
generic = "generic"
|
|
141
|
+
monitoring_app = "monitoring_application"
|
|
@@ -16,6 +16,7 @@ from .constants import (
|
|
|
16
16
|
INTERSECT_DICT_KEYS,
|
|
17
17
|
ApplicationEvent,
|
|
18
18
|
DriftStatus,
|
|
19
|
+
EndpointMode,
|
|
19
20
|
EndpointType,
|
|
20
21
|
EndpointUID,
|
|
21
22
|
EventFieldType,
|
|
@@ -29,6 +30,7 @@ from .constants import (
|
|
|
29
30
|
ModelEndpointMonitoringMetricType,
|
|
30
31
|
ModelEndpointSchema,
|
|
31
32
|
ModelMonitoringAppLabel,
|
|
33
|
+
ModelMonitoringInfraLabel,
|
|
32
34
|
ModelMonitoringMode,
|
|
33
35
|
MonitoringFunctionNames,
|
|
34
36
|
PredictionsQueryConstants,
|
|
@@ -59,6 +61,7 @@ from .model_endpoints import (
|
|
|
59
61
|
Features,
|
|
60
62
|
FeatureValues,
|
|
61
63
|
ModelEndpoint,
|
|
64
|
+
ModelEndpointDriftValues,
|
|
62
65
|
ModelEndpointList,
|
|
63
66
|
ModelEndpointMetadata,
|
|
64
67
|
ModelEndpointMonitoringMetric,
|
|
@@ -34,6 +34,7 @@ class ModelEndpointSchema(MonitoringStrEnum):
|
|
|
34
34
|
UID = "uid"
|
|
35
35
|
PROJECT = "project"
|
|
36
36
|
ENDPOINT_TYPE = "endpoint_type"
|
|
37
|
+
MODE = "mode"
|
|
37
38
|
NAME = "name"
|
|
38
39
|
CREATED = "created"
|
|
39
40
|
UPDATED = "updated"
|
|
@@ -195,6 +196,10 @@ class WriterEventKind(MonitoringStrEnum):
|
|
|
195
196
|
RESULT = "result"
|
|
196
197
|
STATS = "stats"
|
|
197
198
|
|
|
199
|
+
@classmethod
|
|
200
|
+
def user_app_outputs(cls):
|
|
201
|
+
return [cls.METRIC, cls.RESULT]
|
|
202
|
+
|
|
198
203
|
|
|
199
204
|
class ControllerEvent(MonitoringStrEnum):
|
|
200
205
|
KIND = "kind"
|
|
@@ -205,6 +210,11 @@ class ControllerEvent(MonitoringStrEnum):
|
|
|
205
210
|
FIRST_REQUEST = "first_request"
|
|
206
211
|
FEATURE_SET_URI = "feature_set_uri"
|
|
207
212
|
ENDPOINT_TYPE = "endpoint_type"
|
|
213
|
+
|
|
214
|
+
# first_timestamp and last_timestamp are used to batch completed events
|
|
215
|
+
FIRST_TIMESTAMP = "first_timestamp"
|
|
216
|
+
LAST_TIMESTAMP = "last_timestamp"
|
|
217
|
+
|
|
208
218
|
ENDPOINT_POLICY = "endpoint_policy"
|
|
209
219
|
# Note: currently under endpoint policy we will have a dictionary including the keys: "application_names"
|
|
210
220
|
# "base_period", and "updated_endpoint" stand for when the MEP was updated
|
|
@@ -219,6 +229,7 @@ class ControllerEventEndpointPolicy(MonitoringStrEnum):
|
|
|
219
229
|
class ControllerEventKind(MonitoringStrEnum):
|
|
220
230
|
NOP_EVENT = "nop_event"
|
|
221
231
|
REGULAR_EVENT = "regular_event"
|
|
232
|
+
BATCH_COMPLETE = "batch_complete"
|
|
222
233
|
|
|
223
234
|
|
|
224
235
|
class MetricData(MonitoringStrEnum):
|
|
@@ -297,6 +308,7 @@ class FileTargetKind:
|
|
|
297
308
|
MONITORING_APPLICATION = "monitoring_application"
|
|
298
309
|
ERRORS = "errors"
|
|
299
310
|
STATS = "stats"
|
|
311
|
+
PARQUET_STATS = "parquet_stats"
|
|
300
312
|
LAST_REQUEST = "last_request"
|
|
301
313
|
|
|
302
314
|
|
|
@@ -321,6 +333,12 @@ class EndpointType(IntEnum):
|
|
|
321
333
|
return [cls.NODE_EP, cls.ROUTER, cls.BATCH_EP]
|
|
322
334
|
|
|
323
335
|
|
|
336
|
+
class EndpointMode(IntEnum):
|
|
337
|
+
REAL_TIME = 0
|
|
338
|
+
BATCH = 1
|
|
339
|
+
BATCH_LEGACY = 2 # legacy batch mode, used for endpoints created through the batch inference job
|
|
340
|
+
|
|
341
|
+
|
|
324
342
|
class MonitoringFunctionNames(MonitoringStrEnum):
|
|
325
343
|
STREAM = "model-monitoring-stream"
|
|
326
344
|
APPLICATION_CONTROLLER = "model-monitoring-controller"
|
|
@@ -474,19 +492,25 @@ class ModelEndpointMonitoringMetricType(StrEnum):
|
|
|
474
492
|
METRIC = "metric"
|
|
475
493
|
|
|
476
494
|
|
|
495
|
+
# refer to `mlrun.utils.regex.project_name`
|
|
496
|
+
_INNER_PROJECT_PATTERN = r"[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?"
|
|
497
|
+
PROJECT_PATTERN = rf"^{_INNER_PROJECT_PATTERN}$"
|
|
498
|
+
|
|
499
|
+
MODEL_ENDPOINT_ID_PATTERN = r"^[a-zA-Z0-9_-]+$"
|
|
500
|
+
|
|
477
501
|
_FQN_PART_PATTERN = r"[a-zA-Z0-9_-]+"
|
|
502
|
+
_RESULT_NAME_PATTERN = r"[a-zA-Z_][a-zA-Z0-9_]*"
|
|
503
|
+
|
|
478
504
|
FQN_PATTERN = (
|
|
479
|
-
rf"^(?P<project>{
|
|
505
|
+
rf"^(?P<project>{_INNER_PROJECT_PATTERN})\."
|
|
480
506
|
rf"(?P<app>{_FQN_PART_PATTERN})\."
|
|
481
507
|
rf"(?P<type>{ModelEndpointMonitoringMetricType.RESULT}|{ModelEndpointMonitoringMetricType.METRIC})\."
|
|
482
|
-
rf"(?P<name>{
|
|
508
|
+
rf"(?P<name>{_RESULT_NAME_PATTERN})$"
|
|
483
509
|
)
|
|
484
510
|
FQN_REGEX = re.compile(FQN_PATTERN)
|
|
511
|
+
APP_NAME_REGEX = re.compile(_FQN_PART_PATTERN)
|
|
512
|
+
RESULT_NAME_REGEX = re.compile(_RESULT_NAME_PATTERN)
|
|
485
513
|
|
|
486
|
-
# refer to `mlrun.utils.regex.project_name`
|
|
487
|
-
PROJECT_PATTERN = r"^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$"
|
|
488
|
-
MODEL_ENDPOINT_ID_PATTERN = r"^[a-zA-Z0-9_-]+$"
|
|
489
|
-
RESULT_NAME_PATTERN = r"[a-zA-Z_][a-zA-Z0-9_]*"
|
|
490
514
|
|
|
491
515
|
INTERSECT_DICT_KEYS = {
|
|
492
516
|
ModelEndpointMonitoringMetricType.METRIC: "intersect_metrics",
|
|
@@ -54,15 +54,24 @@ class FunctionSummary(BaseModel):
|
|
|
54
54
|
|
|
55
55
|
return cls(
|
|
56
56
|
type=func_type,
|
|
57
|
-
name=func_dict["metadata"]["name"]
|
|
57
|
+
name=func_dict["metadata"]["name"]
|
|
58
|
+
if func_type != FunctionsType.APPLICATION
|
|
59
|
+
else func_dict["spec"]
|
|
60
|
+
.get("graph", {})
|
|
61
|
+
.get("steps", {})
|
|
62
|
+
.get("PrepareMonitoringEvent", {})
|
|
63
|
+
.get("class_args", {})
|
|
64
|
+
.get("application_name"),
|
|
58
65
|
application_class=""
|
|
59
66
|
if func_type != FunctionsType.APPLICATION
|
|
60
|
-
else func_dict["spec"]
|
|
61
|
-
|
|
62
|
-
|
|
67
|
+
else func_dict["spec"]
|
|
68
|
+
.get("graph", {})
|
|
69
|
+
.get("steps", {})
|
|
70
|
+
.get("PushToMonitoringWriter", {})
|
|
71
|
+
.get("after", [None])[0],
|
|
63
72
|
project_name=func_dict["metadata"]["project"],
|
|
64
73
|
updated_time=func_dict["metadata"].get("updated"),
|
|
65
74
|
status=func_dict["status"].get("state"),
|
|
66
75
|
base_period=base_period,
|
|
67
|
-
stats=stats,
|
|
76
|
+
stats=stats or {},
|
|
68
77
|
)
|
|
@@ -28,6 +28,7 @@ from .constants import (
|
|
|
28
28
|
FQN_REGEX,
|
|
29
29
|
MODEL_ENDPOINT_ID_PATTERN,
|
|
30
30
|
PROJECT_PATTERN,
|
|
31
|
+
EndpointMode,
|
|
31
32
|
EndpointType,
|
|
32
33
|
ModelEndpointMonitoringMetricType,
|
|
33
34
|
ModelMonitoringMode,
|
|
@@ -118,6 +119,7 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
|
|
|
118
119
|
project: constr(regex=PROJECT_PATTERN)
|
|
119
120
|
endpoint_type: EndpointType = EndpointType.NODE_EP
|
|
120
121
|
uid: Optional[constr(regex=MODEL_ENDPOINT_ID_PATTERN)]
|
|
122
|
+
mode: Optional[EndpointMode] = None
|
|
121
123
|
|
|
122
124
|
@classmethod
|
|
123
125
|
def mutable_fields(cls):
|
|
@@ -129,6 +131,15 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
|
|
|
129
131
|
return str(v)
|
|
130
132
|
return v
|
|
131
133
|
|
|
134
|
+
@validator("mode", pre=True, always=True)
|
|
135
|
+
def _set_mode_based_on_endpoint_type(cls, v, values): # noqa: N805
|
|
136
|
+
if v is None:
|
|
137
|
+
if values.get("endpoint_type") == EndpointType.BATCH_EP:
|
|
138
|
+
return EndpointMode.BATCH_LEGACY
|
|
139
|
+
else:
|
|
140
|
+
return EndpointMode.REAL_TIME
|
|
141
|
+
return v
|
|
142
|
+
|
|
132
143
|
|
|
133
144
|
class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
|
|
134
145
|
model_class: Optional[str] = ""
|
|
@@ -352,6 +363,16 @@ class ApplicationMetricRecord(ApplicationBaseRecord):
|
|
|
352
363
|
type: Literal["metric"] = "metric"
|
|
353
364
|
|
|
354
365
|
|
|
366
|
+
class _DriftBin(NamedTuple):
|
|
367
|
+
timestamp: datetime
|
|
368
|
+
count_suspected: int
|
|
369
|
+
count_detected: int
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
class ModelEndpointDriftValues(BaseModel):
|
|
373
|
+
values: list[_DriftBin]
|
|
374
|
+
|
|
375
|
+
|
|
355
376
|
def _mapping_attributes(
|
|
356
377
|
model_class: type[Model],
|
|
357
378
|
flattened_dictionary: dict,
|
mlrun/common/schemas/pipeline.py
CHANGED
mlrun/common/schemas/serving.py
CHANGED
mlrun/common/schemas/workflow.py
CHANGED
|
@@ -49,9 +49,11 @@ class WorkflowRequest(pydantic.v1.BaseModel):
|
|
|
49
49
|
class RerunWorkflowRequest(pydantic.v1.BaseModel):
|
|
50
50
|
run_name: typing.Optional[str] = None
|
|
51
51
|
run_id: typing.Optional[str] = None
|
|
52
|
-
original_workflow_id: typing.Optional[str] = None
|
|
53
52
|
notifications: typing.Optional[list[Notification]] = None
|
|
54
53
|
workflow_runner_node_selector: typing.Optional[dict[str, str]] = None
|
|
54
|
+
original_workflow_runner_uid: typing.Optional[str] = None
|
|
55
|
+
original_workflow_name: typing.Optional[str] = None
|
|
56
|
+
rerun_index: typing.Optional[int] = None
|
|
55
57
|
|
|
56
58
|
|
|
57
59
|
class WorkflowResponse(pydantic.v1.BaseModel):
|
mlrun/common/secrets.py
CHANGED
|
@@ -11,10 +11,31 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
import re
|
|
15
15
|
from abc import ABC, abstractmethod
|
|
16
16
|
|
|
17
17
|
import mlrun.common.schemas
|
|
18
|
+
from mlrun.config import config as mlconf
|
|
19
|
+
|
|
20
|
+
_AUTH_SECRET_NAME_TEMPLATE = re.escape(
|
|
21
|
+
mlconf.secret_stores.kubernetes.auth_secret_name.format(
|
|
22
|
+
hashed_access_key="",
|
|
23
|
+
)
|
|
24
|
+
)
|
|
25
|
+
AUTH_SECRET_PATTERN = re.compile(f"^{_AUTH_SECRET_NAME_TEMPLATE}.*")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def validate_not_forbidden_secret(secret_name: str) -> None:
|
|
29
|
+
"""
|
|
30
|
+
Forbid client-supplied references to internal MLRun auth/project secrets.
|
|
31
|
+
No-op when running inside the API server (API enrichments are allowed).
|
|
32
|
+
"""
|
|
33
|
+
if not secret_name or mlrun.config.is_running_as_api():
|
|
34
|
+
return
|
|
35
|
+
if AUTH_SECRET_PATTERN.match(secret_name):
|
|
36
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
37
|
+
f"Forbidden secret '{secret_name}' matches MLRun auth-secret pattern."
|
|
38
|
+
)
|
|
18
39
|
|
|
19
40
|
|
|
20
41
|
class SecretProviderInterface(ABC):
|
mlrun/config.py
CHANGED
|
@@ -107,7 +107,11 @@ default_config = {
|
|
|
107
107
|
"submit_timeout": "280", # timeout when submitting a new k8s resource
|
|
108
108
|
# runtimes cleanup interval in seconds
|
|
109
109
|
"runtimes_cleanup_interval": "300",
|
|
110
|
-
|
|
110
|
+
# disabled by default due to an internal bug in serving functions
|
|
111
|
+
# relying on a background task to hold the status for its model endpoints
|
|
112
|
+
# TODO: need to refine what/when we can delete the background tasks
|
|
113
|
+
# e.g: use labels or naming convention.
|
|
114
|
+
"background_task_cleanup_interval": "0",
|
|
111
115
|
"background_task_max_age": "21600", # 6 hours in seconds
|
|
112
116
|
"monitoring": {
|
|
113
117
|
"runs": {
|
|
@@ -193,7 +197,8 @@ default_config = {
|
|
|
193
197
|
},
|
|
194
198
|
"v3io_framesd": "http://framesd:8080",
|
|
195
199
|
"model_providers": {
|
|
196
|
-
"openai_default_model": "gpt-
|
|
200
|
+
"openai_default_model": "gpt-4o",
|
|
201
|
+
"huggingface_default_model": "microsoft/Phi-3-mini-4k-instruct",
|
|
197
202
|
},
|
|
198
203
|
# default node selector to be applied to all functions - json string base64 encoded format
|
|
199
204
|
"default_function_node_selector": "e30=",
|
|
@@ -250,7 +255,8 @@ default_config = {
|
|
|
250
255
|
},
|
|
251
256
|
"runtimes": {
|
|
252
257
|
"dask": "600",
|
|
253
|
-
|
|
258
|
+
# cluster start might take some time in case k8s needs to spin up new nodes
|
|
259
|
+
"dask_cluster_start": "600",
|
|
254
260
|
},
|
|
255
261
|
"push_notifications": "60",
|
|
256
262
|
},
|
|
@@ -298,6 +304,7 @@ default_config = {
|
|
|
298
304
|
"application": {
|
|
299
305
|
"default_sidecar_internal_port": 8050,
|
|
300
306
|
"default_authentication_mode": mlrun.common.schemas.APIGatewayAuthenticationMode.none,
|
|
307
|
+
"default_worker_number": 10000,
|
|
301
308
|
},
|
|
302
309
|
},
|
|
303
310
|
# TODO: function defaults should be moved to the function spec config above
|
|
@@ -406,11 +413,7 @@ default_config = {
|
|
|
406
413
|
#
|
|
407
414
|
# if set to "nil" or "none", nothing would be set
|
|
408
415
|
"modes": (
|
|
409
|
-
"STRICT_TRANS_TABLES"
|
|
410
|
-
",NO_ZERO_IN_DATE"
|
|
411
|
-
",NO_ZERO_DATE"
|
|
412
|
-
",ERROR_FOR_DIVISION_BY_ZERO"
|
|
413
|
-
",NO_ENGINE_SUBSTITUTION",
|
|
416
|
+
"STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION"
|
|
414
417
|
)
|
|
415
418
|
},
|
|
416
419
|
},
|
|
@@ -647,6 +650,13 @@ default_config = {
|
|
|
647
650
|
"max_replicas": 1,
|
|
648
651
|
},
|
|
649
652
|
},
|
|
653
|
+
"writer_graph": {
|
|
654
|
+
"max_events": 1000,
|
|
655
|
+
"flush_after_seconds": 30,
|
|
656
|
+
"writer_version": "v1", # v1 is the sync version while v2 is async
|
|
657
|
+
"parquet_batching_max_events": 10,
|
|
658
|
+
"parquet_batching_timeout_secs": 30,
|
|
659
|
+
},
|
|
650
660
|
# Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
|
|
651
661
|
# stream, and endpoints.
|
|
652
662
|
"store_prefixes": {
|
|
@@ -717,7 +727,6 @@ default_config = {
|
|
|
717
727
|
"name": "default",
|
|
718
728
|
"description": "MLRun global function hub",
|
|
719
729
|
"url": "https://mlrun.github.io/marketplace",
|
|
720
|
-
"object_type": "functions",
|
|
721
730
|
"channel": "master",
|
|
722
731
|
},
|
|
723
732
|
},
|
|
@@ -999,9 +1008,9 @@ class Config:
|
|
|
999
1008
|
)
|
|
1000
1009
|
|
|
1001
1010
|
@staticmethod
|
|
1002
|
-
def
|
|
1011
|
+
def get_default_hub_source_url_prefix(object_type) -> str:
|
|
1003
1012
|
default_source = config.hub.default_source
|
|
1004
|
-
return f"{default_source.url}/{
|
|
1013
|
+
return f"{default_source.url}/{object_type}/{default_source.channel}/"
|
|
1005
1014
|
|
|
1006
1015
|
@staticmethod
|
|
1007
1016
|
def decode_base64_config_and_load_to_object(
|
|
@@ -1242,6 +1251,19 @@ class Config:
|
|
|
1242
1251
|
"""
|
|
1243
1252
|
return self.is_running_on_iguazio()
|
|
1244
1253
|
|
|
1254
|
+
@staticmethod
|
|
1255
|
+
def get_run_retry_staleness_threshold_timedelta() -> timedelta:
|
|
1256
|
+
"""
|
|
1257
|
+
Get the staleness threshold in timedelta for run retries.
|
|
1258
|
+
This is used to determine if a run is stale and should be retried.
|
|
1259
|
+
|
|
1260
|
+
:return: The staleness threshold in timedelta.
|
|
1261
|
+
"""
|
|
1262
|
+
staleness_threshold = int(
|
|
1263
|
+
mlrun.mlconf.monitoring.runs.retry.staleness_threshold
|
|
1264
|
+
)
|
|
1265
|
+
return timedelta(minutes=staleness_threshold)
|
|
1266
|
+
|
|
1245
1267
|
def to_dict(self):
|
|
1246
1268
|
return copy.deepcopy(self._cfg)
|
|
1247
1269
|
|
mlrun/datastore/__init__.py
CHANGED
|
@@ -39,10 +39,11 @@ __all__ = [
|
|
|
39
39
|
from urllib.parse import urlparse
|
|
40
40
|
|
|
41
41
|
import fsspec
|
|
42
|
+
import storey
|
|
42
43
|
|
|
43
44
|
import mlrun.datastore.wasbfs
|
|
44
45
|
from mlrun.datastore.datastore_profile import (
|
|
45
|
-
|
|
46
|
+
DatastoreProfileKafkaStream,
|
|
46
47
|
DatastoreProfileKafkaTarget,
|
|
47
48
|
DatastoreProfileV3io,
|
|
48
49
|
)
|
|
@@ -122,7 +123,7 @@ def get_stream_pusher(stream_path: str, **kwargs):
|
|
|
122
123
|
)
|
|
123
124
|
if isinstance(
|
|
124
125
|
datastore_profile,
|
|
125
|
-
(
|
|
126
|
+
(DatastoreProfileKafkaStream, DatastoreProfileKafkaTarget),
|
|
126
127
|
):
|
|
127
128
|
attributes = datastore_profile.attributes()
|
|
128
129
|
brokers = attributes.pop("brokers", None)
|
|
@@ -168,11 +169,12 @@ def get_stream_pusher(stream_path: str, **kwargs):
|
|
|
168
169
|
raise ValueError(f"unsupported stream path {stream_path}")
|
|
169
170
|
|
|
170
171
|
|
|
171
|
-
class _DummyStream:
|
|
172
|
+
class _DummyStream(storey.MapClass):
|
|
172
173
|
"""stream emulator for tests and debug"""
|
|
173
174
|
|
|
174
175
|
def __init__(self, event_list=None, **kwargs):
|
|
175
176
|
self.event_list = event_list or []
|
|
177
|
+
super().__init__(**kwargs)
|
|
176
178
|
|
|
177
179
|
def push(self, data, **kwargs):
|
|
178
180
|
if not isinstance(data, list):
|
|
@@ -180,3 +182,9 @@ class _DummyStream:
|
|
|
180
182
|
for item in data:
|
|
181
183
|
logger.info(f"dummy stream got event: {item}, kwargs={kwargs}")
|
|
182
184
|
self.event_list.append(item)
|
|
185
|
+
|
|
186
|
+
def do(self, event):
|
|
187
|
+
if not isinstance(event, list):
|
|
188
|
+
event = [event]
|
|
189
|
+
for item in event:
|
|
190
|
+
self.event_list.append(item)
|