mlrun 1.8.0rc19__py3-none-any.whl → 1.8.0rc26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +37 -3
- mlrun/__main__.py +5 -0
- mlrun/alerts/alert.py +1 -0
- mlrun/artifacts/document.py +78 -36
- mlrun/common/formatters/feature_set.py +1 -0
- mlrun/common/runtimes/constants.py +17 -0
- mlrun/common/schemas/alert.py +3 -0
- mlrun/common/schemas/client_spec.py +0 -1
- mlrun/common/schemas/model_monitoring/constants.py +32 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +2 -0
- mlrun/common/schemas/workflow.py +1 -0
- mlrun/config.py +39 -6
- mlrun/datastore/datastore_profile.py +58 -16
- mlrun/datastore/sources.py +7 -1
- mlrun/datastore/vectorstore.py +20 -1
- mlrun/db/base.py +20 -0
- mlrun/db/httpdb.py +97 -10
- mlrun/db/nopdb.py +19 -0
- mlrun/errors.py +4 -0
- mlrun/execution.py +15 -6
- mlrun/frameworks/_common/model_handler.py +0 -2
- mlrun/launcher/client.py +2 -2
- mlrun/launcher/local.py +5 -1
- mlrun/model_monitoring/applications/_application_steps.py +3 -1
- mlrun/model_monitoring/controller.py +266 -103
- mlrun/model_monitoring/db/tsdb/__init__.py +11 -23
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +2 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +20 -21
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -34
- mlrun/model_monitoring/helpers.py +16 -10
- mlrun/model_monitoring/stream_processing.py +106 -35
- mlrun/package/context_handler.py +1 -1
- mlrun/package/packagers_manager.py +4 -18
- mlrun/projects/pipelines.py +18 -5
- mlrun/projects/project.py +156 -39
- mlrun/runtimes/nuclio/serving.py +22 -13
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/secrets.py +1 -1
- mlrun/serving/server.py +11 -3
- mlrun/serving/states.py +65 -8
- mlrun/serving/v2_serving.py +67 -44
- mlrun/utils/helpers.py +111 -23
- mlrun/utils/notifications/notification/base.py +6 -1
- mlrun/utils/notifications/notification/slack.py +5 -1
- mlrun/utils/notifications/notification_pusher.py +67 -36
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/METADATA +33 -16
- {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/RECORD +52 -52
- {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/WHEEL +1 -1
- {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/LICENSE +0 -0
- {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/entry_points.txt +0 -0
- {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/top_level.txt +0 -0
mlrun/__init__.py
CHANGED
|
@@ -213,7 +213,41 @@ def set_env_from_file(env_file: str, return_dict: bool = False) -> Optional[dict
|
|
|
213
213
|
env_vars = dotenv.dotenv_values(env_file)
|
|
214
214
|
if None in env_vars.values():
|
|
215
215
|
raise MLRunInvalidArgumentError("env file lines must be in the form key=value")
|
|
216
|
-
|
|
217
|
-
|
|
216
|
+
|
|
217
|
+
ordered_env_vars = order_env_vars(env_vars)
|
|
218
|
+
for key, value in ordered_env_vars.items():
|
|
219
|
+
environ[key] = value
|
|
220
|
+
|
|
218
221
|
mlconf.reload() # reload mlrun configuration
|
|
219
|
-
return
|
|
222
|
+
return ordered_env_vars if return_dict else None
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def order_env_vars(env_vars: dict[str, str]) -> dict[str, str]:
|
|
226
|
+
"""
|
|
227
|
+
Order and process environment variables by first handling specific ordered keys,
|
|
228
|
+
then processing the remaining keys in the given dictionary.
|
|
229
|
+
|
|
230
|
+
The function ensures that environment variables defined in the `ordered_keys` list
|
|
231
|
+
are added to the result dictionary first. Any other environment variables from
|
|
232
|
+
`env_vars` are then added in the order they appear in the input dictionary.
|
|
233
|
+
|
|
234
|
+
:param env_vars: A dictionary where each key is the name of an environment variable (str),
|
|
235
|
+
and each value is the corresponding environment variable value (str).
|
|
236
|
+
:return: A dictionary with the processed environment variables, ordered with the specific
|
|
237
|
+
keys first, followed by the rest in their original order.
|
|
238
|
+
"""
|
|
239
|
+
ordered_keys = mlconf.get_ordered_keys()
|
|
240
|
+
|
|
241
|
+
ordered_env_vars: dict[str, str] = {}
|
|
242
|
+
|
|
243
|
+
# First, add the ordered keys to the dictionary
|
|
244
|
+
for key in ordered_keys:
|
|
245
|
+
if key in env_vars:
|
|
246
|
+
ordered_env_vars[key] = env_vars[key]
|
|
247
|
+
|
|
248
|
+
# Then, add the remaining keys (those not in ordered_keys)
|
|
249
|
+
for key, value in env_vars.items():
|
|
250
|
+
if key not in ordered_keys:
|
|
251
|
+
ordered_env_vars[key] = value
|
|
252
|
+
|
|
253
|
+
return ordered_env_vars
|
mlrun/__main__.py
CHANGED
|
@@ -32,6 +32,7 @@ from tabulate import tabulate
|
|
|
32
32
|
import mlrun
|
|
33
33
|
import mlrun.common.constants as mlrun_constants
|
|
34
34
|
import mlrun.common.schemas
|
|
35
|
+
import mlrun.utils.helpers
|
|
35
36
|
from mlrun.common.helpers import parse_versioned_object_uri
|
|
36
37
|
from mlrun.runtimes.mounts import auto_mount as auto_mount_modifier
|
|
37
38
|
|
|
@@ -304,6 +305,7 @@ def run(
|
|
|
304
305
|
update_in(runtime, "spec.build.code_origin", url_file)
|
|
305
306
|
elif runtime:
|
|
306
307
|
runtime = py_eval(runtime)
|
|
308
|
+
runtime = mlrun.utils.helpers.as_dict(runtime)
|
|
307
309
|
if not isinstance(runtime, dict):
|
|
308
310
|
print(f"Runtime parameter must be a dict, not {type(runtime)}")
|
|
309
311
|
exit(1)
|
|
@@ -515,6 +517,7 @@ def build(
|
|
|
515
517
|
|
|
516
518
|
if runtime:
|
|
517
519
|
runtime = py_eval(runtime)
|
|
520
|
+
runtime = mlrun.utils.helpers.as_dict(runtime)
|
|
518
521
|
if not isinstance(runtime, dict):
|
|
519
522
|
print(f"Runtime parameter must be a dict, not {type(runtime)}")
|
|
520
523
|
exit(1)
|
|
@@ -662,6 +665,8 @@ def deploy(
|
|
|
662
665
|
runtime = py_eval(spec)
|
|
663
666
|
else:
|
|
664
667
|
runtime = {}
|
|
668
|
+
|
|
669
|
+
runtime = mlrun.utils.helpers.as_dict(runtime)
|
|
665
670
|
if not isinstance(runtime, dict):
|
|
666
671
|
print(f"Runtime parameter must be a dict, not {type(runtime)}")
|
|
667
672
|
exit(1)
|
mlrun/alerts/alert.py
CHANGED
mlrun/artifacts/document.py
CHANGED
|
@@ -20,10 +20,12 @@ from importlib import import_module
|
|
|
20
20
|
from typing import Optional, Union
|
|
21
21
|
|
|
22
22
|
import mlrun
|
|
23
|
+
import mlrun.artifacts
|
|
23
24
|
from mlrun.artifacts import Artifact, ArtifactSpec
|
|
24
25
|
from mlrun.model import ModelObj
|
|
25
26
|
|
|
26
27
|
from ..utils import generate_artifact_uri
|
|
28
|
+
from .base import ArtifactStatus
|
|
27
29
|
|
|
28
30
|
|
|
29
31
|
class DocumentLoaderSpec(ModelObj):
|
|
@@ -41,13 +43,13 @@ class DocumentLoaderSpec(ModelObj):
|
|
|
41
43
|
|
|
42
44
|
"""
|
|
43
45
|
|
|
44
|
-
_dict_fields = ["loader_class_name", "src_name", "kwargs"]
|
|
46
|
+
_dict_fields = ["loader_class_name", "src_name", "download_object", "kwargs"]
|
|
45
47
|
|
|
46
48
|
def __init__(
|
|
47
49
|
self,
|
|
48
50
|
loader_class_name: str = "langchain_community.document_loaders.TextLoader",
|
|
49
51
|
src_name: str = "file_path",
|
|
50
|
-
download_object: bool =
|
|
52
|
+
download_object: bool = True,
|
|
51
53
|
kwargs: Optional[dict] = None,
|
|
52
54
|
):
|
|
53
55
|
"""
|
|
@@ -191,6 +193,14 @@ class MLRunLoader:
|
|
|
191
193
|
self.producer = mlrun.get_or_create_project(self.producer)
|
|
192
194
|
|
|
193
195
|
def lazy_load(self) -> Iterator["Document"]: # noqa: F821
|
|
196
|
+
collections = None
|
|
197
|
+
try:
|
|
198
|
+
artifact = self.producer.get_artifact(self.artifact_key, self.tag)
|
|
199
|
+
collections = (
|
|
200
|
+
artifact.status.collections if artifact else collections
|
|
201
|
+
)
|
|
202
|
+
except mlrun.MLRunNotFoundError:
|
|
203
|
+
pass
|
|
194
204
|
artifact = self.producer.log_document(
|
|
195
205
|
key=self.artifact_key,
|
|
196
206
|
document_loader_spec=self.loader_spec,
|
|
@@ -198,6 +208,7 @@ class MLRunLoader:
|
|
|
198
208
|
upload=self.upload,
|
|
199
209
|
labels=self.labels,
|
|
200
210
|
tag=self.tag,
|
|
211
|
+
collections=collections,
|
|
201
212
|
)
|
|
202
213
|
res = artifact.to_langchain_documents()
|
|
203
214
|
return res
|
|
@@ -217,30 +228,8 @@ class MLRunLoader:
|
|
|
217
228
|
@staticmethod
|
|
218
229
|
def artifact_key_instance(artifact_key: str, src_path: str) -> str:
|
|
219
230
|
if "%%" in artifact_key:
|
|
220
|
-
|
|
221
|
-
# Convert anchored pattern (^...$) to non-anchored version for finditer
|
|
222
|
-
search_pattern = pattern.strip("^$")
|
|
223
|
-
result = []
|
|
224
|
-
current_pos = 0
|
|
225
|
-
|
|
226
|
-
# Find all valid sequences
|
|
227
|
-
for match in re.finditer(search_pattern, src_path):
|
|
228
|
-
# Add hex values for characters between matches
|
|
229
|
-
for char in src_path[current_pos : match.start()]:
|
|
230
|
-
result.append(hex(ord(char))[2:].zfill(2))
|
|
231
|
-
|
|
232
|
-
# Add the valid sequence
|
|
233
|
-
result.append(match.group())
|
|
234
|
-
current_pos = match.end()
|
|
235
|
-
|
|
236
|
-
# Handle any remaining characters after the last match
|
|
237
|
-
for char in src_path[current_pos:]:
|
|
238
|
-
result.append(hex(ord(char))[2:].zfill(2))
|
|
239
|
-
|
|
240
|
-
resolved_path = "".join(result)
|
|
241
|
-
|
|
231
|
+
resolved_path = DocumentArtifact.key_from_source(src_path)
|
|
242
232
|
artifact_key = artifact_key.replace("%%", resolved_path)
|
|
243
|
-
|
|
244
233
|
return artifact_key
|
|
245
234
|
|
|
246
235
|
|
|
@@ -249,29 +238,70 @@ class DocumentArtifact(Artifact):
|
|
|
249
238
|
A specific artifact class inheriting from generic artifact, used to maintain Document meta-data.
|
|
250
239
|
"""
|
|
251
240
|
|
|
241
|
+
@staticmethod
|
|
242
|
+
def key_from_source(src_path: str) -> str:
|
|
243
|
+
"""Convert a source path into a valid artifact key by replacing invalid characters with underscores.
|
|
244
|
+
Args:
|
|
245
|
+
src_path (str): The source path to be converted into a valid artifact key
|
|
246
|
+
Returns:
|
|
247
|
+
str: A modified version of the source path where all invalid characters are replaced
|
|
248
|
+
with underscores while preserving valid sequences in their original positions
|
|
249
|
+
Examples:
|
|
250
|
+
>>> DocumentArtifact.key_from_source("data/file-name(v1).txt")
|
|
251
|
+
"data_file-name_v1__txt"
|
|
252
|
+
"""
|
|
253
|
+
pattern = mlrun.utils.regex.artifact_key[0]
|
|
254
|
+
# Convert anchored pattern (^...$) to non-anchored version for finditer
|
|
255
|
+
search_pattern = pattern.strip("^$")
|
|
256
|
+
result = []
|
|
257
|
+
current_pos = 0
|
|
258
|
+
|
|
259
|
+
# Find all valid sequences
|
|
260
|
+
for match in re.finditer(search_pattern, src_path):
|
|
261
|
+
# Add '_' values for characters between matches
|
|
262
|
+
for char in src_path[current_pos : match.start()]:
|
|
263
|
+
result.append("_")
|
|
264
|
+
|
|
265
|
+
# Add the valid sequence
|
|
266
|
+
result.append(match.group())
|
|
267
|
+
current_pos = match.end()
|
|
268
|
+
|
|
269
|
+
# Handle any remaining characters after the last match
|
|
270
|
+
for char in src_path[current_pos:]:
|
|
271
|
+
result.append("_")
|
|
272
|
+
|
|
273
|
+
resolved_path = "".join(result)
|
|
274
|
+
return resolved_path
|
|
275
|
+
|
|
252
276
|
class DocumentArtifactSpec(ArtifactSpec):
|
|
253
277
|
_dict_fields = ArtifactSpec._dict_fields + [
|
|
254
278
|
"document_loader",
|
|
255
|
-
"collections",
|
|
256
279
|
"original_source",
|
|
257
280
|
]
|
|
258
|
-
_exclude_fields_from_uid_hash = ArtifactSpec._exclude_fields_from_uid_hash + [
|
|
259
|
-
"collections",
|
|
260
|
-
]
|
|
261
281
|
|
|
262
282
|
def __init__(
|
|
263
283
|
self,
|
|
264
284
|
*args,
|
|
265
285
|
document_loader: Optional[DocumentLoaderSpec] = None,
|
|
266
|
-
collections: Optional[dict] = None,
|
|
267
286
|
original_source: Optional[str] = None,
|
|
268
287
|
**kwargs,
|
|
269
288
|
):
|
|
270
289
|
super().__init__(*args, **kwargs)
|
|
271
290
|
self.document_loader = document_loader
|
|
272
|
-
self.collections = collections if collections is not None else {}
|
|
273
291
|
self.original_source = original_source
|
|
274
292
|
|
|
293
|
+
class DocumentArtifactStatus(ArtifactStatus):
|
|
294
|
+
_dict_fields = ArtifactStatus._dict_fields + ["collections"]
|
|
295
|
+
|
|
296
|
+
def __init__(
|
|
297
|
+
self,
|
|
298
|
+
*args,
|
|
299
|
+
collections: Optional[dict] = None,
|
|
300
|
+
**kwargs,
|
|
301
|
+
):
|
|
302
|
+
super().__init__(*args, **kwargs)
|
|
303
|
+
self.collections = collections if collections is not None else {}
|
|
304
|
+
|
|
275
305
|
kind = "document"
|
|
276
306
|
|
|
277
307
|
METADATA_SOURCE_KEY = "source"
|
|
@@ -286,6 +316,7 @@ class DocumentArtifact(Artifact):
|
|
|
286
316
|
self,
|
|
287
317
|
original_source: Optional[str] = None,
|
|
288
318
|
document_loader_spec: Optional[DocumentLoaderSpec] = None,
|
|
319
|
+
collections: Optional[dict] = None,
|
|
289
320
|
**kwargs,
|
|
290
321
|
):
|
|
291
322
|
super().__init__(**kwargs)
|
|
@@ -295,6 +326,17 @@ class DocumentArtifact(Artifact):
|
|
|
295
326
|
else self.spec.document_loader
|
|
296
327
|
)
|
|
297
328
|
self.spec.original_source = original_source or self.spec.original_source
|
|
329
|
+
self.status = DocumentArtifact.DocumentArtifactStatus(collections=collections)
|
|
330
|
+
|
|
331
|
+
@property
|
|
332
|
+
def status(self) -> DocumentArtifactStatus:
|
|
333
|
+
return self._status
|
|
334
|
+
|
|
335
|
+
@status.setter
|
|
336
|
+
def status(self, status):
|
|
337
|
+
self._status = self._verify_dict(
|
|
338
|
+
status, "status", DocumentArtifact.DocumentArtifactStatus
|
|
339
|
+
)
|
|
298
340
|
|
|
299
341
|
@property
|
|
300
342
|
def spec(self) -> DocumentArtifactSpec:
|
|
@@ -355,7 +397,7 @@ class DocumentArtifact(Artifact):
|
|
|
355
397
|
metadata[self.METADATA_ORIGINAL_SOURCE_KEY] = self.spec.original_source
|
|
356
398
|
metadata[self.METADATA_SOURCE_KEY] = self.get_source()
|
|
357
399
|
metadata[self.METADATA_ARTIFACT_TAG] = self.tag or "latest"
|
|
358
|
-
metadata[self.METADATA_ARTIFACT_KEY] = self.
|
|
400
|
+
metadata[self.METADATA_ARTIFACT_KEY] = self.db_key
|
|
359
401
|
metadata[self.METADATA_ARTIFACT_PROJECT] = self.metadata.project
|
|
360
402
|
|
|
361
403
|
if self.get_target_path():
|
|
@@ -386,8 +428,8 @@ class DocumentArtifact(Artifact):
|
|
|
386
428
|
Args:
|
|
387
429
|
collection_id (str): The ID of the collection to add
|
|
388
430
|
"""
|
|
389
|
-
if collection_id not in self.
|
|
390
|
-
self.
|
|
431
|
+
if collection_id not in self.status.collections:
|
|
432
|
+
self.status.collections[collection_id] = "1"
|
|
391
433
|
return True
|
|
392
434
|
return False
|
|
393
435
|
|
|
@@ -403,7 +445,7 @@ class DocumentArtifact(Artifact):
|
|
|
403
445
|
Args:
|
|
404
446
|
collection_id (str): The ID of the collection to remove
|
|
405
447
|
"""
|
|
406
|
-
if collection_id in self.
|
|
407
|
-
self.
|
|
448
|
+
if collection_id in self.status.collections:
|
|
449
|
+
self.status.collections.pop(collection_id)
|
|
408
450
|
return True
|
|
409
451
|
return False
|
|
@@ -214,6 +214,23 @@ class RunStates:
|
|
|
214
214
|
RunStates.skipped: mlrun_pipelines.common.models.RunStatuses.skipped,
|
|
215
215
|
}[run_state]
|
|
216
216
|
|
|
217
|
+
@staticmethod
|
|
218
|
+
def pipeline_run_status_to_run_state(pipeline_run_status):
|
|
219
|
+
if pipeline_run_status not in mlrun_pipelines.common.models.RunStatuses.all():
|
|
220
|
+
raise ValueError(f"Invalid pipeline run status: {pipeline_run_status}")
|
|
221
|
+
return {
|
|
222
|
+
mlrun_pipelines.common.models.RunStatuses.succeeded: RunStates.completed,
|
|
223
|
+
mlrun_pipelines.common.models.RunStatuses.failed: RunStates.error,
|
|
224
|
+
mlrun_pipelines.common.models.RunStatuses.running: RunStates.running,
|
|
225
|
+
mlrun_pipelines.common.models.RunStatuses.pending: RunStates.pending,
|
|
226
|
+
mlrun_pipelines.common.models.RunStatuses.canceled: RunStates.aborted,
|
|
227
|
+
mlrun_pipelines.common.models.RunStatuses.canceling: RunStates.aborting,
|
|
228
|
+
mlrun_pipelines.common.models.RunStatuses.skipped: RunStates.skipped,
|
|
229
|
+
mlrun_pipelines.common.models.RunStatuses.runtime_state_unspecified: RunStates.unknown,
|
|
230
|
+
mlrun_pipelines.common.models.RunStatuses.error: RunStates.error,
|
|
231
|
+
mlrun_pipelines.common.models.RunStatuses.paused: RunStates.unknown,
|
|
232
|
+
}[pipeline_run_status]
|
|
233
|
+
|
|
217
234
|
|
|
218
235
|
# TODO: remove this class in 1.9.0 - use only MlrunInternalLabels
|
|
219
236
|
class RunLabels(enum.Enum):
|
mlrun/common/schemas/alert.py
CHANGED
|
@@ -160,6 +160,9 @@ class AlertConfig(pydantic.v1.BaseModel):
|
|
|
160
160
|
count: Optional[int] = 0
|
|
161
161
|
updated: datetime = None
|
|
162
162
|
|
|
163
|
+
class Config:
|
|
164
|
+
extra = pydantic.v1.Extra.allow
|
|
165
|
+
|
|
163
166
|
def get_raw_notifications(self) -> list[notification_objects.Notification]:
|
|
164
167
|
return [
|
|
165
168
|
alert_notification.notification for alert_notification in self.notifications
|
|
@@ -57,7 +57,6 @@ class ClientSpec(pydantic.v1.BaseModel):
|
|
|
57
57
|
redis_url: typing.Optional[str]
|
|
58
58
|
redis_type: typing.Optional[str]
|
|
59
59
|
sql_url: typing.Optional[str]
|
|
60
|
-
model_monitoring_tsdb_connection: typing.Optional[str]
|
|
61
60
|
ce: typing.Optional[dict]
|
|
62
61
|
# not passing them as one object as it possible client user would like to override only one of the params
|
|
63
62
|
calculate_artifact_hash: typing.Optional[str]
|
|
@@ -61,6 +61,7 @@ class ModelEndpointSchema(MonitoringStrEnum):
|
|
|
61
61
|
STATE = "state"
|
|
62
62
|
MONITORING_MODE = "monitoring_mode"
|
|
63
63
|
FIRST_REQUEST = "first_request"
|
|
64
|
+
SAMPLING_PERCENTAGE = "sampling_percentage"
|
|
64
65
|
|
|
65
66
|
# status - operative
|
|
66
67
|
LAST_REQUEST = "last_request"
|
|
@@ -137,6 +138,10 @@ class EventFieldType:
|
|
|
137
138
|
SAMPLE_PARQUET_PATH = "sample_parquet_path"
|
|
138
139
|
TIME = "time"
|
|
139
140
|
TABLE_COLUMN = "table_column"
|
|
141
|
+
SAMPLING_PERCENTAGE = "sampling_percentage"
|
|
142
|
+
SAMPLING_RATE = "sampling_rate"
|
|
143
|
+
ESTIMATED_PREDICTION_COUNT = "estimated_prediction_count"
|
|
144
|
+
EFFECTIVE_SAMPLE_COUNT = "effective_sample_count"
|
|
140
145
|
|
|
141
146
|
|
|
142
147
|
class FeatureSetFeatures(MonitoringStrEnum):
|
|
@@ -178,6 +183,25 @@ class WriterEventKind(MonitoringStrEnum):
|
|
|
178
183
|
STATS = "stats"
|
|
179
184
|
|
|
180
185
|
|
|
186
|
+
class ControllerEvent(MonitoringStrEnum):
|
|
187
|
+
KIND = "kind"
|
|
188
|
+
ENDPOINT_ID = "endpoint_id"
|
|
189
|
+
ENDPOINT_NAME = "endpoint_name"
|
|
190
|
+
PROJECT = "project"
|
|
191
|
+
TIMESTAMP = "timestamp"
|
|
192
|
+
FIRST_REQUEST = "first_request"
|
|
193
|
+
FEATURE_SET_URI = "feature_set_uri"
|
|
194
|
+
ENDPOINT_TYPE = "endpoint_type"
|
|
195
|
+
ENDPOINT_POLICY = "endpoint_policy"
|
|
196
|
+
# Note: currently under endpoint policy we will have a dictionary including the keys: "application_names"
|
|
197
|
+
# and "base_period"
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
class ControllerEventKind(MonitoringStrEnum):
|
|
201
|
+
NOP_EVENT = "nop_event"
|
|
202
|
+
REGULAR_EVENT = "regular_event"
|
|
203
|
+
|
|
204
|
+
|
|
181
205
|
class MetricData(MonitoringStrEnum):
|
|
182
206
|
METRIC_NAME = "metric_name"
|
|
183
207
|
METRIC_VALUE = "metric_value"
|
|
@@ -223,28 +247,26 @@ class ModelEndpointTarget(MonitoringStrEnum):
|
|
|
223
247
|
SQL = "sql"
|
|
224
248
|
|
|
225
249
|
|
|
226
|
-
class StreamKind(MonitoringStrEnum):
|
|
227
|
-
V3IO_STREAM = "v3io_stream"
|
|
228
|
-
KAFKA = "kafka"
|
|
229
|
-
|
|
230
|
-
|
|
231
250
|
class TSDBTarget(MonitoringStrEnum):
|
|
232
251
|
V3IO_TSDB = "v3io-tsdb"
|
|
233
252
|
TDEngine = "tdengine"
|
|
234
253
|
|
|
235
254
|
|
|
255
|
+
class DefaultProfileName(StrEnum):
|
|
256
|
+
STREAM = "mm-infra-stream"
|
|
257
|
+
TSDB = "mm-infra-tsdb"
|
|
258
|
+
|
|
259
|
+
|
|
236
260
|
class ProjectSecretKeys:
|
|
237
261
|
ACCESS_KEY = "MODEL_MONITORING_ACCESS_KEY"
|
|
238
|
-
STREAM_PATH = "STREAM_PATH"
|
|
239
|
-
TSDB_CONNECTION = "TSDB_CONNECTION"
|
|
240
262
|
TSDB_PROFILE_NAME = "TSDB_PROFILE_NAME"
|
|
241
263
|
STREAM_PROFILE_NAME = "STREAM_PROFILE_NAME"
|
|
242
264
|
|
|
243
265
|
@classmethod
|
|
244
266
|
def mandatory_secrets(cls):
|
|
245
267
|
return [
|
|
246
|
-
cls.
|
|
247
|
-
cls.
|
|
268
|
+
cls.STREAM_PROFILE_NAME,
|
|
269
|
+
cls.TSDB_PROFILE_NAME,
|
|
248
270
|
]
|
|
249
271
|
|
|
250
272
|
|
|
@@ -306,6 +328,7 @@ class V3IOTSDBTables(MonitoringStrEnum):
|
|
|
306
328
|
METRICS = "metrics"
|
|
307
329
|
EVENTS = "events"
|
|
308
330
|
ERRORS = "errors"
|
|
331
|
+
PREDICTIONS = "predictions"
|
|
309
332
|
|
|
310
333
|
|
|
311
334
|
class TDEngineSuperTables(MonitoringStrEnum):
|
|
@@ -160,6 +160,7 @@ class ModelEndpointStatus(ObjectStatus, ModelEndpointParser):
|
|
|
160
160
|
state: Optional[str] = "unknown" # will be updated according to the function state
|
|
161
161
|
first_request: Optional[datetime] = None
|
|
162
162
|
monitoring_mode: Optional[ModelMonitoringMode] = ModelMonitoringMode.disabled
|
|
163
|
+
sampling_percentage: Optional[float] = 100
|
|
163
164
|
|
|
164
165
|
# operative
|
|
165
166
|
last_request: Optional[datetime] = None
|
|
@@ -177,6 +178,7 @@ class ModelEndpointStatus(ObjectStatus, ModelEndpointParser):
|
|
|
177
178
|
"monitoring_mode",
|
|
178
179
|
"first_request",
|
|
179
180
|
"last_request",
|
|
181
|
+
"sampling_percentage",
|
|
180
182
|
]
|
|
181
183
|
|
|
182
184
|
|
mlrun/common/schemas/workflow.py
CHANGED
mlrun/config.py
CHANGED
|
@@ -537,6 +537,8 @@ default_config = {
|
|
|
537
537
|
},
|
|
538
538
|
"pagination": {
|
|
539
539
|
"default_page_size": 200,
|
|
540
|
+
"page_limit": 1000000,
|
|
541
|
+
"page_size_limit": 1000000,
|
|
540
542
|
"pagination_cache": {
|
|
541
543
|
"interval": 60,
|
|
542
544
|
"ttl": 3600,
|
|
@@ -594,6 +596,22 @@ default_config = {
|
|
|
594
596
|
"max_replicas": 1,
|
|
595
597
|
},
|
|
596
598
|
},
|
|
599
|
+
"controller_stream_args": {
|
|
600
|
+
"v3io": {
|
|
601
|
+
"shard_count": 10,
|
|
602
|
+
"retention_period_hours": 24,
|
|
603
|
+
"num_workers": 10,
|
|
604
|
+
"min_replicas": 1,
|
|
605
|
+
"max_replicas": 1,
|
|
606
|
+
},
|
|
607
|
+
"kafka": {
|
|
608
|
+
"partition_count": 10,
|
|
609
|
+
"replication_factor": 1,
|
|
610
|
+
"num_workers": 10,
|
|
611
|
+
"min_replicas": 1,
|
|
612
|
+
"max_replicas": 1,
|
|
613
|
+
},
|
|
614
|
+
},
|
|
597
615
|
# Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
|
|
598
616
|
# stream, and endpoints.
|
|
599
617
|
"store_prefixes": {
|
|
@@ -606,10 +624,6 @@ default_config = {
|
|
|
606
624
|
"offline_storage_path": "model-endpoints/{kind}",
|
|
607
625
|
"parquet_batching_max_events": 10_000,
|
|
608
626
|
"parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
|
|
609
|
-
# See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
|
|
610
|
-
"tsdb_connection": "",
|
|
611
|
-
# See mlrun.common.schemas.model_monitoring.constants.StreamKind for available options
|
|
612
|
-
"stream_connection": "",
|
|
613
627
|
"tdengine": {
|
|
614
628
|
"timeout": 10,
|
|
615
629
|
"retries": 1,
|
|
@@ -727,6 +741,7 @@ default_config = {
|
|
|
727
741
|
},
|
|
728
742
|
"workflows": {
|
|
729
743
|
"default_workflow_runner_name": "workflow-runner-{}",
|
|
744
|
+
"concurrent_delete_worker_count": 20,
|
|
730
745
|
# Default timeout seconds for retrieving workflow id after execution
|
|
731
746
|
# Remote workflow timeout is the maximum between remote and the inner engine timeout
|
|
732
747
|
"timeouts": {"local": 120, "kfp": 60, "remote": 60 * 5},
|
|
@@ -799,7 +814,7 @@ default_config = {
|
|
|
799
814
|
# maximum allowed value for count in criteria field inside AlertConfig
|
|
800
815
|
"max_criteria_count": 100,
|
|
801
816
|
# interval for periodic events generation job
|
|
802
|
-
"events_generation_interval":
|
|
817
|
+
"events_generation_interval": 30, # seconds
|
|
803
818
|
},
|
|
804
819
|
"auth_with_client_id": {
|
|
805
820
|
"enabled": False,
|
|
@@ -1282,6 +1297,8 @@ class Config:
|
|
|
1282
1297
|
function_name
|
|
1283
1298
|
and function_name
|
|
1284
1299
|
!= mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.STREAM
|
|
1300
|
+
and function_name
|
|
1301
|
+
!= mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.APPLICATION_CONTROLLER
|
|
1285
1302
|
):
|
|
1286
1303
|
return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
|
|
1287
1304
|
project=project,
|
|
@@ -1289,12 +1306,21 @@ class Config:
|
|
|
1289
1306
|
if function_name is None
|
|
1290
1307
|
else f"{kind}-{function_name.lower()}",
|
|
1291
1308
|
)
|
|
1292
|
-
elif
|
|
1309
|
+
elif (
|
|
1310
|
+
kind == "stream"
|
|
1311
|
+
and function_name
|
|
1312
|
+
!= mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.APPLICATION_CONTROLLER
|
|
1313
|
+
):
|
|
1293
1314
|
return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
|
|
1294
1315
|
project=project,
|
|
1295
1316
|
kind=kind,
|
|
1296
1317
|
)
|
|
1297
1318
|
else:
|
|
1319
|
+
if (
|
|
1320
|
+
function_name
|
|
1321
|
+
== mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.APPLICATION_CONTROLLER
|
|
1322
|
+
):
|
|
1323
|
+
kind = function_name
|
|
1298
1324
|
return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
1299
1325
|
project=project,
|
|
1300
1326
|
kind=kind,
|
|
@@ -1363,6 +1389,13 @@ class Config:
|
|
|
1363
1389
|
>= semver.VersionInfo.parse("1.12.10")
|
|
1364
1390
|
)
|
|
1365
1391
|
|
|
1392
|
+
@staticmethod
|
|
1393
|
+
def get_ordered_keys():
|
|
1394
|
+
# Define the keys to process first
|
|
1395
|
+
return [
|
|
1396
|
+
"MLRUN_HTTPDB__HTTP__VERIFY" # Ensure this key is processed first for proper connection setup
|
|
1397
|
+
]
|
|
1398
|
+
|
|
1366
1399
|
|
|
1367
1400
|
# Global configuration
|
|
1368
1401
|
config = Config.from_dict(default_config)
|
|
@@ -17,7 +17,7 @@ import base64
|
|
|
17
17
|
import json
|
|
18
18
|
import typing
|
|
19
19
|
import warnings
|
|
20
|
-
from urllib.parse import ParseResult, urlparse
|
|
20
|
+
from urllib.parse import ParseResult, urlparse
|
|
21
21
|
|
|
22
22
|
import pydantic.v1
|
|
23
23
|
from mergedeep import merge
|
|
@@ -211,9 +211,10 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
|
|
|
211
211
|
attributes["partitions"] = self.partitions
|
|
212
212
|
sasl = attributes.pop("sasl", {})
|
|
213
213
|
if self.sasl_user and self.sasl_pass:
|
|
214
|
-
sasl["
|
|
214
|
+
sasl["enable"] = True
|
|
215
215
|
sasl["user"] = self.sasl_user
|
|
216
216
|
sasl["password"] = self.sasl_pass
|
|
217
|
+
sasl["mechanism"] = "PLAIN"
|
|
217
218
|
if sasl:
|
|
218
219
|
attributes["sasl"] = sasl
|
|
219
220
|
return attributes
|
|
@@ -312,7 +313,7 @@ class DatastoreProfileRedis(DatastoreProfile):
|
|
|
312
313
|
query=parsed_url.query,
|
|
313
314
|
fragment=parsed_url.fragment,
|
|
314
315
|
)
|
|
315
|
-
return
|
|
316
|
+
return new_parsed_url.geturl()
|
|
316
317
|
|
|
317
318
|
def secrets(self) -> dict:
|
|
318
319
|
res = {}
|
|
@@ -473,6 +474,59 @@ class DatastoreProfileHdfs(DatastoreProfile):
|
|
|
473
474
|
return f"webhdfs://{self.host}:{self.http_port}{subpath}"
|
|
474
475
|
|
|
475
476
|
|
|
477
|
+
class TDEngineDatastoreProfile(DatastoreProfile):
|
|
478
|
+
"""
|
|
479
|
+
A profile that holds the required parameters for a TDEngine database, with the websocket scheme.
|
|
480
|
+
https://docs.tdengine.com/developer-guide/connecting-to-tdengine/#websocket-connection
|
|
481
|
+
"""
|
|
482
|
+
|
|
483
|
+
type: str = pydantic.v1.Field("taosws")
|
|
484
|
+
_private_attributes = ["password"]
|
|
485
|
+
user: str
|
|
486
|
+
# The password cannot be empty in real world scenarios. It's here just because of the profiles completion design.
|
|
487
|
+
password: typing.Optional[str]
|
|
488
|
+
host: str
|
|
489
|
+
port: int
|
|
490
|
+
|
|
491
|
+
def dsn(self) -> str:
|
|
492
|
+
"""Get the Data Source Name of the configured TDEngine profile."""
|
|
493
|
+
return f"{self.type}://{self.user}:{self.password}@{self.host}:{self.port}"
|
|
494
|
+
|
|
495
|
+
@classmethod
|
|
496
|
+
def from_dsn(cls, dsn: str, profile_name: str) -> "TDEngineDatastoreProfile":
|
|
497
|
+
"""
|
|
498
|
+
Construct a TDEngine profile from DSN (connection string) and a name for the profile.
|
|
499
|
+
|
|
500
|
+
:param dsn: The DSN (Data Source Name) of the TDEngine database, e.g.: ``"taosws://root:taosdata@localhost:6041"``.
|
|
501
|
+
:param profile_name: The new profile's name.
|
|
502
|
+
:return: The TDEngine profile.
|
|
503
|
+
"""
|
|
504
|
+
parsed_url = urlparse(dsn)
|
|
505
|
+
return cls(
|
|
506
|
+
name=profile_name,
|
|
507
|
+
user=parsed_url.username,
|
|
508
|
+
password=parsed_url.password,
|
|
509
|
+
host=parsed_url.hostname,
|
|
510
|
+
port=parsed_url.port,
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
_DATASTORE_TYPE_TO_PROFILE_CLASS: dict[str, type[DatastoreProfile]] = {
|
|
515
|
+
"v3io": DatastoreProfileV3io,
|
|
516
|
+
"s3": DatastoreProfileS3,
|
|
517
|
+
"redis": DatastoreProfileRedis,
|
|
518
|
+
"basic": DatastoreProfileBasic,
|
|
519
|
+
"kafka_target": DatastoreProfileKafkaTarget,
|
|
520
|
+
"kafka_source": DatastoreProfileKafkaSource,
|
|
521
|
+
"dbfs": DatastoreProfileDBFS,
|
|
522
|
+
"gcs": DatastoreProfileGCS,
|
|
523
|
+
"az": DatastoreProfileAzureBlob,
|
|
524
|
+
"hdfs": DatastoreProfileHdfs,
|
|
525
|
+
"taosws": TDEngineDatastoreProfile,
|
|
526
|
+
"config": ConfigProfile,
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
|
|
476
530
|
class DatastoreProfile2Json(pydantic.v1.BaseModel):
|
|
477
531
|
@staticmethod
|
|
478
532
|
def _to_json(attributes):
|
|
@@ -523,19 +577,7 @@ class DatastoreProfile2Json(pydantic.v1.BaseModel):
|
|
|
523
577
|
|
|
524
578
|
decoded_dict = {k: safe_literal_eval(v) for k, v in decoded_dict.items()}
|
|
525
579
|
datastore_type = decoded_dict.get("type")
|
|
526
|
-
ds_profile_factory =
|
|
527
|
-
"v3io": DatastoreProfileV3io,
|
|
528
|
-
"s3": DatastoreProfileS3,
|
|
529
|
-
"redis": DatastoreProfileRedis,
|
|
530
|
-
"basic": DatastoreProfileBasic,
|
|
531
|
-
"kafka_target": DatastoreProfileKafkaTarget,
|
|
532
|
-
"kafka_source": DatastoreProfileKafkaSource,
|
|
533
|
-
"dbfs": DatastoreProfileDBFS,
|
|
534
|
-
"gcs": DatastoreProfileGCS,
|
|
535
|
-
"az": DatastoreProfileAzureBlob,
|
|
536
|
-
"hdfs": DatastoreProfileHdfs,
|
|
537
|
-
"config": ConfigProfile,
|
|
538
|
-
}
|
|
580
|
+
ds_profile_factory = _DATASTORE_TYPE_TO_PROFILE_CLASS
|
|
539
581
|
if datastore_type in ds_profile_factory:
|
|
540
582
|
return ds_profile_factory[datastore_type].parse_obj(decoded_dict)
|
|
541
583
|
else:
|