mlrun 1.7.0rc13__py3-none-any.whl → 1.7.0rc15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (85) hide show
  1. mlrun/__main__.py +0 -105
  2. mlrun/artifacts/__init__.py +1 -2
  3. mlrun/artifacts/base.py +8 -250
  4. mlrun/artifacts/dataset.py +1 -190
  5. mlrun/artifacts/manager.py +2 -41
  6. mlrun/artifacts/model.py +1 -140
  7. mlrun/artifacts/plots.py +1 -375
  8. mlrun/common/schemas/model_monitoring/__init__.py +4 -0
  9. mlrun/common/schemas/model_monitoring/constants.py +24 -3
  10. mlrun/common/schemas/model_monitoring/model_endpoints.py +13 -1
  11. mlrun/common/schemas/project.py +1 -0
  12. mlrun/config.py +14 -4
  13. mlrun/data_types/to_pandas.py +4 -4
  14. mlrun/datastore/base.py +41 -9
  15. mlrun/datastore/datastore_profile.py +50 -3
  16. mlrun/datastore/hdfs.py +5 -0
  17. mlrun/datastore/inmem.py +2 -2
  18. mlrun/datastore/sources.py +43 -2
  19. mlrun/datastore/store_resources.py +2 -6
  20. mlrun/datastore/targets.py +125 -6
  21. mlrun/datastore/v3io.py +1 -1
  22. mlrun/db/auth_utils.py +152 -0
  23. mlrun/db/base.py +1 -1
  24. mlrun/db/httpdb.py +69 -33
  25. mlrun/feature_store/__init__.py +0 -2
  26. mlrun/feature_store/api.py +12 -47
  27. mlrun/feature_store/feature_set.py +9 -0
  28. mlrun/feature_store/retrieval/base.py +9 -4
  29. mlrun/feature_store/retrieval/conversion.py +4 -4
  30. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  31. mlrun/feature_store/retrieval/job.py +2 -0
  32. mlrun/feature_store/retrieval/local_merger.py +2 -0
  33. mlrun/feature_store/retrieval/spark_merger.py +5 -0
  34. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
  35. mlrun/kfpops.py +5 -10
  36. mlrun/launcher/base.py +1 -1
  37. mlrun/launcher/client.py +1 -1
  38. mlrun/lists.py +2 -2
  39. mlrun/model.py +36 -9
  40. mlrun/model_monitoring/api.py +41 -18
  41. mlrun/model_monitoring/application.py +5 -305
  42. mlrun/model_monitoring/applications/__init__.py +11 -0
  43. mlrun/model_monitoring/applications/_application_steps.py +158 -0
  44. mlrun/model_monitoring/applications/base.py +282 -0
  45. mlrun/model_monitoring/applications/context.py +214 -0
  46. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  47. mlrun/model_monitoring/applications/histogram_data_drift.py +92 -77
  48. mlrun/model_monitoring/applications/results.py +99 -0
  49. mlrun/model_monitoring/controller.py +3 -1
  50. mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
  51. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +1 -1
  52. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +67 -4
  53. mlrun/model_monitoring/evidently_application.py +6 -118
  54. mlrun/model_monitoring/helpers.py +1 -1
  55. mlrun/model_monitoring/model_endpoint.py +3 -2
  56. mlrun/model_monitoring/stream_processing.py +2 -3
  57. mlrun/model_monitoring/writer.py +69 -39
  58. mlrun/platforms/iguazio.py +2 -2
  59. mlrun/projects/pipelines.py +24 -7
  60. mlrun/projects/project.py +130 -65
  61. mlrun/render.py +2 -10
  62. mlrun/run.py +1 -4
  63. mlrun/runtimes/__init__.py +3 -3
  64. mlrun/runtimes/base.py +3 -3
  65. mlrun/runtimes/funcdoc.py +0 -28
  66. mlrun/runtimes/local.py +1 -1
  67. mlrun/runtimes/mpijob/__init__.py +0 -20
  68. mlrun/runtimes/mpijob/v1.py +1 -1
  69. mlrun/runtimes/nuclio/api_gateway.py +275 -153
  70. mlrun/runtimes/nuclio/function.py +1 -1
  71. mlrun/runtimes/pod.py +5 -5
  72. mlrun/runtimes/utils.py +1 -1
  73. mlrun/serving/states.py +53 -2
  74. mlrun/utils/helpers.py +27 -40
  75. mlrun/utils/notifications/notification/slack.py +31 -8
  76. mlrun/utils/notifications/notification_pusher.py +133 -14
  77. mlrun/utils/version/version.json +2 -2
  78. {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc15.dist-info}/METADATA +2 -2
  79. {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc15.dist-info}/RECORD +84 -79
  80. mlrun/runtimes/mpijob/v1alpha1.py +0 -29
  81. /mlrun/{runtimes → common/runtimes}/constants.py +0 -0
  82. {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc15.dist-info}/LICENSE +0 -0
  83. {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc15.dist-info}/WHEEL +0 -0
  84. {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc15.dist-info}/entry_points.txt +0 -0
  85. {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc15.dist-info}/top_level.txt +0 -0
mlrun/utils/helpers.py CHANGED
@@ -39,7 +39,6 @@ import pandas
39
39
  import semver
40
40
  import yaml
41
41
  from dateutil import parser
42
- from deprecated import deprecated
43
42
  from pandas._libs.tslibs.timestamps import Timedelta, Timestamp
44
43
  from yaml.representer import RepresenterError
45
44
 
@@ -76,19 +75,6 @@ class OverwriteBuildParamsWarning(FutureWarning):
76
75
  pass
77
76
 
78
77
 
79
- # TODO: remove in 1.7.0
80
- @deprecated(
81
- version="1.5.0",
82
- reason="'parse_versioned_object_uri' will be removed from this file in 1.7.0, use "
83
- "'mlrun.common.helpers.parse_versioned_object_uri' instead",
84
- category=FutureWarning,
85
- )
86
- def parse_versioned_object_uri(uri: str, default_project: str = ""):
87
- return mlrun.common.helpers.parse_versioned_object_uri(
88
- uri=uri, default_project=default_project
89
- )
90
-
91
-
92
78
  class StorePrefix:
93
79
  """map mlrun store objects to prefixes"""
94
80
 
@@ -119,14 +105,9 @@ class StorePrefix:
119
105
 
120
106
 
121
107
  def get_artifact_target(item: dict, project=None):
122
- if is_legacy_artifact(item):
123
- db_key = item.get("db_key")
124
- project_str = project or item.get("project")
125
- tree = item.get("tree")
126
- else:
127
- db_key = item["spec"].get("db_key")
128
- project_str = project or item["metadata"].get("project")
129
- tree = item["metadata"].get("tree")
108
+ db_key = item["spec"].get("db_key")
109
+ project_str = project or item["metadata"].get("project")
110
+ tree = item["metadata"].get("tree")
130
111
 
131
112
  kind = item.get("kind")
132
113
  if kind in ["dataset", "model", "artifact"] and db_key:
@@ -135,11 +116,15 @@ def get_artifact_target(item: dict, project=None):
135
116
  target = f"{target}@{tree}"
136
117
  return target
137
118
 
138
- return (
139
- item.get("target_path")
140
- if is_legacy_artifact(item)
141
- else item["spec"].get("target_path")
142
- )
119
+ return item["spec"].get("target_path")
120
+
121
+
122
+ # TODO: left for migrations testing purposes. Remove in 1.8.0.
123
+ def is_legacy_artifact(artifact):
124
+ if isinstance(artifact, dict):
125
+ return "metadata" not in artifact
126
+ else:
127
+ return not hasattr(artifact, "metadata")
143
128
 
144
129
 
145
130
  logger = create_logger(config.log_level, config.log_formatter, "mlrun", sys.stdout)
@@ -1018,8 +1003,9 @@ def get_ui_url(project, uid=None):
1018
1003
  def get_workflow_url(project, id=None):
1019
1004
  url = ""
1020
1005
  if mlrun.mlconf.resolve_ui_url():
1021
- url = "{}/{}/{}/jobs/monitor-workflows/workflow/{}".format(
1022
- mlrun.mlconf.resolve_ui_url(), mlrun.mlconf.ui.projects_prefix, project, id
1006
+ url = (
1007
+ f"{mlrun.mlconf.resolve_ui_url()}/{mlrun.mlconf.ui.projects_prefix}"
1008
+ f"/{project}/jobs/monitor-workflows/workflow/{id}"
1023
1009
  )
1024
1010
  return url
1025
1011
 
@@ -1291,13 +1277,6 @@ def str_to_timestamp(time_str: str, now_time: Timestamp = None):
1291
1277
  return Timestamp(time_str)
1292
1278
 
1293
1279
 
1294
- def is_legacy_artifact(artifact):
1295
- if isinstance(artifact, dict):
1296
- return "metadata" not in artifact
1297
- else:
1298
- return not hasattr(artifact, "metadata")
1299
-
1300
-
1301
1280
  def is_link_artifact(artifact):
1302
1281
  if isinstance(artifact, dict):
1303
1282
  return (
@@ -1345,16 +1324,16 @@ def format_run(run: dict, with_project=False) -> dict:
1345
1324
  # as observed https://jira.iguazeng.com/browse/ML-5195
1346
1325
  # set to unknown to ensure a status is returned
1347
1326
  if run["status"] is None:
1348
- run["status"] = inflection.titleize(mlrun.runtimes.constants.RunStates.unknown)
1327
+ run["status"] = inflection.titleize(
1328
+ mlrun.common.runtimes.constants.RunStates.unknown
1329
+ )
1349
1330
 
1350
1331
  return run
1351
1332
 
1352
1333
 
1353
1334
  def get_in_artifact(artifact: dict, key, default=None, raise_on_missing=False):
1354
1335
  """artifact can be dict or Artifact object"""
1355
- if is_legacy_artifact(artifact):
1356
- return artifact.get(key, default)
1357
- elif key == "kind":
1336
+ if key == "kind":
1358
1337
  return artifact.get(key, default)
1359
1338
  else:
1360
1339
  for block in ["metadata", "spec", "status"]:
@@ -1596,3 +1575,11 @@ def get_serving_spec():
1596
1575
  )
1597
1576
  spec = json.loads(data)
1598
1577
  return spec
1578
+
1579
+
1580
+ def additional_filters_warning(additional_filters, class_name):
1581
+ if additional_filters and any(additional_filters):
1582
+ mlrun.utils.logger.warn(
1583
+ f"additional_filters parameter is not supported in {class_name},"
1584
+ f" parameter has been ignored."
1585
+ )
@@ -72,12 +72,7 @@ class SlackNotification(NotificationBase):
72
72
  event_data: mlrun.common.schemas.Event = None,
73
73
  ) -> dict:
74
74
  data = {
75
- "blocks": [
76
- {
77
- "type": "header",
78
- "text": {"type": "plain_text", "text": f"[{severity}] {message}"},
79
- },
80
- ]
75
+ "blocks": self._generate_slack_header_blocks(severity, message),
81
76
  }
82
77
  if self.name:
83
78
  data["blocks"].append(
@@ -106,6 +101,32 @@ class SlackNotification(NotificationBase):
106
101
 
107
102
  return data
108
103
 
104
+ def _generate_slack_header_blocks(self, severity: str, message: str):
105
+ header_text = block_text = f"[{severity}] {message}"
106
+ section_text = None
107
+
108
+ # Slack doesn't allow headers to be longer than 150 characters
109
+ # If there's a comma in the message, split the message at the comma
110
+ # Otherwise, split the message at 150 characters
111
+ if len(block_text) > 150:
112
+ if ", " in block_text and block_text.index(", ") < 149:
113
+ header_text = block_text.split(",")[0]
114
+ section_text = block_text[len(header_text) + 2 :]
115
+ else:
116
+ header_text = block_text[:150]
117
+ section_text = block_text[150:]
118
+ blocks = [
119
+ {"type": "header", "text": {"type": "plain_text", "text": header_text}}
120
+ ]
121
+ if section_text:
122
+ blocks.append(
123
+ {
124
+ "type": "section",
125
+ "text": self._get_slack_row(section_text),
126
+ }
127
+ )
128
+ return blocks
129
+
109
130
  def _get_alert_fields(
110
131
  self,
111
132
  alert: mlrun.common.schemas.AlertConfig,
@@ -131,7 +152,9 @@ class SlackNotification(NotificationBase):
131
152
  def _get_run_line(self, run: dict) -> dict:
132
153
  meta = run["metadata"]
133
154
  url = mlrun.utils.helpers.get_ui_url(meta.get("project"), meta.get("uid"))
134
- if url:
155
+
156
+ # Only show the URL if the run is not a function (serving or mlrun function)
157
+ if run.get("kind") not in ["serving", None] and url:
135
158
  line = f'<{url}|*{meta.get("name")}*>'
136
159
  else:
137
160
  line = meta.get("name")
@@ -148,7 +171,7 @@ class SlackNotification(NotificationBase):
148
171
  result = mlrun.utils.helpers.dict_to_str(
149
172
  run["status"].get("results", {}), ", "
150
173
  )
151
- return self._get_slack_row(result or "None")
174
+ return self._get_slack_row(result or state)
152
175
 
153
176
  @staticmethod
154
177
  def _get_slack_row(text: str) -> dict:
@@ -14,15 +14,21 @@
14
14
 
15
15
  import asyncio
16
16
  import datetime
17
+ import json
17
18
  import os
19
+ import re
18
20
  import traceback
19
21
  import typing
20
22
  from concurrent.futures import ThreadPoolExecutor
21
23
 
24
+ import kfp
25
+
26
+ import mlrun.common.runtimes.constants
22
27
  import mlrun.common.schemas
23
28
  import mlrun.config
24
29
  import mlrun.db.base
25
30
  import mlrun.errors
31
+ import mlrun.kfpops
26
32
  import mlrun.lists
27
33
  import mlrun.model
28
34
  import mlrun.utils.helpers
@@ -238,20 +244,7 @@ class NotificationPusher(_NotificationPusherBase):
238
244
  custom_message = (
239
245
  f" (workflow: {run.metadata.labels['workflow']}){custom_message}"
240
246
  )
241
- db = mlrun.get_run_db()
242
-
243
- workflow_id = run.status.results.get("workflow_id", None)
244
- if workflow_id:
245
- workflow_runs = db.list_runs(
246
- project=run.metadata.project,
247
- labels=f"workflow={workflow_id}",
248
- )
249
- logger.debug(
250
- "Found workflow runs, extending notification runs",
251
- workflow_id=workflow_id,
252
- workflow_runs_amount=len(workflow_runs),
253
- )
254
- runs.extend(workflow_runs)
247
+ runs.extend(self.get_workflow_steps(run))
255
248
 
256
249
  message = (
257
250
  self.messages.get(run.state(), "").format(resource=resource)
@@ -395,6 +388,132 @@ class NotificationPusher(_NotificationPusherBase):
395
388
  mask_params=False,
396
389
  )
397
390
 
391
+ def get_workflow_steps(self, run: mlrun.model.RunObject) -> list:
392
+ steps = []
393
+ db = mlrun.get_run_db()
394
+
395
+ def _add_run_step(_node_name, _):
396
+ steps.append(
397
+ db.list_runs(
398
+ project=run.metadata.project,
399
+ labels=f"mlrun/runner-pod={_node_name}",
400
+ )[0]
401
+ )
402
+
403
+ def _add_deploy_function_step(_, _node_template):
404
+ project, name, hash_key = self._extract_function_uri(
405
+ _node_template["metadata"]["annotations"]["mlrun/function-uri"]
406
+ )
407
+ if name:
408
+ try:
409
+ function = db.get_function(
410
+ project=project, name=name, hash_key=hash_key
411
+ )
412
+ except mlrun.errors.MLRunNotFoundError:
413
+ # If the function is not found (if build failed for example), we will create a dummy
414
+ # function object for the notification to display the function name
415
+ function = {
416
+ "metadata": {
417
+ "name": name,
418
+ "project": project,
419
+ "hash_key": hash_key,
420
+ },
421
+ }
422
+ function["status"] = {
423
+ "state": mlrun.common.runtimes.constants.PodPhases.pod_phase_to_run_state(
424
+ node["phase"]
425
+ ),
426
+ }
427
+ if isinstance(function["metadata"].get("updated"), datetime.datetime):
428
+ function["metadata"]["updated"] = function["metadata"][
429
+ "updated"
430
+ ].isoformat()
431
+ steps.append(function)
432
+
433
+ step_methods = {
434
+ mlrun.kfpops.PipelineRunType.run: _add_run_step,
435
+ mlrun.kfpops.PipelineRunType.build: _add_deploy_function_step,
436
+ mlrun.kfpops.PipelineRunType.deploy: _add_deploy_function_step,
437
+ }
438
+
439
+ workflow_id = run.status.results.get("workflow_id", None)
440
+ if not workflow_id:
441
+ return steps
442
+
443
+ workflow_manifest = self._get_workflow_manifest(workflow_id)
444
+ if not workflow_manifest:
445
+ return steps
446
+
447
+ try:
448
+ workflow_nodes = sorted(
449
+ workflow_manifest["status"]["nodes"].items(),
450
+ key=lambda _node: _node[1]["finishedAt"],
451
+ )
452
+ for node_name, node in workflow_nodes:
453
+ if node["type"] != "Pod":
454
+ # Skip the parent DAG node
455
+ continue
456
+
457
+ node_template = next(
458
+ template
459
+ for template in workflow_manifest["spec"]["templates"]
460
+ if template["name"] == node["templateName"]
461
+ )
462
+ step_type = node_template["metadata"]["annotations"].get(
463
+ "mlrun/pipeline-step-type"
464
+ )
465
+ step_method = step_methods.get(step_type)
466
+ if step_method:
467
+ step_method(node_name, node_template)
468
+ return steps
469
+ except Exception:
470
+ # If we fail to read the pipeline steps, we will return the list of runs that have the same workflow id
471
+ logger.warning(
472
+ "Failed to extract workflow steps from workflow manifest, "
473
+ "returning all runs with the workflow id label",
474
+ workflow_id=workflow_id,
475
+ traceback=traceback.format_exc(),
476
+ )
477
+ return db.list_runs(
478
+ project=run.metadata.project,
479
+ labels=f"workflow={workflow_id}",
480
+ )
481
+
482
+ @staticmethod
483
+ def _get_workflow_manifest(workflow_id: str) -> typing.Optional[dict]:
484
+ kfp_client = kfp.Client(namespace=mlrun.config.config.namespace)
485
+
486
+ # arbitrary timeout of 5 seconds, the workflow should be done by now
487
+ kfp_run = kfp_client.wait_for_run_completion(workflow_id, 5)
488
+ if not kfp_run:
489
+ return None
490
+
491
+ kfp_run = kfp_run.to_dict()
492
+ try:
493
+ return json.loads(kfp_run["pipeline_runtime"]["workflow_manifest"])
494
+ except Exception:
495
+ return None
496
+
497
+ def _extract_function_uri(self, function_uri: str) -> tuple[str, str, str]:
498
+ """
499
+ Extract the project, name, and hash key from a function uri.
500
+ Examples:
501
+ - "project/name@hash_key" returns project, name, hash_key
502
+ - "project/name returns" project, name, ""
503
+ """
504
+ project, name, hash_key = None, None, None
505
+ hashed_pattern = r"^(.+)/(.+)@(.+)$"
506
+ pattern = r"^(.+)/(.+)$"
507
+ match = re.match(hashed_pattern, function_uri)
508
+ if match:
509
+ project, name, hash_key = match.groups()
510
+ else:
511
+ match = re.match(pattern, function_uri)
512
+ if match:
513
+ project, name = match.groups()
514
+ hash_key = ""
515
+ return project, name, hash_key
516
+
398
517
 
399
518
  class CustomNotificationPusher(_NotificationPusherBase):
400
519
  def __init__(self, notification_types: list[str] = None):
@@ -1,4 +1,4 @@
1
1
  {
2
- "git_commit": "3bba8f5664d7a3ff4639a3437811fa5972883b2f",
3
- "version": "1.7.0-rc13"
2
+ "git_commit": "dd3ddb2c7544ea578f0445adb74711f0e8b5cbc9",
3
+ "version": "1.7.0-rc15"
4
4
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mlrun
3
- Version: 1.7.0rc13
3
+ Version: 1.7.0rc15
4
4
  Summary: Tracking and config of machine learning runs
5
5
  Home-page: https://github.com/mlrun/mlrun
6
6
  Author: Yaron Haviv
@@ -44,7 +44,7 @@ Requires-Dist: semver ~=3.0
44
44
  Requires-Dist: dependency-injector ~=4.41
45
45
  Requires-Dist: fsspec <2024.4,>=2023.9.2
46
46
  Requires-Dist: v3iofs ~=0.1.17
47
- Requires-Dist: storey ~=1.7.7
47
+ Requires-Dist: storey ~=1.7.11
48
48
  Requires-Dist: inflection ~=0.5.0
49
49
  Requires-Dist: python-dotenv ~=0.17.0
50
50
  Requires-Dist: setuptools ~=69.1