mlrun 1.10.0rc13__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (107) hide show
  1. mlrun/__init__.py +22 -2
  2. mlrun/artifacts/base.py +0 -31
  3. mlrun/artifacts/document.py +6 -1
  4. mlrun/artifacts/llm_prompt.py +123 -25
  5. mlrun/artifacts/manager.py +0 -5
  6. mlrun/artifacts/model.py +3 -3
  7. mlrun/common/constants.py +10 -1
  8. mlrun/common/formatters/artifact.py +1 -0
  9. mlrun/common/model_monitoring/helpers.py +86 -0
  10. mlrun/common/schemas/__init__.py +3 -0
  11. mlrun/common/schemas/auth.py +2 -0
  12. mlrun/common/schemas/function.py +10 -0
  13. mlrun/common/schemas/hub.py +30 -18
  14. mlrun/common/schemas/model_monitoring/__init__.py +3 -0
  15. mlrun/common/schemas/model_monitoring/constants.py +30 -6
  16. mlrun/common/schemas/model_monitoring/functions.py +14 -5
  17. mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -0
  18. mlrun/common/schemas/pipeline.py +1 -1
  19. mlrun/common/schemas/serving.py +3 -0
  20. mlrun/common/schemas/workflow.py +3 -1
  21. mlrun/common/secrets.py +22 -1
  22. mlrun/config.py +33 -11
  23. mlrun/datastore/__init__.py +11 -3
  24. mlrun/datastore/azure_blob.py +162 -47
  25. mlrun/datastore/datastore.py +9 -4
  26. mlrun/datastore/datastore_profile.py +61 -5
  27. mlrun/datastore/model_provider/huggingface_provider.py +363 -0
  28. mlrun/datastore/model_provider/mock_model_provider.py +87 -0
  29. mlrun/datastore/model_provider/model_provider.py +230 -65
  30. mlrun/datastore/model_provider/openai_provider.py +295 -42
  31. mlrun/datastore/s3.py +24 -2
  32. mlrun/datastore/storeytargets.py +2 -3
  33. mlrun/datastore/utils.py +15 -3
  34. mlrun/db/base.py +47 -19
  35. mlrun/db/httpdb.py +120 -56
  36. mlrun/db/nopdb.py +38 -10
  37. mlrun/execution.py +70 -19
  38. mlrun/hub/__init__.py +15 -0
  39. mlrun/hub/module.py +181 -0
  40. mlrun/k8s_utils.py +105 -16
  41. mlrun/launcher/base.py +13 -6
  42. mlrun/launcher/local.py +15 -0
  43. mlrun/model.py +24 -3
  44. mlrun/model_monitoring/__init__.py +1 -0
  45. mlrun/model_monitoring/api.py +66 -27
  46. mlrun/model_monitoring/applications/__init__.py +1 -1
  47. mlrun/model_monitoring/applications/base.py +509 -117
  48. mlrun/model_monitoring/applications/context.py +2 -4
  49. mlrun/model_monitoring/applications/results.py +4 -7
  50. mlrun/model_monitoring/controller.py +239 -101
  51. mlrun/model_monitoring/db/_schedules.py +116 -33
  52. mlrun/model_monitoring/db/_stats.py +4 -3
  53. mlrun/model_monitoring/db/tsdb/base.py +100 -9
  54. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +11 -6
  55. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +191 -50
  56. mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
  57. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
  58. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +259 -40
  59. mlrun/model_monitoring/helpers.py +54 -9
  60. mlrun/model_monitoring/stream_processing.py +45 -14
  61. mlrun/model_monitoring/writer.py +220 -1
  62. mlrun/platforms/__init__.py +3 -2
  63. mlrun/platforms/iguazio.py +7 -3
  64. mlrun/projects/operations.py +6 -1
  65. mlrun/projects/pipelines.py +46 -26
  66. mlrun/projects/project.py +166 -58
  67. mlrun/run.py +94 -17
  68. mlrun/runtimes/__init__.py +18 -0
  69. mlrun/runtimes/base.py +14 -6
  70. mlrun/runtimes/daskjob.py +7 -0
  71. mlrun/runtimes/local.py +5 -2
  72. mlrun/runtimes/mounts.py +20 -2
  73. mlrun/runtimes/mpijob/abstract.py +6 -0
  74. mlrun/runtimes/mpijob/v1.py +6 -0
  75. mlrun/runtimes/nuclio/__init__.py +1 -0
  76. mlrun/runtimes/nuclio/application/application.py +149 -17
  77. mlrun/runtimes/nuclio/function.py +76 -27
  78. mlrun/runtimes/nuclio/serving.py +97 -15
  79. mlrun/runtimes/pod.py +234 -21
  80. mlrun/runtimes/remotesparkjob.py +6 -0
  81. mlrun/runtimes/sparkjob/spark3job.py +6 -0
  82. mlrun/runtimes/utils.py +49 -11
  83. mlrun/secrets.py +54 -13
  84. mlrun/serving/__init__.py +2 -0
  85. mlrun/serving/remote.py +79 -6
  86. mlrun/serving/routers.py +23 -41
  87. mlrun/serving/server.py +320 -80
  88. mlrun/serving/states.py +725 -157
  89. mlrun/serving/steps.py +62 -0
  90. mlrun/serving/system_steps.py +200 -119
  91. mlrun/serving/v2_serving.py +9 -10
  92. mlrun/utils/helpers.py +288 -88
  93. mlrun/utils/logger.py +3 -1
  94. mlrun/utils/notifications/notification/base.py +18 -0
  95. mlrun/utils/notifications/notification/git.py +2 -4
  96. mlrun/utils/notifications/notification/slack.py +2 -4
  97. mlrun/utils/notifications/notification/webhook.py +2 -5
  98. mlrun/utils/notifications/notification_pusher.py +1 -1
  99. mlrun/utils/retryer.py +15 -2
  100. mlrun/utils/version/version.json +2 -2
  101. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/METADATA +45 -51
  102. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/RECORD +106 -101
  103. mlrun/api/schemas/__init__.py +0 -259
  104. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/WHEEL +0 -0
  105. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/entry_points.txt +0 -0
  106. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/licenses/LICENSE +0 -0
  107. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/top_level.txt +0 -0
mlrun/__init__.py CHANGED
@@ -31,11 +31,13 @@ from typing import Optional
31
31
 
32
32
  import dotenv
33
33
 
34
+ from .common.constants import MLRUN_ACTIVE_PROJECT
34
35
  from .config import config as mlconf
35
36
  from .datastore import DataItem, ModelProvider, store_manager
36
37
  from .db import get_run_db
37
38
  from .errors import MLRunInvalidArgumentError, MLRunNotFoundError
38
39
  from .execution import MLClientCtx
40
+ from .hub import get_hub_module, import_module
39
41
  from .model import RunObject, RunTemplate, new_task
40
42
  from .package import ArtifactType, DefaultPackager, Packager, handler
41
43
  from .projects import (
@@ -167,11 +169,29 @@ def set_environment(
167
169
 
168
170
 
169
171
  def get_current_project(silent: bool = False) -> Optional[MlrunProject]:
170
- if not pipeline_context.project and not silent:
172
+ if pipeline_context.project:
173
+ return pipeline_context.project
174
+
175
+ project_name = environ.get(MLRUN_ACTIVE_PROJECT, None)
176
+ if not project_name:
177
+ if not silent:
178
+ raise MLRunInvalidArgumentError(
179
+ "No current project is initialized. Use new, get or load project functions first."
180
+ )
181
+ return None
182
+
183
+ project = load_project(
184
+ name=project_name,
185
+ url=project_name,
186
+ save=False,
187
+ sync_functions=False,
188
+ )
189
+
190
+ if not project and not silent:
171
191
  raise MLRunInvalidArgumentError(
172
192
  "No current project is initialized. Use new, get or load project functions first."
173
193
  )
174
- return pipeline_context.project
194
+ return project
175
195
 
176
196
 
177
197
  def get_sample_path(subpath=""):
mlrun/artifacts/base.py CHANGED
@@ -16,7 +16,6 @@ import os
16
16
  import pathlib
17
17
  import tempfile
18
18
  import typing
19
- import warnings
20
19
  import zipfile
21
20
 
22
21
  import yaml
@@ -876,36 +875,6 @@ def generate_target_path(item: Artifact, artifact_path, producer):
876
875
  return f"{artifact_path}{item.key}{suffix}"
877
876
 
878
877
 
879
- # TODO: Remove once data migration v5 is obsolete
880
- def convert_legacy_artifact_to_new_format(
881
- legacy_artifact: dict,
882
- ) -> Artifact:
883
- """Converts a legacy artifact to a new format.
884
- :param legacy_artifact: The legacy artifact to convert.
885
- :return: The converted artifact.
886
- """
887
- artifact_key = legacy_artifact.get("key", "")
888
- artifact_tag = legacy_artifact.get("tag", "")
889
- if artifact_tag:
890
- artifact_key = f"{artifact_key}:{artifact_tag}"
891
- # TODO: Remove once data migration v5 is obsolete
892
- warnings.warn(
893
- f"Converting legacy artifact '{artifact_key}' to new format. This will not be supported in MLRun 1.10.0. "
894
- f"Make sure to save the artifact/project in the new format.",
895
- FutureWarning,
896
- )
897
-
898
- artifact = mlrun.artifacts.artifact_types.get(
899
- legacy_artifact.get("kind", "artifact"), mlrun.artifacts.Artifact
900
- )()
901
-
902
- artifact.metadata = artifact.metadata.from_dict(legacy_artifact)
903
- artifact.spec = artifact.spec.from_dict(legacy_artifact)
904
- artifact.status = artifact.status.from_dict(legacy_artifact)
905
-
906
- return artifact
907
-
908
-
909
878
  def fill_artifact_object_hash(object_dict, iteration=None, producer_id=None):
910
879
  # remove artifact related fields before calculating hash
911
880
  object_dict.setdefault("metadata", {})
@@ -359,7 +359,12 @@ class DocumentArtifact(Artifact):
359
359
  self,
360
360
  splitter: Optional["TextSplitter"] = None, # noqa: F821
361
361
  ) -> list["Document"]: # noqa: F821
362
- from langchain.schema import Document
362
+ # Try new langchain 1.0+ import path first
363
+ try:
364
+ from langchain_core.documents import Document
365
+ except ImportError:
366
+ # Fall back to old langchain <1.0 import path
367
+ from langchain.schema import Document
363
368
 
364
369
  """
365
370
  Create LC documents from the artifact
@@ -11,12 +11,14 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ import json
14
15
  import tempfile
16
+ from collections import defaultdict
15
17
  from typing import Optional, Union
16
18
 
17
19
  import mlrun
18
20
  import mlrun.artifacts.model as model_art
19
- import mlrun.common
21
+ import mlrun.common.schemas
20
22
  from mlrun.artifacts import Artifact, ArtifactMetadata, ArtifactSpec
21
23
  from mlrun.utils import StorePrefix, logger
22
24
 
@@ -25,41 +27,52 @@ MAX_PROMPT_LENGTH = 1024
25
27
 
26
28
  class LLMPromptArtifactSpec(ArtifactSpec):
27
29
  _dict_fields = ArtifactSpec._dict_fields + [
28
- "prompt_string",
30
+ "prompt_template",
29
31
  "prompt_legend",
30
- "model_configuration",
32
+ "invocation_config",
31
33
  "description",
32
34
  ]
35
+ PROMPT_TEMPLATE_KEYS = ("content", "role")
36
+ PROMPT_LEGENDS_KEYS = ("field", "description")
33
37
 
34
38
  def __init__(
35
39
  self,
36
40
  model_artifact: Union[model_art.ModelArtifact, str] = None,
37
- prompt_string: Optional[str] = None,
41
+ prompt_template: Optional[list[dict]] = None,
38
42
  prompt_path: Optional[str] = None,
39
43
  prompt_legend: Optional[dict] = None,
40
- model_configuration: Optional[dict] = None,
44
+ invocation_config: Optional[dict] = None,
41
45
  description: Optional[str] = None,
42
46
  target_path: Optional[str] = None,
43
47
  **kwargs,
44
48
  ):
45
- if prompt_string and prompt_path:
49
+ if prompt_template and prompt_path:
46
50
  raise mlrun.errors.MLRunInvalidArgumentError(
47
- "Cannot specify both 'prompt_string' and 'prompt_path'"
51
+ "Cannot specify both 'prompt_template' and 'prompt_path'"
48
52
  )
49
-
53
+ if prompt_legend:
54
+ self._verify_prompt_legend(prompt_legend)
55
+ if prompt_path:
56
+ self._verify_prompt_path(prompt_path)
57
+ if prompt_template:
58
+ self._verify_prompt_template(prompt_template)
50
59
  super().__init__(
51
60
  src_path=prompt_path,
52
61
  target_path=target_path,
53
62
  parent_uri=model_artifact.uri
54
63
  if isinstance(model_artifact, model_art.ModelArtifact)
55
64
  else model_artifact,
56
- body=prompt_string,
65
+ format=kwargs.pop("format", "") or "json",
57
66
  **kwargs,
58
67
  )
59
68
 
60
- self.prompt_string = prompt_string
69
+ self.prompt_template = prompt_template
61
70
  self.prompt_legend = prompt_legend
62
- self.model_configuration = model_configuration
71
+ if invocation_config is not None and not isinstance(invocation_config, dict):
72
+ raise mlrun.errors.MLRunInvalidArgumentError(
73
+ "LLMPromptArtifact invocation_config must be a dictionary or None"
74
+ )
75
+ self.invocation_config = invocation_config or {}
63
76
  self.description = description
64
77
  self._model_artifact = (
65
78
  model_artifact
@@ -67,10 +80,79 @@ class LLMPromptArtifactSpec(ArtifactSpec):
67
80
  else None
68
81
  )
69
82
 
83
+ def _verify_prompt_template(self, prompt_template):
84
+ if not (
85
+ isinstance(prompt_template, list)
86
+ and all(isinstance(item, dict) for item in prompt_template)
87
+ ):
88
+ raise mlrun.errors.MLRunInvalidArgumentError(
89
+ "Expected prompt_template to be a list of dicts"
90
+ )
91
+ for message in prompt_template:
92
+ if set(key.lower() for key in message.keys()) != set(
93
+ self.PROMPT_TEMPLATE_KEYS
94
+ ):
95
+ raise mlrun.errors.MLRunInvalidArgumentError(
96
+ f"Expected prompt_template to contain dicts with keys "
97
+ f"{self.PROMPT_TEMPLATE_KEYS}, got {message.keys()}"
98
+ )
99
+ keys_to_pop = []
100
+ for key in message.keys():
101
+ if isinstance(key, str):
102
+ if not key.islower():
103
+ message[key.lower()] = message[key]
104
+ keys_to_pop.append(key)
105
+ else:
106
+ raise mlrun.errors.MLRunInvalidArgumentError(
107
+ f"Expected prompt_template to contain dict that only"
108
+ f" has str keys got {key} of type {type(key)}"
109
+ )
110
+ for key_to_pop in keys_to_pop:
111
+ message.pop(key_to_pop)
112
+
70
113
  @property
71
114
  def model_uri(self):
72
115
  return self.parent_uri
73
116
 
117
+ @staticmethod
118
+ def _verify_prompt_legend(prompt_legend: dict):
119
+ if prompt_legend is None:
120
+ return True
121
+ for place_holder, body_map in prompt_legend.items():
122
+ if isinstance(body_map, dict):
123
+ if body_map.get("field") is None:
124
+ body_map["field"] = place_holder
125
+ body_map["description"] = body_map.get("description")
126
+ if diff := set(body_map.keys()) - set(
127
+ LLMPromptArtifactSpec.PROMPT_LEGENDS_KEYS
128
+ ):
129
+ raise mlrun.errors.MLRunInvalidArgumentError(
130
+ "prompt_legend values must contain only 'field' and "
131
+ f"'description' keys, got extra fields: {diff}"
132
+ )
133
+ else:
134
+ raise mlrun.errors.MLRunInvalidArgumentError(
135
+ f"Wrong prompt_legend format, {place_holder} is not mapped to dict"
136
+ )
137
+
138
+ @staticmethod
139
+ def _verify_prompt_path(prompt_path: str):
140
+ with mlrun.datastore.store_manager.object(prompt_path).open(mode="r") as p_file:
141
+ try:
142
+ json.load(p_file)
143
+ except json.JSONDecodeError:
144
+ raise mlrun.errors.MLRunInvalidArgumentError(
145
+ f"Failed on decoding str in path "
146
+ f"{prompt_path} expected file to contain a "
147
+ f"json format."
148
+ )
149
+
150
+ def get_body(self):
151
+ if self.prompt_template:
152
+ return json.dumps(self.prompt_template)
153
+ else:
154
+ return None
155
+
74
156
 
75
157
  class LLMPromptArtifact(Artifact):
76
158
  """
@@ -90,20 +172,20 @@ class LLMPromptArtifact(Artifact):
90
172
  model_artifact: Union[
91
173
  model_art.ModelArtifact, str
92
174
  ] = None, # TODO support partial model uri
93
- prompt_string: Optional[str] = None,
175
+ prompt_template: Optional[list[dict]] = None,
94
176
  prompt_path: Optional[str] = None,
95
177
  prompt_legend: Optional[dict] = None,
96
- model_configuration: Optional[dict] = None,
178
+ invocation_config: Optional[dict] = None,
97
179
  description: Optional[str] = None,
98
180
  target_path=None,
99
181
  **kwargs,
100
182
  ):
101
183
  llm_prompt_spec = LLMPromptArtifactSpec(
102
- prompt_string=prompt_string,
184
+ prompt_template=prompt_template,
103
185
  prompt_path=prompt_path,
104
186
  prompt_legend=prompt_legend,
105
187
  model_artifact=model_artifact,
106
- model_configuration=model_configuration,
188
+ invocation_config=invocation_config,
107
189
  target_path=target_path,
108
190
  description=description,
109
191
  )
@@ -137,33 +219,49 @@ class LLMPromptArtifact(Artifact):
137
219
  return self.spec._model_artifact
138
220
  return None
139
221
 
140
- def read_prompt(self) -> Optional[str]:
222
+ def read_prompt(self) -> Optional[Union[str, list[dict]]]:
141
223
  """
142
- Read the prompt string from the artifact.
224
+ Read the prompt json from the artifact or if provided prompt template.
225
+ @:param as_str: True to return the prompt string or a list of dicts.
226
+ @:return prompt string or list of dicts
143
227
  """
144
- if self.spec.prompt_string:
145
- return self.spec.prompt_string
228
+ if self.spec.prompt_template:
229
+ return self.spec.prompt_template
146
230
  if self.spec.target_path:
147
231
  with mlrun.datastore.store_manager.object(url=self.spec.target_path).open(
148
232
  mode="r"
149
233
  ) as p_file:
150
- return p_file.read()
234
+ try:
235
+ return json.load(p_file)
236
+ except json.JSONDecodeError:
237
+ raise mlrun.errors.MLRunInvalidArgumentError(
238
+ f"Failed on decoding str in path "
239
+ f"{self.spec.target_path} expected file to contain a "
240
+ f"json format."
241
+ )
151
242
 
152
243
  def before_log(self):
153
244
  """
154
245
  Prepare the artifact before logging.
155
246
  This method is called before the artifact is logged.
156
247
  """
157
- if self.spec.prompt_string and len(self.spec.prompt_string) > MAX_PROMPT_LENGTH:
248
+ if (
249
+ self.spec.prompt_template
250
+ and len(str(self.spec.prompt_template)) > MAX_PROMPT_LENGTH
251
+ ):
158
252
  logger.debug(
159
253
  "Prompt string exceeds maximum length, saving to a temporary file."
160
254
  )
161
255
  with tempfile.NamedTemporaryFile(
162
- delete=False, mode="w", suffix=".txt"
256
+ delete=False, mode="w", suffix=".json"
163
257
  ) as temp_file:
164
- temp_file.write(self.spec.prompt_string)
258
+ temp_file.write(json.dumps(self.spec.prompt_template))
165
259
  self.spec.src_path = temp_file.name
166
- self.spec.prompt_string = None
260
+ self.spec.prompt_template = None
167
261
  self._src_is_temp = True
168
-
169
262
  super().before_log()
263
+
264
+
265
+ class PlaceholderDefaultDict(defaultdict):
266
+ def __missing__(self, key):
267
+ return f"{{{key}}}"
@@ -110,11 +110,6 @@ class ArtifactProducer:
110
110
 
111
111
  def dict_to_artifact(struct: dict) -> Artifact:
112
112
  kind = struct.get("kind", "")
113
-
114
- # TODO: Remove once data migration v5 is obsolete
115
- if mlrun.utils.is_legacy_artifact(struct):
116
- return mlrun.artifacts.base.convert_legacy_artifact_to_new_format(struct)
117
-
118
113
  artifact_class = artifact_types[kind]
119
114
  return artifact_class.from_dict(struct)
120
115
 
mlrun/artifacts/model.py CHANGED
@@ -190,10 +190,10 @@ class ModelArtifact(Artifact):
190
190
  """
191
191
  super().__init__(key, body, format=format, target_path=target_path, **kwargs)
192
192
  model_file = str(model_file or "")
193
- if model_file and model_url:
193
+ if (model_file or model_dir or body) and model_url:
194
194
  raise mlrun.errors.MLRunInvalidArgumentError(
195
- "Arguments 'model_file' and 'model_dir' cannot be"
196
- " used together with 'model_url'."
195
+ "Arguments 'model_file' and 'model_url' cannot be"
196
+ " used together with 'model_file', 'model_dir' or 'body'."
197
197
  )
198
198
  if model_file and "/" in model_file:
199
199
  if model_dir:
mlrun/common/constants.py CHANGED
@@ -27,9 +27,16 @@ DASK_LABEL_PREFIX = "dask.org/"
27
27
  NUCLIO_LABEL_PREFIX = "nuclio.io/"
28
28
  RESERVED_TAG_NAME_LATEST = "latest"
29
29
 
30
+ # Kubernetes DNS-1123 label name length limit
31
+ K8S_DNS_1123_LABEL_MAX_LENGTH = 63
32
+
33
+
34
+ RESERVED_BATCH_JOB_SUFFIX = "-batch"
35
+
30
36
  JOB_TYPE_WORKFLOW_RUNNER = "workflow-runner"
31
37
  JOB_TYPE_PROJECT_LOADER = "project-loader"
32
38
  JOB_TYPE_RERUN_WORKFLOW_RUNNER = "rerun-workflow-runner"
39
+ MLRUN_ACTIVE_PROJECT = "MLRUN_ACTIVE_PROJECT"
33
40
 
34
41
 
35
42
  class MLRunInternalLabels:
@@ -81,9 +88,11 @@ class MLRunInternalLabels:
81
88
  kind = "kind"
82
89
  component = "component"
83
90
  mlrun_type = "mlrun__type"
84
- rerun_of = "rerun-of"
85
91
  original_workflow_id = "original-workflow-id"
86
92
  workflow_id = "workflow-id"
93
+ retrying = "retrying"
94
+ rerun_counter = "rerun-counter"
95
+ rerun_index = "rerun-index"
87
96
 
88
97
  owner = "owner"
89
98
  v3io_user = "v3io_user"
@@ -41,6 +41,7 @@ class ArtifactFormat(ObjectFormat, mlrun.common.types.StrEnum):
41
41
  "spec.metrics",
42
42
  "spec.target_path",
43
43
  "spec.parent_uri",
44
+ "spec.has_children",
44
45
  ]
45
46
  ),
46
47
  }[_format]
@@ -14,6 +14,7 @@
14
14
 
15
15
  import sys
16
16
  import typing
17
+ from datetime import datetime
17
18
 
18
19
  import mlrun.common
19
20
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
@@ -24,6 +25,7 @@ BinCounts = typing.NewType("BinCounts", list[int])
24
25
  BinEdges = typing.NewType("BinEdges", list[float])
25
26
 
26
27
  _MAX_FLOAT = sys.float_info.max
28
+ logger = mlrun.utils.create_logger(level="info", name="mm_helpers")
27
29
 
28
30
 
29
31
  def parse_model_endpoint_project_prefix(path: str, project_name: str):
@@ -87,3 +89,87 @@ def pad_features_hist(feature_stats: FeatureStats) -> None:
87
89
  for feature in feature_stats.values():
88
90
  if hist_key in feature:
89
91
  pad_hist(Histogram(feature[hist_key]))
92
+
93
+
94
+ def get_model_endpoints_creation_task_status(
95
+ server,
96
+ ) -> tuple[
97
+ mlrun.common.schemas.BackgroundTaskState,
98
+ typing.Optional[datetime],
99
+ typing.Optional[set[str]],
100
+ ]:
101
+ background_task = None
102
+ background_task_state = mlrun.common.schemas.BackgroundTaskState.running
103
+ background_task_check_timestamp = None
104
+ model_endpoint_uids = None
105
+ try:
106
+ background_task = mlrun.get_run_db().get_project_background_task(
107
+ server.project, server.model_endpoint_creation_task_name
108
+ )
109
+ background_task_check_timestamp = mlrun.utils.now_date()
110
+ log_background_task_state(
111
+ server, background_task.status.state, background_task_check_timestamp
112
+ )
113
+ background_task_state = background_task.status.state
114
+ except mlrun.errors.MLRunNotFoundError:
115
+ logger.warning(
116
+ "Model endpoint creation task not found listing model endpoints",
117
+ project=server.project,
118
+ task_name=server.model_endpoint_creation_task_name,
119
+ )
120
+ if background_task is None:
121
+ model_endpoints = mlrun.get_run_db().list_model_endpoints(
122
+ project=server.project,
123
+ function_name=server.function_name,
124
+ function_tag=server.function_tag,
125
+ tsdb_metrics=False,
126
+ )
127
+ if model_endpoints:
128
+ model_endpoint_uids = {
129
+ endpoint.metadata.uid for endpoint in model_endpoints.endpoints
130
+ }
131
+ logger.info(
132
+ "Model endpoints found after background task not found, model monitoring will monitor "
133
+ "events",
134
+ project=server.project,
135
+ function_name=server.function_name,
136
+ function_tag=server.function_tag,
137
+ uids=model_endpoint_uids,
138
+ )
139
+ background_task_state = mlrun.common.schemas.BackgroundTaskState.succeeded
140
+ else:
141
+ logger.warning(
142
+ "Model endpoints not found after background task not found, model monitoring will not "
143
+ "monitor events",
144
+ project=server.project,
145
+ function_name=server.function_name,
146
+ function_tag=server.function_tag,
147
+ )
148
+ background_task_state = mlrun.common.schemas.BackgroundTaskState.failed
149
+ return background_task_state, background_task_check_timestamp, model_endpoint_uids
150
+
151
+
152
+ def log_background_task_state(
153
+ server,
154
+ background_task_state: mlrun.common.schemas.BackgroundTaskState,
155
+ background_task_check_timestamp: typing.Optional[datetime],
156
+ ):
157
+ logger.info(
158
+ "Checking model endpoint creation task status",
159
+ task_name=server.model_endpoint_creation_task_name,
160
+ )
161
+ if (
162
+ background_task_state
163
+ in mlrun.common.schemas.BackgroundTaskState.terminal_states()
164
+ ):
165
+ logger.info(
166
+ f"Model endpoint creation task completed with state {background_task_state}"
167
+ )
168
+ else: # in progress
169
+ logger.info(
170
+ f"Model endpoint creation task is still in progress with the current state: "
171
+ f"{background_task_state}. Events will not be monitored for the next "
172
+ f"{mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period} seconds",
173
+ function_name=server.function_name,
174
+ background_task_check_timestamp=background_task_check_timestamp.isoformat(),
175
+ )
@@ -133,6 +133,7 @@ from .k8s import NodeSelectorOperator, Resources, ResourceSpec
133
133
  from .memory_reports import MostCommonObjectTypesReport, ObjectTypeReport
134
134
  from .model_monitoring import (
135
135
  DriftStatus,
136
+ EndpointMode,
136
137
  EndpointType,
137
138
  EndpointUID,
138
139
  EventFieldType,
@@ -147,11 +148,13 @@ from .model_monitoring import (
147
148
  GrafanaTable,
148
149
  ModelEndpoint,
149
150
  ModelEndpointCreationStrategy,
151
+ ModelEndpointDriftValues,
150
152
  ModelEndpointList,
151
153
  ModelEndpointMetadata,
152
154
  ModelEndpointSchema,
153
155
  ModelEndpointSpec,
154
156
  ModelEndpointStatus,
157
+ ModelMonitoringInfraLabel,
155
158
  ModelMonitoringMode,
156
159
  MonitoringFunctionNames,
157
160
  TSDBTarget,
@@ -55,6 +55,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
55
55
  secret = "secret"
56
56
  run = "run"
57
57
  model_endpoint = "model-endpoint"
58
+ model_monitoring = "model-monitoring"
58
59
  pipeline = "pipeline"
59
60
  hub_source = "hub-source"
60
61
  workflow = "workflow"
@@ -96,6 +97,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
96
97
  # runtime resource doesn't have an identifier, we don't need any auth granularity behind project level
97
98
  AuthorizationResourceTypes.runtime_resource: "/projects/{project_name}/runtime-resources",
98
99
  AuthorizationResourceTypes.model_endpoint: "/projects/{project_name}/model-endpoints/{resource_name}",
100
+ AuthorizationResourceTypes.model_monitoring: "/projects/{project_name}/model-monitoring/{resource_name}",
99
101
  AuthorizationResourceTypes.pipeline: "/projects/{project_name}/pipelines/{resource_name}",
100
102
  AuthorizationResourceTypes.datastore_profile: "/projects/{project_name}/datastore_profiles",
101
103
  # Hub sources are not project-scoped, and auth is globally on the sources endpoint.
@@ -114,11 +114,21 @@ class StateThresholds(pydantic.v1.BaseModel):
114
114
  default: typing.Optional[dict[str, str]]
115
115
 
116
116
 
117
+ class Backoff(pydantic.v1.BaseModel):
118
+ default_base_delay: typing.Optional[str]
119
+ min_base_delay: typing.Optional[str]
120
+
121
+
122
+ class RetrySpec(pydantic.v1.BaseModel):
123
+ backoff: Backoff
124
+
125
+
117
126
  class FunctionSpec(pydantic.v1.BaseModel):
118
127
  image_pull_secret: typing.Optional[ImagePullSecret]
119
128
  security_context: typing.Optional[SecurityContext]
120
129
  service_account: typing.Optional[ServiceAccount]
121
130
  state_thresholds: typing.Optional[StateThresholds]
131
+ retry: typing.Optional[RetrySpec]
122
132
 
123
133
  class Config:
124
134
  extra = pydantic.v1.Extra.allow