mlrun 1.10.0rc16__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (98) hide show
  1. mlrun/__init__.py +22 -2
  2. mlrun/artifacts/document.py +6 -1
  3. mlrun/artifacts/llm_prompt.py +21 -15
  4. mlrun/artifacts/model.py +3 -3
  5. mlrun/common/constants.py +9 -0
  6. mlrun/common/formatters/artifact.py +1 -0
  7. mlrun/common/model_monitoring/helpers.py +86 -0
  8. mlrun/common/schemas/__init__.py +2 -0
  9. mlrun/common/schemas/auth.py +2 -0
  10. mlrun/common/schemas/function.py +10 -0
  11. mlrun/common/schemas/hub.py +30 -18
  12. mlrun/common/schemas/model_monitoring/__init__.py +2 -0
  13. mlrun/common/schemas/model_monitoring/constants.py +30 -6
  14. mlrun/common/schemas/model_monitoring/functions.py +13 -4
  15. mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
  16. mlrun/common/schemas/pipeline.py +1 -1
  17. mlrun/common/schemas/serving.py +3 -0
  18. mlrun/common/schemas/workflow.py +1 -0
  19. mlrun/common/secrets.py +22 -1
  20. mlrun/config.py +32 -10
  21. mlrun/datastore/__init__.py +11 -3
  22. mlrun/datastore/azure_blob.py +162 -47
  23. mlrun/datastore/datastore.py +9 -4
  24. mlrun/datastore/datastore_profile.py +61 -5
  25. mlrun/datastore/model_provider/huggingface_provider.py +363 -0
  26. mlrun/datastore/model_provider/mock_model_provider.py +87 -0
  27. mlrun/datastore/model_provider/model_provider.py +211 -74
  28. mlrun/datastore/model_provider/openai_provider.py +243 -71
  29. mlrun/datastore/s3.py +24 -2
  30. mlrun/datastore/storeytargets.py +2 -3
  31. mlrun/datastore/utils.py +15 -3
  32. mlrun/db/base.py +27 -19
  33. mlrun/db/httpdb.py +57 -48
  34. mlrun/db/nopdb.py +25 -10
  35. mlrun/execution.py +55 -13
  36. mlrun/hub/__init__.py +15 -0
  37. mlrun/hub/module.py +181 -0
  38. mlrun/k8s_utils.py +105 -16
  39. mlrun/launcher/base.py +13 -6
  40. mlrun/launcher/local.py +2 -0
  41. mlrun/model.py +9 -3
  42. mlrun/model_monitoring/api.py +66 -27
  43. mlrun/model_monitoring/applications/__init__.py +1 -1
  44. mlrun/model_monitoring/applications/base.py +372 -136
  45. mlrun/model_monitoring/applications/context.py +2 -4
  46. mlrun/model_monitoring/applications/results.py +4 -7
  47. mlrun/model_monitoring/controller.py +239 -101
  48. mlrun/model_monitoring/db/_schedules.py +36 -13
  49. mlrun/model_monitoring/db/_stats.py +4 -3
  50. mlrun/model_monitoring/db/tsdb/base.py +29 -9
  51. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +4 -5
  52. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +154 -50
  53. mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
  54. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
  55. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +245 -51
  56. mlrun/model_monitoring/helpers.py +28 -5
  57. mlrun/model_monitoring/stream_processing.py +45 -14
  58. mlrun/model_monitoring/writer.py +220 -1
  59. mlrun/platforms/__init__.py +3 -2
  60. mlrun/platforms/iguazio.py +7 -3
  61. mlrun/projects/operations.py +6 -1
  62. mlrun/projects/pipelines.py +2 -2
  63. mlrun/projects/project.py +128 -45
  64. mlrun/run.py +94 -17
  65. mlrun/runtimes/__init__.py +18 -0
  66. mlrun/runtimes/base.py +14 -6
  67. mlrun/runtimes/daskjob.py +1 -0
  68. mlrun/runtimes/local.py +5 -2
  69. mlrun/runtimes/mounts.py +20 -2
  70. mlrun/runtimes/nuclio/__init__.py +1 -0
  71. mlrun/runtimes/nuclio/application/application.py +147 -17
  72. mlrun/runtimes/nuclio/function.py +70 -27
  73. mlrun/runtimes/nuclio/serving.py +85 -4
  74. mlrun/runtimes/pod.py +213 -21
  75. mlrun/runtimes/utils.py +49 -9
  76. mlrun/secrets.py +54 -13
  77. mlrun/serving/remote.py +79 -6
  78. mlrun/serving/routers.py +23 -41
  79. mlrun/serving/server.py +211 -40
  80. mlrun/serving/states.py +536 -156
  81. mlrun/serving/steps.py +62 -0
  82. mlrun/serving/system_steps.py +136 -81
  83. mlrun/serving/v2_serving.py +9 -10
  84. mlrun/utils/helpers.py +212 -82
  85. mlrun/utils/logger.py +3 -1
  86. mlrun/utils/notifications/notification/base.py +18 -0
  87. mlrun/utils/notifications/notification/git.py +2 -4
  88. mlrun/utils/notifications/notification/slack.py +2 -4
  89. mlrun/utils/notifications/notification/webhook.py +2 -5
  90. mlrun/utils/notifications/notification_pusher.py +1 -1
  91. mlrun/utils/version/version.json +2 -2
  92. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/METADATA +44 -45
  93. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/RECORD +97 -92
  94. mlrun/api/schemas/__init__.py +0 -259
  95. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/WHEEL +0 -0
  96. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/entry_points.txt +0 -0
  97. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/licenses/LICENSE +0 -0
  98. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/top_level.txt +0 -0
mlrun/__init__.py CHANGED
@@ -31,11 +31,13 @@ from typing import Optional
31
31
 
32
32
  import dotenv
33
33
 
34
+ from .common.constants import MLRUN_ACTIVE_PROJECT
34
35
  from .config import config as mlconf
35
36
  from .datastore import DataItem, ModelProvider, store_manager
36
37
  from .db import get_run_db
37
38
  from .errors import MLRunInvalidArgumentError, MLRunNotFoundError
38
39
  from .execution import MLClientCtx
40
+ from .hub import get_hub_module, import_module
39
41
  from .model import RunObject, RunTemplate, new_task
40
42
  from .package import ArtifactType, DefaultPackager, Packager, handler
41
43
  from .projects import (
@@ -167,11 +169,29 @@ def set_environment(
167
169
 
168
170
 
169
171
  def get_current_project(silent: bool = False) -> Optional[MlrunProject]:
170
- if not pipeline_context.project and not silent:
172
+ if pipeline_context.project:
173
+ return pipeline_context.project
174
+
175
+ project_name = environ.get(MLRUN_ACTIVE_PROJECT, None)
176
+ if not project_name:
177
+ if not silent:
178
+ raise MLRunInvalidArgumentError(
179
+ "No current project is initialized. Use new, get or load project functions first."
180
+ )
181
+ return None
182
+
183
+ project = load_project(
184
+ name=project_name,
185
+ url=project_name,
186
+ save=False,
187
+ sync_functions=False,
188
+ )
189
+
190
+ if not project and not silent:
171
191
  raise MLRunInvalidArgumentError(
172
192
  "No current project is initialized. Use new, get or load project functions first."
173
193
  )
174
- return pipeline_context.project
194
+ return project
175
195
 
176
196
 
177
197
  def get_sample_path(subpath=""):
@@ -359,7 +359,12 @@ class DocumentArtifact(Artifact):
359
359
  self,
360
360
  splitter: Optional["TextSplitter"] = None, # noqa: F821
361
361
  ) -> list["Document"]: # noqa: F821
362
- from langchain.schema import Document
362
+ # Try new langchain 1.0+ import path first
363
+ try:
364
+ from langchain_core.documents import Document
365
+ except ImportError:
366
+ # Fall back to old langchain <1.0 import path
367
+ from langchain.schema import Document
363
368
 
364
369
  """
365
370
  Create LC documents from the artifact
@@ -29,7 +29,7 @@ class LLMPromptArtifactSpec(ArtifactSpec):
29
29
  _dict_fields = ArtifactSpec._dict_fields + [
30
30
  "prompt_template",
31
31
  "prompt_legend",
32
- "model_configuration",
32
+ "invocation_config",
33
33
  "description",
34
34
  ]
35
35
  PROMPT_TEMPLATE_KEYS = ("content", "role")
@@ -41,7 +41,7 @@ class LLMPromptArtifactSpec(ArtifactSpec):
41
41
  prompt_template: Optional[list[dict]] = None,
42
42
  prompt_path: Optional[str] = None,
43
43
  prompt_legend: Optional[dict] = None,
44
- model_configuration: Optional[dict] = None,
44
+ invocation_config: Optional[dict] = None,
45
45
  description: Optional[str] = None,
46
46
  target_path: Optional[str] = None,
47
47
  **kwargs,
@@ -62,12 +62,17 @@ class LLMPromptArtifactSpec(ArtifactSpec):
62
62
  parent_uri=model_artifact.uri
63
63
  if isinstance(model_artifact, model_art.ModelArtifact)
64
64
  else model_artifact,
65
+ format=kwargs.pop("format", "") or "json",
65
66
  **kwargs,
66
67
  )
67
68
 
68
69
  self.prompt_template = prompt_template
69
70
  self.prompt_legend = prompt_legend
70
- self.model_configuration = model_configuration
71
+ if invocation_config is not None and not isinstance(invocation_config, dict):
72
+ raise mlrun.errors.MLRunInvalidArgumentError(
73
+ "LLMPromptArtifact invocation_config must be a dictionary or None"
74
+ )
75
+ self.invocation_config = invocation_config or {}
71
76
  self.description = description
72
77
  self._model_artifact = (
73
78
  model_artifact
@@ -83,19 +88,20 @@ class LLMPromptArtifactSpec(ArtifactSpec):
83
88
  raise mlrun.errors.MLRunInvalidArgumentError(
84
89
  "Expected prompt_template to be a list of dicts"
85
90
  )
86
- keys_to_pop = []
87
91
  for message in prompt_template:
92
+ if set(key.lower() for key in message.keys()) != set(
93
+ self.PROMPT_TEMPLATE_KEYS
94
+ ):
95
+ raise mlrun.errors.MLRunInvalidArgumentError(
96
+ f"Expected prompt_template to contain dicts with keys "
97
+ f"{self.PROMPT_TEMPLATE_KEYS}, got {message.keys()}"
98
+ )
99
+ keys_to_pop = []
88
100
  for key in message.keys():
89
101
  if isinstance(key, str):
90
- if key.lower() not in self.PROMPT_TEMPLATE_KEYS:
91
- raise mlrun.errors.MLRunInvalidArgumentError(
92
- f"Expected prompt_template to contain dict that "
93
- f"only has keys from {self.PROMPT_TEMPLATE_KEYS}"
94
- )
95
- else:
96
- if not key.islower():
97
- message[key.lower()] = message[key]
98
- keys_to_pop.append(key)
102
+ if not key.islower():
103
+ message[key.lower()] = message[key]
104
+ keys_to_pop.append(key)
99
105
  else:
100
106
  raise mlrun.errors.MLRunInvalidArgumentError(
101
107
  f"Expected prompt_template to contain dict that only"
@@ -169,7 +175,7 @@ class LLMPromptArtifact(Artifact):
169
175
  prompt_template: Optional[list[dict]] = None,
170
176
  prompt_path: Optional[str] = None,
171
177
  prompt_legend: Optional[dict] = None,
172
- model_configuration: Optional[dict] = None,
178
+ invocation_config: Optional[dict] = None,
173
179
  description: Optional[str] = None,
174
180
  target_path=None,
175
181
  **kwargs,
@@ -179,7 +185,7 @@ class LLMPromptArtifact(Artifact):
179
185
  prompt_path=prompt_path,
180
186
  prompt_legend=prompt_legend,
181
187
  model_artifact=model_artifact,
182
- model_configuration=model_configuration,
188
+ invocation_config=invocation_config,
183
189
  target_path=target_path,
184
190
  description=description,
185
191
  )
mlrun/artifacts/model.py CHANGED
@@ -190,10 +190,10 @@ class ModelArtifact(Artifact):
190
190
  """
191
191
  super().__init__(key, body, format=format, target_path=target_path, **kwargs)
192
192
  model_file = str(model_file or "")
193
- if model_file and model_url:
193
+ if (model_file or model_dir or body) and model_url:
194
194
  raise mlrun.errors.MLRunInvalidArgumentError(
195
- "Arguments 'model_file' and 'model_dir' cannot be"
196
- " used together with 'model_url'."
195
+ "Arguments 'model_file' and 'model_url' cannot be"
196
+ " used together with 'model_file', 'model_dir' or 'body'."
197
197
  )
198
198
  if model_file and "/" in model_file:
199
199
  if model_dir:
mlrun/common/constants.py CHANGED
@@ -27,9 +27,16 @@ DASK_LABEL_PREFIX = "dask.org/"
27
27
  NUCLIO_LABEL_PREFIX = "nuclio.io/"
28
28
  RESERVED_TAG_NAME_LATEST = "latest"
29
29
 
30
+ # Kubernetes DNS-1123 label name length limit
31
+ K8S_DNS_1123_LABEL_MAX_LENGTH = 63
32
+
33
+
34
+ RESERVED_BATCH_JOB_SUFFIX = "-batch"
35
+
30
36
  JOB_TYPE_WORKFLOW_RUNNER = "workflow-runner"
31
37
  JOB_TYPE_PROJECT_LOADER = "project-loader"
32
38
  JOB_TYPE_RERUN_WORKFLOW_RUNNER = "rerun-workflow-runner"
39
+ MLRUN_ACTIVE_PROJECT = "MLRUN_ACTIVE_PROJECT"
33
40
 
34
41
 
35
42
  class MLRunInternalLabels:
@@ -84,6 +91,8 @@ class MLRunInternalLabels:
84
91
  original_workflow_id = "original-workflow-id"
85
92
  workflow_id = "workflow-id"
86
93
  retrying = "retrying"
94
+ rerun_counter = "rerun-counter"
95
+ rerun_index = "rerun-index"
87
96
 
88
97
  owner = "owner"
89
98
  v3io_user = "v3io_user"
@@ -41,6 +41,7 @@ class ArtifactFormat(ObjectFormat, mlrun.common.types.StrEnum):
41
41
  "spec.metrics",
42
42
  "spec.target_path",
43
43
  "spec.parent_uri",
44
+ "spec.has_children",
44
45
  ]
45
46
  ),
46
47
  }[_format]
@@ -14,6 +14,7 @@
14
14
 
15
15
  import sys
16
16
  import typing
17
+ from datetime import datetime
17
18
 
18
19
  import mlrun.common
19
20
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
@@ -24,6 +25,7 @@ BinCounts = typing.NewType("BinCounts", list[int])
24
25
  BinEdges = typing.NewType("BinEdges", list[float])
25
26
 
26
27
  _MAX_FLOAT = sys.float_info.max
28
+ logger = mlrun.utils.create_logger(level="info", name="mm_helpers")
27
29
 
28
30
 
29
31
  def parse_model_endpoint_project_prefix(path: str, project_name: str):
@@ -87,3 +89,87 @@ def pad_features_hist(feature_stats: FeatureStats) -> None:
87
89
  for feature in feature_stats.values():
88
90
  if hist_key in feature:
89
91
  pad_hist(Histogram(feature[hist_key]))
92
+
93
+
94
+ def get_model_endpoints_creation_task_status(
95
+ server,
96
+ ) -> tuple[
97
+ mlrun.common.schemas.BackgroundTaskState,
98
+ typing.Optional[datetime],
99
+ typing.Optional[set[str]],
100
+ ]:
101
+ background_task = None
102
+ background_task_state = mlrun.common.schemas.BackgroundTaskState.running
103
+ background_task_check_timestamp = None
104
+ model_endpoint_uids = None
105
+ try:
106
+ background_task = mlrun.get_run_db().get_project_background_task(
107
+ server.project, server.model_endpoint_creation_task_name
108
+ )
109
+ background_task_check_timestamp = mlrun.utils.now_date()
110
+ log_background_task_state(
111
+ server, background_task.status.state, background_task_check_timestamp
112
+ )
113
+ background_task_state = background_task.status.state
114
+ except mlrun.errors.MLRunNotFoundError:
115
+ logger.warning(
116
+ "Model endpoint creation task not found listing model endpoints",
117
+ project=server.project,
118
+ task_name=server.model_endpoint_creation_task_name,
119
+ )
120
+ if background_task is None:
121
+ model_endpoints = mlrun.get_run_db().list_model_endpoints(
122
+ project=server.project,
123
+ function_name=server.function_name,
124
+ function_tag=server.function_tag,
125
+ tsdb_metrics=False,
126
+ )
127
+ if model_endpoints:
128
+ model_endpoint_uids = {
129
+ endpoint.metadata.uid for endpoint in model_endpoints.endpoints
130
+ }
131
+ logger.info(
132
+ "Model endpoints found after background task not found, model monitoring will monitor "
133
+ "events",
134
+ project=server.project,
135
+ function_name=server.function_name,
136
+ function_tag=server.function_tag,
137
+ uids=model_endpoint_uids,
138
+ )
139
+ background_task_state = mlrun.common.schemas.BackgroundTaskState.succeeded
140
+ else:
141
+ logger.warning(
142
+ "Model endpoints not found after background task not found, model monitoring will not "
143
+ "monitor events",
144
+ project=server.project,
145
+ function_name=server.function_name,
146
+ function_tag=server.function_tag,
147
+ )
148
+ background_task_state = mlrun.common.schemas.BackgroundTaskState.failed
149
+ return background_task_state, background_task_check_timestamp, model_endpoint_uids
150
+
151
+
152
+ def log_background_task_state(
153
+ server,
154
+ background_task_state: mlrun.common.schemas.BackgroundTaskState,
155
+ background_task_check_timestamp: typing.Optional[datetime],
156
+ ):
157
+ logger.info(
158
+ "Checking model endpoint creation task status",
159
+ task_name=server.model_endpoint_creation_task_name,
160
+ )
161
+ if (
162
+ background_task_state
163
+ in mlrun.common.schemas.BackgroundTaskState.terminal_states()
164
+ ):
165
+ logger.info(
166
+ f"Model endpoint creation task completed with state {background_task_state}"
167
+ )
168
+ else: # in progress
169
+ logger.info(
170
+ f"Model endpoint creation task is still in progress with the current state: "
171
+ f"{background_task_state}. Events will not be monitored for the next "
172
+ f"{mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period} seconds",
173
+ function_name=server.function_name,
174
+ background_task_check_timestamp=background_task_check_timestamp.isoformat(),
175
+ )
@@ -133,6 +133,7 @@ from .k8s import NodeSelectorOperator, Resources, ResourceSpec
133
133
  from .memory_reports import MostCommonObjectTypesReport, ObjectTypeReport
134
134
  from .model_monitoring import (
135
135
  DriftStatus,
136
+ EndpointMode,
136
137
  EndpointType,
137
138
  EndpointUID,
138
139
  EventFieldType,
@@ -153,6 +154,7 @@ from .model_monitoring import (
153
154
  ModelEndpointSchema,
154
155
  ModelEndpointSpec,
155
156
  ModelEndpointStatus,
157
+ ModelMonitoringInfraLabel,
156
158
  ModelMonitoringMode,
157
159
  MonitoringFunctionNames,
158
160
  TSDBTarget,
@@ -55,6 +55,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
55
55
  secret = "secret"
56
56
  run = "run"
57
57
  model_endpoint = "model-endpoint"
58
+ model_monitoring = "model-monitoring"
58
59
  pipeline = "pipeline"
59
60
  hub_source = "hub-source"
60
61
  workflow = "workflow"
@@ -96,6 +97,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
96
97
  # runtime resource doesn't have an identifier, we don't need any auth granularity behind project level
97
98
  AuthorizationResourceTypes.runtime_resource: "/projects/{project_name}/runtime-resources",
98
99
  AuthorizationResourceTypes.model_endpoint: "/projects/{project_name}/model-endpoints/{resource_name}",
100
+ AuthorizationResourceTypes.model_monitoring: "/projects/{project_name}/model-monitoring/{resource_name}",
99
101
  AuthorizationResourceTypes.pipeline: "/projects/{project_name}/pipelines/{resource_name}",
100
102
  AuthorizationResourceTypes.datastore_profile: "/projects/{project_name}/datastore_profiles",
101
103
  # Hub sources are not project-scoped, and auth is globally on the sources endpoint.
@@ -114,11 +114,21 @@ class StateThresholds(pydantic.v1.BaseModel):
114
114
  default: typing.Optional[dict[str, str]]
115
115
 
116
116
 
117
+ class Backoff(pydantic.v1.BaseModel):
118
+ default_base_delay: typing.Optional[str]
119
+ min_base_delay: typing.Optional[str]
120
+
121
+
122
+ class RetrySpec(pydantic.v1.BaseModel):
123
+ backoff: Backoff
124
+
125
+
117
126
  class FunctionSpec(pydantic.v1.BaseModel):
118
127
  image_pull_secret: typing.Optional[ImagePullSecret]
119
128
  security_context: typing.Optional[SecurityContext]
120
129
  service_account: typing.Optional[ServiceAccount]
121
130
  state_thresholds: typing.Optional[StateThresholds]
131
+ retry: typing.Optional[RetrySpec]
122
132
 
123
133
  class Config:
124
134
  extra = pydantic.v1.Extra.allow
@@ -15,6 +15,7 @@
15
15
  from datetime import datetime, timezone
16
16
  from typing import Optional
17
17
 
18
+ import deepdiff
18
19
  from pydantic.v1 import BaseModel, Extra, Field
19
20
 
20
21
  import mlrun.common.types
@@ -36,9 +37,9 @@ class HubObjectMetadata(BaseModel):
36
37
  extra = Extra.allow
37
38
 
38
39
 
39
- # Currently only functions are supported. Will add more in the future.
40
40
  class HubSourceType(mlrun.common.types.StrEnum):
41
41
  functions = "functions"
42
+ modules = "modules"
42
43
 
43
44
 
44
45
  # Sources-related objects
@@ -46,7 +47,6 @@ class HubSourceSpec(ObjectSpec):
46
47
  path: str # URL to base directory, should include schema (s3://, etc...)
47
48
  channel: str
48
49
  credentials: Optional[dict] = {}
49
- object_type: HubSourceType = Field(HubSourceType.functions, const=True)
50
50
 
51
51
 
52
52
  class HubSource(BaseModel):
@@ -55,11 +55,11 @@ class HubSource(BaseModel):
55
55
  spec: HubSourceSpec
56
56
  status: Optional[ObjectStatus] = ObjectStatus(state="created")
57
57
 
58
- def get_full_uri(self, relative_path):
59
- return f"{self.spec.path}/{self.spec.object_type}/{self.spec.channel}/{relative_path}"
58
+ def get_full_uri(self, relative_path, object_type):
59
+ return f"{self.spec.path}/{object_type}/{self.spec.channel}/{relative_path}"
60
60
 
61
- def get_catalog_uri(self):
62
- return self.get_full_uri(mlrun.mlconf.hub.catalog_filename)
61
+ def get_catalog_uri(self, object_type):
62
+ return self.get_full_uri(mlrun.mlconf.hub.catalog_filename, object_type)
63
63
 
64
64
  @classmethod
65
65
  def generate_default_source(cls):
@@ -78,11 +78,23 @@ class HubSource(BaseModel):
78
78
  spec=HubSourceSpec(
79
79
  path=mlrun.mlconf.hub.default_source.url,
80
80
  channel=mlrun.mlconf.hub.default_source.channel,
81
- object_type=HubSourceType(mlrun.mlconf.hub.default_source.object_type),
82
81
  ),
83
82
  status=ObjectStatus(state="created"),
84
83
  )
85
84
 
85
+ def diff(self, another_source: "HubSource") -> dict:
86
+ """
87
+ Compare this HubSource with another one.
88
+ Returns a dict of differences (metadata, spec, status).
89
+ """
90
+ exclude_paths = [
91
+ "root['metadata']['updated']",
92
+ "root['metadata']['created']",
93
+ ]
94
+ return deepdiff.DeepDiff(
95
+ self.dict(), another_source.dict(), exclude_paths=exclude_paths
96
+ )
97
+
86
98
 
87
99
  last_source_index = -1
88
100
 
@@ -94,21 +106,16 @@ class IndexedHubSource(BaseModel):
94
106
 
95
107
  # Item-related objects
96
108
  class HubItemMetadata(HubObjectMetadata):
97
- source: HubSourceType = Field(HubSourceType.functions, const=True)
109
+ source: HubSourceType = HubSourceType.functions
98
110
  version: str
99
111
  tag: Optional[str]
100
112
 
101
113
  def get_relative_path(self) -> str:
102
- if self.source == HubSourceType.functions:
103
- # This is needed since the hub deployment script modifies the paths to use _ instead of -.
104
- modified_name = self.name.replace("-", "_")
105
- # Prefer using the tag if exists. Otherwise, use version.
106
- version = self.tag or self.version
107
- return f"{modified_name}/{version}/"
108
- else:
109
- raise mlrun.errors.MLRunInvalidArgumentError(
110
- f"Bad source for hub item - {self.source}"
111
- )
114
+ # This is needed since the hub deployment script modifies the paths to use _ instead of -.
115
+ modified_name = self.name.replace("-", "_")
116
+ # Prefer using the tag if exists. Otherwise, use version.
117
+ version = self.tag or self.version
118
+ return f"{modified_name}/{version}/"
112
119
 
113
120
 
114
121
  class HubItemSpec(ObjectSpec):
@@ -127,3 +134,8 @@ class HubCatalog(BaseModel):
127
134
  kind: ObjectKind = Field(ObjectKind.hub_catalog, const=True)
128
135
  channel: str
129
136
  catalog: list[HubItem]
137
+
138
+
139
+ class HubModuleType(mlrun.common.types.StrEnum):
140
+ generic = "generic"
141
+ monitoring_app = "monitoring_application"
@@ -16,6 +16,7 @@ from .constants import (
16
16
  INTERSECT_DICT_KEYS,
17
17
  ApplicationEvent,
18
18
  DriftStatus,
19
+ EndpointMode,
19
20
  EndpointType,
20
21
  EndpointUID,
21
22
  EventFieldType,
@@ -29,6 +30,7 @@ from .constants import (
29
30
  ModelEndpointMonitoringMetricType,
30
31
  ModelEndpointSchema,
31
32
  ModelMonitoringAppLabel,
33
+ ModelMonitoringInfraLabel,
32
34
  ModelMonitoringMode,
33
35
  MonitoringFunctionNames,
34
36
  PredictionsQueryConstants,
@@ -34,6 +34,7 @@ class ModelEndpointSchema(MonitoringStrEnum):
34
34
  UID = "uid"
35
35
  PROJECT = "project"
36
36
  ENDPOINT_TYPE = "endpoint_type"
37
+ MODE = "mode"
37
38
  NAME = "name"
38
39
  CREATED = "created"
39
40
  UPDATED = "updated"
@@ -195,6 +196,10 @@ class WriterEventKind(MonitoringStrEnum):
195
196
  RESULT = "result"
196
197
  STATS = "stats"
197
198
 
199
+ @classmethod
200
+ def user_app_outputs(cls):
201
+ return [cls.METRIC, cls.RESULT]
202
+
198
203
 
199
204
  class ControllerEvent(MonitoringStrEnum):
200
205
  KIND = "kind"
@@ -205,6 +210,11 @@ class ControllerEvent(MonitoringStrEnum):
205
210
  FIRST_REQUEST = "first_request"
206
211
  FEATURE_SET_URI = "feature_set_uri"
207
212
  ENDPOINT_TYPE = "endpoint_type"
213
+
214
+ # first_timestamp and last_timestamp are used to batch completed events
215
+ FIRST_TIMESTAMP = "first_timestamp"
216
+ LAST_TIMESTAMP = "last_timestamp"
217
+
208
218
  ENDPOINT_POLICY = "endpoint_policy"
209
219
  # Note: currently under endpoint policy we will have a dictionary including the keys: "application_names"
210
220
  # "base_period", and "updated_endpoint" stand for when the MEP was updated
@@ -219,6 +229,7 @@ class ControllerEventEndpointPolicy(MonitoringStrEnum):
219
229
  class ControllerEventKind(MonitoringStrEnum):
220
230
  NOP_EVENT = "nop_event"
221
231
  REGULAR_EVENT = "regular_event"
232
+ BATCH_COMPLETE = "batch_complete"
222
233
 
223
234
 
224
235
  class MetricData(MonitoringStrEnum):
@@ -297,6 +308,7 @@ class FileTargetKind:
297
308
  MONITORING_APPLICATION = "monitoring_application"
298
309
  ERRORS = "errors"
299
310
  STATS = "stats"
311
+ PARQUET_STATS = "parquet_stats"
300
312
  LAST_REQUEST = "last_request"
301
313
 
302
314
 
@@ -321,6 +333,12 @@ class EndpointType(IntEnum):
321
333
  return [cls.NODE_EP, cls.ROUTER, cls.BATCH_EP]
322
334
 
323
335
 
336
+ class EndpointMode(IntEnum):
337
+ REAL_TIME = 0
338
+ BATCH = 1
339
+ BATCH_LEGACY = 2 # legacy batch mode, used for endpoints created through the batch inference job
340
+
341
+
324
342
  class MonitoringFunctionNames(MonitoringStrEnum):
325
343
  STREAM = "model-monitoring-stream"
326
344
  APPLICATION_CONTROLLER = "model-monitoring-controller"
@@ -474,19 +492,25 @@ class ModelEndpointMonitoringMetricType(StrEnum):
474
492
  METRIC = "metric"
475
493
 
476
494
 
495
+ # refer to `mlrun.utils.regex.project_name`
496
+ _INNER_PROJECT_PATTERN = r"[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?"
497
+ PROJECT_PATTERN = rf"^{_INNER_PROJECT_PATTERN}$"
498
+
499
+ MODEL_ENDPOINT_ID_PATTERN = r"^[a-zA-Z0-9_-]+$"
500
+
477
501
  _FQN_PART_PATTERN = r"[a-zA-Z0-9_-]+"
502
+ _RESULT_NAME_PATTERN = r"[a-zA-Z_][a-zA-Z0-9_]*"
503
+
478
504
  FQN_PATTERN = (
479
- rf"^(?P<project>{_FQN_PART_PATTERN})\."
505
+ rf"^(?P<project>{_INNER_PROJECT_PATTERN})\."
480
506
  rf"(?P<app>{_FQN_PART_PATTERN})\."
481
507
  rf"(?P<type>{ModelEndpointMonitoringMetricType.RESULT}|{ModelEndpointMonitoringMetricType.METRIC})\."
482
- rf"(?P<name>{_FQN_PART_PATTERN})$"
508
+ rf"(?P<name>{_RESULT_NAME_PATTERN})$"
483
509
  )
484
510
  FQN_REGEX = re.compile(FQN_PATTERN)
511
+ APP_NAME_REGEX = re.compile(_FQN_PART_PATTERN)
512
+ RESULT_NAME_REGEX = re.compile(_RESULT_NAME_PATTERN)
485
513
 
486
- # refer to `mlrun.utils.regex.project_name`
487
- PROJECT_PATTERN = r"^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$"
488
- MODEL_ENDPOINT_ID_PATTERN = r"^[a-zA-Z0-9_-]+$"
489
- RESULT_NAME_PATTERN = r"[a-zA-Z_][a-zA-Z0-9_]*"
490
514
 
491
515
  INTERSECT_DICT_KEYS = {
492
516
  ModelEndpointMonitoringMetricType.METRIC: "intersect_metrics",
@@ -54,12 +54,21 @@ class FunctionSummary(BaseModel):
54
54
 
55
55
  return cls(
56
56
  type=func_type,
57
- name=func_dict["metadata"]["name"],
57
+ name=func_dict["metadata"]["name"]
58
+ if func_type != FunctionsType.APPLICATION
59
+ else func_dict["spec"]
60
+ .get("graph", {})
61
+ .get("steps", {})
62
+ .get("PrepareMonitoringEvent", {})
63
+ .get("class_args", {})
64
+ .get("application_name"),
58
65
  application_class=""
59
66
  if func_type != FunctionsType.APPLICATION
60
- else func_dict["spec"]["graph"]["steps"]["PushToMonitoringWriter"]["after"][
61
- 0
62
- ],
67
+ else func_dict["spec"]
68
+ .get("graph", {})
69
+ .get("steps", {})
70
+ .get("PushToMonitoringWriter", {})
71
+ .get("after", [None])[0],
63
72
  project_name=func_dict["metadata"]["project"],
64
73
  updated_time=func_dict["metadata"].get("updated"),
65
74
  status=func_dict["status"].get("state"),
@@ -28,6 +28,7 @@ from .constants import (
28
28
  FQN_REGEX,
29
29
  MODEL_ENDPOINT_ID_PATTERN,
30
30
  PROJECT_PATTERN,
31
+ EndpointMode,
31
32
  EndpointType,
32
33
  ModelEndpointMonitoringMetricType,
33
34
  ModelMonitoringMode,
@@ -118,6 +119,7 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
118
119
  project: constr(regex=PROJECT_PATTERN)
119
120
  endpoint_type: EndpointType = EndpointType.NODE_EP
120
121
  uid: Optional[constr(regex=MODEL_ENDPOINT_ID_PATTERN)]
122
+ mode: Optional[EndpointMode] = None
121
123
 
122
124
  @classmethod
123
125
  def mutable_fields(cls):
@@ -129,6 +131,15 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
129
131
  return str(v)
130
132
  return v
131
133
 
134
+ @validator("mode", pre=True, always=True)
135
+ def _set_mode_based_on_endpoint_type(cls, v, values): # noqa: N805
136
+ if v is None:
137
+ if values.get("endpoint_type") == EndpointType.BATCH_EP:
138
+ return EndpointMode.BATCH_LEGACY
139
+ else:
140
+ return EndpointMode.REAL_TIME
141
+ return v
142
+
132
143
 
133
144
  class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
134
145
  model_class: Optional[str] = ""
@@ -18,7 +18,7 @@ import pydantic.v1
18
18
 
19
19
 
20
20
  class PipelinesPagination(str):
21
- default_page_size = 20
21
+ default_page_size = 200
22
22
  # https://github.com/kubeflow/pipelines/blob/master/backend/src/apiserver/list/list.go#L363
23
23
  max_page_size = 200
24
24
 
@@ -47,3 +47,6 @@ class MonitoringData(StrEnum):
47
47
  class ModelsData(enum.Enum):
48
48
  MODEL_CLASS = 0
49
49
  MODEL_PARAMETERS = 1
50
+
51
+
52
+ MAX_BATCH_JOB_DURATION = "1w"
@@ -53,6 +53,7 @@ class RerunWorkflowRequest(pydantic.v1.BaseModel):
53
53
  workflow_runner_node_selector: typing.Optional[dict[str, str]] = None
54
54
  original_workflow_runner_uid: typing.Optional[str] = None
55
55
  original_workflow_name: typing.Optional[str] = None
56
+ rerun_index: typing.Optional[int] = None
56
57
 
57
58
 
58
59
  class WorkflowResponse(pydantic.v1.BaseModel):