mlrun 1.8.0rc9__py3-none-any.whl → 1.8.0rc12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (37) hide show
  1. mlrun/artifacts/__init__.py +1 -1
  2. mlrun/artifacts/document.py +53 -11
  3. mlrun/common/constants.py +1 -0
  4. mlrun/common/schemas/__init__.py +2 -0
  5. mlrun/common/schemas/model_monitoring/__init__.py +1 -0
  6. mlrun/common/schemas/model_monitoring/constants.py +7 -0
  7. mlrun/common/schemas/model_monitoring/model_endpoints.py +36 -0
  8. mlrun/config.py +1 -0
  9. mlrun/data_types/data_types.py +1 -0
  10. mlrun/data_types/spark.py +3 -2
  11. mlrun/data_types/to_pandas.py +11 -2
  12. mlrun/datastore/__init__.py +2 -0
  13. mlrun/datastore/targets.py +2 -1
  14. mlrun/datastore/vectorstore.py +21 -15
  15. mlrun/db/base.py +2 -0
  16. mlrun/db/httpdb.py +12 -0
  17. mlrun/db/nopdb.py +2 -0
  18. mlrun/feature_store/steps.py +1 -1
  19. mlrun/model_monitoring/api.py +30 -21
  20. mlrun/model_monitoring/applications/base.py +42 -4
  21. mlrun/projects/project.py +18 -16
  22. mlrun/runtimes/nuclio/serving.py +28 -5
  23. mlrun/serving/__init__.py +8 -0
  24. mlrun/serving/merger.py +1 -1
  25. mlrun/serving/remote.py +17 -5
  26. mlrun/serving/routers.py +27 -87
  27. mlrun/serving/server.py +6 -2
  28. mlrun/serving/states.py +154 -13
  29. mlrun/serving/v2_serving.py +38 -79
  30. mlrun/utils/helpers.py +6 -0
  31. mlrun/utils/version/version.json +2 -2
  32. {mlrun-1.8.0rc9.dist-info → mlrun-1.8.0rc12.dist-info}/METADATA +10 -10
  33. {mlrun-1.8.0rc9.dist-info → mlrun-1.8.0rc12.dist-info}/RECORD +37 -37
  34. {mlrun-1.8.0rc9.dist-info → mlrun-1.8.0rc12.dist-info}/LICENSE +0 -0
  35. {mlrun-1.8.0rc9.dist-info → mlrun-1.8.0rc12.dist-info}/WHEEL +0 -0
  36. {mlrun-1.8.0rc9.dist-info → mlrun-1.8.0rc12.dist-info}/entry_points.txt +0 -0
  37. {mlrun-1.8.0rc9.dist-info → mlrun-1.8.0rc12.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  # Don't remove this, used by sphinx documentation
16
- __all__ = ["get_model", "update_model"]
16
+ __all__ = ["get_model", "update_model", "DocumentLoaderSpec", "MLRunLoader"]
17
17
 
18
18
  from .base import (
19
19
  Artifact,
@@ -39,8 +39,6 @@ class DocumentLoaderSpec(ModelObj):
39
39
  src_name (str): The name of the source attribute to pass to the loader class.
40
40
  kwargs (Optional[dict]): Additional keyword arguments to pass to the loader class.
41
41
 
42
- Methods:
43
- make_loader(src_path): Creates an instance of the loader class with the specified source path.
44
42
  """
45
43
 
46
44
  _dict_fields = ["loader_class_name", "src_name", "kwargs"]
@@ -58,6 +56,19 @@ class DocumentLoaderSpec(ModelObj):
58
56
  loader_class_name (str): The name of the loader class to use.
59
57
  src_name (str): The source name for the document.
60
58
  kwargs (Optional[dict]): Additional keyword arguments to pass to the loader class.
59
+
60
+ Example:
61
+ >>> # Create a loader specification for PDF documents
62
+ >>> loader_spec = DocumentLoaderSpec(
63
+ ... loader_class_name="langchain_community.document_loaders.PDFLoader",
64
+ ... src_name="file_path",
65
+ ... kwargs={"extract_images": True},
66
+ ... )
67
+ >>> # Create a loader instance for a specific PDF file
68
+ >>> pdf_loader = loader_spec.make_loader("/path/to/document.pdf")
69
+ >>> # Load the documents
70
+ >>> documents = pdf_loader.load()
71
+
61
72
  """
62
73
  self.loader_class_name = loader_class_name
63
74
  self.src_name = src_name
@@ -87,6 +98,45 @@ class MLRunLoader:
87
98
 
88
99
  Returns:
89
100
  DynamicDocumentLoader: An instance of a dynamically defined subclass of BaseLoader.
101
+
102
+ Example:
103
+ >>> # Create a document loader specification
104
+ >>> loader_spec = DocumentLoaderSpec(
105
+ ... loader_class_name="langchain_community.document_loaders.TextLoader",
106
+ ... src_name="file_path",
107
+ ... )
108
+ >>> # Create a basic loader for a single file
109
+ >>> loader = MLRunLoader(
110
+ ... source_path="/path/to/document.txt",
111
+ ... loader_spec=loader_spec,
112
+ ... artifact_key="my_doc",
113
+ ... producer=project,
114
+ ... upload=True,
115
+ ... )
116
+ >>> documents = loader.load()
117
+ >>> # Create a loader with auto-generated keys
118
+ >>> loader = MLRunLoader(
119
+ ... source_path="/path/to/document.txt",
120
+ ... loader_spec=loader_spec,
121
+ ... artifact_key="doc%%", # %% will be replaced with encoded path
122
+ ... producer=project,
123
+ ... )
124
+ >>> documents = loader.load()
125
+ >>> # Use with DirectoryLoader
126
+ >>> from langchain_community.document_loaders import DirectoryLoader
127
+ >>> dir_loader = DirectoryLoader(
128
+ ... "/path/to/directory",
129
+ ... glob="**/*.txt",
130
+ ... loader_cls=MLRunLoader,
131
+ ... loader_kwargs={
132
+ ... "loader_spec": loader_spec,
133
+ ... "artifact_key": "doc%%",
134
+ ... "producer": project,
135
+ ... "upload": True,
136
+ ... },
137
+ ... )
138
+ >>> documents = dir_loader.load()
139
+
90
140
  """
91
141
 
92
142
  def __new__(
@@ -178,11 +228,6 @@ class MLRunLoader:
178
228
  class DocumentArtifact(Artifact):
179
229
  """
180
230
  A specific artifact class inheriting from generic artifact, used to maintain Document meta-data.
181
-
182
- Methods:
183
- to_langchain_documents(splitter): Create LC documents from the artifact.
184
- collection_add(collection_id): Add a collection ID to the artifact.
185
- collection_remove(collection_id): Remove a collection ID from the artifact.
186
231
  """
187
232
 
188
233
  class DocumentArtifactSpec(ArtifactSpec):
@@ -205,10 +250,6 @@ class DocumentArtifact(Artifact):
205
250
  self.collections = collections if collections is not None else {}
206
251
  self.original_source = original_source
207
252
 
208
- """
209
- A specific artifact class inheriting from generic artifact, used to maintain Document meta-data.
210
- """
211
-
212
253
  kind = "document"
213
254
 
214
255
  METADATA_SOURCE_KEY = "source"
@@ -242,6 +283,7 @@ class DocumentArtifact(Artifact):
242
283
  )
243
284
 
244
285
  def get_source(self):
286
+ """Get the source URI for this artifact."""
245
287
  return generate_artifact_uri(self.metadata.project, self.spec.db_key)
246
288
 
247
289
  def to_langchain_documents(
mlrun/common/constants.py CHANGED
@@ -25,6 +25,7 @@ MYSQL_MEDIUMBLOB_SIZE_BYTES = 16 * 1024 * 1024
25
25
  MLRUN_LABEL_PREFIX = "mlrun/"
26
26
  DASK_LABEL_PREFIX = "dask.org/"
27
27
  NUCLIO_LABEL_PREFIX = "nuclio.io/"
28
+ RESERVED_TAG_NAME_LATEST = "latest"
28
29
 
29
30
 
30
31
  class MLRunInternalLabels:
@@ -146,8 +146,10 @@ from .model_monitoring import (
146
146
  GrafanaTable,
147
147
  GrafanaTimeSeriesTarget,
148
148
  ModelEndpoint,
149
+ ModelEndpointCreationStrategy,
149
150
  ModelEndpointList,
150
151
  ModelEndpointMetadata,
152
+ ModelEndpointSchema,
151
153
  ModelEndpointSpec,
152
154
  ModelEndpointStatus,
153
155
  ModelMonitoringMode,
@@ -26,6 +26,7 @@ from .constants import (
26
26
  FileTargetKind,
27
27
  FunctionURI,
28
28
  MetricData,
29
+ ModelEndpointCreationStrategy,
29
30
  ModelEndpointMonitoringMetricType,
30
31
  ModelEndpointSchema,
31
32
  ModelEndpointTarget,
@@ -44,6 +44,7 @@ class ModelEndpointSchema(MonitoringStrEnum):
44
44
  FUNCTION_TAG = "function_tag"
45
45
  FUNCTION_UID = "function_uid"
46
46
  MODEL_NAME = "model_name"
47
+ MODEL_DB_KEY = "model_db_key"
47
48
  MODEL_TAG = "model_tag"
48
49
  MODEL_CLASS = "model_class"
49
50
  MODEL_UID = "model_uid"
@@ -70,6 +71,12 @@ class ModelEndpointSchema(MonitoringStrEnum):
70
71
  DRIFT_MEASURES = "drift_measures"
71
72
 
72
73
 
74
+ class ModelEndpointCreationStrategy(MonitoringStrEnum):
75
+ INPLACE = "inplace"
76
+ ARCHIVE = "archive"
77
+ OVERWRITE = "overwrite"
78
+
79
+
73
80
  class EventFieldType:
74
81
  FUNCTION_URI = "function_uri"
75
82
  FUNCTION = "function"
@@ -117,10 +117,15 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
117
117
  endpoint_type: EndpointType = EndpointType.NODE_EP
118
118
  uid: Optional[constr(regex=MODEL_ENDPOINT_ID_PATTERN)]
119
119
 
120
+ @classmethod
121
+ def mutable_fields(cls):
122
+ return ["labels"]
123
+
120
124
 
121
125
  class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
122
126
  model_uid: Optional[str] = ""
123
127
  model_name: Optional[str] = ""
128
+ model_db_key: Optional[str] = ""
124
129
  model_tag: Optional[str] = ""
125
130
  model_class: Optional[str] = ""
126
131
  function_name: Optional[str] = ""
@@ -135,6 +140,21 @@ class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
135
140
  children_uids: Optional[list[str]] = []
136
141
  monitoring_feature_set_uri: Optional[str] = ""
137
142
 
143
+ @classmethod
144
+ def mutable_fields(cls):
145
+ return [
146
+ "model_uid",
147
+ "model_name",
148
+ "model_db_key",
149
+ "model_tag",
150
+ "model_class",
151
+ "function_uid",
152
+ "feature_names",
153
+ "label_names",
154
+ "children",
155
+ "children_uids",
156
+ ]
157
+
138
158
 
139
159
  class ModelEndpointStatus(ObjectStatus, ModelEndpointParser):
140
160
  state: Optional[str] = "unknown" # will be updated according to the function state
@@ -151,6 +171,14 @@ class ModelEndpointStatus(ObjectStatus, ModelEndpointParser):
151
171
  drift_measures: Optional[dict] = {}
152
172
  drift_measures_timestamp: Optional[datetime] = None
153
173
 
174
+ @classmethod
175
+ def mutable_fields(cls):
176
+ return [
177
+ "monitoring_mode",
178
+ "first_request",
179
+ "last_request",
180
+ ]
181
+
154
182
 
155
183
  class ModelEndpoint(BaseModel):
156
184
  kind: ObjectKind = Field(ObjectKind.model_endpoint, const=True)
@@ -158,6 +186,14 @@ class ModelEndpoint(BaseModel):
158
186
  spec: ModelEndpointSpec
159
187
  status: ModelEndpointStatus
160
188
 
189
+ @classmethod
190
+ def mutable_fields(cls):
191
+ return (
192
+ ModelEndpointMetadata.mutable_fields()
193
+ + ModelEndpointSpec.mutable_fields()
194
+ + ModelEndpointStatus.mutable_fields()
195
+ )
196
+
161
197
  def flat_dict(self) -> dict[str, Any]:
162
198
  """Generate a flattened `ModelEndpoint` dictionary. The flattened dictionary result is important for storing
163
199
  the model endpoint object in the database.
mlrun/config.py CHANGED
@@ -160,6 +160,7 @@ default_config = {
160
160
  # migration from artifacts to artifacts_v2 is done in batches, and requires a state file to keep track of the
161
161
  # migration progress.
162
162
  "artifact_migration_batch_size": 200,
163
+ "artifact_migration_v9_batch_size": 30000,
163
164
  "artifact_migration_state_file_path": "./db/_artifact_migration_state.json",
164
165
  "datasets": {
165
166
  "max_preview_columns": 100,
@@ -124,6 +124,7 @@ def spark_to_value_type(data_type):
124
124
  "double": ValueType.DOUBLE,
125
125
  "boolean": ValueType.BOOL,
126
126
  "timestamp": ValueType.DATETIME,
127
+ "timestamp_ntz": ValueType.DATETIME,
127
128
  "string": ValueType.STRING,
128
129
  "array": "list",
129
130
  "map": "dict",
mlrun/data_types/spark.py CHANGED
@@ -19,7 +19,7 @@ from typing import Optional
19
19
  import numpy as np
20
20
  import pytz
21
21
  from pyspark.sql.functions import to_utc_timestamp
22
- from pyspark.sql.types import BooleanType, DoubleType, TimestampType
22
+ from pyspark.sql.types import BooleanType, DoubleType
23
23
 
24
24
  from mlrun.feature_store.retrieval.spark_merger import spark_df_to_pandas
25
25
  from mlrun.utils import logger
@@ -144,7 +144,8 @@ def get_df_stats_spark(df, options, num_bins=20, sample_size=None):
144
144
  timestamp_columns = set()
145
145
  boolean_columns = set()
146
146
  for field in df_after_type_casts.schema.fields:
147
- is_timestamp = isinstance(field.dataType, TimestampType)
147
+ # covers TimestampType and TimestampNTZType, which was added in PySpark 3.4.0
148
+ is_timestamp = field.dataType.typeName().startswith("timestamp")
148
149
  is_boolean = isinstance(field.dataType, BooleanType)
149
150
  if is_timestamp:
150
151
  df_after_type_casts = df_after_type_casts.withColumn(
@@ -244,6 +244,15 @@ def _to_corrected_pandas_type(dt):
244
244
 
245
245
 
246
246
  def spark_df_to_pandas(spark_df):
247
+ import pyspark
248
+
249
+ if semver.parse(pyspark.__version__) >= semver.Version(3, 5, 0):
250
+
251
+ def to_pandas(spark_df_inner):
252
+ return spark_df_inner.toPandas()
253
+ else:
254
+ to_pandas = _to_pandas
255
+
247
256
  # as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
248
257
  # when we upgrade pyspark, we should check whether this workaround is still necessary
249
258
  # see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
@@ -262,9 +271,9 @@ def spark_df_to_pandas(spark_df):
262
271
  )
263
272
  type_conversion_dict[field.name] = "datetime64[ns]"
264
273
 
265
- df = _to_pandas(spark_df)
274
+ df = to_pandas(spark_df)
266
275
  if type_conversion_dict:
267
276
  df = df.astype(type_conversion_dict)
268
277
  return df
269
278
  else:
270
- return _to_pandas(spark_df)
279
+ return to_pandas(spark_df)
@@ -30,6 +30,8 @@ __all__ = [
30
30
  "DatabricksFileSystemDisableCache",
31
31
  "DatabricksFileBugFixed",
32
32
  "get_stream_pusher",
33
+ "ConfigProfile",
34
+ "VectorStoreCollection",
33
35
  ]
34
36
 
35
37
  import fsspec
@@ -1136,7 +1136,8 @@ class CSVTarget(BaseStoreTarget):
1136
1136
  import pyspark.sql.functions as funcs
1137
1137
 
1138
1138
  for col_name, col_type in df.dtypes:
1139
- if col_type == "timestamp":
1139
+ # covers TimestampType and TimestampNTZType, which was added in PySpark 3.4.0
1140
+ if col_type.startswith("timestamp"):
1140
1141
  # df.write.csv saves timestamps with millisecond precision, but we want microsecond precision
1141
1142
  # for compatibility with storey.
1142
1143
  df = df.withColumn(
@@ -123,23 +123,29 @@ class VectorStoreCollection:
123
123
  Converts artifacts to LangChain documents, adds them to the vector store, and
124
124
  updates the MLRun context. If documents are split, the IDs are handled appropriately.
125
125
 
126
- Args:
127
- artifacts (list[DocumentArtifact]): List of DocumentArtifact objects to add
128
- splitter (optional): Document splitter to break artifacts into smaller chunks.
129
- If None, each artifact becomes a single document.
130
- **kwargs: Additional arguments passed to the underlying add_documents method.
131
- Special handling for 'ids' kwarg:
132
- - If provided and document is split, IDs are generated as "{original_id}_{i}"
133
- where i starts from 1 (e.g., "doc1_1", "doc1_2", etc.)
134
- - If provided and document isn't split, original IDs are used as-is
126
+ :param artifacts: List of DocumentArtifact objects to add
127
+ :type artifacts: list[DocumentArtifact]
128
+ :param splitter: Document splitter to break artifacts into smaller chunks.
129
+ If None, each artifact becomes a single document.
130
+ :type splitter: TextSplitter, optional
131
+ :param kwargs: Additional arguments passed to the underlying add_documents method.
132
+ Special handling for 'ids' kwarg:
133
+
134
+ * If provided and document is split, IDs are generated as "{original_id}_{i}"
135
+ where i starts from 1 (e.g., "doc1_1", "doc1_2", etc.)
136
+ * If provided and document isn't split, original IDs are used as-is
137
+
138
+ :return: List of IDs for all added documents. When no custom IDs are provided:
139
+
140
+ * Without splitting: Vector store generates IDs automatically
141
+ * With splitting: Vector store generates separate IDs for each chunk
135
142
 
136
- Returns:
137
- list: List of IDs for all added documents. When no custom IDs are provided:
138
- - Without splitting: Vector store generates IDs automatically
139
- - With splitting: Vector store generates separate IDs for each chunk
140
143
  When custom IDs are provided:
141
- - Without splitting: Uses provided IDs directly
142
- - With splitting: Generates sequential IDs as "{original_id}_{i}" for each chunk
144
+
145
+ * Without splitting: Uses provided IDs directly
146
+ * With splitting: Generates sequential IDs as "{original_id}_{i}" for each chunk
147
+ :rtype: list
148
+
143
149
  """
144
150
  all_ids = []
145
151
  user_ids = kwargs.pop("ids", None)
mlrun/db/base.py CHANGED
@@ -666,6 +666,7 @@ class RunDBInterface(ABC):
666
666
  def create_model_endpoint(
667
667
  self,
668
668
  model_endpoint: mlrun.common.schemas.ModelEndpoint,
669
+ creation_strategy: mlrun.common.schemas.ModelEndpointCreationStrategy = "inplace",
669
670
  ) -> mlrun.common.schemas.ModelEndpoint:
670
671
  pass
671
672
 
@@ -688,6 +689,7 @@ class RunDBInterface(ABC):
688
689
  function_name: Optional[str] = None,
689
690
  function_tag: Optional[str] = None,
690
691
  model_name: Optional[str] = None,
692
+ model_tag: Optional[str] = None,
691
693
  labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
692
694
  start: Optional[datetime.datetime] = None,
693
695
  end: Optional[datetime.datetime] = None,
mlrun/db/httpdb.py CHANGED
@@ -3582,11 +3582,17 @@ class HTTPRunDB(RunDBInterface):
3582
3582
  def create_model_endpoint(
3583
3583
  self,
3584
3584
  model_endpoint: mlrun.common.schemas.ModelEndpoint,
3585
+ creation_strategy: mlrun.common.schemas.ModelEndpointCreationStrategy = "inplace",
3585
3586
  ) -> mlrun.common.schemas.ModelEndpoint:
3586
3587
  """
3587
3588
  Creates a DB record with the given model_endpoint record.
3588
3589
 
3589
3590
  :param model_endpoint: An object representing the model endpoint.
3591
+ :param creation_strategy: model endpoint creation strategy :
3592
+ * overwrite - Create a new model endpoint and delete the last old one if it exists.
3593
+ * inplace - Use the existing model endpoint if it already exists (default).
3594
+ * archive - Preserve the old model endpoint and create a new one,
3595
+ tagging it as the latest.
3590
3596
 
3591
3597
  :return: The created model endpoint object.
3592
3598
  """
@@ -3596,6 +3602,9 @@ class HTTPRunDB(RunDBInterface):
3596
3602
  method=mlrun.common.types.HTTPMethod.POST,
3597
3603
  path=path,
3598
3604
  body=model_endpoint.json(),
3605
+ params={
3606
+ "creation_strategy": creation_strategy,
3607
+ },
3599
3608
  )
3600
3609
  return mlrun.common.schemas.ModelEndpoint(**response.json())
3601
3610
 
@@ -3637,6 +3646,7 @@ class HTTPRunDB(RunDBInterface):
3637
3646
  function_name: Optional[str] = None,
3638
3647
  function_tag: Optional[str] = None,
3639
3648
  model_name: Optional[str] = None,
3649
+ model_tag: Optional[str] = None,
3640
3650
  labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
3641
3651
  start: Optional[datetime] = None,
3642
3652
  end: Optional[datetime] = None,
@@ -3653,6 +3663,7 @@ class HTTPRunDB(RunDBInterface):
3653
3663
  :param function_name: The name of the function
3654
3664
  :param function_tag: The tag of the function
3655
3665
  :param model_name: The name of the model
3666
+ :param model_tag: The tag of the model
3656
3667
  :param labels: A list of labels to filter by. (see mlrun.common.schemas.LabelsModel)
3657
3668
  :param start: The start time to filter by.Corresponding to the `created` field.
3658
3669
  :param end: The end time to filter by. Corresponding to the `created` field.
@@ -3671,6 +3682,7 @@ class HTTPRunDB(RunDBInterface):
3671
3682
  params={
3672
3683
  "name": name,
3673
3684
  "model_name": model_name,
3685
+ "model_tag": model_tag,
3674
3686
  "function_name": function_name,
3675
3687
  "function_tag": function_tag,
3676
3688
  "label": labels,
mlrun/db/nopdb.py CHANGED
@@ -575,6 +575,7 @@ class NopDB(RunDBInterface):
575
575
  def create_model_endpoint(
576
576
  self,
577
577
  model_endpoint: mlrun.common.schemas.ModelEndpoint,
578
+ creation_strategy: mlrun.common.schemas.ModelEndpointCreationStrategy = "inplace",
578
579
  ) -> mlrun.common.schemas.ModelEndpoint:
579
580
  pass
580
581
 
@@ -595,6 +596,7 @@ class NopDB(RunDBInterface):
595
596
  function_name: Optional[str] = None,
596
597
  function_tag: Optional[str] = None,
597
598
  model_name: Optional[str] = None,
599
+ model_tag: Optional[str] = None,
598
600
  labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
599
601
  start: Optional[datetime.datetime] = None,
600
602
  end: Optional[datetime.datetime] = None,
@@ -671,7 +671,7 @@ class SetEventMetadata(MapClass):
671
671
 
672
672
  self._tagging_funcs = []
673
673
 
674
- def post_init(self, mode="sync"):
674
+ def post_init(self, mode="sync", **kwargs):
675
675
  def add_metadata(name, path, operator=str):
676
676
  def _add_meta(event):
677
677
  value = get_in(event.body, path)
@@ -54,9 +54,10 @@ def get_or_create_model_endpoint(
54
54
  model_endpoint_name: str = "",
55
55
  endpoint_id: str = "",
56
56
  function_name: str = "",
57
+ function_tag: str = "latest",
57
58
  context: typing.Optional["mlrun.MLClientCtx"] = None,
58
59
  sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
59
- monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
60
+ monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.enabled,
60
61
  db_session=None,
61
62
  ) -> ModelEndpoint:
62
63
  """
@@ -70,8 +71,8 @@ def get_or_create_model_endpoint(
70
71
  under this endpoint (applicable only to new endpoint_id).
71
72
  :param endpoint_id: Model endpoint unique ID. If not exist in DB, will generate a new record based
72
73
  on the provided `endpoint_id`.
73
- :param function_name: If a new model endpoint is created, use this function name for generating the
74
- function URI (applicable only to new endpoint_id).
74
+ :param function_name: If a new model endpoint is created, use this function name.
75
+ :param function_tag: If a new model endpoint is created, use this function tag.
75
76
  :param context: MLRun context. If `function_name` not provided, use the context to generate the
76
77
  full function hash.
77
78
  :param sample_set_statistics: Dictionary of sample set statistics that will be used as a reference data for
@@ -86,28 +87,32 @@ def get_or_create_model_endpoint(
86
87
  if not db_session:
87
88
  # Generate a runtime database
88
89
  db_session = mlrun.get_run_db()
90
+ model_endpoint = None
89
91
  try:
90
- model_endpoint = db_session.get_model_endpoint(
91
- project=project,
92
- name=model_endpoint_name,
93
- endpoint_id=endpoint_id,
94
- function_name=function_name,
95
- )
96
- # If other fields provided, validate that they are correspond to the existing model endpoint data
97
- _model_endpoint_validations(
98
- model_endpoint=model_endpoint,
99
- model_path=model_path,
100
- sample_set_statistics=sample_set_statistics,
101
- )
92
+ if endpoint_id:
93
+ model_endpoint = db_session.get_model_endpoint(
94
+ project=project,
95
+ name=model_endpoint_name,
96
+ endpoint_id=endpoint_id,
97
+ )
98
+ # If other fields provided, validate that they are correspond to the existing model endpoint data
99
+ _model_endpoint_validations(
100
+ model_endpoint=model_endpoint,
101
+ model_path=model_path,
102
+ sample_set_statistics=sample_set_statistics,
103
+ )
102
104
 
103
105
  except mlrun.errors.MLRunNotFoundError:
104
106
  # Create a new model endpoint with the provided details
107
+ pass
108
+ if not model_endpoint:
105
109
  model_endpoint = _generate_model_endpoint(
106
110
  project=project,
107
111
  db_session=db_session,
108
112
  model_path=model_path,
109
113
  model_endpoint_name=model_endpoint_name,
110
114
  function_name=function_name,
115
+ function_tag=function_tag,
111
116
  context=context,
112
117
  sample_set_statistics=sample_set_statistics,
113
118
  monitoring_mode=monitoring_mode,
@@ -333,9 +338,10 @@ def _generate_model_endpoint(
333
338
  model_path: str,
334
339
  model_endpoint_name: str,
335
340
  function_name: str,
341
+ function_tag: str,
336
342
  context: "mlrun.MLClientCtx",
337
343
  sample_set_statistics: dict[str, typing.Any],
338
- monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
344
+ monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.enabled,
339
345
  ) -> ModelEndpoint:
340
346
  """
341
347
  Write a new model endpoint record.
@@ -345,8 +351,8 @@ def _generate_model_endpoint(
345
351
  :param db_session: A session that manages the current dialog with the database.
346
352
  :param model_path: The model Store path.
347
353
  :param model_endpoint_name: Model endpoint name will be presented under the new model endpoint.
348
- :param function_name: If a new model endpoint is created, use this function name for generating the
349
- function URI.
354
+ :param function_name: If a new model endpoint is created, use this function name.
355
+ :param function_tag: If a new model endpoint is created, use this function tag.
350
356
  :param context: MLRun context. If function_name not provided, use the context to generate the
351
357
  full function hash.
352
358
  :param sample_set_statistics: Dictionary of sample set statistics that will be used as a reference data for
@@ -374,9 +380,12 @@ def _generate_model_endpoint(
374
380
  endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.BATCH_EP,
375
381
  ),
376
382
  spec=mlrun.common.schemas.ModelEndpointSpec(
377
- function_name=function_name,
378
- model_name=model_obj.metadata.key if model_path else None,
379
- model_uid=model_obj.metadata.uid if model_path else None,
383
+ function_name=function_name or "function",
384
+ function_tag=function_tag or "latest",
385
+ model_name=model_obj.metadata.key if model_obj else None,
386
+ model_uid=model_obj.metadata.uid if model_obj else None,
387
+ model_tag=model_obj.metadata.tag if model_obj else None,
388
+ model_db_key=model_obj.spec.db_key if model_obj else None,
380
389
  model_class="drift-analysis",
381
390
  ),
382
391
  status=mlrun.common.schemas.ModelEndpointStatus(
@@ -148,6 +148,44 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
148
148
  )
149
149
  return start, end
150
150
 
151
+ @classmethod
152
+ def deploy(
153
+ cls,
154
+ func_name: str,
155
+ func_path: Optional[str] = None,
156
+ image: Optional[str] = None,
157
+ handler: Optional[str] = None,
158
+ with_repo: Optional[bool] = False,
159
+ tag: Optional[str] = None,
160
+ requirements: Optional[Union[str, list[str]]] = None,
161
+ requirements_file: str = "",
162
+ **application_kwargs,
163
+ ) -> None:
164
+ """
165
+ Set the application to the current project and deploy it as a Nuclio serving function.
166
+ Required for your model monitoring application to work as a part of the model monitoring framework.
167
+
168
+ :param func_name: The name of the function.
169
+ :param func_path: The path of the function, :code:`None` refers to the current Jupyter notebook.
170
+
171
+ For the other arguments, refer to
172
+ :py:meth:`~mlrun.projects.MlrunProject.set_model_monitoring_function`.
173
+ """
174
+ project = cast("mlrun.MlrunProject", mlrun.get_current_project())
175
+ function = project.set_model_monitoring_function(
176
+ name=func_name,
177
+ func=func_path,
178
+ application_class=cls.__name__,
179
+ handler=handler,
180
+ image=image,
181
+ with_repo=with_repo,
182
+ requirements=requirements,
183
+ requirements_file=requirements_file,
184
+ tag=tag,
185
+ **application_kwargs,
186
+ )
187
+ function.deploy()
188
+
151
189
  @classmethod
152
190
  def evaluate(
153
191
  cls,
@@ -175,10 +213,10 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
175
213
  :param func_name: The name of the function. If not passed, the class name is used.
176
214
  :param tag: An optional tag for the function.
177
215
  :param run_local: Whether to run the function locally or remotely.
178
- :param sample_df: Optional - pandas data-frame as the current dataset.
179
- When set, it replaces the data read from the model endpoint's offline source.
180
- :param feature_stats: Optional - statistics dictionary of the reference data.
181
- When set, it overrides the model endpoint's feature stats.
216
+ :param sample_data: Optional - pandas data-frame as the current dataset.
217
+ When set, it replaces the data read from the model endpoint's offline source.
218
+ :param reference_data: Optional - pandas data-frame of the reference dataset.
219
+ When set, its statistics override the model endpoint's feature statistics.
182
220
  :param image: Docker image to run the job on.
183
221
  :param with_repo: Whether to clone the current repo to the build source.
184
222
  :param requirements: List of Python requirements to be installed in the image.