mlrun 1.8.0rc9__py3-none-any.whl → 1.8.0rc12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/__init__.py +1 -1
- mlrun/artifacts/document.py +53 -11
- mlrun/common/constants.py +1 -0
- mlrun/common/schemas/__init__.py +2 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +7 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +36 -0
- mlrun/config.py +1 -0
- mlrun/data_types/data_types.py +1 -0
- mlrun/data_types/spark.py +3 -2
- mlrun/data_types/to_pandas.py +11 -2
- mlrun/datastore/__init__.py +2 -0
- mlrun/datastore/targets.py +2 -1
- mlrun/datastore/vectorstore.py +21 -15
- mlrun/db/base.py +2 -0
- mlrun/db/httpdb.py +12 -0
- mlrun/db/nopdb.py +2 -0
- mlrun/feature_store/steps.py +1 -1
- mlrun/model_monitoring/api.py +30 -21
- mlrun/model_monitoring/applications/base.py +42 -4
- mlrun/projects/project.py +18 -16
- mlrun/runtimes/nuclio/serving.py +28 -5
- mlrun/serving/__init__.py +8 -0
- mlrun/serving/merger.py +1 -1
- mlrun/serving/remote.py +17 -5
- mlrun/serving/routers.py +27 -87
- mlrun/serving/server.py +6 -2
- mlrun/serving/states.py +154 -13
- mlrun/serving/v2_serving.py +38 -79
- mlrun/utils/helpers.py +6 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.8.0rc9.dist-info → mlrun-1.8.0rc12.dist-info}/METADATA +10 -10
- {mlrun-1.8.0rc9.dist-info → mlrun-1.8.0rc12.dist-info}/RECORD +37 -37
- {mlrun-1.8.0rc9.dist-info → mlrun-1.8.0rc12.dist-info}/LICENSE +0 -0
- {mlrun-1.8.0rc9.dist-info → mlrun-1.8.0rc12.dist-info}/WHEEL +0 -0
- {mlrun-1.8.0rc9.dist-info → mlrun-1.8.0rc12.dist-info}/entry_points.txt +0 -0
- {mlrun-1.8.0rc9.dist-info → mlrun-1.8.0rc12.dist-info}/top_level.txt +0 -0
mlrun/artifacts/__init__.py
CHANGED
mlrun/artifacts/document.py
CHANGED
|
@@ -39,8 +39,6 @@ class DocumentLoaderSpec(ModelObj):
|
|
|
39
39
|
src_name (str): The name of the source attribute to pass to the loader class.
|
|
40
40
|
kwargs (Optional[dict]): Additional keyword arguments to pass to the loader class.
|
|
41
41
|
|
|
42
|
-
Methods:
|
|
43
|
-
make_loader(src_path): Creates an instance of the loader class with the specified source path.
|
|
44
42
|
"""
|
|
45
43
|
|
|
46
44
|
_dict_fields = ["loader_class_name", "src_name", "kwargs"]
|
|
@@ -58,6 +56,19 @@ class DocumentLoaderSpec(ModelObj):
|
|
|
58
56
|
loader_class_name (str): The name of the loader class to use.
|
|
59
57
|
src_name (str): The source name for the document.
|
|
60
58
|
kwargs (Optional[dict]): Additional keyword arguments to pass to the loader class.
|
|
59
|
+
|
|
60
|
+
Example:
|
|
61
|
+
>>> # Create a loader specification for PDF documents
|
|
62
|
+
>>> loader_spec = DocumentLoaderSpec(
|
|
63
|
+
... loader_class_name="langchain_community.document_loaders.PDFLoader",
|
|
64
|
+
... src_name="file_path",
|
|
65
|
+
... kwargs={"extract_images": True},
|
|
66
|
+
... )
|
|
67
|
+
>>> # Create a loader instance for a specific PDF file
|
|
68
|
+
>>> pdf_loader = loader_spec.make_loader("/path/to/document.pdf")
|
|
69
|
+
>>> # Load the documents
|
|
70
|
+
>>> documents = pdf_loader.load()
|
|
71
|
+
|
|
61
72
|
"""
|
|
62
73
|
self.loader_class_name = loader_class_name
|
|
63
74
|
self.src_name = src_name
|
|
@@ -87,6 +98,45 @@ class MLRunLoader:
|
|
|
87
98
|
|
|
88
99
|
Returns:
|
|
89
100
|
DynamicDocumentLoader: An instance of a dynamically defined subclass of BaseLoader.
|
|
101
|
+
|
|
102
|
+
Example:
|
|
103
|
+
>>> # Create a document loader specification
|
|
104
|
+
>>> loader_spec = DocumentLoaderSpec(
|
|
105
|
+
... loader_class_name="langchain_community.document_loaders.TextLoader",
|
|
106
|
+
... src_name="file_path",
|
|
107
|
+
... )
|
|
108
|
+
>>> # Create a basic loader for a single file
|
|
109
|
+
>>> loader = MLRunLoader(
|
|
110
|
+
... source_path="/path/to/document.txt",
|
|
111
|
+
... loader_spec=loader_spec,
|
|
112
|
+
... artifact_key="my_doc",
|
|
113
|
+
... producer=project,
|
|
114
|
+
... upload=True,
|
|
115
|
+
... )
|
|
116
|
+
>>> documents = loader.load()
|
|
117
|
+
>>> # Create a loader with auto-generated keys
|
|
118
|
+
>>> loader = MLRunLoader(
|
|
119
|
+
... source_path="/path/to/document.txt",
|
|
120
|
+
... loader_spec=loader_spec,
|
|
121
|
+
... artifact_key="doc%%", # %% will be replaced with encoded path
|
|
122
|
+
... producer=project,
|
|
123
|
+
... )
|
|
124
|
+
>>> documents = loader.load()
|
|
125
|
+
>>> # Use with DirectoryLoader
|
|
126
|
+
>>> from langchain_community.document_loaders import DirectoryLoader
|
|
127
|
+
>>> dir_loader = DirectoryLoader(
|
|
128
|
+
... "/path/to/directory",
|
|
129
|
+
... glob="**/*.txt",
|
|
130
|
+
... loader_cls=MLRunLoader,
|
|
131
|
+
... loader_kwargs={
|
|
132
|
+
... "loader_spec": loader_spec,
|
|
133
|
+
... "artifact_key": "doc%%",
|
|
134
|
+
... "producer": project,
|
|
135
|
+
... "upload": True,
|
|
136
|
+
... },
|
|
137
|
+
... )
|
|
138
|
+
>>> documents = dir_loader.load()
|
|
139
|
+
|
|
90
140
|
"""
|
|
91
141
|
|
|
92
142
|
def __new__(
|
|
@@ -178,11 +228,6 @@ class MLRunLoader:
|
|
|
178
228
|
class DocumentArtifact(Artifact):
|
|
179
229
|
"""
|
|
180
230
|
A specific artifact class inheriting from generic artifact, used to maintain Document meta-data.
|
|
181
|
-
|
|
182
|
-
Methods:
|
|
183
|
-
to_langchain_documents(splitter): Create LC documents from the artifact.
|
|
184
|
-
collection_add(collection_id): Add a collection ID to the artifact.
|
|
185
|
-
collection_remove(collection_id): Remove a collection ID from the artifact.
|
|
186
231
|
"""
|
|
187
232
|
|
|
188
233
|
class DocumentArtifactSpec(ArtifactSpec):
|
|
@@ -205,10 +250,6 @@ class DocumentArtifact(Artifact):
|
|
|
205
250
|
self.collections = collections if collections is not None else {}
|
|
206
251
|
self.original_source = original_source
|
|
207
252
|
|
|
208
|
-
"""
|
|
209
|
-
A specific artifact class inheriting from generic artifact, used to maintain Document meta-data.
|
|
210
|
-
"""
|
|
211
|
-
|
|
212
253
|
kind = "document"
|
|
213
254
|
|
|
214
255
|
METADATA_SOURCE_KEY = "source"
|
|
@@ -242,6 +283,7 @@ class DocumentArtifact(Artifact):
|
|
|
242
283
|
)
|
|
243
284
|
|
|
244
285
|
def get_source(self):
|
|
286
|
+
"""Get the source URI for this artifact."""
|
|
245
287
|
return generate_artifact_uri(self.metadata.project, self.spec.db_key)
|
|
246
288
|
|
|
247
289
|
def to_langchain_documents(
|
mlrun/common/constants.py
CHANGED
mlrun/common/schemas/__init__.py
CHANGED
|
@@ -146,8 +146,10 @@ from .model_monitoring import (
|
|
|
146
146
|
GrafanaTable,
|
|
147
147
|
GrafanaTimeSeriesTarget,
|
|
148
148
|
ModelEndpoint,
|
|
149
|
+
ModelEndpointCreationStrategy,
|
|
149
150
|
ModelEndpointList,
|
|
150
151
|
ModelEndpointMetadata,
|
|
152
|
+
ModelEndpointSchema,
|
|
151
153
|
ModelEndpointSpec,
|
|
152
154
|
ModelEndpointStatus,
|
|
153
155
|
ModelMonitoringMode,
|
|
@@ -44,6 +44,7 @@ class ModelEndpointSchema(MonitoringStrEnum):
|
|
|
44
44
|
FUNCTION_TAG = "function_tag"
|
|
45
45
|
FUNCTION_UID = "function_uid"
|
|
46
46
|
MODEL_NAME = "model_name"
|
|
47
|
+
MODEL_DB_KEY = "model_db_key"
|
|
47
48
|
MODEL_TAG = "model_tag"
|
|
48
49
|
MODEL_CLASS = "model_class"
|
|
49
50
|
MODEL_UID = "model_uid"
|
|
@@ -70,6 +71,12 @@ class ModelEndpointSchema(MonitoringStrEnum):
|
|
|
70
71
|
DRIFT_MEASURES = "drift_measures"
|
|
71
72
|
|
|
72
73
|
|
|
74
|
+
class ModelEndpointCreationStrategy(MonitoringStrEnum):
|
|
75
|
+
INPLACE = "inplace"
|
|
76
|
+
ARCHIVE = "archive"
|
|
77
|
+
OVERWRITE = "overwrite"
|
|
78
|
+
|
|
79
|
+
|
|
73
80
|
class EventFieldType:
|
|
74
81
|
FUNCTION_URI = "function_uri"
|
|
75
82
|
FUNCTION = "function"
|
|
@@ -117,10 +117,15 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
|
|
|
117
117
|
endpoint_type: EndpointType = EndpointType.NODE_EP
|
|
118
118
|
uid: Optional[constr(regex=MODEL_ENDPOINT_ID_PATTERN)]
|
|
119
119
|
|
|
120
|
+
@classmethod
|
|
121
|
+
def mutable_fields(cls):
|
|
122
|
+
return ["labels"]
|
|
123
|
+
|
|
120
124
|
|
|
121
125
|
class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
|
|
122
126
|
model_uid: Optional[str] = ""
|
|
123
127
|
model_name: Optional[str] = ""
|
|
128
|
+
model_db_key: Optional[str] = ""
|
|
124
129
|
model_tag: Optional[str] = ""
|
|
125
130
|
model_class: Optional[str] = ""
|
|
126
131
|
function_name: Optional[str] = ""
|
|
@@ -135,6 +140,21 @@ class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
|
|
|
135
140
|
children_uids: Optional[list[str]] = []
|
|
136
141
|
monitoring_feature_set_uri: Optional[str] = ""
|
|
137
142
|
|
|
143
|
+
@classmethod
|
|
144
|
+
def mutable_fields(cls):
|
|
145
|
+
return [
|
|
146
|
+
"model_uid",
|
|
147
|
+
"model_name",
|
|
148
|
+
"model_db_key",
|
|
149
|
+
"model_tag",
|
|
150
|
+
"model_class",
|
|
151
|
+
"function_uid",
|
|
152
|
+
"feature_names",
|
|
153
|
+
"label_names",
|
|
154
|
+
"children",
|
|
155
|
+
"children_uids",
|
|
156
|
+
]
|
|
157
|
+
|
|
138
158
|
|
|
139
159
|
class ModelEndpointStatus(ObjectStatus, ModelEndpointParser):
|
|
140
160
|
state: Optional[str] = "unknown" # will be updated according to the function state
|
|
@@ -151,6 +171,14 @@ class ModelEndpointStatus(ObjectStatus, ModelEndpointParser):
|
|
|
151
171
|
drift_measures: Optional[dict] = {}
|
|
152
172
|
drift_measures_timestamp: Optional[datetime] = None
|
|
153
173
|
|
|
174
|
+
@classmethod
|
|
175
|
+
def mutable_fields(cls):
|
|
176
|
+
return [
|
|
177
|
+
"monitoring_mode",
|
|
178
|
+
"first_request",
|
|
179
|
+
"last_request",
|
|
180
|
+
]
|
|
181
|
+
|
|
154
182
|
|
|
155
183
|
class ModelEndpoint(BaseModel):
|
|
156
184
|
kind: ObjectKind = Field(ObjectKind.model_endpoint, const=True)
|
|
@@ -158,6 +186,14 @@ class ModelEndpoint(BaseModel):
|
|
|
158
186
|
spec: ModelEndpointSpec
|
|
159
187
|
status: ModelEndpointStatus
|
|
160
188
|
|
|
189
|
+
@classmethod
|
|
190
|
+
def mutable_fields(cls):
|
|
191
|
+
return (
|
|
192
|
+
ModelEndpointMetadata.mutable_fields()
|
|
193
|
+
+ ModelEndpointSpec.mutable_fields()
|
|
194
|
+
+ ModelEndpointStatus.mutable_fields()
|
|
195
|
+
)
|
|
196
|
+
|
|
161
197
|
def flat_dict(self) -> dict[str, Any]:
|
|
162
198
|
"""Generate a flattened `ModelEndpoint` dictionary. The flattened dictionary result is important for storing
|
|
163
199
|
the model endpoint object in the database.
|
mlrun/config.py
CHANGED
|
@@ -160,6 +160,7 @@ default_config = {
|
|
|
160
160
|
# migration from artifacts to artifacts_v2 is done in batches, and requires a state file to keep track of the
|
|
161
161
|
# migration progress.
|
|
162
162
|
"artifact_migration_batch_size": 200,
|
|
163
|
+
"artifact_migration_v9_batch_size": 30000,
|
|
163
164
|
"artifact_migration_state_file_path": "./db/_artifact_migration_state.json",
|
|
164
165
|
"datasets": {
|
|
165
166
|
"max_preview_columns": 100,
|
mlrun/data_types/data_types.py
CHANGED
mlrun/data_types/spark.py
CHANGED
|
@@ -19,7 +19,7 @@ from typing import Optional
|
|
|
19
19
|
import numpy as np
|
|
20
20
|
import pytz
|
|
21
21
|
from pyspark.sql.functions import to_utc_timestamp
|
|
22
|
-
from pyspark.sql.types import BooleanType, DoubleType
|
|
22
|
+
from pyspark.sql.types import BooleanType, DoubleType
|
|
23
23
|
|
|
24
24
|
from mlrun.feature_store.retrieval.spark_merger import spark_df_to_pandas
|
|
25
25
|
from mlrun.utils import logger
|
|
@@ -144,7 +144,8 @@ def get_df_stats_spark(df, options, num_bins=20, sample_size=None):
|
|
|
144
144
|
timestamp_columns = set()
|
|
145
145
|
boolean_columns = set()
|
|
146
146
|
for field in df_after_type_casts.schema.fields:
|
|
147
|
-
|
|
147
|
+
# covers TimestampType and TimestampNTZType, which was added in PySpark 3.4.0
|
|
148
|
+
is_timestamp = field.dataType.typeName().startswith("timestamp")
|
|
148
149
|
is_boolean = isinstance(field.dataType, BooleanType)
|
|
149
150
|
if is_timestamp:
|
|
150
151
|
df_after_type_casts = df_after_type_casts.withColumn(
|
mlrun/data_types/to_pandas.py
CHANGED
|
@@ -244,6 +244,15 @@ def _to_corrected_pandas_type(dt):
|
|
|
244
244
|
|
|
245
245
|
|
|
246
246
|
def spark_df_to_pandas(spark_df):
|
|
247
|
+
import pyspark
|
|
248
|
+
|
|
249
|
+
if semver.parse(pyspark.__version__) >= semver.Version(3, 5, 0):
|
|
250
|
+
|
|
251
|
+
def to_pandas(spark_df_inner):
|
|
252
|
+
return spark_df_inner.toPandas()
|
|
253
|
+
else:
|
|
254
|
+
to_pandas = _to_pandas
|
|
255
|
+
|
|
247
256
|
# as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
|
|
248
257
|
# when we upgrade pyspark, we should check whether this workaround is still necessary
|
|
249
258
|
# see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
|
|
@@ -262,9 +271,9 @@ def spark_df_to_pandas(spark_df):
|
|
|
262
271
|
)
|
|
263
272
|
type_conversion_dict[field.name] = "datetime64[ns]"
|
|
264
273
|
|
|
265
|
-
df =
|
|
274
|
+
df = to_pandas(spark_df)
|
|
266
275
|
if type_conversion_dict:
|
|
267
276
|
df = df.astype(type_conversion_dict)
|
|
268
277
|
return df
|
|
269
278
|
else:
|
|
270
|
-
return
|
|
279
|
+
return to_pandas(spark_df)
|
mlrun/datastore/__init__.py
CHANGED
mlrun/datastore/targets.py
CHANGED
|
@@ -1136,7 +1136,8 @@ class CSVTarget(BaseStoreTarget):
|
|
|
1136
1136
|
import pyspark.sql.functions as funcs
|
|
1137
1137
|
|
|
1138
1138
|
for col_name, col_type in df.dtypes:
|
|
1139
|
-
|
|
1139
|
+
# covers TimestampType and TimestampNTZType, which was added in PySpark 3.4.0
|
|
1140
|
+
if col_type.startswith("timestamp"):
|
|
1140
1141
|
# df.write.csv saves timestamps with millisecond precision, but we want microsecond precision
|
|
1141
1142
|
# for compatibility with storey.
|
|
1142
1143
|
df = df.withColumn(
|
mlrun/datastore/vectorstore.py
CHANGED
|
@@ -123,23 +123,29 @@ class VectorStoreCollection:
|
|
|
123
123
|
Converts artifacts to LangChain documents, adds them to the vector store, and
|
|
124
124
|
updates the MLRun context. If documents are split, the IDs are handled appropriately.
|
|
125
125
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
126
|
+
:param artifacts: List of DocumentArtifact objects to add
|
|
127
|
+
:type artifacts: list[DocumentArtifact]
|
|
128
|
+
:param splitter: Document splitter to break artifacts into smaller chunks.
|
|
129
|
+
If None, each artifact becomes a single document.
|
|
130
|
+
:type splitter: TextSplitter, optional
|
|
131
|
+
:param kwargs: Additional arguments passed to the underlying add_documents method.
|
|
132
|
+
Special handling for 'ids' kwarg:
|
|
133
|
+
|
|
134
|
+
* If provided and document is split, IDs are generated as "{original_id}_{i}"
|
|
135
|
+
where i starts from 1 (e.g., "doc1_1", "doc1_2", etc.)
|
|
136
|
+
* If provided and document isn't split, original IDs are used as-is
|
|
137
|
+
|
|
138
|
+
:return: List of IDs for all added documents. When no custom IDs are provided:
|
|
139
|
+
|
|
140
|
+
* Without splitting: Vector store generates IDs automatically
|
|
141
|
+
* With splitting: Vector store generates separate IDs for each chunk
|
|
135
142
|
|
|
136
|
-
Returns:
|
|
137
|
-
list: List of IDs for all added documents. When no custom IDs are provided:
|
|
138
|
-
- Without splitting: Vector store generates IDs automatically
|
|
139
|
-
- With splitting: Vector store generates separate IDs for each chunk
|
|
140
143
|
When custom IDs are provided:
|
|
141
|
-
|
|
142
|
-
|
|
144
|
+
|
|
145
|
+
* Without splitting: Uses provided IDs directly
|
|
146
|
+
* With splitting: Generates sequential IDs as "{original_id}_{i}" for each chunk
|
|
147
|
+
:rtype: list
|
|
148
|
+
|
|
143
149
|
"""
|
|
144
150
|
all_ids = []
|
|
145
151
|
user_ids = kwargs.pop("ids", None)
|
mlrun/db/base.py
CHANGED
|
@@ -666,6 +666,7 @@ class RunDBInterface(ABC):
|
|
|
666
666
|
def create_model_endpoint(
|
|
667
667
|
self,
|
|
668
668
|
model_endpoint: mlrun.common.schemas.ModelEndpoint,
|
|
669
|
+
creation_strategy: mlrun.common.schemas.ModelEndpointCreationStrategy = "inplace",
|
|
669
670
|
) -> mlrun.common.schemas.ModelEndpoint:
|
|
670
671
|
pass
|
|
671
672
|
|
|
@@ -688,6 +689,7 @@ class RunDBInterface(ABC):
|
|
|
688
689
|
function_name: Optional[str] = None,
|
|
689
690
|
function_tag: Optional[str] = None,
|
|
690
691
|
model_name: Optional[str] = None,
|
|
692
|
+
model_tag: Optional[str] = None,
|
|
691
693
|
labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
|
|
692
694
|
start: Optional[datetime.datetime] = None,
|
|
693
695
|
end: Optional[datetime.datetime] = None,
|
mlrun/db/httpdb.py
CHANGED
|
@@ -3582,11 +3582,17 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3582
3582
|
def create_model_endpoint(
|
|
3583
3583
|
self,
|
|
3584
3584
|
model_endpoint: mlrun.common.schemas.ModelEndpoint,
|
|
3585
|
+
creation_strategy: mlrun.common.schemas.ModelEndpointCreationStrategy = "inplace",
|
|
3585
3586
|
) -> mlrun.common.schemas.ModelEndpoint:
|
|
3586
3587
|
"""
|
|
3587
3588
|
Creates a DB record with the given model_endpoint record.
|
|
3588
3589
|
|
|
3589
3590
|
:param model_endpoint: An object representing the model endpoint.
|
|
3591
|
+
:param creation_strategy: model endpoint creation strategy :
|
|
3592
|
+
* overwrite - Create a new model endpoint and delete the last old one if it exists.
|
|
3593
|
+
* inplace - Use the existing model endpoint if it already exists (default).
|
|
3594
|
+
* archive - Preserve the old model endpoint and create a new one,
|
|
3595
|
+
tagging it as the latest.
|
|
3590
3596
|
|
|
3591
3597
|
:return: The created model endpoint object.
|
|
3592
3598
|
"""
|
|
@@ -3596,6 +3602,9 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3596
3602
|
method=mlrun.common.types.HTTPMethod.POST,
|
|
3597
3603
|
path=path,
|
|
3598
3604
|
body=model_endpoint.json(),
|
|
3605
|
+
params={
|
|
3606
|
+
"creation_strategy": creation_strategy,
|
|
3607
|
+
},
|
|
3599
3608
|
)
|
|
3600
3609
|
return mlrun.common.schemas.ModelEndpoint(**response.json())
|
|
3601
3610
|
|
|
@@ -3637,6 +3646,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3637
3646
|
function_name: Optional[str] = None,
|
|
3638
3647
|
function_tag: Optional[str] = None,
|
|
3639
3648
|
model_name: Optional[str] = None,
|
|
3649
|
+
model_tag: Optional[str] = None,
|
|
3640
3650
|
labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
|
|
3641
3651
|
start: Optional[datetime] = None,
|
|
3642
3652
|
end: Optional[datetime] = None,
|
|
@@ -3653,6 +3663,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3653
3663
|
:param function_name: The name of the function
|
|
3654
3664
|
:param function_tag: The tag of the function
|
|
3655
3665
|
:param model_name: The name of the model
|
|
3666
|
+
:param model_tag: The tag of the model
|
|
3656
3667
|
:param labels: A list of labels to filter by. (see mlrun.common.schemas.LabelsModel)
|
|
3657
3668
|
:param start: The start time to filter by.Corresponding to the `created` field.
|
|
3658
3669
|
:param end: The end time to filter by. Corresponding to the `created` field.
|
|
@@ -3671,6 +3682,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3671
3682
|
params={
|
|
3672
3683
|
"name": name,
|
|
3673
3684
|
"model_name": model_name,
|
|
3685
|
+
"model_tag": model_tag,
|
|
3674
3686
|
"function_name": function_name,
|
|
3675
3687
|
"function_tag": function_tag,
|
|
3676
3688
|
"label": labels,
|
mlrun/db/nopdb.py
CHANGED
|
@@ -575,6 +575,7 @@ class NopDB(RunDBInterface):
|
|
|
575
575
|
def create_model_endpoint(
|
|
576
576
|
self,
|
|
577
577
|
model_endpoint: mlrun.common.schemas.ModelEndpoint,
|
|
578
|
+
creation_strategy: mlrun.common.schemas.ModelEndpointCreationStrategy = "inplace",
|
|
578
579
|
) -> mlrun.common.schemas.ModelEndpoint:
|
|
579
580
|
pass
|
|
580
581
|
|
|
@@ -595,6 +596,7 @@ class NopDB(RunDBInterface):
|
|
|
595
596
|
function_name: Optional[str] = None,
|
|
596
597
|
function_tag: Optional[str] = None,
|
|
597
598
|
model_name: Optional[str] = None,
|
|
599
|
+
model_tag: Optional[str] = None,
|
|
598
600
|
labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
|
|
599
601
|
start: Optional[datetime.datetime] = None,
|
|
600
602
|
end: Optional[datetime.datetime] = None,
|
mlrun/feature_store/steps.py
CHANGED
|
@@ -671,7 +671,7 @@ class SetEventMetadata(MapClass):
|
|
|
671
671
|
|
|
672
672
|
self._tagging_funcs = []
|
|
673
673
|
|
|
674
|
-
def post_init(self, mode="sync"):
|
|
674
|
+
def post_init(self, mode="sync", **kwargs):
|
|
675
675
|
def add_metadata(name, path, operator=str):
|
|
676
676
|
def _add_meta(event):
|
|
677
677
|
value = get_in(event.body, path)
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -54,9 +54,10 @@ def get_or_create_model_endpoint(
|
|
|
54
54
|
model_endpoint_name: str = "",
|
|
55
55
|
endpoint_id: str = "",
|
|
56
56
|
function_name: str = "",
|
|
57
|
+
function_tag: str = "latest",
|
|
57
58
|
context: typing.Optional["mlrun.MLClientCtx"] = None,
|
|
58
59
|
sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
|
|
59
|
-
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.
|
|
60
|
+
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.enabled,
|
|
60
61
|
db_session=None,
|
|
61
62
|
) -> ModelEndpoint:
|
|
62
63
|
"""
|
|
@@ -70,8 +71,8 @@ def get_or_create_model_endpoint(
|
|
|
70
71
|
under this endpoint (applicable only to new endpoint_id).
|
|
71
72
|
:param endpoint_id: Model endpoint unique ID. If not exist in DB, will generate a new record based
|
|
72
73
|
on the provided `endpoint_id`.
|
|
73
|
-
:param function_name: If a new model endpoint is created, use this function name
|
|
74
|
-
|
|
74
|
+
:param function_name: If a new model endpoint is created, use this function name.
|
|
75
|
+
:param function_tag: If a new model endpoint is created, use this function tag.
|
|
75
76
|
:param context: MLRun context. If `function_name` not provided, use the context to generate the
|
|
76
77
|
full function hash.
|
|
77
78
|
:param sample_set_statistics: Dictionary of sample set statistics that will be used as a reference data for
|
|
@@ -86,28 +87,32 @@ def get_or_create_model_endpoint(
|
|
|
86
87
|
if not db_session:
|
|
87
88
|
# Generate a runtime database
|
|
88
89
|
db_session = mlrun.get_run_db()
|
|
90
|
+
model_endpoint = None
|
|
89
91
|
try:
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
92
|
+
if endpoint_id:
|
|
93
|
+
model_endpoint = db_session.get_model_endpoint(
|
|
94
|
+
project=project,
|
|
95
|
+
name=model_endpoint_name,
|
|
96
|
+
endpoint_id=endpoint_id,
|
|
97
|
+
)
|
|
98
|
+
# If other fields provided, validate that they are correspond to the existing model endpoint data
|
|
99
|
+
_model_endpoint_validations(
|
|
100
|
+
model_endpoint=model_endpoint,
|
|
101
|
+
model_path=model_path,
|
|
102
|
+
sample_set_statistics=sample_set_statistics,
|
|
103
|
+
)
|
|
102
104
|
|
|
103
105
|
except mlrun.errors.MLRunNotFoundError:
|
|
104
106
|
# Create a new model endpoint with the provided details
|
|
107
|
+
pass
|
|
108
|
+
if not model_endpoint:
|
|
105
109
|
model_endpoint = _generate_model_endpoint(
|
|
106
110
|
project=project,
|
|
107
111
|
db_session=db_session,
|
|
108
112
|
model_path=model_path,
|
|
109
113
|
model_endpoint_name=model_endpoint_name,
|
|
110
114
|
function_name=function_name,
|
|
115
|
+
function_tag=function_tag,
|
|
111
116
|
context=context,
|
|
112
117
|
sample_set_statistics=sample_set_statistics,
|
|
113
118
|
monitoring_mode=monitoring_mode,
|
|
@@ -333,9 +338,10 @@ def _generate_model_endpoint(
|
|
|
333
338
|
model_path: str,
|
|
334
339
|
model_endpoint_name: str,
|
|
335
340
|
function_name: str,
|
|
341
|
+
function_tag: str,
|
|
336
342
|
context: "mlrun.MLClientCtx",
|
|
337
343
|
sample_set_statistics: dict[str, typing.Any],
|
|
338
|
-
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.
|
|
344
|
+
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.enabled,
|
|
339
345
|
) -> ModelEndpoint:
|
|
340
346
|
"""
|
|
341
347
|
Write a new model endpoint record.
|
|
@@ -345,8 +351,8 @@ def _generate_model_endpoint(
|
|
|
345
351
|
:param db_session: A session that manages the current dialog with the database.
|
|
346
352
|
:param model_path: The model Store path.
|
|
347
353
|
:param model_endpoint_name: Model endpoint name will be presented under the new model endpoint.
|
|
348
|
-
:param function_name: If a new model endpoint is created, use this function name
|
|
349
|
-
|
|
354
|
+
:param function_name: If a new model endpoint is created, use this function name.
|
|
355
|
+
:param function_tag: If a new model endpoint is created, use this function tag.
|
|
350
356
|
:param context: MLRun context. If function_name not provided, use the context to generate the
|
|
351
357
|
full function hash.
|
|
352
358
|
:param sample_set_statistics: Dictionary of sample set statistics that will be used as a reference data for
|
|
@@ -374,9 +380,12 @@ def _generate_model_endpoint(
|
|
|
374
380
|
endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.BATCH_EP,
|
|
375
381
|
),
|
|
376
382
|
spec=mlrun.common.schemas.ModelEndpointSpec(
|
|
377
|
-
function_name=function_name,
|
|
378
|
-
|
|
379
|
-
|
|
383
|
+
function_name=function_name or "function",
|
|
384
|
+
function_tag=function_tag or "latest",
|
|
385
|
+
model_name=model_obj.metadata.key if model_obj else None,
|
|
386
|
+
model_uid=model_obj.metadata.uid if model_obj else None,
|
|
387
|
+
model_tag=model_obj.metadata.tag if model_obj else None,
|
|
388
|
+
model_db_key=model_obj.spec.db_key if model_obj else None,
|
|
380
389
|
model_class="drift-analysis",
|
|
381
390
|
),
|
|
382
391
|
status=mlrun.common.schemas.ModelEndpointStatus(
|
|
@@ -148,6 +148,44 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
148
148
|
)
|
|
149
149
|
return start, end
|
|
150
150
|
|
|
151
|
+
@classmethod
|
|
152
|
+
def deploy(
|
|
153
|
+
cls,
|
|
154
|
+
func_name: str,
|
|
155
|
+
func_path: Optional[str] = None,
|
|
156
|
+
image: Optional[str] = None,
|
|
157
|
+
handler: Optional[str] = None,
|
|
158
|
+
with_repo: Optional[bool] = False,
|
|
159
|
+
tag: Optional[str] = None,
|
|
160
|
+
requirements: Optional[Union[str, list[str]]] = None,
|
|
161
|
+
requirements_file: str = "",
|
|
162
|
+
**application_kwargs,
|
|
163
|
+
) -> None:
|
|
164
|
+
"""
|
|
165
|
+
Set the application to the current project and deploy it as a Nuclio serving function.
|
|
166
|
+
Required for your model monitoring application to work as a part of the model monitoring framework.
|
|
167
|
+
|
|
168
|
+
:param func_name: The name of the function.
|
|
169
|
+
:param func_path: The path of the function, :code:`None` refers to the current Jupyter notebook.
|
|
170
|
+
|
|
171
|
+
For the other arguments, refer to
|
|
172
|
+
:py:meth:`~mlrun.projects.MlrunProject.set_model_monitoring_function`.
|
|
173
|
+
"""
|
|
174
|
+
project = cast("mlrun.MlrunProject", mlrun.get_current_project())
|
|
175
|
+
function = project.set_model_monitoring_function(
|
|
176
|
+
name=func_name,
|
|
177
|
+
func=func_path,
|
|
178
|
+
application_class=cls.__name__,
|
|
179
|
+
handler=handler,
|
|
180
|
+
image=image,
|
|
181
|
+
with_repo=with_repo,
|
|
182
|
+
requirements=requirements,
|
|
183
|
+
requirements_file=requirements_file,
|
|
184
|
+
tag=tag,
|
|
185
|
+
**application_kwargs,
|
|
186
|
+
)
|
|
187
|
+
function.deploy()
|
|
188
|
+
|
|
151
189
|
@classmethod
|
|
152
190
|
def evaluate(
|
|
153
191
|
cls,
|
|
@@ -175,10 +213,10 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
175
213
|
:param func_name: The name of the function. If not passed, the class name is used.
|
|
176
214
|
:param tag: An optional tag for the function.
|
|
177
215
|
:param run_local: Whether to run the function locally or remotely.
|
|
178
|
-
:param
|
|
179
|
-
|
|
180
|
-
:param
|
|
181
|
-
|
|
216
|
+
:param sample_data: Optional - pandas data-frame as the current dataset.
|
|
217
|
+
When set, it replaces the data read from the model endpoint's offline source.
|
|
218
|
+
:param reference_data: Optional - pandas data-frame of the reference dataset.
|
|
219
|
+
When set, its statistics override the model endpoint's feature statistics.
|
|
182
220
|
:param image: Docker image to run the job on.
|
|
183
221
|
:param with_repo: Whether to clone the current repo to the build source.
|
|
184
222
|
:param requirements: List of Python requirements to be installed in the image.
|