mlrun 1.8.0rc8__py3-none-any.whl → 1.8.0rc11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/__init__.py +1 -1
- mlrun/artifacts/base.py +9 -0
- mlrun/artifacts/document.py +56 -12
- mlrun/artifacts/manager.py +12 -5
- mlrun/common/constants.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +1 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +1 -0
- mlrun/config.py +1 -0
- mlrun/data_types/data_types.py +1 -0
- mlrun/data_types/spark.py +3 -2
- mlrun/data_types/to_pandas.py +11 -2
- mlrun/datastore/__init__.py +2 -0
- mlrun/datastore/store_resources.py +7 -2
- mlrun/datastore/targets.py +2 -1
- mlrun/datastore/vectorstore.py +53 -18
- mlrun/db/base.py +6 -2
- mlrun/db/httpdb.py +35 -8
- mlrun/db/nopdb.py +6 -2
- mlrun/model_monitoring/api.py +13 -2
- mlrun/model_monitoring/helpers.py +0 -1
- mlrun/model_monitoring/stream_processing.py +2 -2
- mlrun/projects/project.py +31 -20
- mlrun/runtimes/nuclio/serving.py +0 -2
- mlrun/serving/routers.py +2 -2
- mlrun/serving/v2_serving.py +43 -19
- mlrun/utils/helpers.py +9 -3
- mlrun/utils/regex.py +8 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.8.0rc8.dist-info → mlrun-1.8.0rc11.dist-info}/METADATA +9 -9
- {mlrun-1.8.0rc8.dist-info → mlrun-1.8.0rc11.dist-info}/RECORD +34 -34
- {mlrun-1.8.0rc8.dist-info → mlrun-1.8.0rc11.dist-info}/LICENSE +0 -0
- {mlrun-1.8.0rc8.dist-info → mlrun-1.8.0rc11.dist-info}/WHEEL +0 -0
- {mlrun-1.8.0rc8.dist-info → mlrun-1.8.0rc11.dist-info}/entry_points.txt +0 -0
- {mlrun-1.8.0rc8.dist-info → mlrun-1.8.0rc11.dist-info}/top_level.txt +0 -0
mlrun/artifacts/__init__.py
CHANGED
mlrun/artifacts/base.py
CHANGED
|
@@ -379,6 +379,7 @@ class Artifact(ModelObj):
|
|
|
379
379
|
iter=self.metadata.iter,
|
|
380
380
|
tree=tree,
|
|
381
381
|
tag=tag,
|
|
382
|
+
uid=self.uid,
|
|
382
383
|
)
|
|
383
384
|
return mlrun.datastore.get_store_uri(self._store_prefix, uri)
|
|
384
385
|
|
|
@@ -653,6 +654,14 @@ class Artifact(ModelObj):
|
|
|
653
654
|
def hash(self, hash):
|
|
654
655
|
self.metadata.hash = hash
|
|
655
656
|
|
|
657
|
+
@property
|
|
658
|
+
def uid(self):
|
|
659
|
+
return self.metadata.uid
|
|
660
|
+
|
|
661
|
+
@uid.setter
|
|
662
|
+
def uid(self, uid):
|
|
663
|
+
self.metadata.uid = uid
|
|
664
|
+
|
|
656
665
|
def generate_target_path(self, artifact_path, producer):
|
|
657
666
|
return generate_target_path(self, artifact_path, producer)
|
|
658
667
|
|
mlrun/artifacts/document.py
CHANGED
|
@@ -39,8 +39,6 @@ class DocumentLoaderSpec(ModelObj):
|
|
|
39
39
|
src_name (str): The name of the source attribute to pass to the loader class.
|
|
40
40
|
kwargs (Optional[dict]): Additional keyword arguments to pass to the loader class.
|
|
41
41
|
|
|
42
|
-
Methods:
|
|
43
|
-
make_loader(src_path): Creates an instance of the loader class with the specified source path.
|
|
44
42
|
"""
|
|
45
43
|
|
|
46
44
|
_dict_fields = ["loader_class_name", "src_name", "kwargs"]
|
|
@@ -58,6 +56,19 @@ class DocumentLoaderSpec(ModelObj):
|
|
|
58
56
|
loader_class_name (str): The name of the loader class to use.
|
|
59
57
|
src_name (str): The source name for the document.
|
|
60
58
|
kwargs (Optional[dict]): Additional keyword arguments to pass to the loader class.
|
|
59
|
+
|
|
60
|
+
Example:
|
|
61
|
+
>>> # Create a loader specification for PDF documents
|
|
62
|
+
>>> loader_spec = DocumentLoaderSpec(
|
|
63
|
+
... loader_class_name="langchain_community.document_loaders.PDFLoader",
|
|
64
|
+
... src_name="file_path",
|
|
65
|
+
... kwargs={"extract_images": True},
|
|
66
|
+
... )
|
|
67
|
+
>>> # Create a loader instance for a specific PDF file
|
|
68
|
+
>>> pdf_loader = loader_spec.make_loader("/path/to/document.pdf")
|
|
69
|
+
>>> # Load the documents
|
|
70
|
+
>>> documents = pdf_loader.load()
|
|
71
|
+
|
|
61
72
|
"""
|
|
62
73
|
self.loader_class_name = loader_class_name
|
|
63
74
|
self.src_name = src_name
|
|
@@ -87,6 +98,45 @@ class MLRunLoader:
|
|
|
87
98
|
|
|
88
99
|
Returns:
|
|
89
100
|
DynamicDocumentLoader: An instance of a dynamically defined subclass of BaseLoader.
|
|
101
|
+
|
|
102
|
+
Example:
|
|
103
|
+
>>> # Create a document loader specification
|
|
104
|
+
>>> loader_spec = DocumentLoaderSpec(
|
|
105
|
+
... loader_class_name="langchain_community.document_loaders.TextLoader",
|
|
106
|
+
... src_name="file_path",
|
|
107
|
+
... )
|
|
108
|
+
>>> # Create a basic loader for a single file
|
|
109
|
+
>>> loader = MLRunLoader(
|
|
110
|
+
... source_path="/path/to/document.txt",
|
|
111
|
+
... loader_spec=loader_spec,
|
|
112
|
+
... artifact_key="my_doc",
|
|
113
|
+
... producer=project,
|
|
114
|
+
... upload=True,
|
|
115
|
+
... )
|
|
116
|
+
>>> documents = loader.load()
|
|
117
|
+
>>> # Create a loader with auto-generated keys
|
|
118
|
+
>>> loader = MLRunLoader(
|
|
119
|
+
... source_path="/path/to/document.txt",
|
|
120
|
+
... loader_spec=loader_spec,
|
|
121
|
+
... artifact_key="doc%%", # %% will be replaced with encoded path
|
|
122
|
+
... producer=project,
|
|
123
|
+
... )
|
|
124
|
+
>>> documents = loader.load()
|
|
125
|
+
>>> # Use with DirectoryLoader
|
|
126
|
+
>>> from langchain_community.document_loaders import DirectoryLoader
|
|
127
|
+
>>> dir_loader = DirectoryLoader(
|
|
128
|
+
... "/path/to/directory",
|
|
129
|
+
... glob="**/*.txt",
|
|
130
|
+
... loader_cls=MLRunLoader,
|
|
131
|
+
... loader_kwargs={
|
|
132
|
+
... "loader_spec": loader_spec,
|
|
133
|
+
... "artifact_key": "doc%%",
|
|
134
|
+
... "producer": project,
|
|
135
|
+
... "upload": True,
|
|
136
|
+
... },
|
|
137
|
+
... )
|
|
138
|
+
>>> documents = dir_loader.load()
|
|
139
|
+
|
|
90
140
|
"""
|
|
91
141
|
|
|
92
142
|
def __new__(
|
|
@@ -178,11 +228,6 @@ class MLRunLoader:
|
|
|
178
228
|
class DocumentArtifact(Artifact):
|
|
179
229
|
"""
|
|
180
230
|
A specific artifact class inheriting from generic artifact, used to maintain Document meta-data.
|
|
181
|
-
|
|
182
|
-
Methods:
|
|
183
|
-
to_langchain_documents(splitter): Create LC documents from the artifact.
|
|
184
|
-
collection_add(collection_id): Add a collection ID to the artifact.
|
|
185
|
-
collection_remove(collection_id): Remove a collection ID from the artifact.
|
|
186
231
|
"""
|
|
187
232
|
|
|
188
233
|
class DocumentArtifactSpec(ArtifactSpec):
|
|
@@ -205,10 +250,6 @@ class DocumentArtifact(Artifact):
|
|
|
205
250
|
self.collections = collections if collections is not None else {}
|
|
206
251
|
self.original_source = original_source
|
|
207
252
|
|
|
208
|
-
"""
|
|
209
|
-
A specific artifact class inheriting from generic artifact, used to maintain Document meta-data.
|
|
210
|
-
"""
|
|
211
|
-
|
|
212
253
|
kind = "document"
|
|
213
254
|
|
|
214
255
|
METADATA_SOURCE_KEY = "source"
|
|
@@ -242,6 +283,7 @@ class DocumentArtifact(Artifact):
|
|
|
242
283
|
)
|
|
243
284
|
|
|
244
285
|
def get_source(self):
|
|
286
|
+
"""Get the source URI for this artifact."""
|
|
245
287
|
return generate_artifact_uri(self.metadata.project, self.spec.db_key)
|
|
246
288
|
|
|
247
289
|
def to_langchain_documents(
|
|
@@ -277,6 +319,7 @@ class DocumentArtifact(Artifact):
|
|
|
277
319
|
)
|
|
278
320
|
|
|
279
321
|
results = []
|
|
322
|
+
idx = 0
|
|
280
323
|
for document in documents:
|
|
281
324
|
if splitter:
|
|
282
325
|
texts = splitter.split_text(document.page_content)
|
|
@@ -293,13 +336,14 @@ class DocumentArtifact(Artifact):
|
|
|
293
336
|
self.get_target_path()
|
|
294
337
|
)
|
|
295
338
|
|
|
296
|
-
for
|
|
339
|
+
for text in texts:
|
|
297
340
|
metadata[self.METADATA_CHUNK_KEY] = str(idx)
|
|
298
341
|
doc = Document(
|
|
299
342
|
page_content=text,
|
|
300
343
|
metadata=metadata.copy(),
|
|
301
344
|
)
|
|
302
345
|
results.append(doc)
|
|
346
|
+
idx = idx + 1
|
|
303
347
|
return results
|
|
304
348
|
|
|
305
349
|
def collection_add(self, collection_id: str) -> None:
|
mlrun/artifacts/manager.py
CHANGED
|
@@ -306,7 +306,6 @@ class ArtifactManager:
|
|
|
306
306
|
item.target_path = target_path
|
|
307
307
|
|
|
308
308
|
item.before_log()
|
|
309
|
-
self.artifact_uris[key] = item.uri
|
|
310
309
|
|
|
311
310
|
if ((upload is None and item.kind != "dir") or upload) and not item.is_inline():
|
|
312
311
|
# before uploading the item, we want to ensure that its tags are valid,
|
|
@@ -315,7 +314,12 @@ class ArtifactManager:
|
|
|
315
314
|
item.upload(artifact_path=artifact_path)
|
|
316
315
|
|
|
317
316
|
if db_key:
|
|
318
|
-
self._log_to_db(db_key, project, producer.inputs, item)
|
|
317
|
+
artifact_uid = self._log_to_db(db_key, project, producer.inputs, item)
|
|
318
|
+
if artifact_uid is not None:
|
|
319
|
+
item.uid = artifact_uid
|
|
320
|
+
# Generate the artifact URI after logging to the database and retrieving the artifact UID, if available.
|
|
321
|
+
self.artifact_uris[key] = item.uri
|
|
322
|
+
|
|
319
323
|
size = str(item.size) or "?"
|
|
320
324
|
db_str = "Y" if (self.artifact_db and db_key) else "N"
|
|
321
325
|
logger.debug(
|
|
@@ -327,20 +331,21 @@ class ArtifactManager:
|
|
|
327
331
|
self.artifact_uris[item.key] = item.uri
|
|
328
332
|
self._log_to_db(item.db_key, producer.project, producer.inputs, item)
|
|
329
333
|
|
|
330
|
-
def _log_to_db(self, key, project, sources, item, tag=None):
|
|
334
|
+
def _log_to_db(self, key, project, sources, item, tag=None) -> typing.Optional[str]:
|
|
331
335
|
"""
|
|
332
336
|
log artifact to db
|
|
333
337
|
:param key: Identifying key of the artifact.
|
|
334
338
|
:param project: Project that the artifact belongs to.
|
|
335
|
-
:param sources: List of artifact sources ( Mainly passed from the producer.items ).
|
|
339
|
+
:param sources: List of artifact sources ( Mainly passed from the `producer.items` ).
|
|
336
340
|
:param item: The actual artifact to store.
|
|
337
341
|
:param tag: The name of the Tag of the artifact.
|
|
342
|
+
:return: The logged artifact uid.
|
|
338
343
|
"""
|
|
339
344
|
if self.artifact_db:
|
|
340
345
|
item.updated = None
|
|
341
346
|
if sources:
|
|
342
347
|
item.sources = [{"name": k, "path": str(v)} for k, v in sources.items()]
|
|
343
|
-
self.artifact_db.store_artifact(
|
|
348
|
+
artifact_item = self.artifact_db.store_artifact(
|
|
344
349
|
key,
|
|
345
350
|
item.to_dict(),
|
|
346
351
|
iter=item.iter,
|
|
@@ -348,6 +353,8 @@ class ArtifactManager:
|
|
|
348
353
|
project=project,
|
|
349
354
|
tree=item.tree,
|
|
350
355
|
)
|
|
356
|
+
if artifact_item:
|
|
357
|
+
return artifact_item.get("metadata", {}).get("uid")
|
|
351
358
|
|
|
352
359
|
def link_artifact(
|
|
353
360
|
self,
|
mlrun/common/constants.py
CHANGED
|
@@ -121,6 +121,7 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
|
|
|
121
121
|
class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
|
|
122
122
|
model_uid: Optional[str] = ""
|
|
123
123
|
model_name: Optional[str] = ""
|
|
124
|
+
model_db_key: Optional[str] = ""
|
|
124
125
|
model_tag: Optional[str] = ""
|
|
125
126
|
model_class: Optional[str] = ""
|
|
126
127
|
function_name: Optional[str] = ""
|
mlrun/config.py
CHANGED
|
@@ -160,6 +160,7 @@ default_config = {
|
|
|
160
160
|
# migration from artifacts to artifacts_v2 is done in batches, and requires a state file to keep track of the
|
|
161
161
|
# migration progress.
|
|
162
162
|
"artifact_migration_batch_size": 200,
|
|
163
|
+
"artifact_migration_v9_batch_size": 30000,
|
|
163
164
|
"artifact_migration_state_file_path": "./db/_artifact_migration_state.json",
|
|
164
165
|
"datasets": {
|
|
165
166
|
"max_preview_columns": 100,
|
mlrun/data_types/data_types.py
CHANGED
mlrun/data_types/spark.py
CHANGED
|
@@ -19,7 +19,7 @@ from typing import Optional
|
|
|
19
19
|
import numpy as np
|
|
20
20
|
import pytz
|
|
21
21
|
from pyspark.sql.functions import to_utc_timestamp
|
|
22
|
-
from pyspark.sql.types import BooleanType, DoubleType
|
|
22
|
+
from pyspark.sql.types import BooleanType, DoubleType
|
|
23
23
|
|
|
24
24
|
from mlrun.feature_store.retrieval.spark_merger import spark_df_to_pandas
|
|
25
25
|
from mlrun.utils import logger
|
|
@@ -144,7 +144,8 @@ def get_df_stats_spark(df, options, num_bins=20, sample_size=None):
|
|
|
144
144
|
timestamp_columns = set()
|
|
145
145
|
boolean_columns = set()
|
|
146
146
|
for field in df_after_type_casts.schema.fields:
|
|
147
|
-
|
|
147
|
+
# covers TimestampType and TimestampNTZType, which was added in PySpark 3.4.0
|
|
148
|
+
is_timestamp = field.dataType.typeName().startswith("timestamp")
|
|
148
149
|
is_boolean = isinstance(field.dataType, BooleanType)
|
|
149
150
|
if is_timestamp:
|
|
150
151
|
df_after_type_casts = df_after_type_casts.withColumn(
|
mlrun/data_types/to_pandas.py
CHANGED
|
@@ -244,6 +244,15 @@ def _to_corrected_pandas_type(dt):
|
|
|
244
244
|
|
|
245
245
|
|
|
246
246
|
def spark_df_to_pandas(spark_df):
|
|
247
|
+
import pyspark
|
|
248
|
+
|
|
249
|
+
if semver.parse(pyspark.__version__) >= semver.Version(3, 5, 0):
|
|
250
|
+
|
|
251
|
+
def to_pandas(spark_df_inner):
|
|
252
|
+
return spark_df_inner.toPandas()
|
|
253
|
+
else:
|
|
254
|
+
to_pandas = _to_pandas
|
|
255
|
+
|
|
247
256
|
# as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
|
|
248
257
|
# when we upgrade pyspark, we should check whether this workaround is still necessary
|
|
249
258
|
# see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
|
|
@@ -262,9 +271,9 @@ def spark_df_to_pandas(spark_df):
|
|
|
262
271
|
)
|
|
263
272
|
type_conversion_dict[field.name] = "datetime64[ns]"
|
|
264
273
|
|
|
265
|
-
df =
|
|
274
|
+
df = to_pandas(spark_df)
|
|
266
275
|
if type_conversion_dict:
|
|
267
276
|
df = df.astype(type_conversion_dict)
|
|
268
277
|
return df
|
|
269
278
|
else:
|
|
270
|
-
return
|
|
279
|
+
return to_pandas(spark_df)
|
mlrun/datastore/__init__.py
CHANGED
|
@@ -163,11 +163,16 @@ def get_store_resource(
|
|
|
163
163
|
return db.get_feature_vector(name, project, tag, uid)
|
|
164
164
|
|
|
165
165
|
elif StorePrefix.is_artifact(kind):
|
|
166
|
-
project, key, iteration, tag, tree = parse_artifact_uri(
|
|
166
|
+
project, key, iteration, tag, tree, uid = parse_artifact_uri(
|
|
167
167
|
uri, project or config.default_project
|
|
168
168
|
)
|
|
169
169
|
resource = db.read_artifact(
|
|
170
|
-
key,
|
|
170
|
+
key,
|
|
171
|
+
project=project,
|
|
172
|
+
tag=tag,
|
|
173
|
+
iter=iteration,
|
|
174
|
+
tree=tree,
|
|
175
|
+
uid=uid,
|
|
171
176
|
)
|
|
172
177
|
if resource.get("kind", "") == "link":
|
|
173
178
|
# todo: support other link types (not just iter, move this to the db/api layer
|
mlrun/datastore/targets.py
CHANGED
|
@@ -1136,7 +1136,8 @@ class CSVTarget(BaseStoreTarget):
|
|
|
1136
1136
|
import pyspark.sql.functions as funcs
|
|
1137
1137
|
|
|
1138
1138
|
for col_name, col_type in df.dtypes:
|
|
1139
|
-
|
|
1139
|
+
# covers TimestampType and TimestampNTZType, which was added in PySpark 3.4.0
|
|
1140
|
+
if col_type.startswith("timestamp"):
|
|
1140
1141
|
# df.write.csv saves timestamps with millisecond precision, but we want microsecond precision
|
|
1141
1142
|
# for compatibility with storey.
|
|
1142
1143
|
df = df.withColumn(
|
mlrun/datastore/vectorstore.py
CHANGED
|
@@ -14,11 +14,32 @@
|
|
|
14
14
|
|
|
15
15
|
import inspect
|
|
16
16
|
from collections.abc import Iterable
|
|
17
|
-
from typing import Union
|
|
17
|
+
from typing import Optional, Union
|
|
18
18
|
|
|
19
19
|
from mlrun.artifacts import DocumentArtifact
|
|
20
20
|
|
|
21
21
|
|
|
22
|
+
def _extract_collection_name(vectorstore: "VectorStore") -> str: # noqa: F821
|
|
23
|
+
# List of possible attribute names for collection name
|
|
24
|
+
possible_attributes = ["collection_name", "_collection_name"]
|
|
25
|
+
|
|
26
|
+
for attr in possible_attributes:
|
|
27
|
+
if hasattr(vectorstore, attr):
|
|
28
|
+
collection_name = getattr(vectorstore, attr)
|
|
29
|
+
if collection_name:
|
|
30
|
+
return collection_name
|
|
31
|
+
|
|
32
|
+
store_class = vectorstore.__class__.__name__.lower()
|
|
33
|
+
if store_class == "mongodbatlasvectorsearch":
|
|
34
|
+
return vectorstore.collection.name
|
|
35
|
+
|
|
36
|
+
# If we get here, we couldn't find a valid collection name
|
|
37
|
+
raise ValueError(
|
|
38
|
+
"Failed to extract collection name from the vector store. "
|
|
39
|
+
"Please provide the collection name explicitly. "
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
22
43
|
class VectorStoreCollection:
|
|
23
44
|
"""
|
|
24
45
|
A wrapper class for vector store collections with MLRun integration.
|
|
@@ -36,12 +57,17 @@ class VectorStoreCollection:
|
|
|
36
57
|
def __init__(
|
|
37
58
|
self,
|
|
38
59
|
mlrun_context: Union["MlrunProject", "MLClientCtx"], # noqa: F821
|
|
39
|
-
collection_name: str,
|
|
40
60
|
vector_store: "VectorStore", # noqa: F821
|
|
61
|
+
collection_name: Optional[str] = None,
|
|
41
62
|
):
|
|
42
63
|
self._collection_impl = vector_store
|
|
43
64
|
self._mlrun_context = mlrun_context
|
|
44
|
-
self.collection_name = collection_name
|
|
65
|
+
self.collection_name = collection_name or _extract_collection_name(vector_store)
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def __class__(self):
|
|
69
|
+
# Make isinstance() check the wrapped object's class
|
|
70
|
+
return self._collection_impl.__class__
|
|
45
71
|
|
|
46
72
|
def __getattr__(self, name):
|
|
47
73
|
# This method is called when an attribute is not found in the usual places
|
|
@@ -56,6 +82,9 @@ class VectorStoreCollection:
|
|
|
56
82
|
# Forward the attribute setting to _collection_impl
|
|
57
83
|
setattr(self._collection_impl, name, value)
|
|
58
84
|
|
|
85
|
+
def delete(self, *args, **kwargs):
|
|
86
|
+
self._collection_impl.delete(*args, **kwargs)
|
|
87
|
+
|
|
59
88
|
def add_documents(
|
|
60
89
|
self,
|
|
61
90
|
documents: list["Document"], # noqa: F821
|
|
@@ -94,23 +123,29 @@ class VectorStoreCollection:
|
|
|
94
123
|
Converts artifacts to LangChain documents, adds them to the vector store, and
|
|
95
124
|
updates the MLRun context. If documents are split, the IDs are handled appropriately.
|
|
96
125
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
126
|
+
:param artifacts: List of DocumentArtifact objects to add
|
|
127
|
+
:type artifacts: list[DocumentArtifact]
|
|
128
|
+
:param splitter: Document splitter to break artifacts into smaller chunks.
|
|
129
|
+
If None, each artifact becomes a single document.
|
|
130
|
+
:type splitter: TextSplitter, optional
|
|
131
|
+
:param kwargs: Additional arguments passed to the underlying add_documents method.
|
|
132
|
+
Special handling for 'ids' kwarg:
|
|
133
|
+
|
|
134
|
+
* If provided and document is split, IDs are generated as "{original_id}_{i}"
|
|
135
|
+
where i starts from 1 (e.g., "doc1_1", "doc1_2", etc.)
|
|
136
|
+
* If provided and document isn't split, original IDs are used as-is
|
|
137
|
+
|
|
138
|
+
:return: List of IDs for all added documents. When no custom IDs are provided:
|
|
139
|
+
|
|
140
|
+
* Without splitting: Vector store generates IDs automatically
|
|
141
|
+
* With splitting: Vector store generates separate IDs for each chunk
|
|
106
142
|
|
|
107
|
-
Returns:
|
|
108
|
-
list: List of IDs for all added documents. When no custom IDs are provided:
|
|
109
|
-
- Without splitting: Vector store generates IDs automatically
|
|
110
|
-
- With splitting: Vector store generates separate IDs for each chunk
|
|
111
143
|
When custom IDs are provided:
|
|
112
|
-
|
|
113
|
-
|
|
144
|
+
|
|
145
|
+
* Without splitting: Uses provided IDs directly
|
|
146
|
+
* With splitting: Generates sequential IDs as "{original_id}_{i}" for each chunk
|
|
147
|
+
:rtype: list
|
|
148
|
+
|
|
114
149
|
"""
|
|
115
150
|
all_ids = []
|
|
116
151
|
user_ids = kwargs.pop("ids", None)
|
mlrun/db/base.py
CHANGED
|
@@ -674,8 +674,9 @@ class RunDBInterface(ABC):
|
|
|
674
674
|
self,
|
|
675
675
|
name: str,
|
|
676
676
|
project: str,
|
|
677
|
-
function_name: str,
|
|
678
|
-
|
|
677
|
+
function_name: Optional[str] = None,
|
|
678
|
+
function_tag: Optional[str] = None,
|
|
679
|
+
endpoint_id: Optional[str] = None,
|
|
679
680
|
):
|
|
680
681
|
pass
|
|
681
682
|
|
|
@@ -685,6 +686,7 @@ class RunDBInterface(ABC):
|
|
|
685
686
|
project: str,
|
|
686
687
|
name: Optional[str] = None,
|
|
687
688
|
function_name: Optional[str] = None,
|
|
689
|
+
function_tag: Optional[str] = None,
|
|
688
690
|
model_name: Optional[str] = None,
|
|
689
691
|
labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
|
|
690
692
|
start: Optional[datetime.datetime] = None,
|
|
@@ -702,6 +704,7 @@ class RunDBInterface(ABC):
|
|
|
702
704
|
name: str,
|
|
703
705
|
project: str,
|
|
704
706
|
function_name: Optional[str] = None,
|
|
707
|
+
function_tag: Optional[str] = None,
|
|
705
708
|
endpoint_id: Optional[str] = None,
|
|
706
709
|
tsdb_metrics: bool = True,
|
|
707
710
|
feature_analysis: bool = False,
|
|
@@ -715,6 +718,7 @@ class RunDBInterface(ABC):
|
|
|
715
718
|
project: str,
|
|
716
719
|
attributes: dict,
|
|
717
720
|
function_name: Optional[str] = None,
|
|
721
|
+
function_tag: Optional[str] = None,
|
|
718
722
|
endpoint_id: Optional[str] = None,
|
|
719
723
|
) -> mlrun.common.schemas.ModelEndpoint:
|
|
720
724
|
pass
|
mlrun/db/httpdb.py
CHANGED
|
@@ -996,7 +996,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
996
996
|
tag=None,
|
|
997
997
|
project="",
|
|
998
998
|
tree=None,
|
|
999
|
-
):
|
|
999
|
+
) -> dict[str, str]:
|
|
1000
1000
|
"""Store an artifact in the DB.
|
|
1001
1001
|
|
|
1002
1002
|
:param key: Identifying key of the artifact.
|
|
@@ -1008,6 +1008,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1008
1008
|
:param tag: Tag of the artifact.
|
|
1009
1009
|
:param project: Project that the artifact belongs to.
|
|
1010
1010
|
:param tree: The tree (producer id) which generated this artifact.
|
|
1011
|
+
:returns: The stored artifact dictionary.
|
|
1011
1012
|
"""
|
|
1012
1013
|
if uid:
|
|
1013
1014
|
warnings.warn(
|
|
@@ -1032,9 +1033,10 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1032
1033
|
params["tree"] = tree
|
|
1033
1034
|
|
|
1034
1035
|
body = _as_json(artifact)
|
|
1035
|
-
self.api_call(
|
|
1036
|
+
response = self.api_call(
|
|
1036
1037
|
"PUT", endpoint_path, error, body=body, params=params, version="v2"
|
|
1037
1038
|
)
|
|
1039
|
+
return response.json()
|
|
1038
1040
|
|
|
1039
1041
|
def read_artifact(
|
|
1040
1042
|
self,
|
|
@@ -3601,8 +3603,9 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3601
3603
|
self,
|
|
3602
3604
|
name: str,
|
|
3603
3605
|
project: str,
|
|
3604
|
-
function_name: str,
|
|
3605
|
-
|
|
3606
|
+
function_name: Optional[str] = None,
|
|
3607
|
+
function_tag: Optional[str] = None,
|
|
3608
|
+
endpoint_id: Optional[str] = None,
|
|
3606
3609
|
):
|
|
3607
3610
|
"""
|
|
3608
3611
|
Deletes the DB record of a given model endpoint, project and endpoint_id are used for lookup
|
|
@@ -3610,15 +3613,19 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3610
3613
|
:param name: The name of the model endpoint
|
|
3611
3614
|
:param project: The name of the project
|
|
3612
3615
|
:param function_name: The name of the function
|
|
3616
|
+
:param function_tag: The tag of the function
|
|
3613
3617
|
:param endpoint_id: The id of the endpoint
|
|
3614
3618
|
"""
|
|
3615
|
-
|
|
3619
|
+
self._check_model_endpoint_representation(
|
|
3620
|
+
function_name, function_tag, endpoint_id
|
|
3621
|
+
)
|
|
3616
3622
|
path = f"projects/{project}/model-endpoints/{name}"
|
|
3617
3623
|
self.api_call(
|
|
3618
3624
|
method=mlrun.common.types.HTTPMethod.DELETE,
|
|
3619
3625
|
path=path,
|
|
3620
3626
|
params={
|
|
3621
3627
|
"function_name": function_name,
|
|
3628
|
+
"function_tag": function_tag,
|
|
3622
3629
|
"endpoint_id": endpoint_id,
|
|
3623
3630
|
},
|
|
3624
3631
|
)
|
|
@@ -3628,6 +3635,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3628
3635
|
project: str,
|
|
3629
3636
|
name: Optional[str] = None,
|
|
3630
3637
|
function_name: Optional[str] = None,
|
|
3638
|
+
function_tag: Optional[str] = None,
|
|
3631
3639
|
model_name: Optional[str] = None,
|
|
3632
3640
|
labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
|
|
3633
3641
|
start: Optional[datetime] = None,
|
|
@@ -3643,6 +3651,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3643
3651
|
:param project: The name of the project
|
|
3644
3652
|
:param name: The name of the model endpoint
|
|
3645
3653
|
:param function_name: The name of the function
|
|
3654
|
+
:param function_tag: The tag of the function
|
|
3646
3655
|
:param model_name: The name of the model
|
|
3647
3656
|
:param labels: A list of labels to filter by. (see mlrun.common.schemas.LabelsModel)
|
|
3648
3657
|
:param start: The start time to filter by.Corresponding to the `created` field.
|
|
@@ -3663,6 +3672,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3663
3672
|
"name": name,
|
|
3664
3673
|
"model_name": model_name,
|
|
3665
3674
|
"function_name": function_name,
|
|
3675
|
+
"function_tag": function_tag,
|
|
3666
3676
|
"label": labels,
|
|
3667
3677
|
"start": datetime_to_iso(start),
|
|
3668
3678
|
"end": datetime_to_iso(end),
|
|
@@ -3680,7 +3690,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3680
3690
|
name: str,
|
|
3681
3691
|
project: str,
|
|
3682
3692
|
function_name: Optional[str] = None,
|
|
3683
|
-
|
|
3693
|
+
function_tag: Optional[str] = None,
|
|
3684
3694
|
endpoint_id: Optional[str] = None,
|
|
3685
3695
|
tsdb_metrics: bool = True,
|
|
3686
3696
|
feature_analysis: bool = False,
|
|
@@ -3691,6 +3701,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3691
3701
|
:param name: The name of the model endpoint
|
|
3692
3702
|
:param project: The name of the project
|
|
3693
3703
|
:param function_name: The name of the function
|
|
3704
|
+
:param function_tag: The tag of the function
|
|
3694
3705
|
:param endpoint_id: The id of the endpoint
|
|
3695
3706
|
:param tsdb_metrics: Whether to include metrics from the time series DB.
|
|
3696
3707
|
:param feature_analysis: Whether to include feature analysis data (feature_stats,
|
|
@@ -3698,13 +3709,16 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3698
3709
|
|
|
3699
3710
|
:return: A `ModelEndpoint` object.
|
|
3700
3711
|
"""
|
|
3701
|
-
|
|
3712
|
+
self._check_model_endpoint_representation(
|
|
3713
|
+
function_name, function_tag, endpoint_id
|
|
3714
|
+
)
|
|
3702
3715
|
path = f"projects/{project}/model-endpoints/{name}"
|
|
3703
3716
|
response = self.api_call(
|
|
3704
3717
|
method=mlrun.common.types.HTTPMethod.GET,
|
|
3705
3718
|
path=path,
|
|
3706
3719
|
params={
|
|
3707
3720
|
"function_name": function_name,
|
|
3721
|
+
"function_tag": function_tag,
|
|
3708
3722
|
"endpoint_id": endpoint_id,
|
|
3709
3723
|
"tsdb_metrics": tsdb_metrics,
|
|
3710
3724
|
"feature_analysis": feature_analysis,
|
|
@@ -3719,6 +3733,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3719
3733
|
project: str,
|
|
3720
3734
|
attributes: dict,
|
|
3721
3735
|
function_name: Optional[str] = None,
|
|
3736
|
+
function_tag: Optional[str] = None,
|
|
3722
3737
|
endpoint_id: Optional[str] = None,
|
|
3723
3738
|
) -> mlrun.common.schemas.ModelEndpoint:
|
|
3724
3739
|
"""
|
|
@@ -3728,13 +3743,16 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3728
3743
|
:param project: The name of the project
|
|
3729
3744
|
:param attributes: The attributes to update
|
|
3730
3745
|
:param function_name: The name of the function
|
|
3746
|
+
:param function_tag: The tag of the function
|
|
3731
3747
|
:param endpoint_id: The id of the endpoint
|
|
3732
3748
|
:return: The updated `ModelEndpoint` object.
|
|
3733
3749
|
"""
|
|
3734
3750
|
attributes_keys = list(attributes.keys())
|
|
3735
3751
|
attributes["name"] = name
|
|
3736
3752
|
attributes["project"] = project
|
|
3737
|
-
attributes["
|
|
3753
|
+
attributes["function_name"] = function_name or None
|
|
3754
|
+
attributes["function_tag"] = function_tag or None
|
|
3755
|
+
attributes["uid"] = endpoint_id or None
|
|
3738
3756
|
model_endpoint = mlrun.common.schemas.ModelEndpoint.from_flat_dict(attributes)
|
|
3739
3757
|
path = f"projects/{project}/model-endpoints"
|
|
3740
3758
|
logger.info(
|
|
@@ -3753,6 +3771,15 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3753
3771
|
|
|
3754
3772
|
return mlrun.common.schemas.ModelEndpoint(**response.json())
|
|
3755
3773
|
|
|
3774
|
+
@staticmethod
|
|
3775
|
+
def _check_model_endpoint_representation(
|
|
3776
|
+
function_name: str, function_tag: str, uid: str
|
|
3777
|
+
):
|
|
3778
|
+
if not uid and not (function_name and function_tag):
|
|
3779
|
+
raise MLRunInvalidArgumentError(
|
|
3780
|
+
"Either endpoint_uid or function_name and function_tag must be provided"
|
|
3781
|
+
)
|
|
3782
|
+
|
|
3756
3783
|
def update_model_monitoring_controller(
|
|
3757
3784
|
self,
|
|
3758
3785
|
project: str,
|