mlrun 1.7.1rc10__py3-none-any.whl → 1.8.0rc11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +23 -21
- mlrun/__main__.py +3 -3
- mlrun/alerts/alert.py +148 -14
- mlrun/artifacts/__init__.py +2 -3
- mlrun/artifacts/base.py +55 -12
- mlrun/artifacts/dataset.py +16 -16
- mlrun/artifacts/document.py +378 -0
- mlrun/artifacts/manager.py +26 -17
- mlrun/artifacts/model.py +66 -53
- mlrun/common/constants.py +8 -0
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/feature_set.py +1 -0
- mlrun/common/formatters/function.py +1 -0
- mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
- mlrun/common/formatters/pipeline.py +1 -2
- mlrun/common/formatters/project.py +9 -0
- mlrun/common/model_monitoring/__init__.py +0 -5
- mlrun/common/model_monitoring/helpers.py +1 -29
- mlrun/common/runtimes/constants.py +1 -2
- mlrun/common/schemas/__init__.py +6 -2
- mlrun/common/schemas/alert.py +111 -19
- mlrun/common/schemas/api_gateway.py +3 -3
- mlrun/common/schemas/artifact.py +11 -7
- mlrun/common/schemas/auth.py +6 -4
- mlrun/common/schemas/background_task.py +7 -7
- mlrun/common/schemas/client_spec.py +2 -3
- mlrun/common/schemas/clusterization_spec.py +2 -2
- mlrun/common/schemas/common.py +53 -3
- mlrun/common/schemas/constants.py +15 -0
- mlrun/common/schemas/datastore_profile.py +1 -1
- mlrun/common/schemas/feature_store.py +9 -9
- mlrun/common/schemas/frontend_spec.py +4 -4
- mlrun/common/schemas/function.py +10 -10
- mlrun/common/schemas/hub.py +1 -1
- mlrun/common/schemas/k8s.py +3 -3
- mlrun/common/schemas/memory_reports.py +3 -3
- mlrun/common/schemas/model_monitoring/__init__.py +2 -1
- mlrun/common/schemas/model_monitoring/constants.py +67 -14
- mlrun/common/schemas/model_monitoring/grafana.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +92 -147
- mlrun/common/schemas/notification.py +24 -3
- mlrun/common/schemas/object.py +1 -1
- mlrun/common/schemas/pagination.py +4 -4
- mlrun/common/schemas/partition.py +137 -0
- mlrun/common/schemas/pipeline.py +2 -2
- mlrun/common/schemas/project.py +25 -17
- mlrun/common/schemas/runs.py +2 -2
- mlrun/common/schemas/runtime_resource.py +5 -5
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/secret.py +1 -1
- mlrun/common/schemas/tag.py +3 -3
- mlrun/common/schemas/workflow.py +5 -5
- mlrun/config.py +68 -10
- mlrun/data_types/__init__.py +0 -2
- mlrun/data_types/data_types.py +1 -0
- mlrun/data_types/infer.py +3 -1
- mlrun/data_types/spark.py +5 -3
- mlrun/data_types/to_pandas.py +11 -2
- mlrun/datastore/__init__.py +2 -2
- mlrun/datastore/alibaba_oss.py +4 -1
- mlrun/datastore/azure_blob.py +4 -1
- mlrun/datastore/base.py +12 -4
- mlrun/datastore/datastore.py +9 -3
- mlrun/datastore/datastore_profile.py +79 -20
- mlrun/datastore/dbfs_store.py +4 -1
- mlrun/datastore/filestore.py +4 -1
- mlrun/datastore/google_cloud_storage.py +4 -1
- mlrun/datastore/hdfs.py +4 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +4 -1
- mlrun/datastore/s3.py +4 -1
- mlrun/datastore/sources.py +52 -51
- mlrun/datastore/store_resources.py +7 -4
- mlrun/datastore/targets.py +23 -22
- mlrun/datastore/utils.py +2 -2
- mlrun/datastore/v3io.py +4 -1
- mlrun/datastore/vectorstore.py +229 -0
- mlrun/datastore/wasbfs/fs.py +13 -12
- mlrun/db/base.py +213 -83
- mlrun/db/factory.py +0 -3
- mlrun/db/httpdb.py +1265 -387
- mlrun/db/nopdb.py +205 -74
- mlrun/errors.py +2 -2
- mlrun/execution.py +136 -50
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +41 -40
- mlrun/feature_store/common.py +9 -9
- mlrun/feature_store/feature_set.py +20 -18
- mlrun/feature_store/feature_vector.py +27 -24
- mlrun/feature_store/retrieval/base.py +14 -9
- mlrun/feature_store/retrieval/job.py +2 -1
- mlrun/feature_store/steps.py +2 -2
- mlrun/features.py +30 -13
- mlrun/frameworks/__init__.py +1 -2
- mlrun/frameworks/_common/__init__.py +1 -2
- mlrun/frameworks/_common/artifacts_library.py +2 -2
- mlrun/frameworks/_common/mlrun_interface.py +10 -6
- mlrun/frameworks/_common/model_handler.py +29 -27
- mlrun/frameworks/_common/producer.py +3 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
- mlrun/frameworks/_ml_common/__init__.py +1 -2
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_ml_common/model_handler.py +21 -21
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/auto_mlrun/__init__.py +1 -2
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
- mlrun/frameworks/huggingface/__init__.py +1 -2
- mlrun/frameworks/huggingface/model_server.py +9 -9
- mlrun/frameworks/lgbm/__init__.py +47 -44
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
- mlrun/frameworks/lgbm/model_handler.py +15 -11
- mlrun/frameworks/lgbm/model_server.py +11 -7
- mlrun/frameworks/lgbm/utils.py +2 -2
- mlrun/frameworks/onnx/__init__.py +1 -2
- mlrun/frameworks/onnx/dataset.py +3 -3
- mlrun/frameworks/onnx/mlrun_interface.py +2 -2
- mlrun/frameworks/onnx/model_handler.py +7 -5
- mlrun/frameworks/onnx/model_server.py +8 -6
- mlrun/frameworks/parallel_coordinates.py +11 -11
- mlrun/frameworks/pytorch/__init__.py +22 -23
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
- mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
- mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
- mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
- mlrun/frameworks/pytorch/model_handler.py +21 -17
- mlrun/frameworks/pytorch/model_server.py +13 -9
- mlrun/frameworks/sklearn/__init__.py +19 -18
- mlrun/frameworks/sklearn/estimator.py +2 -2
- mlrun/frameworks/sklearn/metric.py +3 -3
- mlrun/frameworks/sklearn/metrics_library.py +8 -6
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
- mlrun/frameworks/sklearn/model_handler.py +4 -3
- mlrun/frameworks/tf_keras/__init__.py +11 -12
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
- mlrun/frameworks/tf_keras/model_handler.py +17 -13
- mlrun/frameworks/tf_keras/model_server.py +12 -8
- mlrun/frameworks/xgboost/__init__.py +19 -18
- mlrun/frameworks/xgboost/model_handler.py +13 -9
- mlrun/launcher/base.py +3 -4
- mlrun/launcher/local.py +1 -1
- mlrun/launcher/remote.py +1 -1
- mlrun/lists.py +4 -3
- mlrun/model.py +117 -46
- mlrun/model_monitoring/__init__.py +4 -4
- mlrun/model_monitoring/api.py +72 -59
- mlrun/model_monitoring/applications/_application_steps.py +17 -17
- mlrun/model_monitoring/applications/base.py +165 -6
- mlrun/model_monitoring/applications/context.py +88 -37
- mlrun/model_monitoring/applications/evidently_base.py +0 -1
- mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
- mlrun/model_monitoring/applications/results.py +55 -3
- mlrun/model_monitoring/controller.py +207 -239
- mlrun/model_monitoring/db/__init__.py +0 -2
- mlrun/model_monitoring/db/_schedules.py +156 -0
- mlrun/model_monitoring/db/_stats.py +189 -0
- mlrun/model_monitoring/db/tsdb/base.py +78 -25
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +61 -6
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +255 -29
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
- mlrun/model_monitoring/helpers.py +151 -49
- mlrun/model_monitoring/stream_processing.py +99 -283
- mlrun/model_monitoring/tracking_policy.py +10 -3
- mlrun/model_monitoring/writer.py +48 -36
- mlrun/package/__init__.py +3 -6
- mlrun/package/context_handler.py +1 -1
- mlrun/package/packager.py +12 -9
- mlrun/package/packagers/__init__.py +0 -2
- mlrun/package/packagers/default_packager.py +14 -11
- mlrun/package/packagers/numpy_packagers.py +16 -7
- mlrun/package/packagers/pandas_packagers.py +18 -18
- mlrun/package/packagers/python_standard_library_packagers.py +25 -11
- mlrun/package/packagers_manager.py +31 -14
- mlrun/package/utils/__init__.py +0 -3
- mlrun/package/utils/_pickler.py +6 -6
- mlrun/platforms/__init__.py +47 -16
- mlrun/platforms/iguazio.py +4 -1
- mlrun/projects/operations.py +27 -27
- mlrun/projects/pipelines.py +71 -36
- mlrun/projects/project.py +890 -220
- mlrun/run.py +53 -10
- mlrun/runtimes/__init__.py +1 -3
- mlrun/runtimes/base.py +15 -11
- mlrun/runtimes/daskjob.py +9 -9
- mlrun/runtimes/generators.py +2 -1
- mlrun/runtimes/kubejob.py +4 -5
- mlrun/runtimes/mounts.py +572 -0
- mlrun/runtimes/mpijob/__init__.py +0 -2
- mlrun/runtimes/mpijob/abstract.py +7 -6
- mlrun/runtimes/nuclio/api_gateway.py +7 -7
- mlrun/runtimes/nuclio/application/application.py +11 -11
- mlrun/runtimes/nuclio/function.py +19 -17
- mlrun/runtimes/nuclio/serving.py +18 -13
- mlrun/runtimes/pod.py +154 -45
- mlrun/runtimes/remotesparkjob.py +3 -2
- mlrun/runtimes/sparkjob/__init__.py +0 -2
- mlrun/runtimes/sparkjob/spark3job.py +21 -11
- mlrun/runtimes/utils.py +6 -5
- mlrun/serving/merger.py +6 -4
- mlrun/serving/remote.py +18 -17
- mlrun/serving/routers.py +185 -172
- mlrun/serving/server.py +7 -1
- mlrun/serving/states.py +97 -78
- mlrun/serving/utils.py +13 -2
- mlrun/serving/v1_serving.py +3 -2
- mlrun/serving/v2_serving.py +105 -72
- mlrun/track/__init__.py +1 -1
- mlrun/track/tracker.py +2 -2
- mlrun/track/trackers/mlflow_tracker.py +6 -5
- mlrun/utils/async_http.py +1 -1
- mlrun/utils/clones.py +1 -1
- mlrun/utils/helpers.py +63 -19
- mlrun/utils/logger.py +106 -4
- mlrun/utils/notifications/notification/__init__.py +22 -19
- mlrun/utils/notifications/notification/base.py +33 -14
- mlrun/utils/notifications/notification/console.py +6 -6
- mlrun/utils/notifications/notification/git.py +11 -11
- mlrun/utils/notifications/notification/ipython.py +10 -9
- mlrun/utils/notifications/notification/mail.py +176 -0
- mlrun/utils/notifications/notification/slack.py +6 -6
- mlrun/utils/notifications/notification/webhook.py +6 -6
- mlrun/utils/notifications/notification_pusher.py +86 -44
- mlrun/utils/regex.py +11 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/METADATA +29 -24
- mlrun-1.8.0rc11.dist-info/RECORD +347 -0
- mlrun/model_monitoring/db/stores/__init__.py +0 -136
- mlrun/model_monitoring/db/stores/base/store.py +0 -213
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
- mlrun/model_monitoring/model_endpoint.py +0 -118
- mlrun-1.7.1rc10.dist-info/RECORD +0 -351
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/LICENSE +0 -0
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/WHEEL +0 -0
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
import tempfile
|
|
17
|
+
from collections.abc import Iterator
|
|
18
|
+
from copy import deepcopy
|
|
19
|
+
from importlib import import_module
|
|
20
|
+
from typing import Optional, Union
|
|
21
|
+
|
|
22
|
+
import mlrun
|
|
23
|
+
from mlrun.artifacts import Artifact, ArtifactSpec
|
|
24
|
+
from mlrun.model import ModelObj
|
|
25
|
+
|
|
26
|
+
from ..utils import generate_artifact_uri
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DocumentLoaderSpec(ModelObj):
|
|
30
|
+
"""
|
|
31
|
+
A class to load a document from a file path using a specified loader class.
|
|
32
|
+
|
|
33
|
+
This class is responsible for loading documents from a given source path using a specified loader class.
|
|
34
|
+
The loader class is dynamically imported and instantiated with the provided arguments. The loaded documents
|
|
35
|
+
can be optionally uploaded as artifacts.
|
|
36
|
+
|
|
37
|
+
Attributes:
|
|
38
|
+
loader_class_name (str): The name of the loader class to use for loading documents.
|
|
39
|
+
src_name (str): The name of the source attribute to pass to the loader class.
|
|
40
|
+
kwargs (Optional[dict]): Additional keyword arguments to pass to the loader class.
|
|
41
|
+
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
_dict_fields = ["loader_class_name", "src_name", "kwargs"]
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
loader_class_name: str = "langchain_community.document_loaders.TextLoader",
|
|
49
|
+
src_name: str = "file_path",
|
|
50
|
+
kwargs: Optional[dict] = None,
|
|
51
|
+
):
|
|
52
|
+
"""
|
|
53
|
+
Initialize the document loader.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
loader_class_name (str): The name of the loader class to use.
|
|
57
|
+
src_name (str): The source name for the document.
|
|
58
|
+
kwargs (Optional[dict]): Additional keyword arguments to pass to the loader class.
|
|
59
|
+
|
|
60
|
+
Example:
|
|
61
|
+
>>> # Create a loader specification for PDF documents
|
|
62
|
+
>>> loader_spec = DocumentLoaderSpec(
|
|
63
|
+
... loader_class_name="langchain_community.document_loaders.PDFLoader",
|
|
64
|
+
... src_name="file_path",
|
|
65
|
+
... kwargs={"extract_images": True},
|
|
66
|
+
... )
|
|
67
|
+
>>> # Create a loader instance for a specific PDF file
|
|
68
|
+
>>> pdf_loader = loader_spec.make_loader("/path/to/document.pdf")
|
|
69
|
+
>>> # Load the documents
|
|
70
|
+
>>> documents = pdf_loader.load()
|
|
71
|
+
|
|
72
|
+
"""
|
|
73
|
+
self.loader_class_name = loader_class_name
|
|
74
|
+
self.src_name = src_name
|
|
75
|
+
self.kwargs = kwargs
|
|
76
|
+
|
|
77
|
+
def make_loader(self, src_path):
|
|
78
|
+
module_name, class_name = self.loader_class_name.rsplit(".", 1)
|
|
79
|
+
module = import_module(module_name)
|
|
80
|
+
loader_class = getattr(module, class_name)
|
|
81
|
+
kwargs = deepcopy(self.kwargs or {})
|
|
82
|
+
kwargs[self.src_name] = src_path
|
|
83
|
+
loader = loader_class(**kwargs)
|
|
84
|
+
return loader
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class MLRunLoader:
|
|
88
|
+
"""
|
|
89
|
+
A factory class for creating instances of a dynamically defined document loader.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
artifact_key (str): The key for the artifact to be logged.It can include '%%' which will be replaced
|
|
93
|
+
by a hex-encoded version of the source path.
|
|
94
|
+
local_path (str): The source path of the document to be loaded.
|
|
95
|
+
loader_spec (DocumentLoaderSpec): Specification for the document loader.
|
|
96
|
+
producer (Optional[Union[MlrunProject, str, MLClientCtx]], optional): The producer of the document
|
|
97
|
+
upload (bool, optional): Flag indicating whether to upload the document.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
DynamicDocumentLoader: An instance of a dynamically defined subclass of BaseLoader.
|
|
101
|
+
|
|
102
|
+
Example:
|
|
103
|
+
>>> # Create a document loader specification
|
|
104
|
+
>>> loader_spec = DocumentLoaderSpec(
|
|
105
|
+
... loader_class_name="langchain_community.document_loaders.TextLoader",
|
|
106
|
+
... src_name="file_path",
|
|
107
|
+
... )
|
|
108
|
+
>>> # Create a basic loader for a single file
|
|
109
|
+
>>> loader = MLRunLoader(
|
|
110
|
+
... source_path="/path/to/document.txt",
|
|
111
|
+
... loader_spec=loader_spec,
|
|
112
|
+
... artifact_key="my_doc",
|
|
113
|
+
... producer=project,
|
|
114
|
+
... upload=True,
|
|
115
|
+
... )
|
|
116
|
+
>>> documents = loader.load()
|
|
117
|
+
>>> # Create a loader with auto-generated keys
|
|
118
|
+
>>> loader = MLRunLoader(
|
|
119
|
+
... source_path="/path/to/document.txt",
|
|
120
|
+
... loader_spec=loader_spec,
|
|
121
|
+
... artifact_key="doc%%", # %% will be replaced with encoded path
|
|
122
|
+
... producer=project,
|
|
123
|
+
... )
|
|
124
|
+
>>> documents = loader.load()
|
|
125
|
+
>>> # Use with DirectoryLoader
|
|
126
|
+
>>> from langchain_community.document_loaders import DirectoryLoader
|
|
127
|
+
>>> dir_loader = DirectoryLoader(
|
|
128
|
+
... "/path/to/directory",
|
|
129
|
+
... glob="**/*.txt",
|
|
130
|
+
... loader_cls=MLRunLoader,
|
|
131
|
+
... loader_kwargs={
|
|
132
|
+
... "loader_spec": loader_spec,
|
|
133
|
+
... "artifact_key": "doc%%",
|
|
134
|
+
... "producer": project,
|
|
135
|
+
... "upload": True,
|
|
136
|
+
... },
|
|
137
|
+
... )
|
|
138
|
+
>>> documents = dir_loader.load()
|
|
139
|
+
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
def __new__(
|
|
143
|
+
cls,
|
|
144
|
+
source_path: str,
|
|
145
|
+
loader_spec: "DocumentLoaderSpec",
|
|
146
|
+
artifact_key="doc%%",
|
|
147
|
+
producer: Optional[Union["MlrunProject", str, "MLClientCtx"]] = None, # noqa: F821
|
|
148
|
+
upload: bool = False,
|
|
149
|
+
):
|
|
150
|
+
# Dynamically import BaseLoader
|
|
151
|
+
from langchain_community.document_loaders.base import BaseLoader
|
|
152
|
+
|
|
153
|
+
class DynamicDocumentLoader(BaseLoader):
|
|
154
|
+
def __init__(
|
|
155
|
+
self,
|
|
156
|
+
local_path,
|
|
157
|
+
loader_spec,
|
|
158
|
+
artifact_key,
|
|
159
|
+
producer,
|
|
160
|
+
upload,
|
|
161
|
+
):
|
|
162
|
+
self.producer = producer
|
|
163
|
+
self.artifact_key = (
|
|
164
|
+
MLRunLoader.artifact_key_instance(artifact_key, local_path)
|
|
165
|
+
if "%%" in artifact_key
|
|
166
|
+
else artifact_key
|
|
167
|
+
)
|
|
168
|
+
self.loader_spec = loader_spec
|
|
169
|
+
self.local_path = local_path
|
|
170
|
+
self.upload = upload
|
|
171
|
+
|
|
172
|
+
# Resolve the producer
|
|
173
|
+
if not self.producer:
|
|
174
|
+
self.producer = mlrun.mlconf.default_project
|
|
175
|
+
if isinstance(self.producer, str):
|
|
176
|
+
self.producer = mlrun.get_or_create_project(self.producer)
|
|
177
|
+
|
|
178
|
+
def lazy_load(self) -> Iterator["Document"]: # noqa: F821
|
|
179
|
+
artifact = self.producer.log_document(
|
|
180
|
+
key=self.artifact_key,
|
|
181
|
+
document_loader_spec=self.loader_spec,
|
|
182
|
+
local_path=self.local_path,
|
|
183
|
+
upload=self.upload,
|
|
184
|
+
)
|
|
185
|
+
res = artifact.to_langchain_documents()
|
|
186
|
+
yield res[0]
|
|
187
|
+
|
|
188
|
+
# Return an instance of the dynamically defined subclass
|
|
189
|
+
instance = DynamicDocumentLoader(
|
|
190
|
+
artifact_key=artifact_key,
|
|
191
|
+
local_path=source_path,
|
|
192
|
+
loader_spec=loader_spec,
|
|
193
|
+
producer=producer,
|
|
194
|
+
upload=upload,
|
|
195
|
+
)
|
|
196
|
+
return instance
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def artifact_key_instance(artifact_key: str, src_path: str) -> str:
|
|
200
|
+
if "%%" in artifact_key:
|
|
201
|
+
pattern = mlrun.utils.regex.artifact_key[0]
|
|
202
|
+
# Convert anchored pattern (^...$) to non-anchored version for finditer
|
|
203
|
+
search_pattern = pattern.strip("^$")
|
|
204
|
+
result = []
|
|
205
|
+
current_pos = 0
|
|
206
|
+
|
|
207
|
+
# Find all valid sequences
|
|
208
|
+
for match in re.finditer(search_pattern, src_path):
|
|
209
|
+
# Add hex values for characters between matches
|
|
210
|
+
for char in src_path[current_pos : match.start()]:
|
|
211
|
+
result.append(hex(ord(char))[2:].zfill(2))
|
|
212
|
+
|
|
213
|
+
# Add the valid sequence
|
|
214
|
+
result.append(match.group())
|
|
215
|
+
current_pos = match.end()
|
|
216
|
+
|
|
217
|
+
# Handle any remaining characters after the last match
|
|
218
|
+
for char in src_path[current_pos:]:
|
|
219
|
+
result.append(hex(ord(char))[2:].zfill(2))
|
|
220
|
+
|
|
221
|
+
resolved_path = "".join(result)
|
|
222
|
+
|
|
223
|
+
artifact_key = artifact_key.replace("%%", resolved_path)
|
|
224
|
+
|
|
225
|
+
return artifact_key
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class DocumentArtifact(Artifact):
|
|
229
|
+
"""
|
|
230
|
+
A specific artifact class inheriting from generic artifact, used to maintain Document meta-data.
|
|
231
|
+
"""
|
|
232
|
+
|
|
233
|
+
class DocumentArtifactSpec(ArtifactSpec):
|
|
234
|
+
_dict_fields = ArtifactSpec._dict_fields + [
|
|
235
|
+
"document_loader",
|
|
236
|
+
"collections",
|
|
237
|
+
"original_source",
|
|
238
|
+
]
|
|
239
|
+
|
|
240
|
+
def __init__(
|
|
241
|
+
self,
|
|
242
|
+
*args,
|
|
243
|
+
document_loader: Optional[DocumentLoaderSpec] = None,
|
|
244
|
+
collections: Optional[dict] = None,
|
|
245
|
+
original_source: Optional[str] = None,
|
|
246
|
+
**kwargs,
|
|
247
|
+
):
|
|
248
|
+
super().__init__(*args, **kwargs)
|
|
249
|
+
self.document_loader = document_loader
|
|
250
|
+
self.collections = collections if collections is not None else {}
|
|
251
|
+
self.original_source = original_source
|
|
252
|
+
|
|
253
|
+
kind = "document"
|
|
254
|
+
|
|
255
|
+
METADATA_SOURCE_KEY = "source"
|
|
256
|
+
METADATA_ORIGINAL_SOURCE_KEY = "original_source"
|
|
257
|
+
METADATA_CHUNK_KEY = "mlrun_chunk"
|
|
258
|
+
METADATA_ARTIFACT_URI_KEY = "mlrun_object_uri"
|
|
259
|
+
METADATA_ARTIFACT_TARGET_PATH_KEY = "mlrun_target_path"
|
|
260
|
+
|
|
261
|
+
def __init__(
|
|
262
|
+
self,
|
|
263
|
+
original_source: Optional[str] = None,
|
|
264
|
+
document_loader_spec: Optional[DocumentLoaderSpec] = None,
|
|
265
|
+
**kwargs,
|
|
266
|
+
):
|
|
267
|
+
super().__init__(**kwargs)
|
|
268
|
+
self.spec.document_loader = (
|
|
269
|
+
document_loader_spec.to_dict()
|
|
270
|
+
if document_loader_spec
|
|
271
|
+
else self.spec.document_loader
|
|
272
|
+
)
|
|
273
|
+
self.spec.original_source = original_source or self.spec.original_source
|
|
274
|
+
|
|
275
|
+
@property
|
|
276
|
+
def spec(self) -> DocumentArtifactSpec:
|
|
277
|
+
return self._spec
|
|
278
|
+
|
|
279
|
+
@spec.setter
|
|
280
|
+
def spec(self, spec):
|
|
281
|
+
self._spec = self._verify_dict(
|
|
282
|
+
spec, "spec", DocumentArtifact.DocumentArtifactSpec
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
def get_source(self):
|
|
286
|
+
"""Get the source URI for this artifact."""
|
|
287
|
+
return generate_artifact_uri(self.metadata.project, self.spec.db_key)
|
|
288
|
+
|
|
289
|
+
def to_langchain_documents(
|
|
290
|
+
self,
|
|
291
|
+
splitter: Optional["TextSplitter"] = None, # noqa: F821
|
|
292
|
+
) -> list["Document"]: # noqa: F821
|
|
293
|
+
from langchain.schema import Document
|
|
294
|
+
|
|
295
|
+
"""
|
|
296
|
+
Create LC documents from the artifact
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
splitter (Optional[TextSplitter]): A LangChain TextSplitter to split the document into chunks.
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
list[Document]: A list of LangChain Document objects.
|
|
303
|
+
"""
|
|
304
|
+
|
|
305
|
+
loader_spec = DocumentLoaderSpec.from_dict(self.spec.document_loader)
|
|
306
|
+
if self.get_target_path():
|
|
307
|
+
with tempfile.NamedTemporaryFile() as tmp_file:
|
|
308
|
+
mlrun.datastore.store_manager.object(
|
|
309
|
+
url=self.get_target_path()
|
|
310
|
+
).download(tmp_file.name)
|
|
311
|
+
loader = loader_spec.make_loader(tmp_file.name)
|
|
312
|
+
documents = loader.load()
|
|
313
|
+
elif self.spec.original_source:
|
|
314
|
+
loader = loader_spec.make_loader(self.spec.original_source)
|
|
315
|
+
documents = loader.load()
|
|
316
|
+
else:
|
|
317
|
+
raise ValueError(
|
|
318
|
+
"No src_path or target_path provided. Cannot load document."
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
results = []
|
|
322
|
+
idx = 0
|
|
323
|
+
for document in documents:
|
|
324
|
+
if splitter:
|
|
325
|
+
texts = splitter.split_text(document.page_content)
|
|
326
|
+
else:
|
|
327
|
+
texts = [document.page_content]
|
|
328
|
+
|
|
329
|
+
metadata = document.metadata
|
|
330
|
+
|
|
331
|
+
metadata[self.METADATA_ORIGINAL_SOURCE_KEY] = self.spec.original_source
|
|
332
|
+
metadata[self.METADATA_SOURCE_KEY] = self.get_source()
|
|
333
|
+
metadata[self.METADATA_ARTIFACT_URI_KEY] = self.uri
|
|
334
|
+
if self.get_target_path():
|
|
335
|
+
metadata[self.METADATA_ARTIFACT_TARGET_PATH_KEY] = (
|
|
336
|
+
self.get_target_path()
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
for text in texts:
|
|
340
|
+
metadata[self.METADATA_CHUNK_KEY] = str(idx)
|
|
341
|
+
doc = Document(
|
|
342
|
+
page_content=text,
|
|
343
|
+
metadata=metadata.copy(),
|
|
344
|
+
)
|
|
345
|
+
results.append(doc)
|
|
346
|
+
idx = idx + 1
|
|
347
|
+
return results
|
|
348
|
+
|
|
349
|
+
def collection_add(self, collection_id: str) -> None:
|
|
350
|
+
"""
|
|
351
|
+
Add a collection ID to the artifact's collection list.
|
|
352
|
+
|
|
353
|
+
Adds the specified collection ID to the artifact's collection mapping if it
|
|
354
|
+
doesn't already exist.
|
|
355
|
+
This method only modifies the client-side artifact object and does not persist
|
|
356
|
+
the changes to the MLRun DB. To save the changes permanently, you must call
|
|
357
|
+
project.update_artifact() after this method.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
collection_id (str): The ID of the collection to add
|
|
361
|
+
"""
|
|
362
|
+
if collection_id not in self.spec.collections:
|
|
363
|
+
self.spec.collections[collection_id] = "1"
|
|
364
|
+
|
|
365
|
+
def collection_remove(self, collection_id: str) -> None:
|
|
366
|
+
"""
|
|
367
|
+
Remove a collection ID from the artifact's collection list.
|
|
368
|
+
|
|
369
|
+
Removes the specified collection ID from the artifact's local collection mapping.
|
|
370
|
+
This method only modifies the client-side artifact object and does not persist
|
|
371
|
+
the changes to the MLRun DB. To save the changes permanently, you must call
|
|
372
|
+
project.update_artifact() or context.update_artifact() after this method.
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
collection_id (str): The ID of the collection to remove
|
|
376
|
+
"""
|
|
377
|
+
if collection_id in self.spec.collections:
|
|
378
|
+
self.spec.collections.pop(collection_id)
|
mlrun/artifacts/manager.py
CHANGED
|
@@ -41,6 +41,7 @@ from .dataset import (
|
|
|
41
41
|
DatasetArtifact,
|
|
42
42
|
TableArtifact,
|
|
43
43
|
)
|
|
44
|
+
from .document import DocumentArtifact
|
|
44
45
|
from .model import ModelArtifact
|
|
45
46
|
from .plots import (
|
|
46
47
|
PlotArtifact,
|
|
@@ -57,6 +58,7 @@ artifact_types = {
|
|
|
57
58
|
"model": ModelArtifact,
|
|
58
59
|
"dataset": DatasetArtifact,
|
|
59
60
|
"plotly": PlotlyArtifact,
|
|
61
|
+
"document": DocumentArtifact,
|
|
60
62
|
}
|
|
61
63
|
|
|
62
64
|
|
|
@@ -124,7 +126,7 @@ class ArtifactManager:
|
|
|
124
126
|
|
|
125
127
|
self.artifact_db = db
|
|
126
128
|
self.input_artifacts = {}
|
|
127
|
-
self.
|
|
129
|
+
self.artifact_uris = {}
|
|
128
130
|
|
|
129
131
|
@staticmethod
|
|
130
132
|
def ensure_artifact_source_file_exists(item, path, body):
|
|
@@ -156,14 +158,12 @@ class ArtifactManager:
|
|
|
156
158
|
|
|
157
159
|
def artifact_list(self, full=False):
|
|
158
160
|
artifacts = []
|
|
159
|
-
for
|
|
160
|
-
|
|
161
|
-
|
|
161
|
+
for artifacts_uri in self.artifact_uris.values():
|
|
162
|
+
artifact: Artifact = mlrun.datastore.get_store_resource(artifacts_uri)
|
|
163
|
+
if full:
|
|
164
|
+
artifacts.append(artifact.to_dict())
|
|
162
165
|
else:
|
|
163
|
-
|
|
164
|
-
artifacts.append(artifact.to_dict())
|
|
165
|
-
else:
|
|
166
|
-
artifacts.append(artifact.base_dict())
|
|
166
|
+
artifacts.append(artifact.base_dict())
|
|
167
167
|
return artifacts
|
|
168
168
|
|
|
169
169
|
def log_artifact(
|
|
@@ -246,6 +246,8 @@ class ArtifactManager:
|
|
|
246
246
|
# otherwise, we do not want to override it.
|
|
247
247
|
# this is mainly relevant for imported artifacts that have an explicit db_key value already set
|
|
248
248
|
db_key = item.db_key or key
|
|
249
|
+
if db_key != key:
|
|
250
|
+
validate_artifact_key_name(db_key, "artifact.db_key")
|
|
249
251
|
item.db_key = db_key or ""
|
|
250
252
|
item.viewer = viewer or item.viewer
|
|
251
253
|
item.tree = producer.tag
|
|
@@ -304,7 +306,6 @@ class ArtifactManager:
|
|
|
304
306
|
item.target_path = target_path
|
|
305
307
|
|
|
306
308
|
item.before_log()
|
|
307
|
-
self.artifacts[key] = item
|
|
308
309
|
|
|
309
310
|
if ((upload is None and item.kind != "dir") or upload) and not item.is_inline():
|
|
310
311
|
# before uploading the item, we want to ensure that its tags are valid,
|
|
@@ -313,32 +314,38 @@ class ArtifactManager:
|
|
|
313
314
|
item.upload(artifact_path=artifact_path)
|
|
314
315
|
|
|
315
316
|
if db_key:
|
|
316
|
-
self._log_to_db(db_key, project, producer.inputs, item)
|
|
317
|
+
artifact_uid = self._log_to_db(db_key, project, producer.inputs, item)
|
|
318
|
+
if artifact_uid is not None:
|
|
319
|
+
item.uid = artifact_uid
|
|
320
|
+
# Generate the artifact URI after logging to the database and retrieving the artifact UID, if available.
|
|
321
|
+
self.artifact_uris[key] = item.uri
|
|
322
|
+
|
|
317
323
|
size = str(item.size) or "?"
|
|
318
324
|
db_str = "Y" if (self.artifact_db and db_key) else "N"
|
|
319
325
|
logger.debug(
|
|
320
|
-
f"
|
|
326
|
+
f"Log artifact {key} at {item.target_path}, size: {size}, db: {db_str}"
|
|
321
327
|
)
|
|
322
328
|
return item
|
|
323
329
|
|
|
324
|
-
def update_artifact(self, producer, item):
|
|
325
|
-
self.
|
|
330
|
+
def update_artifact(self, producer, item: Artifact):
|
|
331
|
+
self.artifact_uris[item.key] = item.uri
|
|
326
332
|
self._log_to_db(item.db_key, producer.project, producer.inputs, item)
|
|
327
333
|
|
|
328
|
-
def _log_to_db(self, key, project, sources, item, tag=None):
|
|
334
|
+
def _log_to_db(self, key, project, sources, item, tag=None) -> typing.Optional[str]:
|
|
329
335
|
"""
|
|
330
336
|
log artifact to db
|
|
331
337
|
:param key: Identifying key of the artifact.
|
|
332
338
|
:param project: Project that the artifact belongs to.
|
|
333
|
-
:param sources: List of artifact sources ( Mainly passed from the producer.items ).
|
|
339
|
+
:param sources: List of artifact sources ( Mainly passed from the `producer.items` ).
|
|
334
340
|
:param item: The actual artifact to store.
|
|
335
341
|
:param tag: The name of the Tag of the artifact.
|
|
342
|
+
:return: The logged artifact uid.
|
|
336
343
|
"""
|
|
337
344
|
if self.artifact_db:
|
|
338
345
|
item.updated = None
|
|
339
346
|
if sources:
|
|
340
347
|
item.sources = [{"name": k, "path": str(v)} for k, v in sources.items()]
|
|
341
|
-
self.artifact_db.store_artifact(
|
|
348
|
+
artifact_item = self.artifact_db.store_artifact(
|
|
342
349
|
key,
|
|
343
350
|
item.to_dict(),
|
|
344
351
|
iter=item.iter,
|
|
@@ -346,6 +353,8 @@ class ArtifactManager:
|
|
|
346
353
|
project=project,
|
|
347
354
|
tree=item.tree,
|
|
348
355
|
)
|
|
356
|
+
if artifact_item:
|
|
357
|
+
return artifact_item.get("metadata", {}).get("uid")
|
|
349
358
|
|
|
350
359
|
def link_artifact(
|
|
351
360
|
self,
|
|
@@ -387,7 +396,7 @@ class ArtifactManager:
|
|
|
387
396
|
deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
|
|
388
397
|
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
|
|
389
398
|
),
|
|
390
|
-
secrets: dict = None,
|
|
399
|
+
secrets: typing.Optional[dict] = None,
|
|
391
400
|
):
|
|
392
401
|
self.artifact_db.del_artifact(
|
|
393
402
|
key=item.db_key,
|
mlrun/artifacts/model.py
CHANGED
|
@@ -303,7 +303,7 @@ class ModelArtifact(Artifact):
|
|
|
303
303
|
self.metadata.labels = self.metadata.labels or {}
|
|
304
304
|
self.metadata.labels["framework"] = self.spec.framework
|
|
305
305
|
|
|
306
|
-
def upload(self, artifact_path: str = None):
|
|
306
|
+
def upload(self, artifact_path: Optional[str] = None):
|
|
307
307
|
"""
|
|
308
308
|
internal, upload to target store
|
|
309
309
|
:param artifact_path: required only for when generating target_path from artifact hash
|
|
@@ -324,9 +324,7 @@ class ModelArtifact(Artifact):
|
|
|
324
324
|
artifact=self, extra_data=self.spec.extra_data, artifact_path=artifact_path
|
|
325
325
|
)
|
|
326
326
|
|
|
327
|
-
|
|
328
|
-
# and the tag is not part of the model spec but the metadata of the model artifact
|
|
329
|
-
spec_body = _remove_tag_from_spec_yaml(self)
|
|
327
|
+
spec_body = _sanitize_and_serialize_model_spec_yaml(self)
|
|
330
328
|
spec_target_path = None
|
|
331
329
|
|
|
332
330
|
if mlrun.mlconf.artifacts.generate_target_path_from_artifact_hash:
|
|
@@ -355,7 +353,7 @@ class ModelArtifact(Artifact):
|
|
|
355
353
|
def _upload_body_or_file(
|
|
356
354
|
self,
|
|
357
355
|
artifact_path: str,
|
|
358
|
-
target_model_path: str = None,
|
|
356
|
+
target_model_path: Optional[str] = None,
|
|
359
357
|
):
|
|
360
358
|
body = self.spec.get_body()
|
|
361
359
|
if body:
|
|
@@ -403,12 +401,6 @@ class ModelArtifact(Artifact):
|
|
|
403
401
|
return mlrun.get_dataitem(target_model_path).get()
|
|
404
402
|
|
|
405
403
|
|
|
406
|
-
def _get_src_path(model_spec: ModelArtifact, filename):
|
|
407
|
-
if model_spec.src_path:
|
|
408
|
-
return path.join(model_spec.src_path, filename)
|
|
409
|
-
return filename
|
|
410
|
-
|
|
411
|
-
|
|
412
404
|
def get_model(model_dir, suffix=""):
|
|
413
405
|
"""return model file, model spec object, and list of extra data items
|
|
414
406
|
|
|
@@ -483,49 +475,20 @@ def get_model(model_dir, suffix=""):
|
|
|
483
475
|
return temp_path, model_spec, extra_dataitems
|
|
484
476
|
|
|
485
477
|
|
|
486
|
-
def _load_model_spec(spec_path):
|
|
487
|
-
data = mlrun.datastore.store_manager.object(url=spec_path).get()
|
|
488
|
-
spec = yaml.load(data, Loader=yaml.FullLoader)
|
|
489
|
-
return ModelArtifact.from_dict(spec)
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
def _get_file_path(base_path: str, name: str, isdir=False):
|
|
493
|
-
if not is_relative_path(name):
|
|
494
|
-
return name
|
|
495
|
-
if not isdir:
|
|
496
|
-
base_path = path.dirname(base_path)
|
|
497
|
-
return path.join(base_path, name).replace("\\", "/")
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
def _get_extra(target, extra_data, is_dir=False):
|
|
501
|
-
extra_dataitems = {}
|
|
502
|
-
for k, v in extra_data.items():
|
|
503
|
-
extra_dataitems[k] = mlrun.datastore.store_manager.object(
|
|
504
|
-
url=_get_file_path(target, v, isdir=is_dir), key=k
|
|
505
|
-
)
|
|
506
|
-
return extra_dataitems
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
def _remove_tag_from_spec_yaml(model_spec):
|
|
510
|
-
spec_dict = model_spec.to_dict()
|
|
511
|
-
spec_dict["metadata"].pop("tag", None)
|
|
512
|
-
return yaml.safe_dump(spec_dict)
|
|
513
|
-
|
|
514
|
-
|
|
515
478
|
def update_model(
|
|
516
479
|
model_artifact,
|
|
517
|
-
parameters: dict = None,
|
|
518
|
-
metrics: dict = None,
|
|
519
|
-
extra_data: dict = None,
|
|
520
|
-
inputs: list[Feature] = None,
|
|
521
|
-
outputs: list[Feature] = None,
|
|
522
|
-
feature_vector: str = None,
|
|
523
|
-
feature_weights: list = None,
|
|
480
|
+
parameters: Optional[dict] = None,
|
|
481
|
+
metrics: Optional[dict] = None,
|
|
482
|
+
extra_data: Optional[dict] = None,
|
|
483
|
+
inputs: Optional[list[Feature]] = None,
|
|
484
|
+
outputs: Optional[list[Feature]] = None,
|
|
485
|
+
feature_vector: Optional[str] = None,
|
|
486
|
+
feature_weights: Optional[list] = None,
|
|
524
487
|
key_prefix: str = "",
|
|
525
|
-
labels: dict = None,
|
|
488
|
+
labels: Optional[dict] = None,
|
|
526
489
|
write_spec_copy=True,
|
|
527
490
|
store_object: bool = True,
|
|
528
|
-
):
|
|
491
|
+
) -> ModelArtifact:
|
|
529
492
|
"""Update model object attributes
|
|
530
493
|
|
|
531
494
|
this method will edit or add attributes to a model object
|
|
@@ -593,10 +556,7 @@ def update_model(
|
|
|
593
556
|
|
|
594
557
|
if write_spec_copy:
|
|
595
558
|
spec_path = path.join(model_spec.target_path, model_spec_filename)
|
|
596
|
-
|
|
597
|
-
# the model spec yaml should not include the tag, as the same model can be used with different tags,
|
|
598
|
-
# and the tag is not part of the model spec but the metadata of the model artifact
|
|
599
|
-
model_spec_yaml = _remove_tag_from_spec_yaml(model_spec)
|
|
559
|
+
model_spec_yaml = _sanitize_and_serialize_model_spec_yaml(model_spec)
|
|
600
560
|
mlrun.datastore.store_manager.object(url=spec_path).put(model_spec_yaml)
|
|
601
561
|
|
|
602
562
|
model_spec.db_key = model_spec.db_key or model_spec.key
|
|
@@ -609,3 +569,56 @@ def update_model(
|
|
|
609
569
|
project=model_spec.project,
|
|
610
570
|
)
|
|
611
571
|
return model_spec
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def _get_src_path(model_spec: ModelArtifact, filename: str) -> str:
|
|
575
|
+
return path.join(model_spec.src_path, filename) if model_spec.src_path else filename
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
def _load_model_spec(spec_path) -> ModelArtifact:
|
|
579
|
+
data = mlrun.datastore.store_manager.object(url=spec_path).get()
|
|
580
|
+
spec = yaml.load(data, Loader=yaml.FullLoader)
|
|
581
|
+
return ModelArtifact.from_dict(spec)
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
def _get_file_path(base_path: str, name: str, isdir: bool = False) -> str:
|
|
585
|
+
if not is_relative_path(name):
|
|
586
|
+
return name
|
|
587
|
+
if not isdir:
|
|
588
|
+
base_path = path.dirname(base_path)
|
|
589
|
+
return path.join(base_path, name).replace("\\", "/")
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def _get_extra(target: str, extra_data: dict, is_dir: bool = False) -> dict:
|
|
593
|
+
extra_dataitems = {}
|
|
594
|
+
for k, v in extra_data.items():
|
|
595
|
+
extra_dataitems[k] = mlrun.datastore.store_manager.object(
|
|
596
|
+
url=_get_file_path(target, v, isdir=is_dir), key=k
|
|
597
|
+
)
|
|
598
|
+
return extra_dataitems
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
def _sanitize_and_serialize_model_spec_yaml(model: ModelArtifact) -> str:
|
|
602
|
+
model_dict = _sanitize_model_spec(model)
|
|
603
|
+
return _serialize_model_spec_yaml(model_dict)
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
def _sanitize_model_spec(model: ModelArtifact) -> dict:
|
|
607
|
+
model_dict = model.to_dict()
|
|
608
|
+
|
|
609
|
+
# The model spec yaml should not include the tag, as the same model can be used with different tags,
|
|
610
|
+
# and the tag is not part of the model spec but the metadata of the model artifact
|
|
611
|
+
model_dict["metadata"].pop("tag", None)
|
|
612
|
+
|
|
613
|
+
# Remove future packaging links
|
|
614
|
+
if model_dict["spec"].get("extra_data"):
|
|
615
|
+
model_dict["spec"]["extra_data"] = {
|
|
616
|
+
key: item
|
|
617
|
+
for key, item in model_dict["spec"]["extra_data"].items()
|
|
618
|
+
if item is not ...
|
|
619
|
+
}
|
|
620
|
+
return model_dict
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
def _serialize_model_spec_yaml(model_dict: dict) -> str:
|
|
624
|
+
return yaml.safe_dump(model_dict)
|