mlrun 1.7.1rc10__py3-none-any.whl → 1.8.0rc11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (259) hide show
  1. mlrun/__init__.py +23 -21
  2. mlrun/__main__.py +3 -3
  3. mlrun/alerts/alert.py +148 -14
  4. mlrun/artifacts/__init__.py +2 -3
  5. mlrun/artifacts/base.py +55 -12
  6. mlrun/artifacts/dataset.py +16 -16
  7. mlrun/artifacts/document.py +378 -0
  8. mlrun/artifacts/manager.py +26 -17
  9. mlrun/artifacts/model.py +66 -53
  10. mlrun/common/constants.py +8 -0
  11. mlrun/common/formatters/__init__.py +1 -0
  12. mlrun/common/formatters/feature_set.py +1 -0
  13. mlrun/common/formatters/function.py +1 -0
  14. mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
  15. mlrun/common/formatters/pipeline.py +1 -2
  16. mlrun/common/formatters/project.py +9 -0
  17. mlrun/common/model_monitoring/__init__.py +0 -5
  18. mlrun/common/model_monitoring/helpers.py +1 -29
  19. mlrun/common/runtimes/constants.py +1 -2
  20. mlrun/common/schemas/__init__.py +6 -2
  21. mlrun/common/schemas/alert.py +111 -19
  22. mlrun/common/schemas/api_gateway.py +3 -3
  23. mlrun/common/schemas/artifact.py +11 -7
  24. mlrun/common/schemas/auth.py +6 -4
  25. mlrun/common/schemas/background_task.py +7 -7
  26. mlrun/common/schemas/client_spec.py +2 -3
  27. mlrun/common/schemas/clusterization_spec.py +2 -2
  28. mlrun/common/schemas/common.py +53 -3
  29. mlrun/common/schemas/constants.py +15 -0
  30. mlrun/common/schemas/datastore_profile.py +1 -1
  31. mlrun/common/schemas/feature_store.py +9 -9
  32. mlrun/common/schemas/frontend_spec.py +4 -4
  33. mlrun/common/schemas/function.py +10 -10
  34. mlrun/common/schemas/hub.py +1 -1
  35. mlrun/common/schemas/k8s.py +3 -3
  36. mlrun/common/schemas/memory_reports.py +3 -3
  37. mlrun/common/schemas/model_monitoring/__init__.py +2 -1
  38. mlrun/common/schemas/model_monitoring/constants.py +67 -14
  39. mlrun/common/schemas/model_monitoring/grafana.py +1 -1
  40. mlrun/common/schemas/model_monitoring/model_endpoints.py +92 -147
  41. mlrun/common/schemas/notification.py +24 -3
  42. mlrun/common/schemas/object.py +1 -1
  43. mlrun/common/schemas/pagination.py +4 -4
  44. mlrun/common/schemas/partition.py +137 -0
  45. mlrun/common/schemas/pipeline.py +2 -2
  46. mlrun/common/schemas/project.py +25 -17
  47. mlrun/common/schemas/runs.py +2 -2
  48. mlrun/common/schemas/runtime_resource.py +5 -5
  49. mlrun/common/schemas/schedule.py +1 -1
  50. mlrun/common/schemas/secret.py +1 -1
  51. mlrun/common/schemas/tag.py +3 -3
  52. mlrun/common/schemas/workflow.py +5 -5
  53. mlrun/config.py +68 -10
  54. mlrun/data_types/__init__.py +0 -2
  55. mlrun/data_types/data_types.py +1 -0
  56. mlrun/data_types/infer.py +3 -1
  57. mlrun/data_types/spark.py +5 -3
  58. mlrun/data_types/to_pandas.py +11 -2
  59. mlrun/datastore/__init__.py +2 -2
  60. mlrun/datastore/alibaba_oss.py +4 -1
  61. mlrun/datastore/azure_blob.py +4 -1
  62. mlrun/datastore/base.py +12 -4
  63. mlrun/datastore/datastore.py +9 -3
  64. mlrun/datastore/datastore_profile.py +79 -20
  65. mlrun/datastore/dbfs_store.py +4 -1
  66. mlrun/datastore/filestore.py +4 -1
  67. mlrun/datastore/google_cloud_storage.py +4 -1
  68. mlrun/datastore/hdfs.py +4 -1
  69. mlrun/datastore/inmem.py +4 -1
  70. mlrun/datastore/redis.py +4 -1
  71. mlrun/datastore/s3.py +4 -1
  72. mlrun/datastore/sources.py +52 -51
  73. mlrun/datastore/store_resources.py +7 -4
  74. mlrun/datastore/targets.py +23 -22
  75. mlrun/datastore/utils.py +2 -2
  76. mlrun/datastore/v3io.py +4 -1
  77. mlrun/datastore/vectorstore.py +229 -0
  78. mlrun/datastore/wasbfs/fs.py +13 -12
  79. mlrun/db/base.py +213 -83
  80. mlrun/db/factory.py +0 -3
  81. mlrun/db/httpdb.py +1265 -387
  82. mlrun/db/nopdb.py +205 -74
  83. mlrun/errors.py +2 -2
  84. mlrun/execution.py +136 -50
  85. mlrun/feature_store/__init__.py +0 -2
  86. mlrun/feature_store/api.py +41 -40
  87. mlrun/feature_store/common.py +9 -9
  88. mlrun/feature_store/feature_set.py +20 -18
  89. mlrun/feature_store/feature_vector.py +27 -24
  90. mlrun/feature_store/retrieval/base.py +14 -9
  91. mlrun/feature_store/retrieval/job.py +2 -1
  92. mlrun/feature_store/steps.py +2 -2
  93. mlrun/features.py +30 -13
  94. mlrun/frameworks/__init__.py +1 -2
  95. mlrun/frameworks/_common/__init__.py +1 -2
  96. mlrun/frameworks/_common/artifacts_library.py +2 -2
  97. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  98. mlrun/frameworks/_common/model_handler.py +29 -27
  99. mlrun/frameworks/_common/producer.py +3 -1
  100. mlrun/frameworks/_dl_common/__init__.py +1 -2
  101. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  102. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  103. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  104. mlrun/frameworks/_ml_common/__init__.py +1 -2
  105. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  106. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  107. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  108. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  109. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  110. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  111. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  112. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  113. mlrun/frameworks/huggingface/__init__.py +1 -2
  114. mlrun/frameworks/huggingface/model_server.py +9 -9
  115. mlrun/frameworks/lgbm/__init__.py +47 -44
  116. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  117. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  118. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  119. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  120. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  121. mlrun/frameworks/lgbm/model_handler.py +15 -11
  122. mlrun/frameworks/lgbm/model_server.py +11 -7
  123. mlrun/frameworks/lgbm/utils.py +2 -2
  124. mlrun/frameworks/onnx/__init__.py +1 -2
  125. mlrun/frameworks/onnx/dataset.py +3 -3
  126. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  127. mlrun/frameworks/onnx/model_handler.py +7 -5
  128. mlrun/frameworks/onnx/model_server.py +8 -6
  129. mlrun/frameworks/parallel_coordinates.py +11 -11
  130. mlrun/frameworks/pytorch/__init__.py +22 -23
  131. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  132. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  133. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  134. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  135. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  136. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  137. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  138. mlrun/frameworks/pytorch/model_handler.py +21 -17
  139. mlrun/frameworks/pytorch/model_server.py +13 -9
  140. mlrun/frameworks/sklearn/__init__.py +19 -18
  141. mlrun/frameworks/sklearn/estimator.py +2 -2
  142. mlrun/frameworks/sklearn/metric.py +3 -3
  143. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  144. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  145. mlrun/frameworks/sklearn/model_handler.py +4 -3
  146. mlrun/frameworks/tf_keras/__init__.py +11 -12
  147. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  148. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  149. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  150. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  151. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  152. mlrun/frameworks/tf_keras/model_server.py +12 -8
  153. mlrun/frameworks/xgboost/__init__.py +19 -18
  154. mlrun/frameworks/xgboost/model_handler.py +13 -9
  155. mlrun/launcher/base.py +3 -4
  156. mlrun/launcher/local.py +1 -1
  157. mlrun/launcher/remote.py +1 -1
  158. mlrun/lists.py +4 -3
  159. mlrun/model.py +117 -46
  160. mlrun/model_monitoring/__init__.py +4 -4
  161. mlrun/model_monitoring/api.py +72 -59
  162. mlrun/model_monitoring/applications/_application_steps.py +17 -17
  163. mlrun/model_monitoring/applications/base.py +165 -6
  164. mlrun/model_monitoring/applications/context.py +88 -37
  165. mlrun/model_monitoring/applications/evidently_base.py +0 -1
  166. mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
  167. mlrun/model_monitoring/applications/results.py +55 -3
  168. mlrun/model_monitoring/controller.py +207 -239
  169. mlrun/model_monitoring/db/__init__.py +0 -2
  170. mlrun/model_monitoring/db/_schedules.py +156 -0
  171. mlrun/model_monitoring/db/_stats.py +189 -0
  172. mlrun/model_monitoring/db/tsdb/base.py +78 -25
  173. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +61 -6
  174. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  175. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +255 -29
  176. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  177. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
  178. mlrun/model_monitoring/helpers.py +151 -49
  179. mlrun/model_monitoring/stream_processing.py +99 -283
  180. mlrun/model_monitoring/tracking_policy.py +10 -3
  181. mlrun/model_monitoring/writer.py +48 -36
  182. mlrun/package/__init__.py +3 -6
  183. mlrun/package/context_handler.py +1 -1
  184. mlrun/package/packager.py +12 -9
  185. mlrun/package/packagers/__init__.py +0 -2
  186. mlrun/package/packagers/default_packager.py +14 -11
  187. mlrun/package/packagers/numpy_packagers.py +16 -7
  188. mlrun/package/packagers/pandas_packagers.py +18 -18
  189. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  190. mlrun/package/packagers_manager.py +31 -14
  191. mlrun/package/utils/__init__.py +0 -3
  192. mlrun/package/utils/_pickler.py +6 -6
  193. mlrun/platforms/__init__.py +47 -16
  194. mlrun/platforms/iguazio.py +4 -1
  195. mlrun/projects/operations.py +27 -27
  196. mlrun/projects/pipelines.py +71 -36
  197. mlrun/projects/project.py +890 -220
  198. mlrun/run.py +53 -10
  199. mlrun/runtimes/__init__.py +1 -3
  200. mlrun/runtimes/base.py +15 -11
  201. mlrun/runtimes/daskjob.py +9 -9
  202. mlrun/runtimes/generators.py +2 -1
  203. mlrun/runtimes/kubejob.py +4 -5
  204. mlrun/runtimes/mounts.py +572 -0
  205. mlrun/runtimes/mpijob/__init__.py +0 -2
  206. mlrun/runtimes/mpijob/abstract.py +7 -6
  207. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  208. mlrun/runtimes/nuclio/application/application.py +11 -11
  209. mlrun/runtimes/nuclio/function.py +19 -17
  210. mlrun/runtimes/nuclio/serving.py +18 -13
  211. mlrun/runtimes/pod.py +154 -45
  212. mlrun/runtimes/remotesparkjob.py +3 -2
  213. mlrun/runtimes/sparkjob/__init__.py +0 -2
  214. mlrun/runtimes/sparkjob/spark3job.py +21 -11
  215. mlrun/runtimes/utils.py +6 -5
  216. mlrun/serving/merger.py +6 -4
  217. mlrun/serving/remote.py +18 -17
  218. mlrun/serving/routers.py +185 -172
  219. mlrun/serving/server.py +7 -1
  220. mlrun/serving/states.py +97 -78
  221. mlrun/serving/utils.py +13 -2
  222. mlrun/serving/v1_serving.py +3 -2
  223. mlrun/serving/v2_serving.py +105 -72
  224. mlrun/track/__init__.py +1 -1
  225. mlrun/track/tracker.py +2 -2
  226. mlrun/track/trackers/mlflow_tracker.py +6 -5
  227. mlrun/utils/async_http.py +1 -1
  228. mlrun/utils/clones.py +1 -1
  229. mlrun/utils/helpers.py +63 -19
  230. mlrun/utils/logger.py +106 -4
  231. mlrun/utils/notifications/notification/__init__.py +22 -19
  232. mlrun/utils/notifications/notification/base.py +33 -14
  233. mlrun/utils/notifications/notification/console.py +6 -6
  234. mlrun/utils/notifications/notification/git.py +11 -11
  235. mlrun/utils/notifications/notification/ipython.py +10 -9
  236. mlrun/utils/notifications/notification/mail.py +176 -0
  237. mlrun/utils/notifications/notification/slack.py +6 -6
  238. mlrun/utils/notifications/notification/webhook.py +6 -6
  239. mlrun/utils/notifications/notification_pusher.py +86 -44
  240. mlrun/utils/regex.py +11 -2
  241. mlrun/utils/version/version.json +2 -2
  242. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/METADATA +29 -24
  243. mlrun-1.8.0rc11.dist-info/RECORD +347 -0
  244. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  245. mlrun/model_monitoring/db/stores/base/store.py +0 -213
  246. mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
  247. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
  248. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
  249. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
  250. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
  251. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
  252. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
  253. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
  254. mlrun/model_monitoring/model_endpoint.py +0 -118
  255. mlrun-1.7.1rc10.dist-info/RECORD +0 -351
  256. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/LICENSE +0 -0
  257. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/WHEEL +0 -0
  258. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/entry_points.txt +0 -0
  259. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,378 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import re
16
+ import tempfile
17
+ from collections.abc import Iterator
18
+ from copy import deepcopy
19
+ from importlib import import_module
20
+ from typing import Optional, Union
21
+
22
+ import mlrun
23
+ from mlrun.artifacts import Artifact, ArtifactSpec
24
+ from mlrun.model import ModelObj
25
+
26
+ from ..utils import generate_artifact_uri
27
+
28
+
29
+ class DocumentLoaderSpec(ModelObj):
30
+ """
31
+ A class to load a document from a file path using a specified loader class.
32
+
33
+ This class is responsible for loading documents from a given source path using a specified loader class.
34
+ The loader class is dynamically imported and instantiated with the provided arguments. The loaded documents
35
+ can be optionally uploaded as artifacts.
36
+
37
+ Attributes:
38
+ loader_class_name (str): The name of the loader class to use for loading documents.
39
+ src_name (str): The name of the source attribute to pass to the loader class.
40
+ kwargs (Optional[dict]): Additional keyword arguments to pass to the loader class.
41
+
42
+ """
43
+
44
+ _dict_fields = ["loader_class_name", "src_name", "kwargs"]
45
+
46
+ def __init__(
47
+ self,
48
+ loader_class_name: str = "langchain_community.document_loaders.TextLoader",
49
+ src_name: str = "file_path",
50
+ kwargs: Optional[dict] = None,
51
+ ):
52
+ """
53
+ Initialize the document loader.
54
+
55
+ Args:
56
+ loader_class_name (str): The name of the loader class to use.
57
+ src_name (str): The source name for the document.
58
+ kwargs (Optional[dict]): Additional keyword arguments to pass to the loader class.
59
+
60
+ Example:
61
+ >>> # Create a loader specification for PDF documents
62
+ >>> loader_spec = DocumentLoaderSpec(
63
+ ... loader_class_name="langchain_community.document_loaders.PDFLoader",
64
+ ... src_name="file_path",
65
+ ... kwargs={"extract_images": True},
66
+ ... )
67
+ >>> # Create a loader instance for a specific PDF file
68
+ >>> pdf_loader = loader_spec.make_loader("/path/to/document.pdf")
69
+ >>> # Load the documents
70
+ >>> documents = pdf_loader.load()
71
+
72
+ """
73
+ self.loader_class_name = loader_class_name
74
+ self.src_name = src_name
75
+ self.kwargs = kwargs
76
+
77
+ def make_loader(self, src_path):
78
+ module_name, class_name = self.loader_class_name.rsplit(".", 1)
79
+ module = import_module(module_name)
80
+ loader_class = getattr(module, class_name)
81
+ kwargs = deepcopy(self.kwargs or {})
82
+ kwargs[self.src_name] = src_path
83
+ loader = loader_class(**kwargs)
84
+ return loader
85
+
86
+
87
+ class MLRunLoader:
88
+ """
89
+ A factory class for creating instances of a dynamically defined document loader.
90
+
91
+ Args:
92
+ artifact_key (str): The key for the artifact to be logged.It can include '%%' which will be replaced
93
+ by a hex-encoded version of the source path.
94
+ local_path (str): The source path of the document to be loaded.
95
+ loader_spec (DocumentLoaderSpec): Specification for the document loader.
96
+ producer (Optional[Union[MlrunProject, str, MLClientCtx]], optional): The producer of the document
97
+ upload (bool, optional): Flag indicating whether to upload the document.
98
+
99
+ Returns:
100
+ DynamicDocumentLoader: An instance of a dynamically defined subclass of BaseLoader.
101
+
102
+ Example:
103
+ >>> # Create a document loader specification
104
+ >>> loader_spec = DocumentLoaderSpec(
105
+ ... loader_class_name="langchain_community.document_loaders.TextLoader",
106
+ ... src_name="file_path",
107
+ ... )
108
+ >>> # Create a basic loader for a single file
109
+ >>> loader = MLRunLoader(
110
+ ... source_path="/path/to/document.txt",
111
+ ... loader_spec=loader_spec,
112
+ ... artifact_key="my_doc",
113
+ ... producer=project,
114
+ ... upload=True,
115
+ ... )
116
+ >>> documents = loader.load()
117
+ >>> # Create a loader with auto-generated keys
118
+ >>> loader = MLRunLoader(
119
+ ... source_path="/path/to/document.txt",
120
+ ... loader_spec=loader_spec,
121
+ ... artifact_key="doc%%", # %% will be replaced with encoded path
122
+ ... producer=project,
123
+ ... )
124
+ >>> documents = loader.load()
125
+ >>> # Use with DirectoryLoader
126
+ >>> from langchain_community.document_loaders import DirectoryLoader
127
+ >>> dir_loader = DirectoryLoader(
128
+ ... "/path/to/directory",
129
+ ... glob="**/*.txt",
130
+ ... loader_cls=MLRunLoader,
131
+ ... loader_kwargs={
132
+ ... "loader_spec": loader_spec,
133
+ ... "artifact_key": "doc%%",
134
+ ... "producer": project,
135
+ ... "upload": True,
136
+ ... },
137
+ ... )
138
+ >>> documents = dir_loader.load()
139
+
140
+ """
141
+
142
+ def __new__(
143
+ cls,
144
+ source_path: str,
145
+ loader_spec: "DocumentLoaderSpec",
146
+ artifact_key="doc%%",
147
+ producer: Optional[Union["MlrunProject", str, "MLClientCtx"]] = None, # noqa: F821
148
+ upload: bool = False,
149
+ ):
150
+ # Dynamically import BaseLoader
151
+ from langchain_community.document_loaders.base import BaseLoader
152
+
153
+ class DynamicDocumentLoader(BaseLoader):
154
+ def __init__(
155
+ self,
156
+ local_path,
157
+ loader_spec,
158
+ artifact_key,
159
+ producer,
160
+ upload,
161
+ ):
162
+ self.producer = producer
163
+ self.artifact_key = (
164
+ MLRunLoader.artifact_key_instance(artifact_key, local_path)
165
+ if "%%" in artifact_key
166
+ else artifact_key
167
+ )
168
+ self.loader_spec = loader_spec
169
+ self.local_path = local_path
170
+ self.upload = upload
171
+
172
+ # Resolve the producer
173
+ if not self.producer:
174
+ self.producer = mlrun.mlconf.default_project
175
+ if isinstance(self.producer, str):
176
+ self.producer = mlrun.get_or_create_project(self.producer)
177
+
178
+ def lazy_load(self) -> Iterator["Document"]: # noqa: F821
179
+ artifact = self.producer.log_document(
180
+ key=self.artifact_key,
181
+ document_loader_spec=self.loader_spec,
182
+ local_path=self.local_path,
183
+ upload=self.upload,
184
+ )
185
+ res = artifact.to_langchain_documents()
186
+ yield res[0]
187
+
188
+ # Return an instance of the dynamically defined subclass
189
+ instance = DynamicDocumentLoader(
190
+ artifact_key=artifact_key,
191
+ local_path=source_path,
192
+ loader_spec=loader_spec,
193
+ producer=producer,
194
+ upload=upload,
195
+ )
196
+ return instance
197
+
198
+ @staticmethod
199
+ def artifact_key_instance(artifact_key: str, src_path: str) -> str:
200
+ if "%%" in artifact_key:
201
+ pattern = mlrun.utils.regex.artifact_key[0]
202
+ # Convert anchored pattern (^...$) to non-anchored version for finditer
203
+ search_pattern = pattern.strip("^$")
204
+ result = []
205
+ current_pos = 0
206
+
207
+ # Find all valid sequences
208
+ for match in re.finditer(search_pattern, src_path):
209
+ # Add hex values for characters between matches
210
+ for char in src_path[current_pos : match.start()]:
211
+ result.append(hex(ord(char))[2:].zfill(2))
212
+
213
+ # Add the valid sequence
214
+ result.append(match.group())
215
+ current_pos = match.end()
216
+
217
+ # Handle any remaining characters after the last match
218
+ for char in src_path[current_pos:]:
219
+ result.append(hex(ord(char))[2:].zfill(2))
220
+
221
+ resolved_path = "".join(result)
222
+
223
+ artifact_key = artifact_key.replace("%%", resolved_path)
224
+
225
+ return artifact_key
226
+
227
+
228
+ class DocumentArtifact(Artifact):
229
+ """
230
+ A specific artifact class inheriting from generic artifact, used to maintain Document meta-data.
231
+ """
232
+
233
+ class DocumentArtifactSpec(ArtifactSpec):
234
+ _dict_fields = ArtifactSpec._dict_fields + [
235
+ "document_loader",
236
+ "collections",
237
+ "original_source",
238
+ ]
239
+
240
+ def __init__(
241
+ self,
242
+ *args,
243
+ document_loader: Optional[DocumentLoaderSpec] = None,
244
+ collections: Optional[dict] = None,
245
+ original_source: Optional[str] = None,
246
+ **kwargs,
247
+ ):
248
+ super().__init__(*args, **kwargs)
249
+ self.document_loader = document_loader
250
+ self.collections = collections if collections is not None else {}
251
+ self.original_source = original_source
252
+
253
+ kind = "document"
254
+
255
+ METADATA_SOURCE_KEY = "source"
256
+ METADATA_ORIGINAL_SOURCE_KEY = "original_source"
257
+ METADATA_CHUNK_KEY = "mlrun_chunk"
258
+ METADATA_ARTIFACT_URI_KEY = "mlrun_object_uri"
259
+ METADATA_ARTIFACT_TARGET_PATH_KEY = "mlrun_target_path"
260
+
261
+ def __init__(
262
+ self,
263
+ original_source: Optional[str] = None,
264
+ document_loader_spec: Optional[DocumentLoaderSpec] = None,
265
+ **kwargs,
266
+ ):
267
+ super().__init__(**kwargs)
268
+ self.spec.document_loader = (
269
+ document_loader_spec.to_dict()
270
+ if document_loader_spec
271
+ else self.spec.document_loader
272
+ )
273
+ self.spec.original_source = original_source or self.spec.original_source
274
+
275
+ @property
276
+ def spec(self) -> DocumentArtifactSpec:
277
+ return self._spec
278
+
279
+ @spec.setter
280
+ def spec(self, spec):
281
+ self._spec = self._verify_dict(
282
+ spec, "spec", DocumentArtifact.DocumentArtifactSpec
283
+ )
284
+
285
+ def get_source(self):
286
+ """Get the source URI for this artifact."""
287
+ return generate_artifact_uri(self.metadata.project, self.spec.db_key)
288
+
289
+ def to_langchain_documents(
290
+ self,
291
+ splitter: Optional["TextSplitter"] = None, # noqa: F821
292
+ ) -> list["Document"]: # noqa: F821
293
+ from langchain.schema import Document
294
+
295
+ """
296
+ Create LC documents from the artifact
297
+
298
+ Args:
299
+ splitter (Optional[TextSplitter]): A LangChain TextSplitter to split the document into chunks.
300
+
301
+ Returns:
302
+ list[Document]: A list of LangChain Document objects.
303
+ """
304
+
305
+ loader_spec = DocumentLoaderSpec.from_dict(self.spec.document_loader)
306
+ if self.get_target_path():
307
+ with tempfile.NamedTemporaryFile() as tmp_file:
308
+ mlrun.datastore.store_manager.object(
309
+ url=self.get_target_path()
310
+ ).download(tmp_file.name)
311
+ loader = loader_spec.make_loader(tmp_file.name)
312
+ documents = loader.load()
313
+ elif self.spec.original_source:
314
+ loader = loader_spec.make_loader(self.spec.original_source)
315
+ documents = loader.load()
316
+ else:
317
+ raise ValueError(
318
+ "No src_path or target_path provided. Cannot load document."
319
+ )
320
+
321
+ results = []
322
+ idx = 0
323
+ for document in documents:
324
+ if splitter:
325
+ texts = splitter.split_text(document.page_content)
326
+ else:
327
+ texts = [document.page_content]
328
+
329
+ metadata = document.metadata
330
+
331
+ metadata[self.METADATA_ORIGINAL_SOURCE_KEY] = self.spec.original_source
332
+ metadata[self.METADATA_SOURCE_KEY] = self.get_source()
333
+ metadata[self.METADATA_ARTIFACT_URI_KEY] = self.uri
334
+ if self.get_target_path():
335
+ metadata[self.METADATA_ARTIFACT_TARGET_PATH_KEY] = (
336
+ self.get_target_path()
337
+ )
338
+
339
+ for text in texts:
340
+ metadata[self.METADATA_CHUNK_KEY] = str(idx)
341
+ doc = Document(
342
+ page_content=text,
343
+ metadata=metadata.copy(),
344
+ )
345
+ results.append(doc)
346
+ idx = idx + 1
347
+ return results
348
+
349
+ def collection_add(self, collection_id: str) -> None:
350
+ """
351
+ Add a collection ID to the artifact's collection list.
352
+
353
+ Adds the specified collection ID to the artifact's collection mapping if it
354
+ doesn't already exist.
355
+ This method only modifies the client-side artifact object and does not persist
356
+ the changes to the MLRun DB. To save the changes permanently, you must call
357
+ project.update_artifact() after this method.
358
+
359
+ Args:
360
+ collection_id (str): The ID of the collection to add
361
+ """
362
+ if collection_id not in self.spec.collections:
363
+ self.spec.collections[collection_id] = "1"
364
+
365
+ def collection_remove(self, collection_id: str) -> None:
366
+ """
367
+ Remove a collection ID from the artifact's collection list.
368
+
369
+ Removes the specified collection ID from the artifact's local collection mapping.
370
+ This method only modifies the client-side artifact object and does not persist
371
+ the changes to the MLRun DB. To save the changes permanently, you must call
372
+ project.update_artifact() or context.update_artifact() after this method.
373
+
374
+ Args:
375
+ collection_id (str): The ID of the collection to remove
376
+ """
377
+ if collection_id in self.spec.collections:
378
+ self.spec.collections.pop(collection_id)
@@ -41,6 +41,7 @@ from .dataset import (
41
41
  DatasetArtifact,
42
42
  TableArtifact,
43
43
  )
44
+ from .document import DocumentArtifact
44
45
  from .model import ModelArtifact
45
46
  from .plots import (
46
47
  PlotArtifact,
@@ -57,6 +58,7 @@ artifact_types = {
57
58
  "model": ModelArtifact,
58
59
  "dataset": DatasetArtifact,
59
60
  "plotly": PlotlyArtifact,
61
+ "document": DocumentArtifact,
60
62
  }
61
63
 
62
64
 
@@ -124,7 +126,7 @@ class ArtifactManager:
124
126
 
125
127
  self.artifact_db = db
126
128
  self.input_artifacts = {}
127
- self.artifacts = {}
129
+ self.artifact_uris = {}
128
130
 
129
131
  @staticmethod
130
132
  def ensure_artifact_source_file_exists(item, path, body):
@@ -156,14 +158,12 @@ class ArtifactManager:
156
158
 
157
159
  def artifact_list(self, full=False):
158
160
  artifacts = []
159
- for artifact in self.artifacts.values():
160
- if isinstance(artifact, dict):
161
- artifacts.append(artifact)
161
+ for artifacts_uri in self.artifact_uris.values():
162
+ artifact: Artifact = mlrun.datastore.get_store_resource(artifacts_uri)
163
+ if full:
164
+ artifacts.append(artifact.to_dict())
162
165
  else:
163
- if full:
164
- artifacts.append(artifact.to_dict())
165
- else:
166
- artifacts.append(artifact.base_dict())
166
+ artifacts.append(artifact.base_dict())
167
167
  return artifacts
168
168
 
169
169
  def log_artifact(
@@ -246,6 +246,8 @@ class ArtifactManager:
246
246
  # otherwise, we do not want to override it.
247
247
  # this is mainly relevant for imported artifacts that have an explicit db_key value already set
248
248
  db_key = item.db_key or key
249
+ if db_key != key:
250
+ validate_artifact_key_name(db_key, "artifact.db_key")
249
251
  item.db_key = db_key or ""
250
252
  item.viewer = viewer or item.viewer
251
253
  item.tree = producer.tag
@@ -304,7 +306,6 @@ class ArtifactManager:
304
306
  item.target_path = target_path
305
307
 
306
308
  item.before_log()
307
- self.artifacts[key] = item
308
309
 
309
310
  if ((upload is None and item.kind != "dir") or upload) and not item.is_inline():
310
311
  # before uploading the item, we want to ensure that its tags are valid,
@@ -313,32 +314,38 @@ class ArtifactManager:
313
314
  item.upload(artifact_path=artifact_path)
314
315
 
315
316
  if db_key:
316
- self._log_to_db(db_key, project, producer.inputs, item)
317
+ artifact_uid = self._log_to_db(db_key, project, producer.inputs, item)
318
+ if artifact_uid is not None:
319
+ item.uid = artifact_uid
320
+ # Generate the artifact URI after logging to the database and retrieving the artifact UID, if available.
321
+ self.artifact_uris[key] = item.uri
322
+
317
323
  size = str(item.size) or "?"
318
324
  db_str = "Y" if (self.artifact_db and db_key) else "N"
319
325
  logger.debug(
320
- f"log artifact {key} at {item.target_path}, size: {size}, db: {db_str}"
326
+ f"Log artifact {key} at {item.target_path}, size: {size}, db: {db_str}"
321
327
  )
322
328
  return item
323
329
 
324
- def update_artifact(self, producer, item):
325
- self.artifacts[item.key] = item
330
+ def update_artifact(self, producer, item: Artifact):
331
+ self.artifact_uris[item.key] = item.uri
326
332
  self._log_to_db(item.db_key, producer.project, producer.inputs, item)
327
333
 
328
- def _log_to_db(self, key, project, sources, item, tag=None):
334
+ def _log_to_db(self, key, project, sources, item, tag=None) -> typing.Optional[str]:
329
335
  """
330
336
  log artifact to db
331
337
  :param key: Identifying key of the artifact.
332
338
  :param project: Project that the artifact belongs to.
333
- :param sources: List of artifact sources ( Mainly passed from the producer.items ).
339
+ :param sources: List of artifact sources ( Mainly passed from the `producer.items` ).
334
340
  :param item: The actual artifact to store.
335
341
  :param tag: The name of the Tag of the artifact.
342
+ :return: The logged artifact uid.
336
343
  """
337
344
  if self.artifact_db:
338
345
  item.updated = None
339
346
  if sources:
340
347
  item.sources = [{"name": k, "path": str(v)} for k, v in sources.items()]
341
- self.artifact_db.store_artifact(
348
+ artifact_item = self.artifact_db.store_artifact(
342
349
  key,
343
350
  item.to_dict(),
344
351
  iter=item.iter,
@@ -346,6 +353,8 @@ class ArtifactManager:
346
353
  project=project,
347
354
  tree=item.tree,
348
355
  )
356
+ if artifact_item:
357
+ return artifact_item.get("metadata", {}).get("uid")
349
358
 
350
359
  def link_artifact(
351
360
  self,
@@ -387,7 +396,7 @@ class ArtifactManager:
387
396
  deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
388
397
  mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
389
398
  ),
390
- secrets: dict = None,
399
+ secrets: typing.Optional[dict] = None,
391
400
  ):
392
401
  self.artifact_db.del_artifact(
393
402
  key=item.db_key,
mlrun/artifacts/model.py CHANGED
@@ -303,7 +303,7 @@ class ModelArtifact(Artifact):
303
303
  self.metadata.labels = self.metadata.labels or {}
304
304
  self.metadata.labels["framework"] = self.spec.framework
305
305
 
306
- def upload(self, artifact_path: str = None):
306
+ def upload(self, artifact_path: Optional[str] = None):
307
307
  """
308
308
  internal, upload to target store
309
309
  :param artifact_path: required only for when generating target_path from artifact hash
@@ -324,9 +324,7 @@ class ModelArtifact(Artifact):
324
324
  artifact=self, extra_data=self.spec.extra_data, artifact_path=artifact_path
325
325
  )
326
326
 
327
- # the model spec yaml should not include the tag, as the same model can be used with different tags,
328
- # and the tag is not part of the model spec but the metadata of the model artifact
329
- spec_body = _remove_tag_from_spec_yaml(self)
327
+ spec_body = _sanitize_and_serialize_model_spec_yaml(self)
330
328
  spec_target_path = None
331
329
 
332
330
  if mlrun.mlconf.artifacts.generate_target_path_from_artifact_hash:
@@ -355,7 +353,7 @@ class ModelArtifact(Artifact):
355
353
  def _upload_body_or_file(
356
354
  self,
357
355
  artifact_path: str,
358
- target_model_path: str = None,
356
+ target_model_path: Optional[str] = None,
359
357
  ):
360
358
  body = self.spec.get_body()
361
359
  if body:
@@ -403,12 +401,6 @@ class ModelArtifact(Artifact):
403
401
  return mlrun.get_dataitem(target_model_path).get()
404
402
 
405
403
 
406
- def _get_src_path(model_spec: ModelArtifact, filename):
407
- if model_spec.src_path:
408
- return path.join(model_spec.src_path, filename)
409
- return filename
410
-
411
-
412
404
  def get_model(model_dir, suffix=""):
413
405
  """return model file, model spec object, and list of extra data items
414
406
 
@@ -483,49 +475,20 @@ def get_model(model_dir, suffix=""):
483
475
  return temp_path, model_spec, extra_dataitems
484
476
 
485
477
 
486
- def _load_model_spec(spec_path):
487
- data = mlrun.datastore.store_manager.object(url=spec_path).get()
488
- spec = yaml.load(data, Loader=yaml.FullLoader)
489
- return ModelArtifact.from_dict(spec)
490
-
491
-
492
- def _get_file_path(base_path: str, name: str, isdir=False):
493
- if not is_relative_path(name):
494
- return name
495
- if not isdir:
496
- base_path = path.dirname(base_path)
497
- return path.join(base_path, name).replace("\\", "/")
498
-
499
-
500
- def _get_extra(target, extra_data, is_dir=False):
501
- extra_dataitems = {}
502
- for k, v in extra_data.items():
503
- extra_dataitems[k] = mlrun.datastore.store_manager.object(
504
- url=_get_file_path(target, v, isdir=is_dir), key=k
505
- )
506
- return extra_dataitems
507
-
508
-
509
- def _remove_tag_from_spec_yaml(model_spec):
510
- spec_dict = model_spec.to_dict()
511
- spec_dict["metadata"].pop("tag", None)
512
- return yaml.safe_dump(spec_dict)
513
-
514
-
515
478
  def update_model(
516
479
  model_artifact,
517
- parameters: dict = None,
518
- metrics: dict = None,
519
- extra_data: dict = None,
520
- inputs: list[Feature] = None,
521
- outputs: list[Feature] = None,
522
- feature_vector: str = None,
523
- feature_weights: list = None,
480
+ parameters: Optional[dict] = None,
481
+ metrics: Optional[dict] = None,
482
+ extra_data: Optional[dict] = None,
483
+ inputs: Optional[list[Feature]] = None,
484
+ outputs: Optional[list[Feature]] = None,
485
+ feature_vector: Optional[str] = None,
486
+ feature_weights: Optional[list] = None,
524
487
  key_prefix: str = "",
525
- labels: dict = None,
488
+ labels: Optional[dict] = None,
526
489
  write_spec_copy=True,
527
490
  store_object: bool = True,
528
- ):
491
+ ) -> ModelArtifact:
529
492
  """Update model object attributes
530
493
 
531
494
  this method will edit or add attributes to a model object
@@ -593,10 +556,7 @@ def update_model(
593
556
 
594
557
  if write_spec_copy:
595
558
  spec_path = path.join(model_spec.target_path, model_spec_filename)
596
-
597
- # the model spec yaml should not include the tag, as the same model can be used with different tags,
598
- # and the tag is not part of the model spec but the metadata of the model artifact
599
- model_spec_yaml = _remove_tag_from_spec_yaml(model_spec)
559
+ model_spec_yaml = _sanitize_and_serialize_model_spec_yaml(model_spec)
600
560
  mlrun.datastore.store_manager.object(url=spec_path).put(model_spec_yaml)
601
561
 
602
562
  model_spec.db_key = model_spec.db_key or model_spec.key
@@ -609,3 +569,56 @@ def update_model(
609
569
  project=model_spec.project,
610
570
  )
611
571
  return model_spec
572
+
573
+
574
+ def _get_src_path(model_spec: ModelArtifact, filename: str) -> str:
575
+ return path.join(model_spec.src_path, filename) if model_spec.src_path else filename
576
+
577
+
578
+ def _load_model_spec(spec_path) -> ModelArtifact:
579
+ data = mlrun.datastore.store_manager.object(url=spec_path).get()
580
+ spec = yaml.load(data, Loader=yaml.FullLoader)
581
+ return ModelArtifact.from_dict(spec)
582
+
583
+
584
+ def _get_file_path(base_path: str, name: str, isdir: bool = False) -> str:
585
+ if not is_relative_path(name):
586
+ return name
587
+ if not isdir:
588
+ base_path = path.dirname(base_path)
589
+ return path.join(base_path, name).replace("\\", "/")
590
+
591
+
592
+ def _get_extra(target: str, extra_data: dict, is_dir: bool = False) -> dict:
593
+ extra_dataitems = {}
594
+ for k, v in extra_data.items():
595
+ extra_dataitems[k] = mlrun.datastore.store_manager.object(
596
+ url=_get_file_path(target, v, isdir=is_dir), key=k
597
+ )
598
+ return extra_dataitems
599
+
600
+
601
+ def _sanitize_and_serialize_model_spec_yaml(model: ModelArtifact) -> str:
602
+ model_dict = _sanitize_model_spec(model)
603
+ return _serialize_model_spec_yaml(model_dict)
604
+
605
+
606
+ def _sanitize_model_spec(model: ModelArtifact) -> dict:
607
+ model_dict = model.to_dict()
608
+
609
+ # The model spec yaml should not include the tag, as the same model can be used with different tags,
610
+ # and the tag is not part of the model spec but the metadata of the model artifact
611
+ model_dict["metadata"].pop("tag", None)
612
+
613
+ # Remove future packaging links
614
+ if model_dict["spec"].get("extra_data"):
615
+ model_dict["spec"]["extra_data"] = {
616
+ key: item
617
+ for key, item in model_dict["spec"]["extra_data"].items()
618
+ if item is not ...
619
+ }
620
+ return model_dict
621
+
622
+
623
+ def _serialize_model_spec_yaml(model_dict: dict) -> str:
624
+ return yaml.safe_dump(model_dict)