mlrun 1.7.1rc10__py3-none-any.whl → 1.8.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (257) hide show
  1. mlrun/__init__.py +23 -21
  2. mlrun/__main__.py +3 -3
  3. mlrun/alerts/alert.py +148 -14
  4. mlrun/artifacts/__init__.py +1 -2
  5. mlrun/artifacts/base.py +46 -12
  6. mlrun/artifacts/dataset.py +16 -16
  7. mlrun/artifacts/document.py +334 -0
  8. mlrun/artifacts/manager.py +15 -13
  9. mlrun/artifacts/model.py +66 -53
  10. mlrun/common/constants.py +7 -0
  11. mlrun/common/formatters/__init__.py +1 -0
  12. mlrun/common/formatters/feature_set.py +1 -0
  13. mlrun/common/formatters/function.py +1 -0
  14. mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
  15. mlrun/common/formatters/pipeline.py +1 -2
  16. mlrun/common/formatters/project.py +9 -0
  17. mlrun/common/model_monitoring/__init__.py +0 -5
  18. mlrun/common/model_monitoring/helpers.py +1 -29
  19. mlrun/common/runtimes/constants.py +1 -2
  20. mlrun/common/schemas/__init__.py +6 -2
  21. mlrun/common/schemas/alert.py +111 -19
  22. mlrun/common/schemas/api_gateway.py +3 -3
  23. mlrun/common/schemas/artifact.py +11 -7
  24. mlrun/common/schemas/auth.py +6 -4
  25. mlrun/common/schemas/background_task.py +7 -7
  26. mlrun/common/schemas/client_spec.py +2 -3
  27. mlrun/common/schemas/clusterization_spec.py +2 -2
  28. mlrun/common/schemas/common.py +53 -3
  29. mlrun/common/schemas/constants.py +15 -0
  30. mlrun/common/schemas/datastore_profile.py +1 -1
  31. mlrun/common/schemas/feature_store.py +9 -9
  32. mlrun/common/schemas/frontend_spec.py +4 -4
  33. mlrun/common/schemas/function.py +10 -10
  34. mlrun/common/schemas/hub.py +1 -1
  35. mlrun/common/schemas/k8s.py +3 -3
  36. mlrun/common/schemas/memory_reports.py +3 -3
  37. mlrun/common/schemas/model_monitoring/__init__.py +2 -1
  38. mlrun/common/schemas/model_monitoring/constants.py +66 -14
  39. mlrun/common/schemas/model_monitoring/grafana.py +1 -1
  40. mlrun/common/schemas/model_monitoring/model_endpoints.py +91 -147
  41. mlrun/common/schemas/notification.py +24 -3
  42. mlrun/common/schemas/object.py +1 -1
  43. mlrun/common/schemas/pagination.py +4 -4
  44. mlrun/common/schemas/partition.py +137 -0
  45. mlrun/common/schemas/pipeline.py +2 -2
  46. mlrun/common/schemas/project.py +25 -17
  47. mlrun/common/schemas/runs.py +2 -2
  48. mlrun/common/schemas/runtime_resource.py +5 -5
  49. mlrun/common/schemas/schedule.py +1 -1
  50. mlrun/common/schemas/secret.py +1 -1
  51. mlrun/common/schemas/tag.py +3 -3
  52. mlrun/common/schemas/workflow.py +5 -5
  53. mlrun/config.py +67 -10
  54. mlrun/data_types/__init__.py +0 -2
  55. mlrun/data_types/infer.py +3 -1
  56. mlrun/data_types/spark.py +2 -1
  57. mlrun/datastore/__init__.py +0 -2
  58. mlrun/datastore/alibaba_oss.py +4 -1
  59. mlrun/datastore/azure_blob.py +4 -1
  60. mlrun/datastore/base.py +12 -4
  61. mlrun/datastore/datastore.py +9 -3
  62. mlrun/datastore/datastore_profile.py +79 -20
  63. mlrun/datastore/dbfs_store.py +4 -1
  64. mlrun/datastore/filestore.py +4 -1
  65. mlrun/datastore/google_cloud_storage.py +4 -1
  66. mlrun/datastore/hdfs.py +4 -1
  67. mlrun/datastore/inmem.py +4 -1
  68. mlrun/datastore/redis.py +4 -1
  69. mlrun/datastore/s3.py +4 -1
  70. mlrun/datastore/sources.py +52 -51
  71. mlrun/datastore/store_resources.py +0 -2
  72. mlrun/datastore/targets.py +21 -21
  73. mlrun/datastore/utils.py +2 -2
  74. mlrun/datastore/v3io.py +4 -1
  75. mlrun/datastore/vectorstore.py +194 -0
  76. mlrun/datastore/wasbfs/fs.py +13 -12
  77. mlrun/db/base.py +208 -82
  78. mlrun/db/factory.py +0 -3
  79. mlrun/db/httpdb.py +1237 -386
  80. mlrun/db/nopdb.py +201 -74
  81. mlrun/errors.py +2 -2
  82. mlrun/execution.py +136 -50
  83. mlrun/feature_store/__init__.py +0 -2
  84. mlrun/feature_store/api.py +41 -40
  85. mlrun/feature_store/common.py +9 -9
  86. mlrun/feature_store/feature_set.py +20 -18
  87. mlrun/feature_store/feature_vector.py +27 -24
  88. mlrun/feature_store/retrieval/base.py +14 -9
  89. mlrun/feature_store/retrieval/job.py +2 -1
  90. mlrun/feature_store/steps.py +2 -2
  91. mlrun/features.py +30 -13
  92. mlrun/frameworks/__init__.py +1 -2
  93. mlrun/frameworks/_common/__init__.py +1 -2
  94. mlrun/frameworks/_common/artifacts_library.py +2 -2
  95. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  96. mlrun/frameworks/_common/model_handler.py +29 -27
  97. mlrun/frameworks/_common/producer.py +3 -1
  98. mlrun/frameworks/_dl_common/__init__.py +1 -2
  99. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  100. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  101. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  102. mlrun/frameworks/_ml_common/__init__.py +1 -2
  103. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  104. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  105. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  106. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  107. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  108. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  109. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  110. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  111. mlrun/frameworks/huggingface/__init__.py +1 -2
  112. mlrun/frameworks/huggingface/model_server.py +9 -9
  113. mlrun/frameworks/lgbm/__init__.py +47 -44
  114. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  115. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  116. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  117. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  118. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  119. mlrun/frameworks/lgbm/model_handler.py +15 -11
  120. mlrun/frameworks/lgbm/model_server.py +11 -7
  121. mlrun/frameworks/lgbm/utils.py +2 -2
  122. mlrun/frameworks/onnx/__init__.py +1 -2
  123. mlrun/frameworks/onnx/dataset.py +3 -3
  124. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  125. mlrun/frameworks/onnx/model_handler.py +7 -5
  126. mlrun/frameworks/onnx/model_server.py +8 -6
  127. mlrun/frameworks/parallel_coordinates.py +11 -11
  128. mlrun/frameworks/pytorch/__init__.py +22 -23
  129. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  130. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  131. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  132. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  133. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  134. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  135. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  136. mlrun/frameworks/pytorch/model_handler.py +21 -17
  137. mlrun/frameworks/pytorch/model_server.py +13 -9
  138. mlrun/frameworks/sklearn/__init__.py +19 -18
  139. mlrun/frameworks/sklearn/estimator.py +2 -2
  140. mlrun/frameworks/sklearn/metric.py +3 -3
  141. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  142. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  143. mlrun/frameworks/sklearn/model_handler.py +4 -3
  144. mlrun/frameworks/tf_keras/__init__.py +11 -12
  145. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  146. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  147. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  148. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  149. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  150. mlrun/frameworks/tf_keras/model_server.py +12 -8
  151. mlrun/frameworks/xgboost/__init__.py +19 -18
  152. mlrun/frameworks/xgboost/model_handler.py +13 -9
  153. mlrun/launcher/base.py +3 -4
  154. mlrun/launcher/local.py +1 -1
  155. mlrun/launcher/remote.py +1 -1
  156. mlrun/lists.py +4 -3
  157. mlrun/model.py +117 -46
  158. mlrun/model_monitoring/__init__.py +4 -4
  159. mlrun/model_monitoring/api.py +61 -59
  160. mlrun/model_monitoring/applications/_application_steps.py +17 -17
  161. mlrun/model_monitoring/applications/base.py +165 -6
  162. mlrun/model_monitoring/applications/context.py +88 -37
  163. mlrun/model_monitoring/applications/evidently_base.py +0 -1
  164. mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
  165. mlrun/model_monitoring/applications/results.py +55 -3
  166. mlrun/model_monitoring/controller.py +207 -239
  167. mlrun/model_monitoring/db/__init__.py +0 -2
  168. mlrun/model_monitoring/db/_schedules.py +156 -0
  169. mlrun/model_monitoring/db/_stats.py +189 -0
  170. mlrun/model_monitoring/db/tsdb/base.py +78 -25
  171. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +61 -6
  172. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  173. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +255 -29
  174. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  175. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
  176. mlrun/model_monitoring/helpers.py +152 -49
  177. mlrun/model_monitoring/stream_processing.py +99 -283
  178. mlrun/model_monitoring/tracking_policy.py +10 -3
  179. mlrun/model_monitoring/writer.py +48 -36
  180. mlrun/package/__init__.py +3 -6
  181. mlrun/package/context_handler.py +1 -1
  182. mlrun/package/packager.py +12 -9
  183. mlrun/package/packagers/__init__.py +0 -2
  184. mlrun/package/packagers/default_packager.py +14 -11
  185. mlrun/package/packagers/numpy_packagers.py +16 -7
  186. mlrun/package/packagers/pandas_packagers.py +18 -18
  187. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  188. mlrun/package/packagers_manager.py +31 -14
  189. mlrun/package/utils/__init__.py +0 -3
  190. mlrun/package/utils/_pickler.py +6 -6
  191. mlrun/platforms/__init__.py +47 -16
  192. mlrun/platforms/iguazio.py +4 -1
  193. mlrun/projects/operations.py +27 -27
  194. mlrun/projects/pipelines.py +71 -36
  195. mlrun/projects/project.py +865 -206
  196. mlrun/run.py +53 -10
  197. mlrun/runtimes/__init__.py +1 -3
  198. mlrun/runtimes/base.py +15 -11
  199. mlrun/runtimes/daskjob.py +9 -9
  200. mlrun/runtimes/generators.py +2 -1
  201. mlrun/runtimes/kubejob.py +4 -5
  202. mlrun/runtimes/mounts.py +572 -0
  203. mlrun/runtimes/mpijob/__init__.py +0 -2
  204. mlrun/runtimes/mpijob/abstract.py +7 -6
  205. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  206. mlrun/runtimes/nuclio/application/application.py +11 -11
  207. mlrun/runtimes/nuclio/function.py +19 -17
  208. mlrun/runtimes/nuclio/serving.py +18 -11
  209. mlrun/runtimes/pod.py +154 -45
  210. mlrun/runtimes/remotesparkjob.py +3 -2
  211. mlrun/runtimes/sparkjob/__init__.py +0 -2
  212. mlrun/runtimes/sparkjob/spark3job.py +21 -11
  213. mlrun/runtimes/utils.py +6 -5
  214. mlrun/serving/merger.py +6 -4
  215. mlrun/serving/remote.py +18 -17
  216. mlrun/serving/routers.py +185 -172
  217. mlrun/serving/server.py +7 -1
  218. mlrun/serving/states.py +97 -78
  219. mlrun/serving/utils.py +13 -2
  220. mlrun/serving/v1_serving.py +3 -2
  221. mlrun/serving/v2_serving.py +74 -65
  222. mlrun/track/__init__.py +1 -1
  223. mlrun/track/tracker.py +2 -2
  224. mlrun/track/trackers/mlflow_tracker.py +6 -5
  225. mlrun/utils/async_http.py +1 -1
  226. mlrun/utils/clones.py +1 -1
  227. mlrun/utils/helpers.py +54 -16
  228. mlrun/utils/logger.py +106 -4
  229. mlrun/utils/notifications/notification/__init__.py +22 -19
  230. mlrun/utils/notifications/notification/base.py +33 -14
  231. mlrun/utils/notifications/notification/console.py +6 -6
  232. mlrun/utils/notifications/notification/git.py +11 -11
  233. mlrun/utils/notifications/notification/ipython.py +10 -9
  234. mlrun/utils/notifications/notification/mail.py +176 -0
  235. mlrun/utils/notifications/notification/slack.py +6 -6
  236. mlrun/utils/notifications/notification/webhook.py +6 -6
  237. mlrun/utils/notifications/notification_pusher.py +86 -44
  238. mlrun/utils/regex.py +3 -1
  239. mlrun/utils/version/version.json +2 -2
  240. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/METADATA +21 -16
  241. mlrun-1.8.0rc8.dist-info/RECORD +347 -0
  242. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  243. mlrun/model_monitoring/db/stores/base/store.py +0 -213
  244. mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
  245. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
  246. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
  247. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
  248. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
  249. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
  250. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
  251. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
  252. mlrun/model_monitoring/model_endpoint.py +0 -118
  253. mlrun-1.7.1rc10.dist-info/RECORD +0 -351
  254. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/LICENSE +0 -0
  255. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/WHEEL +0 -0
  256. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/entry_points.txt +0 -0
  257. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,334 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import re
16
+ import tempfile
17
+ from collections.abc import Iterator
18
+ from copy import deepcopy
19
+ from importlib import import_module
20
+ from typing import Optional, Union
21
+
22
+ import mlrun
23
+ from mlrun.artifacts import Artifact, ArtifactSpec
24
+ from mlrun.model import ModelObj
25
+
26
+ from ..utils import generate_artifact_uri
27
+
28
+
29
+ class DocumentLoaderSpec(ModelObj):
30
+ """
31
+ A class to load a document from a file path using a specified loader class.
32
+
33
+ This class is responsible for loading documents from a given source path using a specified loader class.
34
+ The loader class is dynamically imported and instantiated with the provided arguments. The loaded documents
35
+ can be optionally uploaded as artifacts.
36
+
37
+ Attributes:
38
+ loader_class_name (str): The name of the loader class to use for loading documents.
39
+ src_name (str): The name of the source attribute to pass to the loader class.
40
+ kwargs (Optional[dict]): Additional keyword arguments to pass to the loader class.
41
+
42
+ Methods:
43
+ make_loader(src_path): Creates an instance of the loader class with the specified source path.
44
+ """
45
+
46
+ _dict_fields = ["loader_class_name", "src_name", "kwargs"]
47
+
48
+ def __init__(
49
+ self,
50
+ loader_class_name: str = "langchain_community.document_loaders.TextLoader",
51
+ src_name: str = "file_path",
52
+ kwargs: Optional[dict] = None,
53
+ ):
54
+ """
55
+ Initialize the document loader.
56
+
57
+ Args:
58
+ loader_class_name (str): The name of the loader class to use.
59
+ src_name (str): The source name for the document.
60
+ kwargs (Optional[dict]): Additional keyword arguments to pass to the loader class.
61
+ """
62
+ self.loader_class_name = loader_class_name
63
+ self.src_name = src_name
64
+ self.kwargs = kwargs
65
+
66
+ def make_loader(self, src_path):
67
+ module_name, class_name = self.loader_class_name.rsplit(".", 1)
68
+ module = import_module(module_name)
69
+ loader_class = getattr(module, class_name)
70
+ kwargs = deepcopy(self.kwargs or {})
71
+ kwargs[self.src_name] = src_path
72
+ loader = loader_class(**kwargs)
73
+ return loader
74
+
75
+
76
+ class MLRunLoader:
77
+ """
78
+ A factory class for creating instances of a dynamically defined document loader.
79
+
80
+ Args:
81
+ artifact_key (str): The key for the artifact to be logged.It can include '%%' which will be replaced
82
+ by a hex-encoded version of the source path.
83
+ local_path (str): The source path of the document to be loaded.
84
+ loader_spec (DocumentLoaderSpec): Specification for the document loader.
85
+ producer (Optional[Union[MlrunProject, str, MLClientCtx]], optional): The producer of the document
86
+ upload (bool, optional): Flag indicating whether to upload the document.
87
+
88
+ Returns:
89
+ DynamicDocumentLoader: An instance of a dynamically defined subclass of BaseLoader.
90
+ """
91
+
92
+ def __new__(
93
+ cls,
94
+ source_path: str,
95
+ loader_spec: "DocumentLoaderSpec",
96
+ artifact_key="doc%%",
97
+ producer: Optional[Union["MlrunProject", str, "MLClientCtx"]] = None, # noqa: F821
98
+ upload: bool = False,
99
+ ):
100
+ # Dynamically import BaseLoader
101
+ from langchain_community.document_loaders.base import BaseLoader
102
+
103
+ class DynamicDocumentLoader(BaseLoader):
104
+ def __init__(
105
+ self,
106
+ local_path,
107
+ loader_spec,
108
+ artifact_key,
109
+ producer,
110
+ upload,
111
+ ):
112
+ self.producer = producer
113
+ self.artifact_key = (
114
+ MLRunLoader.artifact_key_instance(artifact_key, local_path)
115
+ if "%%" in artifact_key
116
+ else artifact_key
117
+ )
118
+ self.loader_spec = loader_spec
119
+ self.local_path = local_path
120
+ self.upload = upload
121
+
122
+ # Resolve the producer
123
+ if not self.producer:
124
+ self.producer = mlrun.mlconf.default_project
125
+ if isinstance(self.producer, str):
126
+ self.producer = mlrun.get_or_create_project(self.producer)
127
+
128
+ def lazy_load(self) -> Iterator["Document"]: # noqa: F821
129
+ artifact = self.producer.log_document(
130
+ key=self.artifact_key,
131
+ document_loader_spec=self.loader_spec,
132
+ local_path=self.local_path,
133
+ upload=self.upload,
134
+ )
135
+ res = artifact.to_langchain_documents()
136
+ yield res[0]
137
+
138
+ # Return an instance of the dynamically defined subclass
139
+ instance = DynamicDocumentLoader(
140
+ artifact_key=artifact_key,
141
+ local_path=source_path,
142
+ loader_spec=loader_spec,
143
+ producer=producer,
144
+ upload=upload,
145
+ )
146
+ return instance
147
+
148
+ @staticmethod
149
+ def artifact_key_instance(artifact_key: str, src_path: str) -> str:
150
+ if "%%" in artifact_key:
151
+ pattern = mlrun.utils.regex.artifact_key[0]
152
+ # Convert anchored pattern (^...$) to non-anchored version for finditer
153
+ search_pattern = pattern.strip("^$")
154
+ result = []
155
+ current_pos = 0
156
+
157
+ # Find all valid sequences
158
+ for match in re.finditer(search_pattern, src_path):
159
+ # Add hex values for characters between matches
160
+ for char in src_path[current_pos : match.start()]:
161
+ result.append(hex(ord(char))[2:].zfill(2))
162
+
163
+ # Add the valid sequence
164
+ result.append(match.group())
165
+ current_pos = match.end()
166
+
167
+ # Handle any remaining characters after the last match
168
+ for char in src_path[current_pos:]:
169
+ result.append(hex(ord(char))[2:].zfill(2))
170
+
171
+ resolved_path = "".join(result)
172
+
173
+ artifact_key = artifact_key.replace("%%", resolved_path)
174
+
175
+ return artifact_key
176
+
177
+
178
+ class DocumentArtifact(Artifact):
179
+ """
180
+ A specific artifact class inheriting from generic artifact, used to maintain Document meta-data.
181
+
182
+ Methods:
183
+ to_langchain_documents(splitter): Create LC documents from the artifact.
184
+ collection_add(collection_id): Add a collection ID to the artifact.
185
+ collection_remove(collection_id): Remove a collection ID from the artifact.
186
+ """
187
+
188
+ class DocumentArtifactSpec(ArtifactSpec):
189
+ _dict_fields = ArtifactSpec._dict_fields + [
190
+ "document_loader",
191
+ "collections",
192
+ "original_source",
193
+ ]
194
+
195
+ def __init__(
196
+ self,
197
+ *args,
198
+ document_loader: Optional[DocumentLoaderSpec] = None,
199
+ collections: Optional[dict] = None,
200
+ original_source: Optional[str] = None,
201
+ **kwargs,
202
+ ):
203
+ super().__init__(*args, **kwargs)
204
+ self.document_loader = document_loader
205
+ self.collections = collections if collections is not None else {}
206
+ self.original_source = original_source
207
+
208
+ """
209
+ A specific artifact class inheriting from generic artifact, used to maintain Document meta-data.
210
+ """
211
+
212
+ kind = "document"
213
+
214
+ METADATA_SOURCE_KEY = "source"
215
+ METADATA_ORIGINAL_SOURCE_KEY = "original_source"
216
+ METADATA_CHUNK_KEY = "mlrun_chunk"
217
+ METADATA_ARTIFACT_URI_KEY = "mlrun_object_uri"
218
+ METADATA_ARTIFACT_TARGET_PATH_KEY = "mlrun_target_path"
219
+
220
+ def __init__(
221
+ self,
222
+ original_source: Optional[str] = None,
223
+ document_loader_spec: Optional[DocumentLoaderSpec] = None,
224
+ **kwargs,
225
+ ):
226
+ super().__init__(**kwargs)
227
+ self.spec.document_loader = (
228
+ document_loader_spec.to_dict()
229
+ if document_loader_spec
230
+ else self.spec.document_loader
231
+ )
232
+ self.spec.original_source = original_source or self.spec.original_source
233
+
234
+ @property
235
+ def spec(self) -> DocumentArtifactSpec:
236
+ return self._spec
237
+
238
+ @spec.setter
239
+ def spec(self, spec):
240
+ self._spec = self._verify_dict(
241
+ spec, "spec", DocumentArtifact.DocumentArtifactSpec
242
+ )
243
+
244
+ def get_source(self):
245
+ return generate_artifact_uri(self.metadata.project, self.spec.db_key)
246
+
247
+ def to_langchain_documents(
248
+ self,
249
+ splitter: Optional["TextSplitter"] = None, # noqa: F821
250
+ ) -> list["Document"]: # noqa: F821
251
+ from langchain.schema import Document
252
+
253
+ """
254
+ Create LC documents from the artifact
255
+
256
+ Args:
257
+ splitter (Optional[TextSplitter]): A LangChain TextSplitter to split the document into chunks.
258
+
259
+ Returns:
260
+ list[Document]: A list of LangChain Document objects.
261
+ """
262
+
263
+ loader_spec = DocumentLoaderSpec.from_dict(self.spec.document_loader)
264
+ if self.get_target_path():
265
+ with tempfile.NamedTemporaryFile() as tmp_file:
266
+ mlrun.datastore.store_manager.object(
267
+ url=self.get_target_path()
268
+ ).download(tmp_file.name)
269
+ loader = loader_spec.make_loader(tmp_file.name)
270
+ documents = loader.load()
271
+ elif self.spec.original_source:
272
+ loader = loader_spec.make_loader(self.spec.original_source)
273
+ documents = loader.load()
274
+ else:
275
+ raise ValueError(
276
+ "No src_path or target_path provided. Cannot load document."
277
+ )
278
+
279
+ results = []
280
+ for document in documents:
281
+ if splitter:
282
+ texts = splitter.split_text(document.page_content)
283
+ else:
284
+ texts = [document.page_content]
285
+
286
+ metadata = document.metadata
287
+
288
+ metadata[self.METADATA_ORIGINAL_SOURCE_KEY] = self.spec.original_source
289
+ metadata[self.METADATA_SOURCE_KEY] = self.get_source()
290
+ metadata[self.METADATA_ARTIFACT_URI_KEY] = self.uri
291
+ if self.get_target_path():
292
+ metadata[self.METADATA_ARTIFACT_TARGET_PATH_KEY] = (
293
+ self.get_target_path()
294
+ )
295
+
296
+ for idx, text in enumerate(texts):
297
+ metadata[self.METADATA_CHUNK_KEY] = str(idx)
298
+ doc = Document(
299
+ page_content=text,
300
+ metadata=metadata.copy(),
301
+ )
302
+ results.append(doc)
303
+ return results
304
+
305
+ def collection_add(self, collection_id: str) -> None:
306
+ """
307
+ Add a collection ID to the artifact's collection list.
308
+
309
+ Adds the specified collection ID to the artifact's collection mapping if it
310
+ doesn't already exist.
311
+ This method only modifies the client-side artifact object and does not persist
312
+ the changes to the MLRun DB. To save the changes permanently, you must call
313
+ project.update_artifact() after this method.
314
+
315
+ Args:
316
+ collection_id (str): The ID of the collection to add
317
+ """
318
+ if collection_id not in self.spec.collections:
319
+ self.spec.collections[collection_id] = "1"
320
+
321
+ def collection_remove(self, collection_id: str) -> None:
322
+ """
323
+ Remove a collection ID from the artifact's collection list.
324
+
325
+ Removes the specified collection ID from the artifact's local collection mapping.
326
+ This method only modifies the client-side artifact object and does not persist
327
+ the changes to the MLRun DB. To save the changes permanently, you must call
328
+ project.update_artifact() or context.update_artifact() after this method.
329
+
330
+ Args:
331
+ collection_id (str): The ID of the collection to remove
332
+ """
333
+ if collection_id in self.spec.collections:
334
+ self.spec.collections.pop(collection_id)
@@ -41,6 +41,7 @@ from .dataset import (
41
41
  DatasetArtifact,
42
42
  TableArtifact,
43
43
  )
44
+ from .document import DocumentArtifact
44
45
  from .model import ModelArtifact
45
46
  from .plots import (
46
47
  PlotArtifact,
@@ -57,6 +58,7 @@ artifact_types = {
57
58
  "model": ModelArtifact,
58
59
  "dataset": DatasetArtifact,
59
60
  "plotly": PlotlyArtifact,
61
+ "document": DocumentArtifact,
60
62
  }
61
63
 
62
64
 
@@ -124,7 +126,7 @@ class ArtifactManager:
124
126
 
125
127
  self.artifact_db = db
126
128
  self.input_artifacts = {}
127
- self.artifacts = {}
129
+ self.artifact_uris = {}
128
130
 
129
131
  @staticmethod
130
132
  def ensure_artifact_source_file_exists(item, path, body):
@@ -156,14 +158,12 @@ class ArtifactManager:
156
158
 
157
159
  def artifact_list(self, full=False):
158
160
  artifacts = []
159
- for artifact in self.artifacts.values():
160
- if isinstance(artifact, dict):
161
- artifacts.append(artifact)
161
+ for artifacts_uri in self.artifact_uris.values():
162
+ artifact: Artifact = mlrun.datastore.get_store_resource(artifacts_uri)
163
+ if full:
164
+ artifacts.append(artifact.to_dict())
162
165
  else:
163
- if full:
164
- artifacts.append(artifact.to_dict())
165
- else:
166
- artifacts.append(artifact.base_dict())
166
+ artifacts.append(artifact.base_dict())
167
167
  return artifacts
168
168
 
169
169
  def log_artifact(
@@ -246,6 +246,8 @@ class ArtifactManager:
246
246
  # otherwise, we do not want to override it.
247
247
  # this is mainly relevant for imported artifacts that have an explicit db_key value already set
248
248
  db_key = item.db_key or key
249
+ if db_key != key:
250
+ validate_artifact_key_name(db_key, "artifact.db_key")
249
251
  item.db_key = db_key or ""
250
252
  item.viewer = viewer or item.viewer
251
253
  item.tree = producer.tag
@@ -304,7 +306,7 @@ class ArtifactManager:
304
306
  item.target_path = target_path
305
307
 
306
308
  item.before_log()
307
- self.artifacts[key] = item
309
+ self.artifact_uris[key] = item.uri
308
310
 
309
311
  if ((upload is None and item.kind != "dir") or upload) and not item.is_inline():
310
312
  # before uploading the item, we want to ensure that its tags are valid,
@@ -317,12 +319,12 @@ class ArtifactManager:
317
319
  size = str(item.size) or "?"
318
320
  db_str = "Y" if (self.artifact_db and db_key) else "N"
319
321
  logger.debug(
320
- f"log artifact {key} at {item.target_path}, size: {size}, db: {db_str}"
322
+ f"Log artifact {key} at {item.target_path}, size: {size}, db: {db_str}"
321
323
  )
322
324
  return item
323
325
 
324
- def update_artifact(self, producer, item):
325
- self.artifacts[item.key] = item
326
+ def update_artifact(self, producer, item: Artifact):
327
+ self.artifact_uris[item.key] = item.uri
326
328
  self._log_to_db(item.db_key, producer.project, producer.inputs, item)
327
329
 
328
330
  def _log_to_db(self, key, project, sources, item, tag=None):
@@ -387,7 +389,7 @@ class ArtifactManager:
387
389
  deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
388
390
  mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
389
391
  ),
390
- secrets: dict = None,
392
+ secrets: typing.Optional[dict] = None,
391
393
  ):
392
394
  self.artifact_db.del_artifact(
393
395
  key=item.db_key,
mlrun/artifacts/model.py CHANGED
@@ -303,7 +303,7 @@ class ModelArtifact(Artifact):
303
303
  self.metadata.labels = self.metadata.labels or {}
304
304
  self.metadata.labels["framework"] = self.spec.framework
305
305
 
306
- def upload(self, artifact_path: str = None):
306
+ def upload(self, artifact_path: Optional[str] = None):
307
307
  """
308
308
  internal, upload to target store
309
309
  :param artifact_path: required only for when generating target_path from artifact hash
@@ -324,9 +324,7 @@ class ModelArtifact(Artifact):
324
324
  artifact=self, extra_data=self.spec.extra_data, artifact_path=artifact_path
325
325
  )
326
326
 
327
- # the model spec yaml should not include the tag, as the same model can be used with different tags,
328
- # and the tag is not part of the model spec but the metadata of the model artifact
329
- spec_body = _remove_tag_from_spec_yaml(self)
327
+ spec_body = _sanitize_and_serialize_model_spec_yaml(self)
330
328
  spec_target_path = None
331
329
 
332
330
  if mlrun.mlconf.artifacts.generate_target_path_from_artifact_hash:
@@ -355,7 +353,7 @@ class ModelArtifact(Artifact):
355
353
  def _upload_body_or_file(
356
354
  self,
357
355
  artifact_path: str,
358
- target_model_path: str = None,
356
+ target_model_path: Optional[str] = None,
359
357
  ):
360
358
  body = self.spec.get_body()
361
359
  if body:
@@ -403,12 +401,6 @@ class ModelArtifact(Artifact):
403
401
  return mlrun.get_dataitem(target_model_path).get()
404
402
 
405
403
 
406
- def _get_src_path(model_spec: ModelArtifact, filename):
407
- if model_spec.src_path:
408
- return path.join(model_spec.src_path, filename)
409
- return filename
410
-
411
-
412
404
  def get_model(model_dir, suffix=""):
413
405
  """return model file, model spec object, and list of extra data items
414
406
 
@@ -483,49 +475,20 @@ def get_model(model_dir, suffix=""):
483
475
  return temp_path, model_spec, extra_dataitems
484
476
 
485
477
 
486
- def _load_model_spec(spec_path):
487
- data = mlrun.datastore.store_manager.object(url=spec_path).get()
488
- spec = yaml.load(data, Loader=yaml.FullLoader)
489
- return ModelArtifact.from_dict(spec)
490
-
491
-
492
- def _get_file_path(base_path: str, name: str, isdir=False):
493
- if not is_relative_path(name):
494
- return name
495
- if not isdir:
496
- base_path = path.dirname(base_path)
497
- return path.join(base_path, name).replace("\\", "/")
498
-
499
-
500
- def _get_extra(target, extra_data, is_dir=False):
501
- extra_dataitems = {}
502
- for k, v in extra_data.items():
503
- extra_dataitems[k] = mlrun.datastore.store_manager.object(
504
- url=_get_file_path(target, v, isdir=is_dir), key=k
505
- )
506
- return extra_dataitems
507
-
508
-
509
- def _remove_tag_from_spec_yaml(model_spec):
510
- spec_dict = model_spec.to_dict()
511
- spec_dict["metadata"].pop("tag", None)
512
- return yaml.safe_dump(spec_dict)
513
-
514
-
515
478
  def update_model(
516
479
  model_artifact,
517
- parameters: dict = None,
518
- metrics: dict = None,
519
- extra_data: dict = None,
520
- inputs: list[Feature] = None,
521
- outputs: list[Feature] = None,
522
- feature_vector: str = None,
523
- feature_weights: list = None,
480
+ parameters: Optional[dict] = None,
481
+ metrics: Optional[dict] = None,
482
+ extra_data: Optional[dict] = None,
483
+ inputs: Optional[list[Feature]] = None,
484
+ outputs: Optional[list[Feature]] = None,
485
+ feature_vector: Optional[str] = None,
486
+ feature_weights: Optional[list] = None,
524
487
  key_prefix: str = "",
525
- labels: dict = None,
488
+ labels: Optional[dict] = None,
526
489
  write_spec_copy=True,
527
490
  store_object: bool = True,
528
- ):
491
+ ) -> ModelArtifact:
529
492
  """Update model object attributes
530
493
 
531
494
  this method will edit or add attributes to a model object
@@ -593,10 +556,7 @@ def update_model(
593
556
 
594
557
  if write_spec_copy:
595
558
  spec_path = path.join(model_spec.target_path, model_spec_filename)
596
-
597
- # the model spec yaml should not include the tag, as the same model can be used with different tags,
598
- # and the tag is not part of the model spec but the metadata of the model artifact
599
- model_spec_yaml = _remove_tag_from_spec_yaml(model_spec)
559
+ model_spec_yaml = _sanitize_and_serialize_model_spec_yaml(model_spec)
600
560
  mlrun.datastore.store_manager.object(url=spec_path).put(model_spec_yaml)
601
561
 
602
562
  model_spec.db_key = model_spec.db_key or model_spec.key
@@ -609,3 +569,56 @@ def update_model(
609
569
  project=model_spec.project,
610
570
  )
611
571
  return model_spec
572
+
573
+
574
+ def _get_src_path(model_spec: ModelArtifact, filename: str) -> str:
575
+ return path.join(model_spec.src_path, filename) if model_spec.src_path else filename
576
+
577
+
578
+ def _load_model_spec(spec_path) -> ModelArtifact:
579
+ data = mlrun.datastore.store_manager.object(url=spec_path).get()
580
+ spec = yaml.load(data, Loader=yaml.FullLoader)
581
+ return ModelArtifact.from_dict(spec)
582
+
583
+
584
+ def _get_file_path(base_path: str, name: str, isdir: bool = False) -> str:
585
+ if not is_relative_path(name):
586
+ return name
587
+ if not isdir:
588
+ base_path = path.dirname(base_path)
589
+ return path.join(base_path, name).replace("\\", "/")
590
+
591
+
592
+ def _get_extra(target: str, extra_data: dict, is_dir: bool = False) -> dict:
593
+ extra_dataitems = {}
594
+ for k, v in extra_data.items():
595
+ extra_dataitems[k] = mlrun.datastore.store_manager.object(
596
+ url=_get_file_path(target, v, isdir=is_dir), key=k
597
+ )
598
+ return extra_dataitems
599
+
600
+
601
+ def _sanitize_and_serialize_model_spec_yaml(model: ModelArtifact) -> str:
602
+ model_dict = _sanitize_model_spec(model)
603
+ return _serialize_model_spec_yaml(model_dict)
604
+
605
+
606
+ def _sanitize_model_spec(model: ModelArtifact) -> dict:
607
+ model_dict = model.to_dict()
608
+
609
+ # The model spec yaml should not include the tag, as the same model can be used with different tags,
610
+ # and the tag is not part of the model spec but the metadata of the model artifact
611
+ model_dict["metadata"].pop("tag", None)
612
+
613
+ # Remove future packaging links
614
+ if model_dict["spec"].get("extra_data"):
615
+ model_dict["spec"]["extra_data"] = {
616
+ key: item
617
+ for key, item in model_dict["spec"]["extra_data"].items()
618
+ if item is not ...
619
+ }
620
+ return model_dict
621
+
622
+
623
+ def _serialize_model_spec_yaml(model_dict: dict) -> str:
624
+ return yaml.safe_dump(model_dict)
mlrun/common/constants.py CHANGED
@@ -11,6 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ import mlrun.common.types
14
15
 
15
16
  IMAGE_NAME_ENRICH_REGISTRY_PREFIX = "." # prefix for image name to enrich with registry
16
17
  MLRUN_SERVING_CONF = "serving-conf"
@@ -68,6 +69,7 @@ class MLRunInternalLabels:
68
69
  producer_type = f"{MLRUN_LABEL_PREFIX}producer-type"
69
70
  app_name = f"{MLRUN_LABEL_PREFIX}app-name"
70
71
  endpoint_id = f"{MLRUN_LABEL_PREFIX}endpoint-id"
72
+ endpoint_name = f"{MLRUN_LABEL_PREFIX}endpoint-name"
71
73
  host = "host"
72
74
  job_type = "job-type"
73
75
  kind = "kind"
@@ -86,3 +88,8 @@ class MLRunInternalLabels:
86
88
  for key, value in cls.__dict__.items()
87
89
  if not key.startswith("__") and isinstance(value, str)
88
90
  ]
91
+
92
+
93
+ class DeployStatusTextKind(mlrun.common.types.StrEnum):
94
+ logs = "logs"
95
+ events = "events"
@@ -19,3 +19,4 @@ from .pipeline import PipelineFormat # noqa
19
19
  from .project import ProjectFormat # noqa
20
20
  from .run import RunFormat # noqa
21
21
  from .feature_set import FeatureSetFormat # noqa
22
+ from .model_endpoint import ModelEndpointFormat # noqa
@@ -21,6 +21,7 @@ from .base import ObjectFormat
21
21
 
22
22
 
23
23
  class FeatureSetFormat(ObjectFormat, mlrun.common.types.StrEnum):
24
+ full = "full"
24
25
  minimal = "minimal"
25
26
 
26
27
  @staticmethod
@@ -21,6 +21,7 @@ from .base import ObjectFormat
21
21
 
22
22
 
23
23
  class FunctionFormat(ObjectFormat, mlrun.common.types.StrEnum):
24
+ full = "full"
24
25
  minimal = "minimal"
25
26
 
26
27
  @staticmethod
@@ -11,5 +11,20 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ #
15
+
16
+ import typing
17
+
18
+ import mlrun.common.types
19
+
20
+ from .base import ObjectFormat
21
+
22
+
23
+ class ModelEndpointFormat(ObjectFormat, mlrun.common.types.StrEnum):
24
+ full = "full"
14
25
 
15
- from .store import StoreBase
26
+ @staticmethod
27
+ def format_method(_format: str) -> typing.Optional[typing.Callable]:
28
+ return {
29
+ ModelEndpointFormat.full: None,
30
+ }[_format]
@@ -15,11 +15,10 @@
15
15
 
16
16
  import typing
17
17
 
18
+ import mlrun.common.types
18
19
  import mlrun_pipelines.common.ops
19
20
  import mlrun_pipelines.models
20
21
 
21
- import mlrun.common.types
22
-
23
22
  from .base import ObjectFormat
24
23
 
25
24