mlrun 1.6.4rc7__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (305) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +40 -122
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +5 -4
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +47 -257
  8. mlrun/artifacts/dataset.py +11 -192
  9. mlrun/artifacts/manager.py +79 -47
  10. mlrun/artifacts/model.py +31 -159
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +74 -1
  13. mlrun/common/db/sql_session.py +5 -5
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +45 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +33 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +12 -3
  23. mlrun/common/model_monitoring/helpers.py +9 -5
  24. mlrun/{runtimes → common/runtimes}/constants.py +37 -9
  25. mlrun/common/schemas/__init__.py +31 -5
  26. mlrun/common/schemas/alert.py +202 -0
  27. mlrun/common/schemas/api_gateway.py +196 -0
  28. mlrun/common/schemas/artifact.py +25 -4
  29. mlrun/common/schemas/auth.py +16 -5
  30. mlrun/common/schemas/background_task.py +1 -1
  31. mlrun/common/schemas/client_spec.py +4 -2
  32. mlrun/common/schemas/common.py +7 -4
  33. mlrun/common/schemas/constants.py +3 -0
  34. mlrun/common/schemas/feature_store.py +74 -44
  35. mlrun/common/schemas/frontend_spec.py +15 -7
  36. mlrun/common/schemas/function.py +12 -1
  37. mlrun/common/schemas/hub.py +11 -18
  38. mlrun/common/schemas/memory_reports.py +2 -2
  39. mlrun/common/schemas/model_monitoring/__init__.py +20 -4
  40. mlrun/common/schemas/model_monitoring/constants.py +123 -42
  41. mlrun/common/schemas/model_monitoring/grafana.py +13 -9
  42. mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
  43. mlrun/common/schemas/notification.py +71 -14
  44. mlrun/common/schemas/object.py +2 -2
  45. mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
  46. mlrun/common/schemas/pipeline.py +8 -1
  47. mlrun/common/schemas/project.py +69 -18
  48. mlrun/common/schemas/runs.py +7 -1
  49. mlrun/common/schemas/runtime_resource.py +8 -12
  50. mlrun/common/schemas/schedule.py +4 -4
  51. mlrun/common/schemas/tag.py +1 -2
  52. mlrun/common/schemas/workflow.py +12 -4
  53. mlrun/common/types.py +14 -1
  54. mlrun/config.py +154 -69
  55. mlrun/data_types/data_types.py +6 -1
  56. mlrun/data_types/spark.py +2 -2
  57. mlrun/data_types/to_pandas.py +67 -37
  58. mlrun/datastore/__init__.py +6 -8
  59. mlrun/datastore/alibaba_oss.py +131 -0
  60. mlrun/datastore/azure_blob.py +143 -42
  61. mlrun/datastore/base.py +102 -58
  62. mlrun/datastore/datastore.py +34 -13
  63. mlrun/datastore/datastore_profile.py +146 -20
  64. mlrun/datastore/dbfs_store.py +3 -7
  65. mlrun/datastore/filestore.py +1 -4
  66. mlrun/datastore/google_cloud_storage.py +97 -33
  67. mlrun/datastore/hdfs.py +56 -0
  68. mlrun/datastore/inmem.py +6 -3
  69. mlrun/datastore/redis.py +7 -2
  70. mlrun/datastore/s3.py +34 -12
  71. mlrun/datastore/snowflake_utils.py +45 -0
  72. mlrun/datastore/sources.py +303 -111
  73. mlrun/datastore/spark_utils.py +31 -2
  74. mlrun/datastore/store_resources.py +9 -7
  75. mlrun/datastore/storeytargets.py +151 -0
  76. mlrun/datastore/targets.py +453 -176
  77. mlrun/datastore/utils.py +72 -58
  78. mlrun/datastore/v3io.py +6 -1
  79. mlrun/db/base.py +274 -41
  80. mlrun/db/factory.py +1 -1
  81. mlrun/db/httpdb.py +893 -225
  82. mlrun/db/nopdb.py +291 -33
  83. mlrun/errors.py +36 -6
  84. mlrun/execution.py +115 -42
  85. mlrun/feature_store/__init__.py +0 -2
  86. mlrun/feature_store/api.py +65 -73
  87. mlrun/feature_store/common.py +7 -12
  88. mlrun/feature_store/feature_set.py +76 -55
  89. mlrun/feature_store/feature_vector.py +39 -31
  90. mlrun/feature_store/ingestion.py +7 -6
  91. mlrun/feature_store/retrieval/base.py +16 -11
  92. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  93. mlrun/feature_store/retrieval/job.py +13 -4
  94. mlrun/feature_store/retrieval/local_merger.py +2 -0
  95. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  96. mlrun/feature_store/steps.py +45 -34
  97. mlrun/features.py +11 -21
  98. mlrun/frameworks/_common/artifacts_library.py +9 -9
  99. mlrun/frameworks/_common/mlrun_interface.py +5 -5
  100. mlrun/frameworks/_common/model_handler.py +48 -48
  101. mlrun/frameworks/_common/plan.py +5 -6
  102. mlrun/frameworks/_common/producer.py +3 -4
  103. mlrun/frameworks/_common/utils.py +5 -5
  104. mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
  105. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
  106. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
  107. mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
  108. mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
  109. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
  110. mlrun/frameworks/_ml_common/model_handler.py +24 -24
  111. mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
  112. mlrun/frameworks/_ml_common/plan.py +2 -2
  113. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
  114. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
  115. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  116. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
  117. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  118. mlrun/frameworks/_ml_common/utils.py +4 -4
  119. mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
  120. mlrun/frameworks/huggingface/model_server.py +4 -4
  121. mlrun/frameworks/lgbm/__init__.py +33 -33
  122. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  123. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
  124. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
  125. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
  126. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
  127. mlrun/frameworks/lgbm/model_handler.py +10 -10
  128. mlrun/frameworks/lgbm/model_server.py +6 -6
  129. mlrun/frameworks/lgbm/utils.py +5 -5
  130. mlrun/frameworks/onnx/dataset.py +8 -8
  131. mlrun/frameworks/onnx/mlrun_interface.py +3 -3
  132. mlrun/frameworks/onnx/model_handler.py +6 -6
  133. mlrun/frameworks/onnx/model_server.py +7 -7
  134. mlrun/frameworks/parallel_coordinates.py +6 -6
  135. mlrun/frameworks/pytorch/__init__.py +18 -18
  136. mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
  137. mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
  138. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
  139. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
  140. mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
  141. mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
  142. mlrun/frameworks/pytorch/model_handler.py +17 -17
  143. mlrun/frameworks/pytorch/model_server.py +7 -7
  144. mlrun/frameworks/sklearn/__init__.py +13 -13
  145. mlrun/frameworks/sklearn/estimator.py +4 -4
  146. mlrun/frameworks/sklearn/metrics_library.py +14 -14
  147. mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
  148. mlrun/frameworks/sklearn/model_handler.py +2 -2
  149. mlrun/frameworks/tf_keras/__init__.py +10 -7
  150. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
  151. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
  152. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
  153. mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
  154. mlrun/frameworks/tf_keras/model_handler.py +14 -14
  155. mlrun/frameworks/tf_keras/model_server.py +6 -6
  156. mlrun/frameworks/xgboost/__init__.py +13 -13
  157. mlrun/frameworks/xgboost/model_handler.py +6 -6
  158. mlrun/k8s_utils.py +61 -17
  159. mlrun/launcher/__init__.py +1 -1
  160. mlrun/launcher/base.py +16 -15
  161. mlrun/launcher/client.py +13 -11
  162. mlrun/launcher/factory.py +1 -1
  163. mlrun/launcher/local.py +23 -13
  164. mlrun/launcher/remote.py +17 -10
  165. mlrun/lists.py +7 -6
  166. mlrun/model.py +478 -103
  167. mlrun/model_monitoring/__init__.py +1 -1
  168. mlrun/model_monitoring/api.py +163 -371
  169. mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
  170. mlrun/model_monitoring/applications/_application_steps.py +188 -0
  171. mlrun/model_monitoring/applications/base.py +108 -0
  172. mlrun/model_monitoring/applications/context.py +341 -0
  173. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  174. mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
  175. mlrun/model_monitoring/applications/results.py +99 -0
  176. mlrun/model_monitoring/controller.py +131 -278
  177. mlrun/model_monitoring/db/__init__.py +18 -0
  178. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  179. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  180. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  181. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  182. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  183. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  184. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  185. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  186. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  187. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  188. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  189. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  190. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  191. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  192. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  193. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
  194. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  195. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
  196. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  197. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  198. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  199. mlrun/model_monitoring/features_drift_table.py +134 -106
  200. mlrun/model_monitoring/helpers.py +199 -55
  201. mlrun/model_monitoring/metrics/__init__.py +13 -0
  202. mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
  203. mlrun/model_monitoring/model_endpoint.py +3 -2
  204. mlrun/model_monitoring/stream_processing.py +131 -398
  205. mlrun/model_monitoring/tracking_policy.py +9 -2
  206. mlrun/model_monitoring/writer.py +161 -125
  207. mlrun/package/__init__.py +6 -6
  208. mlrun/package/context_handler.py +5 -5
  209. mlrun/package/packager.py +7 -7
  210. mlrun/package/packagers/default_packager.py +8 -8
  211. mlrun/package/packagers/numpy_packagers.py +15 -15
  212. mlrun/package/packagers/pandas_packagers.py +5 -5
  213. mlrun/package/packagers/python_standard_library_packagers.py +10 -10
  214. mlrun/package/packagers_manager.py +19 -23
  215. mlrun/package/utils/_formatter.py +6 -6
  216. mlrun/package/utils/_pickler.py +2 -2
  217. mlrun/package/utils/_supported_format.py +4 -4
  218. mlrun/package/utils/log_hint_utils.py +2 -2
  219. mlrun/package/utils/type_hint_utils.py +4 -9
  220. mlrun/platforms/__init__.py +11 -10
  221. mlrun/platforms/iguazio.py +24 -203
  222. mlrun/projects/operations.py +52 -25
  223. mlrun/projects/pipelines.py +191 -197
  224. mlrun/projects/project.py +1227 -400
  225. mlrun/render.py +16 -19
  226. mlrun/run.py +209 -184
  227. mlrun/runtimes/__init__.py +83 -15
  228. mlrun/runtimes/base.py +51 -35
  229. mlrun/runtimes/daskjob.py +17 -10
  230. mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
  231. mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
  232. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  233. mlrun/runtimes/funcdoc.py +1 -29
  234. mlrun/runtimes/function_reference.py +1 -1
  235. mlrun/runtimes/kubejob.py +34 -128
  236. mlrun/runtimes/local.py +40 -11
  237. mlrun/runtimes/mpijob/__init__.py +0 -20
  238. mlrun/runtimes/mpijob/abstract.py +9 -10
  239. mlrun/runtimes/mpijob/v1.py +1 -1
  240. mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
  241. mlrun/runtimes/nuclio/api_gateway.py +769 -0
  242. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  243. mlrun/runtimes/nuclio/application/application.py +758 -0
  244. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  245. mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
  246. mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
  247. mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
  248. mlrun/runtimes/pod.py +281 -101
  249. mlrun/runtimes/remotesparkjob.py +12 -9
  250. mlrun/runtimes/sparkjob/spark3job.py +67 -51
  251. mlrun/runtimes/utils.py +41 -75
  252. mlrun/secrets.py +9 -5
  253. mlrun/serving/__init__.py +8 -1
  254. mlrun/serving/remote.py +2 -7
  255. mlrun/serving/routers.py +85 -69
  256. mlrun/serving/server.py +69 -44
  257. mlrun/serving/states.py +209 -36
  258. mlrun/serving/utils.py +22 -14
  259. mlrun/serving/v1_serving.py +6 -7
  260. mlrun/serving/v2_serving.py +129 -54
  261. mlrun/track/tracker.py +2 -1
  262. mlrun/track/tracker_manager.py +3 -3
  263. mlrun/track/trackers/mlflow_tracker.py +6 -2
  264. mlrun/utils/async_http.py +6 -8
  265. mlrun/utils/azure_vault.py +1 -1
  266. mlrun/utils/clones.py +1 -2
  267. mlrun/utils/condition_evaluator.py +3 -3
  268. mlrun/utils/db.py +21 -3
  269. mlrun/utils/helpers.py +405 -225
  270. mlrun/utils/http.py +3 -6
  271. mlrun/utils/logger.py +112 -16
  272. mlrun/utils/notifications/notification/__init__.py +17 -13
  273. mlrun/utils/notifications/notification/base.py +50 -2
  274. mlrun/utils/notifications/notification/console.py +2 -0
  275. mlrun/utils/notifications/notification/git.py +24 -1
  276. mlrun/utils/notifications/notification/ipython.py +3 -1
  277. mlrun/utils/notifications/notification/slack.py +96 -21
  278. mlrun/utils/notifications/notification/webhook.py +59 -2
  279. mlrun/utils/notifications/notification_pusher.py +149 -30
  280. mlrun/utils/regex.py +9 -0
  281. mlrun/utils/retryer.py +208 -0
  282. mlrun/utils/singleton.py +1 -1
  283. mlrun/utils/v3io_clients.py +4 -6
  284. mlrun/utils/version/version.json +2 -2
  285. mlrun/utils/version/version.py +2 -6
  286. mlrun-1.7.0.dist-info/METADATA +378 -0
  287. mlrun-1.7.0.dist-info/RECORD +351 -0
  288. {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
  289. mlrun/feature_store/retrieval/conversion.py +0 -273
  290. mlrun/kfpops.py +0 -868
  291. mlrun/model_monitoring/application.py +0 -310
  292. mlrun/model_monitoring/batch.py +0 -1095
  293. mlrun/model_monitoring/prometheus.py +0 -219
  294. mlrun/model_monitoring/stores/__init__.py +0 -111
  295. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
  296. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
  297. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  298. mlrun/model_monitoring/stores/models/base.py +0 -84
  299. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
  300. mlrun/platforms/other.py +0 -306
  301. mlrun-1.6.4rc7.dist-info/METADATA +0 -272
  302. mlrun-1.6.4rc7.dist-info/RECORD +0 -314
  303. {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
  304. {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
  305. {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
mlrun/artifacts/base.py CHANGED
@@ -20,7 +20,6 @@ import warnings
20
20
  import zipfile
21
21
 
22
22
  import yaml
23
- from deprecated import deprecated
24
23
 
25
24
  import mlrun
26
25
  import mlrun.artifacts
@@ -63,9 +62,11 @@ class ArtifactMetadata(ModelObj):
63
62
  def base_dict(self):
64
63
  return super().to_dict()
65
64
 
66
- def to_dict(self, fields=None, exclude=None):
65
+ def to_dict(self, fields: list = None, exclude: list = None, strip: bool = False):
67
66
  """return long dict form of the artifact"""
68
- return super().to_dict(self._dict_fields + self._extra_fields, exclude=exclude)
67
+ return super().to_dict(
68
+ self._dict_fields + self._extra_fields, exclude=exclude, strip=strip
69
+ )
69
70
 
70
71
  @classmethod
71
72
  def from_dict(cls, struct=None, fields=None, deprecated_fields: dict = None):
@@ -86,9 +87,10 @@ class ArtifactSpec(ModelObj):
86
87
  "db_key",
87
88
  "extra_data",
88
89
  "unpackaging_instructions",
90
+ "producer",
89
91
  ]
90
92
 
91
- _extra_fields = ["annotations", "producer", "sources", "license", "encoding"]
93
+ _extra_fields = ["annotations", "sources", "license", "encoding"]
92
94
  _exclude_fields_from_uid_hash = [
93
95
  # if the artifact is first created, it will not have a db_key,
94
96
  # exclude it so further updates of the artifacts will have the same hash
@@ -129,9 +131,11 @@ class ArtifactSpec(ModelObj):
129
131
  def base_dict(self):
130
132
  return super().to_dict()
131
133
 
132
- def to_dict(self, fields=None, exclude=None):
134
+ def to_dict(self, fields: list = None, exclude: list = None, strip: bool = False):
133
135
  """return long dict form of the artifact"""
134
- return super().to_dict(self._dict_fields + self._extra_fields, exclude=exclude)
136
+ return super().to_dict(
137
+ self._dict_fields + self._extra_fields, exclude=exclude, strip=strip
138
+ )
135
139
 
136
140
  @classmethod
137
141
  def from_dict(cls, struct=None, fields=None, deprecated_fields: dict = None):
@@ -187,12 +191,30 @@ class Artifact(ModelObj):
187
191
  format=None,
188
192
  size=None,
189
193
  target_path=None,
190
- # All params up until here are legacy params for compatibility with legacy artifacts.
191
194
  project=None,
195
+ src_path: str = None,
196
+ # All params up until here are legacy params for compatibility with legacy artifacts.
197
+ # TODO: remove them in 1.9.0.
192
198
  metadata: ArtifactMetadata = None,
193
199
  spec: ArtifactSpec = None,
194
- src_path: str = None,
195
200
  ):
201
+ if (
202
+ key
203
+ or body
204
+ or viewer
205
+ or is_inline
206
+ or format
207
+ or size
208
+ or target_path
209
+ or project
210
+ or src_path
211
+ ):
212
+ warnings.warn(
213
+ "Artifact constructor parameters are deprecated and will be removed in 1.9.0. "
214
+ "Use the metadata and spec parameters instead.",
215
+ DeprecationWarning,
216
+ )
217
+
196
218
  self._metadata = None
197
219
  self.metadata = metadata
198
220
  self._spec = None
@@ -610,6 +632,7 @@ class DirArtifactSpec(ArtifactSpec):
610
632
  "src_path",
611
633
  "target_path",
612
634
  "db_key",
635
+ "producer",
613
636
  ]
614
637
 
615
638
 
@@ -694,11 +717,18 @@ class LinkArtifact(Artifact):
694
717
  link_iteration=None,
695
718
  link_key=None,
696
719
  link_tree=None,
697
- # All params up until here are legacy params for compatibility with legacy artifacts.
698
720
  project=None,
721
+ # All params up until here are legacy params for compatibility with legacy artifacts.
722
+ # TODO: remove them in 1.9.0.
699
723
  metadata: ArtifactMetadata = None,
700
724
  spec: LinkArtifactSpec = None,
701
725
  ):
726
+ if key or target_path or link_iteration or link_key or link_tree or project:
727
+ warnings.warn(
728
+ "Artifact constructor parameters are deprecated and will be removed in 1.9.0. "
729
+ "Use the metadata and spec parameters instead.",
730
+ DeprecationWarning,
731
+ )
702
732
  super().__init__(
703
733
  key, target_path=target_path, project=project, metadata=metadata, spec=spec
704
734
  )
@@ -715,237 +745,6 @@ class LinkArtifact(Artifact):
715
745
  self._spec = self._verify_dict(spec, "spec", LinkArtifactSpec)
716
746
 
717
747
 
718
- # TODO: remove in 1.7.0
719
- @deprecated(
720
- version="1.3.0",
721
- reason="'LegacyArtifact' will be removed in 1.7.0, use 'Artifact' instead",
722
- category=FutureWarning,
723
- )
724
- class LegacyArtifact(ModelObj):
725
- _dict_fields = [
726
- "key",
727
- "kind",
728
- "iter",
729
- "tree",
730
- "src_path",
731
- "target_path",
732
- "hash",
733
- "description",
734
- "viewer",
735
- "inline",
736
- "format",
737
- "size",
738
- "db_key",
739
- "extra_data",
740
- "tag",
741
- ]
742
- kind = ""
743
- _store_prefix = StorePrefix.Artifact
744
-
745
- def __init__(
746
- self,
747
- key=None,
748
- body=None,
749
- viewer=None,
750
- is_inline=False,
751
- format=None,
752
- size=None,
753
- target_path=None,
754
- ):
755
- self.key = key
756
- self.project = ""
757
- self.db_key = None
758
- self.size = size
759
- self.iter = None
760
- self.tree = None
761
- self.updated = None
762
- self.target_path = target_path
763
- self.src_path = None
764
- self._body = body
765
- self.format = format
766
- self.description = None
767
- self.viewer = viewer
768
- self.encoding = None
769
- self.labels = {}
770
- self.annotations = None
771
- self.sources = []
772
- self.producer = None
773
- self.hash = None
774
- self._inline = is_inline
775
- self.license = ""
776
- self.extra_data = {}
777
- self.tag = None # temp store of the tag
778
-
779
- def before_log(self):
780
- for key, item in self.extra_data.items():
781
- if hasattr(item, "target_path"):
782
- self.extra_data[key] = item.target_path
783
-
784
- def is_inline(self):
785
- return self._inline
786
-
787
- @property
788
- def is_dir(self):
789
- """this is a directory"""
790
- return False
791
-
792
- @property
793
- def inline(self):
794
- """inline data (body)"""
795
- if self._inline:
796
- return self.get_body()
797
- return None
798
-
799
- @inline.setter
800
- def inline(self, body):
801
- self._body = body
802
- if body:
803
- self._inline = True
804
-
805
- @property
806
- def uri(self):
807
- """return artifact uri (store://..)"""
808
- return self.get_store_url()
809
-
810
- def to_dataitem(self):
811
- """return a DataItem object (if available) representing the artifact content"""
812
- uri = self.get_store_url()
813
- if uri:
814
- return mlrun.get_dataitem(uri)
815
-
816
- def get_body(self):
817
- """get the artifact body when inline"""
818
- return self._body
819
-
820
- def get_target_path(self):
821
- """get the absolute target path for the artifact"""
822
- return self.target_path
823
-
824
- def get_store_url(self, with_tag=True, project=None):
825
- """get the artifact uri (store://..) with optional parameters"""
826
- tag = self.tree if with_tag else None
827
- uri = generate_artifact_uri(
828
- project or self.project, self.db_key, tag, self.iter
829
- )
830
- return mlrun.datastore.get_store_uri(self._store_prefix, uri)
831
-
832
- def base_dict(self):
833
- """return short dict form of the artifact"""
834
- return super().to_dict()
835
-
836
- def to_dict(self, fields=None):
837
- """return long dict form of the artifact"""
838
- return super().to_dict(
839
- self._dict_fields
840
- + ["updated", "labels", "annotations", "producer", "sources", "project"]
841
- )
842
-
843
- @classmethod
844
- def from_dict(cls, struct=None, fields=None):
845
- fields = fields or cls._dict_fields + [
846
- "updated",
847
- "labels",
848
- "annotations",
849
- "producer",
850
- "sources",
851
- "project",
852
- ]
853
- return super().from_dict(struct, fields=fields)
854
-
855
- def upload(self):
856
- """internal, upload to target store"""
857
- src_path = self.src_path
858
- body = self.get_body()
859
- if body:
860
- self._upload_body(body)
861
- else:
862
- if src_path and os.path.isfile(src_path):
863
- self._upload_file(src_path)
864
-
865
- def _upload_body(self, body, target=None):
866
- if mlrun.mlconf.artifacts.calculate_hash:
867
- self.hash = calculate_blob_hash(body)
868
- self.size = len(body)
869
- mlrun.datastore.store_manager.object(url=target or self.target_path).put(body)
870
-
871
- def _upload_file(self, src, target=None):
872
- if mlrun.mlconf.artifacts.calculate_hash:
873
- self.hash = calculate_local_file_hash(src)
874
- self.size = os.stat(src).st_size
875
- mlrun.datastore.store_manager.object(url=target or self.target_path).upload(src)
876
-
877
- def artifact_kind(self):
878
- return self.kind
879
-
880
- def generate_target_path(self, artifact_path, producer):
881
- return generate_target_path(self, artifact_path, producer)
882
-
883
-
884
- # TODO: remove in 1.7.0
885
- @deprecated(
886
- version="1.3.0",
887
- reason="'LegacyDirArtifact' will be removed in 1.7.0, use 'DirArtifact' instead",
888
- category=FutureWarning,
889
- )
890
- class LegacyDirArtifact(LegacyArtifact):
891
- _dict_fields = [
892
- "key",
893
- "kind",
894
- "iter",
895
- "tree",
896
- "src_path",
897
- "target_path",
898
- "description",
899
- "db_key",
900
- ]
901
- kind = "dir"
902
-
903
- @property
904
- def is_dir(self):
905
- return True
906
-
907
- def upload(self):
908
- if not self.src_path:
909
- raise ValueError("local/source path not specified")
910
-
911
- files = os.listdir(self.src_path)
912
- for f in files:
913
- file_path = os.path.join(self.src_path, f)
914
- if not os.path.isfile(file_path):
915
- raise ValueError(f"file {file_path} not found, cant upload")
916
- target = os.path.join(self.target_path, f)
917
- mlrun.datastore.store_manager.object(url=target).upload(file_path)
918
-
919
-
920
- # TODO: remove in 1.7.0
921
- @deprecated(
922
- version="1.3.0",
923
- reason="'LegacyLinkArtifact' will be removed in 1.7.0, use 'LinkArtifact' instead",
924
- category=FutureWarning,
925
- )
926
- class LegacyLinkArtifact(LegacyArtifact):
927
- _dict_fields = LegacyArtifact._dict_fields + [
928
- "link_iteration",
929
- "link_key",
930
- "link_tree",
931
- ]
932
- kind = "link"
933
-
934
- def __init__(
935
- self,
936
- key=None,
937
- target_path="",
938
- link_iteration=None,
939
- link_key=None,
940
- link_tree=None,
941
- ):
942
- super().__init__(key)
943
- self.target_path = target_path
944
- self.link_iteration = link_iteration
945
- self.link_key = link_key
946
- self.link_tree = link_tree
947
-
948
-
949
748
  def calculate_blob_hash(data):
950
749
  if isinstance(data, str):
951
750
  data = data.encode()
@@ -1051,25 +850,16 @@ def generate_target_path(item: Artifact, artifact_path, producer):
1051
850
  return f"{artifact_path}{item.key}{suffix}"
1052
851
 
1053
852
 
853
+ # TODO: left to support data migration from legacy artifacts to new artifacts. Remove in 1.8.0.
1054
854
  def convert_legacy_artifact_to_new_format(
1055
- legacy_artifact: typing.Union[LegacyArtifact, dict],
855
+ legacy_artifact: dict,
1056
856
  ) -> Artifact:
1057
857
  """Converts a legacy artifact to a new format.
1058
-
1059
858
  :param legacy_artifact: The legacy artifact to convert.
1060
859
  :return: The converted artifact.
1061
860
  """
1062
- if isinstance(legacy_artifact, LegacyArtifact):
1063
- legacy_artifact_dict = legacy_artifact.to_dict()
1064
- elif isinstance(legacy_artifact, dict):
1065
- legacy_artifact_dict = legacy_artifact
1066
- else:
1067
- raise TypeError(
1068
- f"Unsupported type '{type(legacy_artifact)}' for legacy artifact"
1069
- )
1070
-
1071
- artifact_key = legacy_artifact_dict.get("key", "")
1072
- artifact_tag = legacy_artifact_dict.get("tag", "")
861
+ artifact_key = legacy_artifact.get("key", "")
862
+ artifact_tag = legacy_artifact.get("tag", "")
1073
863
  if artifact_tag:
1074
864
  artifact_key = f"{artifact_key}:{artifact_tag}"
1075
865
  # TODO: remove in 1.8.0
@@ -1080,12 +870,12 @@ def convert_legacy_artifact_to_new_format(
1080
870
  )
1081
871
 
1082
872
  artifact = mlrun.artifacts.artifact_types.get(
1083
- legacy_artifact_dict.get("kind", "artifact"), mlrun.artifacts.Artifact
873
+ legacy_artifact.get("kind", "artifact"), mlrun.artifacts.Artifact
1084
874
  )()
1085
875
 
1086
- artifact.metadata = artifact.metadata.from_dict(legacy_artifact_dict)
1087
- artifact.spec = artifact.spec.from_dict(legacy_artifact_dict)
1088
- artifact.status = artifact.status.from_dict(legacy_artifact_dict)
876
+ artifact.metadata = artifact.metadata.from_dict(legacy_artifact)
877
+ artifact.spec = artifact.spec.from_dict(legacy_artifact)
878
+ artifact.status = artifact.status.from_dict(legacy_artifact)
1089
879
 
1090
880
  return artifact
1091
881
 
@@ -13,12 +13,12 @@
13
13
  # limitations under the License.
14
14
  import os
15
15
  import pathlib
16
+ import warnings
16
17
  from io import StringIO
17
- from typing import Optional, Tuple
18
+ from typing import Optional
18
19
 
19
20
  import numpy as np
20
21
  import pandas as pd
21
- from deprecated import deprecated
22
22
  from pandas.io.json import build_table_schema
23
23
 
24
24
  import mlrun
@@ -27,7 +27,7 @@ import mlrun.datastore
27
27
  import mlrun.utils.helpers
28
28
  from mlrun.config import config as mlconf
29
29
 
30
- from .base import Artifact, ArtifactSpec, LegacyArtifact, StorePrefix
30
+ from .base import Artifact, ArtifactSpec, StorePrefix
31
31
 
32
32
  default_preview_rows_length = 20
33
33
  max_preview_columns = mlconf.artifacts.datasets.max_preview_columns
@@ -161,6 +161,13 @@ class DatasetArtifact(Artifact):
161
161
  label_column: str = None,
162
162
  **kwargs,
163
163
  ):
164
+ if key or format or target_path:
165
+ warnings.warn(
166
+ "Artifact constructor parameters are deprecated and will be removed in 1.9.0. "
167
+ "Use the metadata and spec parameters instead.",
168
+ DeprecationWarning,
169
+ )
170
+
164
171
  format = (format or "").lower()
165
172
  super().__init__(key, None, format=format, target_path=target_path)
166
173
  if format and format not in self.SUPPORTED_FORMATS:
@@ -360,194 +367,6 @@ class DatasetArtifact(Artifact):
360
367
  self.status.stats = stats
361
368
 
362
369
 
363
- # TODO: remove in 1.7.0
364
- @deprecated(
365
- version="1.3.0",
366
- reason="'LegacyTableArtifact' will be removed in 1.7.0, use 'TableArtifact' instead",
367
- category=FutureWarning,
368
- )
369
- class LegacyTableArtifact(LegacyArtifact):
370
- _dict_fields = LegacyArtifact._dict_fields + ["schema", "header"]
371
- kind = "table"
372
-
373
- def __init__(
374
- self,
375
- key=None,
376
- body=None,
377
- df=None,
378
- viewer=None,
379
- visible=False,
380
- inline=False,
381
- format=None,
382
- header=None,
383
- schema=None,
384
- ):
385
- if key:
386
- key_suffix = pathlib.Path(key).suffix
387
- if not format and key_suffix:
388
- format = key_suffix[1:]
389
- super().__init__(key, body, viewer=viewer, is_inline=inline, format=format)
390
-
391
- if df is not None:
392
- self._is_df = True
393
- self.header = df.reset_index(drop=True).columns.values.tolist()
394
- self.format = "csv" # todo other formats
395
- # if visible and not key_suffix:
396
- # key += '.csv'
397
- self._body = df
398
- else:
399
- self._is_df = False
400
- self.header = header
401
-
402
- self.schema = schema
403
- if not viewer:
404
- viewer = "table" if visible else None
405
- self.viewer = viewer
406
-
407
- def get_body(self):
408
- if not self._is_df:
409
- return self._body
410
- csv_buffer = StringIO()
411
- self._body.to_csv(
412
- csv_buffer,
413
- encoding="utf-8",
414
- **mlrun.utils.line_terminator_kwargs(),
415
- )
416
- return csv_buffer.getvalue()
417
-
418
-
419
- # TODO: remove in 1.7.0
420
- @deprecated(
421
- version="1.3.0",
422
- reason="'LegacyDatasetArtifact' will be removed in 1.7.0, use 'DatasetArtifact' instead",
423
- category=FutureWarning,
424
- )
425
- class LegacyDatasetArtifact(LegacyArtifact):
426
- # List of all the supported saving formats of a DataFrame:
427
- SUPPORTED_FORMATS = ["csv", "parquet", "pq", "tsdb", "kv"]
428
-
429
- _dict_fields = LegacyArtifact._dict_fields + [
430
- "schema",
431
- "header",
432
- "length",
433
- "preview",
434
- "stats",
435
- "extra_data",
436
- "column_metadata",
437
- ]
438
- kind = "dataset"
439
-
440
- def __init__(
441
- self,
442
- key: str = None,
443
- df=None,
444
- preview: int = None,
445
- format: str = "", # TODO: should be changed to 'fmt'.
446
- stats: bool = None,
447
- target_path: str = None,
448
- extra_data: dict = None,
449
- column_metadata: dict = None,
450
- ignore_preview_limits: bool = False,
451
- **kwargs,
452
- ):
453
- format = (format or "").lower()
454
- super().__init__(key, None, format=format, target_path=target_path)
455
- if format and format not in self.SUPPORTED_FORMATS:
456
- raise ValueError(
457
- f"unsupported format {format} use one of {'|'.join(self.SUPPORTED_FORMATS)}"
458
- )
459
-
460
- if format == "pq":
461
- format = "parquet"
462
- self.format = format
463
- self.stats = None
464
- self.extra_data = extra_data or {}
465
- self.column_metadata = column_metadata or {}
466
-
467
- if df is not None:
468
- if hasattr(df, "dask"):
469
- # If df is a Dask DataFrame, and it's small in-memory, convert to Pandas
470
- if (df.memory_usage(deep=True).sum().compute() / 1e9) < max_ddf_size:
471
- df = df.compute()
472
- self.update_preview_fields_from_df(
473
- self, df, stats, preview, ignore_preview_limits
474
- )
475
-
476
- self._df = df
477
- self._kw = kwargs
478
-
479
- def upload(self):
480
- suffix = pathlib.Path(self.target_path).suffix
481
- format = self.format
482
- if not format:
483
- if suffix and suffix in [".csv", ".parquet", ".pq"]:
484
- format = "csv" if suffix == ".csv" else "parquet"
485
- else:
486
- format = "parquet"
487
- if not suffix and not self.target_path.startswith("memory://"):
488
- self.target_path = self.target_path + "." + format
489
-
490
- self.size, self.hash = upload_dataframe(
491
- self._df,
492
- self.target_path,
493
- format=format,
494
- src_path=self.src_path,
495
- **self._kw,
496
- )
497
-
498
- @property
499
- def df(self) -> pd.DataFrame:
500
- """
501
- Get the dataset in this artifact.
502
-
503
- :return: The dataset as a DataFrame.
504
- """
505
- return self._df
506
-
507
- @staticmethod
508
- def is_format_supported(fmt: str) -> bool:
509
- """
510
- Check whether the given dataset format is supported by the DatasetArtifact.
511
-
512
- :param fmt: The format string to check.
513
-
514
- :return: True if the format is supported and False if not.
515
- """
516
- return fmt in DatasetArtifact.SUPPORTED_FORMATS
517
-
518
- @staticmethod
519
- def update_preview_fields_from_df(
520
- artifact, df, stats=None, preview_rows_length=None, ignore_preview_limits=False
521
- ):
522
- preview_rows_length = preview_rows_length or default_preview_rows_length
523
- if hasattr(df, "dask"):
524
- artifact.length = df.shape[0].compute()
525
- preview_df = df.sample(frac=ddf_sample_pct).compute()
526
- else:
527
- artifact.length = df.shape[0]
528
- preview_df = df
529
-
530
- if artifact.length > preview_rows_length and not ignore_preview_limits:
531
- preview_df = df.head(preview_rows_length)
532
-
533
- preview_df = preview_df.reset_index()
534
- if len(preview_df.columns) > max_preview_columns and not ignore_preview_limits:
535
- preview_df = preview_df.iloc[:, :max_preview_columns]
536
- artifact.header = preview_df.columns.values.tolist()
537
- artifact.preview = preview_df.values.tolist()
538
- # Table schema parsing doesn't require a column named "index"
539
- # to align its output with previously generated header and preview data
540
- if "index" in preview_df.columns:
541
- preview_df.drop("index", axis=1, inplace=True)
542
- artifact.schema = build_table_schema(preview_df)
543
- if (
544
- stats
545
- or (artifact.length < max_csv and len(df.columns) < max_preview_columns)
546
- or ignore_preview_limits
547
- ):
548
- artifact.stats = get_df_stats(df)
549
-
550
-
551
370
  def get_df_stats(df):
552
371
  if hasattr(df, "dask"):
553
372
  df = df.sample(frac=ddf_sample_pct).compute()
@@ -656,7 +475,7 @@ def update_dataset_meta(
656
475
 
657
476
  def upload_dataframe(
658
477
  df, target_path, format, src_path=None, **kw
659
- ) -> Tuple[Optional[int], Optional[str]]:
478
+ ) -> tuple[Optional[int], Optional[str]]:
660
479
  if src_path and os.path.isfile(src_path):
661
480
  mlrun.datastore.store_manager.object(url=target_path).upload(src_path)
662
481
  return (