mlrun 1.6.4rc8__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (305) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +40 -122
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +5 -4
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +47 -257
  8. mlrun/artifacts/dataset.py +11 -192
  9. mlrun/artifacts/manager.py +79 -47
  10. mlrun/artifacts/model.py +31 -159
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +74 -1
  13. mlrun/common/db/sql_session.py +5 -5
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +45 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +33 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +12 -3
  23. mlrun/common/model_monitoring/helpers.py +9 -5
  24. mlrun/{runtimes → common/runtimes}/constants.py +37 -9
  25. mlrun/common/schemas/__init__.py +31 -5
  26. mlrun/common/schemas/alert.py +202 -0
  27. mlrun/common/schemas/api_gateway.py +196 -0
  28. mlrun/common/schemas/artifact.py +25 -4
  29. mlrun/common/schemas/auth.py +16 -5
  30. mlrun/common/schemas/background_task.py +1 -1
  31. mlrun/common/schemas/client_spec.py +4 -2
  32. mlrun/common/schemas/common.py +7 -4
  33. mlrun/common/schemas/constants.py +3 -0
  34. mlrun/common/schemas/feature_store.py +74 -44
  35. mlrun/common/schemas/frontend_spec.py +15 -7
  36. mlrun/common/schemas/function.py +12 -1
  37. mlrun/common/schemas/hub.py +11 -18
  38. mlrun/common/schemas/memory_reports.py +2 -2
  39. mlrun/common/schemas/model_monitoring/__init__.py +20 -4
  40. mlrun/common/schemas/model_monitoring/constants.py +123 -42
  41. mlrun/common/schemas/model_monitoring/grafana.py +13 -9
  42. mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
  43. mlrun/common/schemas/notification.py +71 -14
  44. mlrun/common/schemas/object.py +2 -2
  45. mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
  46. mlrun/common/schemas/pipeline.py +8 -1
  47. mlrun/common/schemas/project.py +69 -18
  48. mlrun/common/schemas/runs.py +7 -1
  49. mlrun/common/schemas/runtime_resource.py +8 -12
  50. mlrun/common/schemas/schedule.py +4 -4
  51. mlrun/common/schemas/tag.py +1 -2
  52. mlrun/common/schemas/workflow.py +12 -4
  53. mlrun/common/types.py +14 -1
  54. mlrun/config.py +154 -69
  55. mlrun/data_types/data_types.py +6 -1
  56. mlrun/data_types/spark.py +2 -2
  57. mlrun/data_types/to_pandas.py +67 -37
  58. mlrun/datastore/__init__.py +6 -8
  59. mlrun/datastore/alibaba_oss.py +131 -0
  60. mlrun/datastore/azure_blob.py +143 -42
  61. mlrun/datastore/base.py +102 -58
  62. mlrun/datastore/datastore.py +34 -13
  63. mlrun/datastore/datastore_profile.py +146 -20
  64. mlrun/datastore/dbfs_store.py +3 -7
  65. mlrun/datastore/filestore.py +1 -4
  66. mlrun/datastore/google_cloud_storage.py +97 -33
  67. mlrun/datastore/hdfs.py +56 -0
  68. mlrun/datastore/inmem.py +6 -3
  69. mlrun/datastore/redis.py +7 -2
  70. mlrun/datastore/s3.py +34 -12
  71. mlrun/datastore/snowflake_utils.py +45 -0
  72. mlrun/datastore/sources.py +303 -111
  73. mlrun/datastore/spark_utils.py +31 -2
  74. mlrun/datastore/store_resources.py +9 -7
  75. mlrun/datastore/storeytargets.py +151 -0
  76. mlrun/datastore/targets.py +453 -176
  77. mlrun/datastore/utils.py +72 -58
  78. mlrun/datastore/v3io.py +6 -1
  79. mlrun/db/base.py +274 -41
  80. mlrun/db/factory.py +1 -1
  81. mlrun/db/httpdb.py +893 -225
  82. mlrun/db/nopdb.py +291 -33
  83. mlrun/errors.py +36 -6
  84. mlrun/execution.py +115 -42
  85. mlrun/feature_store/__init__.py +0 -2
  86. mlrun/feature_store/api.py +65 -73
  87. mlrun/feature_store/common.py +7 -12
  88. mlrun/feature_store/feature_set.py +76 -55
  89. mlrun/feature_store/feature_vector.py +39 -31
  90. mlrun/feature_store/ingestion.py +7 -6
  91. mlrun/feature_store/retrieval/base.py +16 -11
  92. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  93. mlrun/feature_store/retrieval/job.py +13 -4
  94. mlrun/feature_store/retrieval/local_merger.py +2 -0
  95. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  96. mlrun/feature_store/steps.py +45 -34
  97. mlrun/features.py +11 -21
  98. mlrun/frameworks/_common/artifacts_library.py +9 -9
  99. mlrun/frameworks/_common/mlrun_interface.py +5 -5
  100. mlrun/frameworks/_common/model_handler.py +48 -48
  101. mlrun/frameworks/_common/plan.py +5 -6
  102. mlrun/frameworks/_common/producer.py +3 -4
  103. mlrun/frameworks/_common/utils.py +5 -5
  104. mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
  105. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
  106. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
  107. mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
  108. mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
  109. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
  110. mlrun/frameworks/_ml_common/model_handler.py +24 -24
  111. mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
  112. mlrun/frameworks/_ml_common/plan.py +2 -2
  113. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
  114. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
  115. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  116. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
  117. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  118. mlrun/frameworks/_ml_common/utils.py +4 -4
  119. mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
  120. mlrun/frameworks/huggingface/model_server.py +4 -4
  121. mlrun/frameworks/lgbm/__init__.py +33 -33
  122. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  123. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
  124. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
  125. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
  126. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
  127. mlrun/frameworks/lgbm/model_handler.py +10 -10
  128. mlrun/frameworks/lgbm/model_server.py +6 -6
  129. mlrun/frameworks/lgbm/utils.py +5 -5
  130. mlrun/frameworks/onnx/dataset.py +8 -8
  131. mlrun/frameworks/onnx/mlrun_interface.py +3 -3
  132. mlrun/frameworks/onnx/model_handler.py +6 -6
  133. mlrun/frameworks/onnx/model_server.py +7 -7
  134. mlrun/frameworks/parallel_coordinates.py +6 -6
  135. mlrun/frameworks/pytorch/__init__.py +18 -18
  136. mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
  137. mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
  138. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
  139. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
  140. mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
  141. mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
  142. mlrun/frameworks/pytorch/model_handler.py +17 -17
  143. mlrun/frameworks/pytorch/model_server.py +7 -7
  144. mlrun/frameworks/sklearn/__init__.py +13 -13
  145. mlrun/frameworks/sklearn/estimator.py +4 -4
  146. mlrun/frameworks/sklearn/metrics_library.py +14 -14
  147. mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
  148. mlrun/frameworks/sklearn/model_handler.py +2 -2
  149. mlrun/frameworks/tf_keras/__init__.py +10 -7
  150. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
  151. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
  152. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
  153. mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
  154. mlrun/frameworks/tf_keras/model_handler.py +14 -14
  155. mlrun/frameworks/tf_keras/model_server.py +6 -6
  156. mlrun/frameworks/xgboost/__init__.py +13 -13
  157. mlrun/frameworks/xgboost/model_handler.py +6 -6
  158. mlrun/k8s_utils.py +61 -17
  159. mlrun/launcher/__init__.py +1 -1
  160. mlrun/launcher/base.py +16 -15
  161. mlrun/launcher/client.py +13 -11
  162. mlrun/launcher/factory.py +1 -1
  163. mlrun/launcher/local.py +23 -13
  164. mlrun/launcher/remote.py +17 -10
  165. mlrun/lists.py +7 -6
  166. mlrun/model.py +478 -103
  167. mlrun/model_monitoring/__init__.py +1 -1
  168. mlrun/model_monitoring/api.py +163 -371
  169. mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
  170. mlrun/model_monitoring/applications/_application_steps.py +188 -0
  171. mlrun/model_monitoring/applications/base.py +108 -0
  172. mlrun/model_monitoring/applications/context.py +341 -0
  173. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  174. mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
  175. mlrun/model_monitoring/applications/results.py +99 -0
  176. mlrun/model_monitoring/controller.py +131 -278
  177. mlrun/model_monitoring/db/__init__.py +18 -0
  178. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  179. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  180. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  181. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  182. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  183. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  184. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  185. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  186. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  187. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  188. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  189. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  190. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  191. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  192. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  193. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
  194. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  195. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
  196. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  197. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  198. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  199. mlrun/model_monitoring/features_drift_table.py +134 -106
  200. mlrun/model_monitoring/helpers.py +199 -55
  201. mlrun/model_monitoring/metrics/__init__.py +13 -0
  202. mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
  203. mlrun/model_monitoring/model_endpoint.py +3 -2
  204. mlrun/model_monitoring/stream_processing.py +134 -398
  205. mlrun/model_monitoring/tracking_policy.py +9 -2
  206. mlrun/model_monitoring/writer.py +161 -125
  207. mlrun/package/__init__.py +6 -6
  208. mlrun/package/context_handler.py +5 -5
  209. mlrun/package/packager.py +7 -7
  210. mlrun/package/packagers/default_packager.py +8 -8
  211. mlrun/package/packagers/numpy_packagers.py +15 -15
  212. mlrun/package/packagers/pandas_packagers.py +5 -5
  213. mlrun/package/packagers/python_standard_library_packagers.py +10 -10
  214. mlrun/package/packagers_manager.py +19 -23
  215. mlrun/package/utils/_formatter.py +6 -6
  216. mlrun/package/utils/_pickler.py +2 -2
  217. mlrun/package/utils/_supported_format.py +4 -4
  218. mlrun/package/utils/log_hint_utils.py +2 -2
  219. mlrun/package/utils/type_hint_utils.py +4 -9
  220. mlrun/platforms/__init__.py +11 -10
  221. mlrun/platforms/iguazio.py +24 -203
  222. mlrun/projects/operations.py +52 -25
  223. mlrun/projects/pipelines.py +191 -197
  224. mlrun/projects/project.py +1227 -400
  225. mlrun/render.py +16 -19
  226. mlrun/run.py +209 -184
  227. mlrun/runtimes/__init__.py +83 -15
  228. mlrun/runtimes/base.py +51 -35
  229. mlrun/runtimes/daskjob.py +17 -10
  230. mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
  231. mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
  232. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  233. mlrun/runtimes/funcdoc.py +1 -29
  234. mlrun/runtimes/function_reference.py +1 -1
  235. mlrun/runtimes/kubejob.py +34 -128
  236. mlrun/runtimes/local.py +40 -11
  237. mlrun/runtimes/mpijob/__init__.py +0 -20
  238. mlrun/runtimes/mpijob/abstract.py +9 -10
  239. mlrun/runtimes/mpijob/v1.py +1 -1
  240. mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
  241. mlrun/runtimes/nuclio/api_gateway.py +769 -0
  242. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  243. mlrun/runtimes/nuclio/application/application.py +758 -0
  244. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  245. mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
  246. mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
  247. mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
  248. mlrun/runtimes/pod.py +281 -101
  249. mlrun/runtimes/remotesparkjob.py +12 -9
  250. mlrun/runtimes/sparkjob/spark3job.py +67 -51
  251. mlrun/runtimes/utils.py +41 -75
  252. mlrun/secrets.py +9 -5
  253. mlrun/serving/__init__.py +8 -1
  254. mlrun/serving/remote.py +2 -7
  255. mlrun/serving/routers.py +85 -69
  256. mlrun/serving/server.py +69 -44
  257. mlrun/serving/states.py +209 -36
  258. mlrun/serving/utils.py +22 -14
  259. mlrun/serving/v1_serving.py +6 -7
  260. mlrun/serving/v2_serving.py +133 -54
  261. mlrun/track/tracker.py +2 -1
  262. mlrun/track/tracker_manager.py +3 -3
  263. mlrun/track/trackers/mlflow_tracker.py +6 -2
  264. mlrun/utils/async_http.py +6 -8
  265. mlrun/utils/azure_vault.py +1 -1
  266. mlrun/utils/clones.py +1 -2
  267. mlrun/utils/condition_evaluator.py +3 -3
  268. mlrun/utils/db.py +21 -3
  269. mlrun/utils/helpers.py +405 -225
  270. mlrun/utils/http.py +3 -6
  271. mlrun/utils/logger.py +112 -16
  272. mlrun/utils/notifications/notification/__init__.py +17 -13
  273. mlrun/utils/notifications/notification/base.py +50 -2
  274. mlrun/utils/notifications/notification/console.py +2 -0
  275. mlrun/utils/notifications/notification/git.py +24 -1
  276. mlrun/utils/notifications/notification/ipython.py +3 -1
  277. mlrun/utils/notifications/notification/slack.py +96 -21
  278. mlrun/utils/notifications/notification/webhook.py +59 -2
  279. mlrun/utils/notifications/notification_pusher.py +149 -30
  280. mlrun/utils/regex.py +9 -0
  281. mlrun/utils/retryer.py +208 -0
  282. mlrun/utils/singleton.py +1 -1
  283. mlrun/utils/v3io_clients.py +4 -6
  284. mlrun/utils/version/version.json +2 -2
  285. mlrun/utils/version/version.py +2 -6
  286. mlrun-1.7.0.dist-info/METADATA +378 -0
  287. mlrun-1.7.0.dist-info/RECORD +351 -0
  288. {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
  289. mlrun/feature_store/retrieval/conversion.py +0 -273
  290. mlrun/kfpops.py +0 -868
  291. mlrun/model_monitoring/application.py +0 -310
  292. mlrun/model_monitoring/batch.py +0 -1095
  293. mlrun/model_monitoring/prometheus.py +0 -219
  294. mlrun/model_monitoring/stores/__init__.py +0 -111
  295. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
  296. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
  297. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  298. mlrun/model_monitoring/stores/models/base.py +0 -84
  299. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
  300. mlrun/platforms/other.py +0 -306
  301. mlrun-1.6.4rc8.dist-info/METADATA +0 -272
  302. mlrun-1.6.4rc8.dist-info/RECORD +0 -314
  303. {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
  304. {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
  305. {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
@@ -16,6 +16,7 @@ import ast
16
16
  import base64
17
17
  import json
18
18
  import typing
19
+ import warnings
19
20
  from urllib.parse import ParseResult, urlparse, urlunparse
20
21
 
21
22
  import pydantic
@@ -30,12 +31,13 @@ from ..secrets import get_secret_or_env
30
31
  class DatastoreProfile(pydantic.BaseModel):
31
32
  type: str
32
33
  name: str
33
- _private_attributes: typing.List = ()
34
+ _private_attributes: list = ()
34
35
 
35
36
  class Config:
36
37
  extra = pydantic.Extra.forbid
37
38
 
38
39
  @pydantic.validator("name")
40
+ @classmethod
39
41
  def lower_case(cls, v):
40
42
  return v.lower()
41
43
 
@@ -68,6 +70,9 @@ class TemporaryClientDatastoreProfiles(metaclass=mlrun.utils.singleton.Singleton
68
70
  def get(self, key):
69
71
  return self._data.get(key, None)
70
72
 
73
+ def remove(self, key):
74
+ self._data.pop(key, None)
75
+
71
76
 
72
77
  class DatastoreProfileBasic(DatastoreProfile):
73
78
  type: str = pydantic.Field("basic")
@@ -79,13 +84,37 @@ class DatastoreProfileBasic(DatastoreProfile):
79
84
  class DatastoreProfileKafkaTarget(DatastoreProfile):
80
85
  type: str = pydantic.Field("kafka_target")
81
86
  _private_attributes = "kwargs_private"
82
- bootstrap_servers: str
87
+ bootstrap_servers: typing.Optional[str] = None
88
+ brokers: typing.Optional[str] = None
83
89
  topic: str
84
- kwargs_public: typing.Optional[typing.Dict]
85
- kwargs_private: typing.Optional[typing.Dict]
90
+ kwargs_public: typing.Optional[dict]
91
+ kwargs_private: typing.Optional[dict]
92
+
93
+ def __init__(self, **kwargs):
94
+ super().__init__(**kwargs)
95
+
96
+ if not self.brokers and not self.bootstrap_servers:
97
+ raise mlrun.errors.MLRunInvalidArgumentError(
98
+ "DatastoreProfileKafkaTarget requires the 'brokers' field to be set"
99
+ )
100
+
101
+ if self.bootstrap_servers:
102
+ if self.brokers:
103
+ raise mlrun.errors.MLRunInvalidArgumentError(
104
+ "DatastoreProfileKafkaTarget cannot be created with both 'brokers' and 'bootstrap_servers'"
105
+ )
106
+ else:
107
+ self.brokers = self.bootstrap_servers
108
+ self.bootstrap_servers = None
109
+ warnings.warn(
110
+ "'bootstrap_servers' parameter is deprecated in 1.7.0 and will be removed in 1.9.0, "
111
+ "use 'brokers' instead.",
112
+ # TODO: Remove this in 1.9.0
113
+ FutureWarning,
114
+ )
86
115
 
87
116
  def attributes(self):
88
- attributes = {"bootstrap_servers": self.bootstrap_servers}
117
+ attributes = {"brokers": self.brokers or self.bootstrap_servers}
89
118
  if self.kwargs_public:
90
119
  attributes = merge(attributes, self.kwargs_public)
91
120
  if self.kwargs_private:
@@ -96,15 +125,15 @@ class DatastoreProfileKafkaTarget(DatastoreProfile):
96
125
  class DatastoreProfileKafkaSource(DatastoreProfile):
97
126
  type: str = pydantic.Field("kafka_source")
98
127
  _private_attributes = ("kwargs_private", "sasl_user", "sasl_pass")
99
- brokers: typing.Union[str, typing.List[str]]
100
- topics: typing.Union[str, typing.List[str]]
128
+ brokers: typing.Union[str, list[str]]
129
+ topics: typing.Union[str, list[str]]
101
130
  group: typing.Optional[str] = "serving"
102
131
  initial_offset: typing.Optional[str] = "earliest"
103
- partitions: typing.Optional[typing.Union[str, typing.List[str]]]
132
+ partitions: typing.Optional[typing.Union[str, list[str]]]
104
133
  sasl_user: typing.Optional[str]
105
134
  sasl_pass: typing.Optional[str]
106
- kwargs_public: typing.Optional[typing.Dict]
107
- kwargs_private: typing.Optional[typing.Dict]
135
+ kwargs_public: typing.Optional[dict]
136
+ kwargs_private: typing.Optional[dict]
108
137
 
109
138
  def attributes(self):
110
139
  attributes = {}
@@ -132,6 +161,22 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
132
161
  return attributes
133
162
 
134
163
 
164
+ class DatastoreProfileV3io(DatastoreProfile):
165
+ type: str = pydantic.Field("v3io")
166
+ v3io_access_key: typing.Optional[str] = None
167
+ _private_attributes = "v3io_access_key"
168
+
169
+ def url(self, subpath):
170
+ subpath = subpath.lstrip("/")
171
+ return f"v3io:///{subpath}"
172
+
173
+ def secrets(self) -> dict:
174
+ res = {}
175
+ if self.v3io_access_key:
176
+ res["V3IO_ACCESS_KEY"] = self.v3io_access_key
177
+ return res
178
+
179
+
135
180
  class DatastoreProfileS3(DatastoreProfile):
136
181
  type: str = pydantic.Field("s3")
137
182
  _private_attributes = ("access_key_id", "secret_key")
@@ -141,6 +186,18 @@ class DatastoreProfileS3(DatastoreProfile):
141
186
  assume_role_arn: typing.Optional[str] = None
142
187
  access_key_id: typing.Optional[str] = None
143
188
  secret_key: typing.Optional[str] = None
189
+ bucket: typing.Optional[str] = None
190
+
191
+ @pydantic.validator("bucket")
192
+ @classmethod
193
+ def check_bucket(cls, v):
194
+ if not v:
195
+ warnings.warn(
196
+ "The 'bucket' attribute will be mandatory starting from version 1.9",
197
+ FutureWarning,
198
+ stacklevel=2,
199
+ )
200
+ return v
144
201
 
145
202
  def secrets(self) -> dict:
146
203
  res = {}
@@ -156,10 +213,16 @@ class DatastoreProfileS3(DatastoreProfile):
156
213
  res["AWS_PROFILE"] = self.profile_name
157
214
  if self.assume_role_arn:
158
215
  res["MLRUN_AWS_ROLE_ARN"] = self.assume_role_arn
159
- return res if res else None
216
+ return res
160
217
 
161
218
  def url(self, subpath):
162
- return f"s3:/{subpath}"
219
+ # TODO: There is an inconsistency with DatastoreProfileGCS. In DatastoreProfileGCS,
220
+ # we assume that the subpath can begin without a '/' character,
221
+ # while here we assume it always starts with one.
222
+ if self.bucket:
223
+ return f"s3://{self.bucket}{subpath}"
224
+ else:
225
+ return f"s3:/{subpath}"
163
226
 
164
227
 
165
228
  class DatastoreProfileRedis(DatastoreProfile):
@@ -199,7 +262,7 @@ class DatastoreProfileRedis(DatastoreProfile):
199
262
  res["REDIS_USER"] = self.username
200
263
  if self.password:
201
264
  res["REDIS_PASSWORD"] = self.password
202
- return res if res else None
265
+ return res
203
266
 
204
267
  def url(self, subpath):
205
268
  return self.endpoint_url + subpath
@@ -220,26 +283,44 @@ class DatastoreProfileDBFS(DatastoreProfile):
220
283
  res["DATABRICKS_TOKEN"] = self.token
221
284
  if self.endpoint_url:
222
285
  res["DATABRICKS_HOST"] = self.endpoint_url
223
- return res if res else None
286
+ return res
224
287
 
225
288
 
226
289
  class DatastoreProfileGCS(DatastoreProfile):
227
290
  type: str = pydantic.Field("gcs")
228
291
  _private_attributes = ("gcp_credentials",)
229
292
  credentials_path: typing.Optional[str] = None # path to file.
230
- gcp_credentials: typing.Optional[typing.Union[str, typing.Dict]] = None
293
+ gcp_credentials: typing.Optional[typing.Union[str, dict]] = None
294
+ bucket: typing.Optional[str] = None
295
+
296
+ @pydantic.validator("bucket")
297
+ @classmethod
298
+ def check_bucket(cls, v):
299
+ if not v:
300
+ warnings.warn(
301
+ "The 'bucket' attribute will be mandatory starting from version 1.9",
302
+ FutureWarning,
303
+ stacklevel=2,
304
+ )
305
+ return v
231
306
 
232
307
  @pydantic.validator("gcp_credentials", pre=True, always=True)
308
+ @classmethod
233
309
  def convert_dict_to_json(cls, v):
234
310
  if isinstance(v, dict):
235
311
  return json.dumps(v)
236
312
  return v
237
313
 
238
314
  def url(self, subpath) -> str:
315
+ # TODO: but there's something wrong with the subpath being assumed to not start with a slash here,
316
+ # but the opposite assumption is made in S3.
239
317
  if subpath.startswith("/"):
240
318
  # in gcs the path after schema is starts with bucket, wherefore it should not start with "/".
241
319
  subpath = subpath[1:]
242
- return f"gcs://{subpath}"
320
+ if self.bucket:
321
+ return f"gcs://{self.bucket}/{subpath}"
322
+ else:
323
+ return f"gcs://{subpath}"
243
324
 
244
325
  def secrets(self) -> dict:
245
326
  res = {}
@@ -247,7 +328,7 @@ class DatastoreProfileGCS(DatastoreProfile):
247
328
  res["GOOGLE_APPLICATION_CREDENTIALS"] = self.credentials_path
248
329
  if self.gcp_credentials:
249
330
  res["GCP_CREDENTIALS"] = self.gcp_credentials
250
- return res if res else None
331
+ return res
251
332
 
252
333
 
253
334
  class DatastoreProfileAzureBlob(DatastoreProfile):
@@ -267,12 +348,27 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
267
348
  client_secret: typing.Optional[str] = None
268
349
  sas_token: typing.Optional[str] = None
269
350
  credential: typing.Optional[str] = None
351
+ container: typing.Optional[str] = None
352
+
353
+ @pydantic.validator("container")
354
+ @classmethod
355
+ def check_container(cls, v):
356
+ if not v:
357
+ warnings.warn(
358
+ "The 'container' attribute will be mandatory starting from version 1.9",
359
+ FutureWarning,
360
+ stacklevel=2,
361
+ )
362
+ return v
270
363
 
271
364
  def url(self, subpath) -> str:
272
365
  if subpath.startswith("/"):
273
- # in azure the path after schema is starts with bucket, wherefore it should not start with "/".
366
+ # in azure the path after schema is starts with container, wherefore it should not start with "/".
274
367
  subpath = subpath[1:]
275
- return f"az://{subpath}"
368
+ if self.container:
369
+ return f"az://{self.container}/{subpath}"
370
+ else:
371
+ return f"az://{subpath}"
276
372
 
277
373
  def secrets(self) -> dict:
278
374
  res = {}
@@ -292,7 +388,31 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
292
388
  res["sas_token"] = self.sas_token
293
389
  if self.credential:
294
390
  res["credential"] = self.credential
295
- return res if res else None
391
+ return res
392
+
393
+
394
+ class DatastoreProfileHdfs(DatastoreProfile):
395
+ type: str = pydantic.Field("hdfs")
396
+ _private_attributes = "token"
397
+ host: typing.Optional[str] = None
398
+ port: typing.Optional[int] = None
399
+ http_port: typing.Optional[int] = None
400
+ user: typing.Optional[str] = None
401
+
402
+ def secrets(self) -> dict:
403
+ res = {}
404
+ if self.host:
405
+ res["HDFS_HOST"] = self.host
406
+ if self.port:
407
+ res["HDFS_PORT"] = self.port
408
+ if self.port:
409
+ res["HDFS_HTTP_PORT"] = self.http_port
410
+ if self.user:
411
+ res["HDFS_USER"] = self.user
412
+ return res or None
413
+
414
+ def url(self, subpath):
415
+ return f"webhdfs://{self.host}:{self.http_port}{subpath}"
296
416
 
297
417
 
298
418
  class DatastoreProfile2Json(pydantic.BaseModel):
@@ -346,6 +466,7 @@ class DatastoreProfile2Json(pydantic.BaseModel):
346
466
  decoded_dict = {k: safe_literal_eval(v) for k, v in decoded_dict.items()}
347
467
  datastore_type = decoded_dict.get("type")
348
468
  ds_profile_factory = {
469
+ "v3io": DatastoreProfileV3io,
349
470
  "s3": DatastoreProfileS3,
350
471
  "redis": DatastoreProfileRedis,
351
472
  "basic": DatastoreProfileBasic,
@@ -354,6 +475,7 @@ class DatastoreProfile2Json(pydantic.BaseModel):
354
475
  "dbfs": DatastoreProfileDBFS,
355
476
  "gcs": DatastoreProfileGCS,
356
477
  "az": DatastoreProfileAzureBlob,
478
+ "hdfs": DatastoreProfileHdfs,
357
479
  }
358
480
  if datastore_type in ds_profile_factory:
359
481
  return ds_profile_factory[datastore_type].parse_obj(decoded_dict)
@@ -418,3 +540,7 @@ def register_temporary_client_datastore_profile(profile: DatastoreProfile):
418
540
  It's beneficial for testing purposes.
419
541
  """
420
542
  TemporaryClientDatastoreProfiles().add(profile)
543
+
544
+
545
+ def remove_temporary_client_datastore_profile(profile_name: str):
546
+ TemporaryClientDatastoreProfiles().remove(profile_name)
@@ -19,7 +19,7 @@ from fsspec.registry import get_filesystem_class
19
19
 
20
20
  import mlrun.errors
21
21
 
22
- from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
22
+ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
23
23
 
24
24
 
25
25
  class DatabricksFileBugFixed(DatabricksFile):
@@ -89,7 +89,7 @@ class DBFSStore(DataStore):
89
89
  """return fsspec file system object, if supported"""
90
90
  filesystem_class = get_filesystem_class(protocol=self.kind)
91
91
  if not self._filesystem:
92
- self._filesystem = makeDatastoreSchemaSanitizer(
92
+ self._filesystem = make_datastore_schema_sanitizer(
93
93
  cls=filesystem_class,
94
94
  using_bucket=False,
95
95
  **self.get_storage_options(),
@@ -130,11 +130,7 @@ class DBFSStore(DataStore):
130
130
  "Append mode not supported for Databricks file system"
131
131
  )
132
132
  # can not use append mode because it overrides data.
133
- mode = "w"
134
- if isinstance(data, bytes):
135
- mode += "b"
136
- elif not isinstance(data, str):
137
- raise TypeError(f"Unknown data type {type(data)}")
133
+ data, mode = self._prepare_put_data(data, append)
138
134
  with self.filesystem.open(key, mode) as f:
139
135
  f.write(data)
140
136
 
@@ -66,9 +66,7 @@ class FileStore(DataStore):
66
66
  dir_to_create = path.dirname(self._join(key))
67
67
  if dir_to_create:
68
68
  self._ensure_directory(dir_to_create)
69
- mode = "a" if append else "w"
70
- if isinstance(data, bytes):
71
- mode = mode + "b"
69
+ data, mode = self._prepare_put_data(data, append)
72
70
  with open(self._join(key), mode) as fp:
73
71
  fp.write(data)
74
72
  fp.close()
@@ -105,4 +103,3 @@ class FileStore(DataStore):
105
103
  return
106
104
  except FileExistsError:
107
105
  time.sleep(0.1)
108
- pass
@@ -12,51 +12,93 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import json
15
+ import os
15
16
  from pathlib import Path
16
17
 
17
18
  from fsspec.registry import get_filesystem_class
19
+ from google.auth.credentials import Credentials
20
+ from google.cloud.storage import Client, transfer_manager
21
+ from google.oauth2 import service_account
18
22
 
19
23
  import mlrun.errors
20
24
  from mlrun.utils import logger
21
25
 
22
- from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
26
+ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
23
27
 
24
28
  # Google storage objects will be represented with the following URL: gcs://<bucket name>/<path> or gs://...
25
29
 
26
30
 
27
31
  class GoogleCloudStorageStore(DataStore):
28
32
  using_bucket = True
33
+ workers = 8
34
+ chunk_size = 32 * 1024 * 1024
29
35
 
30
36
  def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
31
37
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
38
+ self._storage_client = None
39
+ self._storage_options = None
40
+
41
+ @property
42
+ def storage_client(self):
43
+ if self._storage_client:
44
+ return self._storage_client
45
+
46
+ token = self._get_credentials().get("token")
47
+ access = "https://www.googleapis.com/auth/devstorage.full_control"
48
+ if isinstance(token, str):
49
+ if os.path.exists(token):
50
+ credentials = service_account.Credentials.from_service_account_file(
51
+ token, scopes=[access]
52
+ )
53
+ else:
54
+ raise mlrun.errors.MLRunInvalidArgumentError(
55
+ "gcsfs authentication file not found!"
56
+ )
57
+ elif isinstance(token, dict):
58
+ credentials = service_account.Credentials.from_service_account_info(
59
+ token, scopes=[access]
60
+ )
61
+ elif isinstance(token, Credentials):
62
+ credentials = token
63
+ else:
64
+ raise ValueError(f"Unsupported token type: {type(token)}")
65
+ self._storage_client = Client(credentials=credentials)
66
+ return self._storage_client
32
67
 
33
68
  @property
34
69
  def filesystem(self):
35
70
  """return fsspec file system object, if supported"""
36
- if self._filesystem:
37
- return self._filesystem
38
- try:
39
- import gcsfs # noqa
40
- except ImportError as exc:
41
- raise ImportError(
42
- "Google gcsfs not installed, run pip install gcsfs"
43
- ) from exc
44
- filesystem_class = get_filesystem_class(protocol=self.kind)
45
- self._filesystem = makeDatastoreSchemaSanitizer(
46
- filesystem_class,
47
- using_bucket=self.using_bucket,
48
- **self.get_storage_options(),
49
- )
71
+ if not self._filesystem:
72
+ filesystem_class = get_filesystem_class(protocol=self.kind)
73
+ self._filesystem = make_datastore_schema_sanitizer(
74
+ filesystem_class,
75
+ using_bucket=self.using_bucket,
76
+ **self.storage_options,
77
+ )
50
78
  return self._filesystem
51
79
 
52
- def get_storage_options(self):
80
+ @property
81
+ def storage_options(self):
82
+ if self._storage_options:
83
+ return self._storage_options
84
+ credentials = self._get_credentials()
85
+ # due to caching problem introduced in gcsfs 2024.3.1 (ML-7636)
86
+ credentials["use_listings_cache"] = False
87
+ self._storage_options = credentials
88
+ return self._storage_options
89
+
90
+ def _get_credentials(self):
53
91
  credentials = self._get_secret_or_env(
54
92
  "GCP_CREDENTIALS"
55
93
  ) or self._get_secret_or_env("GOOGLE_APPLICATION_CREDENTIALS")
56
94
  if credentials:
57
95
  try:
58
- # Try to handle credentials as a json connection string
59
- token = json.loads(credentials)
96
+ # Try to handle credentials as a json connection string or do nothing if already a dict
97
+ token = (
98
+ credentials
99
+ if isinstance(credentials, dict)
100
+ else json.loads(credentials)
101
+ )
60
102
  except json.JSONDecodeError:
61
103
  # If it's not json, handle it as a filename
62
104
  token = credentials
@@ -67,6 +109,9 @@ class GoogleCloudStorageStore(DataStore):
67
109
  )
68
110
  return self._sanitize_storage_options(None)
69
111
 
112
+ def get_storage_options(self):
113
+ return self.storage_options
114
+
70
115
  def _make_path(self, key):
71
116
  key = key.strip("/")
72
117
  path = Path(self.endpoint, key).as_posix()
@@ -86,21 +131,34 @@ class GoogleCloudStorageStore(DataStore):
86
131
  raise mlrun.errors.MLRunInvalidArgumentError(
87
132
  "Append mode not supported for Google cloud storage datastore"
88
133
  )
89
-
90
- if isinstance(data, bytes):
91
- mode = "wb"
92
- elif isinstance(data, str):
93
- mode = "w"
94
- else:
95
- raise TypeError(
96
- "Data type unknown. Unable to put in Google cloud storage!"
97
- )
134
+ data, mode = self._prepare_put_data(data, append)
98
135
  with self.filesystem.open(path, mode) as f:
99
136
  f.write(data)
100
137
 
101
138
  def upload(self, key, src_path):
102
- path = self._make_path(key)
103
- self.filesystem.put_file(src_path, path, overwrite=True)
139
+ file_size = os.path.getsize(src_path)
140
+ united_path = self._make_path(key)
141
+
142
+ # Multiple upload limitation recommendations as described in
143
+ # https://cloud.google.com/storage/docs/multipart-uploads#storage-upload-object-chunks-python
144
+
145
+ if file_size <= self.chunk_size:
146
+ self.filesystem.put_file(src_path, united_path, overwrite=True)
147
+ return
148
+
149
+ bucket = self.storage_client.bucket(self.endpoint)
150
+ blob = bucket.blob(key.strip("/"))
151
+
152
+ try:
153
+ transfer_manager.upload_chunks_concurrently(
154
+ src_path, blob, chunk_size=self.chunk_size, max_workers=self.workers
155
+ )
156
+ except Exception as upload_chunks_concurrently_exception:
157
+ logger.warning(
158
+ f"gcs: failed to concurrently upload {src_path},"
159
+ f" exception: {upload_chunks_concurrently_exception}. Retrying with single part upload."
160
+ )
161
+ self.filesystem.put_file(src_path, united_path, overwrite=True)
104
162
 
105
163
  def stat(self, key):
106
164
  path = self._make_path(key)
@@ -129,16 +187,18 @@ class GoogleCloudStorageStore(DataStore):
129
187
 
130
188
  def rm(self, path, recursive=False, maxdepth=None):
131
189
  path = self._make_path(path)
132
- self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
190
+ # in order to raise an error in case of a connection error (ML-7056)
191
+ self.filesystem.exists(path)
192
+ super().rm(path, recursive=recursive, maxdepth=maxdepth)
133
193
 
134
194
  def get_spark_options(self):
135
- res = None
136
- st = self.get_storage_options()
195
+ res = {}
196
+ st = self._get_credentials()
137
197
  if "token" in st:
138
198
  res = {"spark.hadoop.google.cloud.auth.service.account.enable": "true"}
139
199
  if isinstance(st["token"], str):
140
200
  # Token is a filename, read json from it
141
- with open(st["token"], "r") as file:
201
+ with open(st["token"]) as file:
142
202
  credentials = json.load(file)
143
203
  else:
144
204
  # Token is a dictionary, use it directly
@@ -161,3 +221,7 @@ class GoogleCloudStorageStore(DataStore):
161
221
  if "client_id" in credentials:
162
222
  res["spark.hadoop.fs.gs.client.id"] = credentials["client_id"]
163
223
  return res
224
+
225
+ @property
226
+ def spark_url(self):
227
+ return f"gs://{self.endpoint}"
@@ -0,0 +1,56 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import os
15
+ from urllib.parse import urlparse
16
+
17
+ import fsspec
18
+
19
+ from mlrun.datastore.base import DataStore
20
+
21
+
22
+ class HdfsStore(DataStore):
23
+ def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
24
+ super().__init__(parent, name, schema, endpoint, secrets)
25
+
26
+ self.host = self._get_secret_or_env("HDFS_HOST")
27
+ self.port = self._get_secret_or_env("HDFS_PORT")
28
+ self.http_port = self._get_secret_or_env("HDFS_HTTP_PORT")
29
+ self.user = self._get_secret_or_env("HDFS_USER")
30
+ if not self.user:
31
+ self.user = os.environ.get("HADOOP_USER_NAME", os.environ.get("USER"))
32
+
33
+ self._filesystem = None
34
+
35
+ @property
36
+ def filesystem(self):
37
+ if not self._filesystem:
38
+ self._filesystem = fsspec.filesystem(
39
+ "webhdfs",
40
+ host=self.host,
41
+ port=self.http_port,
42
+ user=self.user,
43
+ )
44
+ return self._filesystem
45
+
46
+ @property
47
+ def url(self):
48
+ return f"webhdfs://{self.host}:{self.http_port}"
49
+
50
+ @property
51
+ def spark_url(self):
52
+ return f"hdfs://{self.host}:{self.port}"
53
+
54
+ def rm(self, url, recursive=False, maxdepth=None):
55
+ path = urlparse(url).path
56
+ self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
mlrun/datastore/inmem.py CHANGED
@@ -72,7 +72,7 @@ class InMemoryStore(DataStore):
72
72
  if columns:
73
73
  kwargs["usecols"] = columns
74
74
  reader = df_module.read_csv
75
- elif url.endswith(".parquet") or url.endswith(".pq") or format == "parquet":
75
+ elif mlrun.utils.helpers.is_parquet_file(url, format):
76
76
  if columns:
77
77
  kwargs["columns"] = columns
78
78
  reader = df_module.read_parquet
@@ -80,8 +80,11 @@ class InMemoryStore(DataStore):
80
80
  reader = df_module.read_json
81
81
  else:
82
82
  raise mlrun.errors.MLRunInvalidArgumentError(f"file type unhandled {url}")
83
- # InMemoryStore store do not filter on time
84
- for field in ["time_column", "start_time", "end_time"]:
83
+ # InMemoryStore store don't pass filters
84
+ for field in ["time_column", "start_time", "end_time", "additional_filters"]:
85
85
  kwargs.pop(field, None)
86
86
 
87
87
  return reader(item, **kwargs)
88
+
89
+ def rm(self, path, recursive=False, maxdepth=None):
90
+ self._items.pop(path, None)
mlrun/datastore/redis.py CHANGED
@@ -31,7 +31,7 @@ class RedisStore(DataStore):
31
31
  """
32
32
 
33
33
  def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
34
- REDIS_DEFAULT_PORT = "6379"
34
+ redis_default_port = "6379"
35
35
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
36
36
  self.headers = None
37
37
 
@@ -49,7 +49,7 @@ class RedisStore(DataStore):
49
49
  user = self._get_secret_or_env("REDIS_USER", "", credentials_prefix)
50
50
  password = self._get_secret_or_env("REDIS_PASSWORD", "", credentials_prefix)
51
51
  host = parsed_endpoint.hostname
52
- port = parsed_endpoint.port if parsed_endpoint.port else REDIS_DEFAULT_PORT
52
+ port = parsed_endpoint.port if parsed_endpoint.port else redis_default_port
53
53
  schema = parsed_endpoint.scheme
54
54
  if user or password:
55
55
  endpoint = f"{schema}://{user}:{password}@{host}:{port}"
@@ -126,6 +126,7 @@ class RedisStore(DataStore):
126
126
 
127
127
  def put(self, key, data, append=False):
128
128
  key = RedisStore.build_redis_key(key)
129
+ data, _ = self._prepare_put_data(data, append)
129
130
  if append:
130
131
  self.redis.append(key, data)
131
132
  else:
@@ -163,3 +164,7 @@ class RedisStore(DataStore):
163
164
  self.redis.delete(k)
164
165
  else:
165
166
  self.redis.delete(key)
167
+
168
+ @property
169
+ def spark_url(self):
170
+ return ""