mlrun 1.6.4rc8__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (305) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +40 -122
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +5 -4
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +47 -257
  8. mlrun/artifacts/dataset.py +11 -192
  9. mlrun/artifacts/manager.py +79 -47
  10. mlrun/artifacts/model.py +31 -159
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +74 -1
  13. mlrun/common/db/sql_session.py +5 -5
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +45 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +33 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +12 -3
  23. mlrun/common/model_monitoring/helpers.py +9 -5
  24. mlrun/{runtimes → common/runtimes}/constants.py +37 -9
  25. mlrun/common/schemas/__init__.py +31 -5
  26. mlrun/common/schemas/alert.py +202 -0
  27. mlrun/common/schemas/api_gateway.py +196 -0
  28. mlrun/common/schemas/artifact.py +25 -4
  29. mlrun/common/schemas/auth.py +16 -5
  30. mlrun/common/schemas/background_task.py +1 -1
  31. mlrun/common/schemas/client_spec.py +4 -2
  32. mlrun/common/schemas/common.py +7 -4
  33. mlrun/common/schemas/constants.py +3 -0
  34. mlrun/common/schemas/feature_store.py +74 -44
  35. mlrun/common/schemas/frontend_spec.py +15 -7
  36. mlrun/common/schemas/function.py +12 -1
  37. mlrun/common/schemas/hub.py +11 -18
  38. mlrun/common/schemas/memory_reports.py +2 -2
  39. mlrun/common/schemas/model_monitoring/__init__.py +20 -4
  40. mlrun/common/schemas/model_monitoring/constants.py +123 -42
  41. mlrun/common/schemas/model_monitoring/grafana.py +13 -9
  42. mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
  43. mlrun/common/schemas/notification.py +71 -14
  44. mlrun/common/schemas/object.py +2 -2
  45. mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
  46. mlrun/common/schemas/pipeline.py +8 -1
  47. mlrun/common/schemas/project.py +69 -18
  48. mlrun/common/schemas/runs.py +7 -1
  49. mlrun/common/schemas/runtime_resource.py +8 -12
  50. mlrun/common/schemas/schedule.py +4 -4
  51. mlrun/common/schemas/tag.py +1 -2
  52. mlrun/common/schemas/workflow.py +12 -4
  53. mlrun/common/types.py +14 -1
  54. mlrun/config.py +154 -69
  55. mlrun/data_types/data_types.py +6 -1
  56. mlrun/data_types/spark.py +2 -2
  57. mlrun/data_types/to_pandas.py +67 -37
  58. mlrun/datastore/__init__.py +6 -8
  59. mlrun/datastore/alibaba_oss.py +131 -0
  60. mlrun/datastore/azure_blob.py +143 -42
  61. mlrun/datastore/base.py +102 -58
  62. mlrun/datastore/datastore.py +34 -13
  63. mlrun/datastore/datastore_profile.py +146 -20
  64. mlrun/datastore/dbfs_store.py +3 -7
  65. mlrun/datastore/filestore.py +1 -4
  66. mlrun/datastore/google_cloud_storage.py +97 -33
  67. mlrun/datastore/hdfs.py +56 -0
  68. mlrun/datastore/inmem.py +6 -3
  69. mlrun/datastore/redis.py +7 -2
  70. mlrun/datastore/s3.py +34 -12
  71. mlrun/datastore/snowflake_utils.py +45 -0
  72. mlrun/datastore/sources.py +303 -111
  73. mlrun/datastore/spark_utils.py +31 -2
  74. mlrun/datastore/store_resources.py +9 -7
  75. mlrun/datastore/storeytargets.py +151 -0
  76. mlrun/datastore/targets.py +453 -176
  77. mlrun/datastore/utils.py +72 -58
  78. mlrun/datastore/v3io.py +6 -1
  79. mlrun/db/base.py +274 -41
  80. mlrun/db/factory.py +1 -1
  81. mlrun/db/httpdb.py +893 -225
  82. mlrun/db/nopdb.py +291 -33
  83. mlrun/errors.py +36 -6
  84. mlrun/execution.py +115 -42
  85. mlrun/feature_store/__init__.py +0 -2
  86. mlrun/feature_store/api.py +65 -73
  87. mlrun/feature_store/common.py +7 -12
  88. mlrun/feature_store/feature_set.py +76 -55
  89. mlrun/feature_store/feature_vector.py +39 -31
  90. mlrun/feature_store/ingestion.py +7 -6
  91. mlrun/feature_store/retrieval/base.py +16 -11
  92. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  93. mlrun/feature_store/retrieval/job.py +13 -4
  94. mlrun/feature_store/retrieval/local_merger.py +2 -0
  95. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  96. mlrun/feature_store/steps.py +45 -34
  97. mlrun/features.py +11 -21
  98. mlrun/frameworks/_common/artifacts_library.py +9 -9
  99. mlrun/frameworks/_common/mlrun_interface.py +5 -5
  100. mlrun/frameworks/_common/model_handler.py +48 -48
  101. mlrun/frameworks/_common/plan.py +5 -6
  102. mlrun/frameworks/_common/producer.py +3 -4
  103. mlrun/frameworks/_common/utils.py +5 -5
  104. mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
  105. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
  106. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
  107. mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
  108. mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
  109. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
  110. mlrun/frameworks/_ml_common/model_handler.py +24 -24
  111. mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
  112. mlrun/frameworks/_ml_common/plan.py +2 -2
  113. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
  114. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
  115. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  116. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
  117. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  118. mlrun/frameworks/_ml_common/utils.py +4 -4
  119. mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
  120. mlrun/frameworks/huggingface/model_server.py +4 -4
  121. mlrun/frameworks/lgbm/__init__.py +33 -33
  122. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  123. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
  124. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
  125. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
  126. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
  127. mlrun/frameworks/lgbm/model_handler.py +10 -10
  128. mlrun/frameworks/lgbm/model_server.py +6 -6
  129. mlrun/frameworks/lgbm/utils.py +5 -5
  130. mlrun/frameworks/onnx/dataset.py +8 -8
  131. mlrun/frameworks/onnx/mlrun_interface.py +3 -3
  132. mlrun/frameworks/onnx/model_handler.py +6 -6
  133. mlrun/frameworks/onnx/model_server.py +7 -7
  134. mlrun/frameworks/parallel_coordinates.py +6 -6
  135. mlrun/frameworks/pytorch/__init__.py +18 -18
  136. mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
  137. mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
  138. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
  139. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
  140. mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
  141. mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
  142. mlrun/frameworks/pytorch/model_handler.py +17 -17
  143. mlrun/frameworks/pytorch/model_server.py +7 -7
  144. mlrun/frameworks/sklearn/__init__.py +13 -13
  145. mlrun/frameworks/sklearn/estimator.py +4 -4
  146. mlrun/frameworks/sklearn/metrics_library.py +14 -14
  147. mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
  148. mlrun/frameworks/sklearn/model_handler.py +2 -2
  149. mlrun/frameworks/tf_keras/__init__.py +10 -7
  150. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
  151. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
  152. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
  153. mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
  154. mlrun/frameworks/tf_keras/model_handler.py +14 -14
  155. mlrun/frameworks/tf_keras/model_server.py +6 -6
  156. mlrun/frameworks/xgboost/__init__.py +13 -13
  157. mlrun/frameworks/xgboost/model_handler.py +6 -6
  158. mlrun/k8s_utils.py +61 -17
  159. mlrun/launcher/__init__.py +1 -1
  160. mlrun/launcher/base.py +16 -15
  161. mlrun/launcher/client.py +13 -11
  162. mlrun/launcher/factory.py +1 -1
  163. mlrun/launcher/local.py +23 -13
  164. mlrun/launcher/remote.py +17 -10
  165. mlrun/lists.py +7 -6
  166. mlrun/model.py +478 -103
  167. mlrun/model_monitoring/__init__.py +1 -1
  168. mlrun/model_monitoring/api.py +163 -371
  169. mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
  170. mlrun/model_monitoring/applications/_application_steps.py +188 -0
  171. mlrun/model_monitoring/applications/base.py +108 -0
  172. mlrun/model_monitoring/applications/context.py +341 -0
  173. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  174. mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
  175. mlrun/model_monitoring/applications/results.py +99 -0
  176. mlrun/model_monitoring/controller.py +131 -278
  177. mlrun/model_monitoring/db/__init__.py +18 -0
  178. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  179. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  180. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  181. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  182. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  183. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  184. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  185. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  186. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  187. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  188. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  189. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  190. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  191. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  192. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  193. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
  194. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  195. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
  196. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  197. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  198. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  199. mlrun/model_monitoring/features_drift_table.py +134 -106
  200. mlrun/model_monitoring/helpers.py +199 -55
  201. mlrun/model_monitoring/metrics/__init__.py +13 -0
  202. mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
  203. mlrun/model_monitoring/model_endpoint.py +3 -2
  204. mlrun/model_monitoring/stream_processing.py +134 -398
  205. mlrun/model_monitoring/tracking_policy.py +9 -2
  206. mlrun/model_monitoring/writer.py +161 -125
  207. mlrun/package/__init__.py +6 -6
  208. mlrun/package/context_handler.py +5 -5
  209. mlrun/package/packager.py +7 -7
  210. mlrun/package/packagers/default_packager.py +8 -8
  211. mlrun/package/packagers/numpy_packagers.py +15 -15
  212. mlrun/package/packagers/pandas_packagers.py +5 -5
  213. mlrun/package/packagers/python_standard_library_packagers.py +10 -10
  214. mlrun/package/packagers_manager.py +19 -23
  215. mlrun/package/utils/_formatter.py +6 -6
  216. mlrun/package/utils/_pickler.py +2 -2
  217. mlrun/package/utils/_supported_format.py +4 -4
  218. mlrun/package/utils/log_hint_utils.py +2 -2
  219. mlrun/package/utils/type_hint_utils.py +4 -9
  220. mlrun/platforms/__init__.py +11 -10
  221. mlrun/platforms/iguazio.py +24 -203
  222. mlrun/projects/operations.py +52 -25
  223. mlrun/projects/pipelines.py +191 -197
  224. mlrun/projects/project.py +1227 -400
  225. mlrun/render.py +16 -19
  226. mlrun/run.py +209 -184
  227. mlrun/runtimes/__init__.py +83 -15
  228. mlrun/runtimes/base.py +51 -35
  229. mlrun/runtimes/daskjob.py +17 -10
  230. mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
  231. mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
  232. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  233. mlrun/runtimes/funcdoc.py +1 -29
  234. mlrun/runtimes/function_reference.py +1 -1
  235. mlrun/runtimes/kubejob.py +34 -128
  236. mlrun/runtimes/local.py +40 -11
  237. mlrun/runtimes/mpijob/__init__.py +0 -20
  238. mlrun/runtimes/mpijob/abstract.py +9 -10
  239. mlrun/runtimes/mpijob/v1.py +1 -1
  240. mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
  241. mlrun/runtimes/nuclio/api_gateway.py +769 -0
  242. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  243. mlrun/runtimes/nuclio/application/application.py +758 -0
  244. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  245. mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
  246. mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
  247. mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
  248. mlrun/runtimes/pod.py +281 -101
  249. mlrun/runtimes/remotesparkjob.py +12 -9
  250. mlrun/runtimes/sparkjob/spark3job.py +67 -51
  251. mlrun/runtimes/utils.py +41 -75
  252. mlrun/secrets.py +9 -5
  253. mlrun/serving/__init__.py +8 -1
  254. mlrun/serving/remote.py +2 -7
  255. mlrun/serving/routers.py +85 -69
  256. mlrun/serving/server.py +69 -44
  257. mlrun/serving/states.py +209 -36
  258. mlrun/serving/utils.py +22 -14
  259. mlrun/serving/v1_serving.py +6 -7
  260. mlrun/serving/v2_serving.py +133 -54
  261. mlrun/track/tracker.py +2 -1
  262. mlrun/track/tracker_manager.py +3 -3
  263. mlrun/track/trackers/mlflow_tracker.py +6 -2
  264. mlrun/utils/async_http.py +6 -8
  265. mlrun/utils/azure_vault.py +1 -1
  266. mlrun/utils/clones.py +1 -2
  267. mlrun/utils/condition_evaluator.py +3 -3
  268. mlrun/utils/db.py +21 -3
  269. mlrun/utils/helpers.py +405 -225
  270. mlrun/utils/http.py +3 -6
  271. mlrun/utils/logger.py +112 -16
  272. mlrun/utils/notifications/notification/__init__.py +17 -13
  273. mlrun/utils/notifications/notification/base.py +50 -2
  274. mlrun/utils/notifications/notification/console.py +2 -0
  275. mlrun/utils/notifications/notification/git.py +24 -1
  276. mlrun/utils/notifications/notification/ipython.py +3 -1
  277. mlrun/utils/notifications/notification/slack.py +96 -21
  278. mlrun/utils/notifications/notification/webhook.py +59 -2
  279. mlrun/utils/notifications/notification_pusher.py +149 -30
  280. mlrun/utils/regex.py +9 -0
  281. mlrun/utils/retryer.py +208 -0
  282. mlrun/utils/singleton.py +1 -1
  283. mlrun/utils/v3io_clients.py +4 -6
  284. mlrun/utils/version/version.json +2 -2
  285. mlrun/utils/version/version.py +2 -6
  286. mlrun-1.7.0.dist-info/METADATA +378 -0
  287. mlrun-1.7.0.dist-info/RECORD +351 -0
  288. {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
  289. mlrun/feature_store/retrieval/conversion.py +0 -273
  290. mlrun/kfpops.py +0 -868
  291. mlrun/model_monitoring/application.py +0 -310
  292. mlrun/model_monitoring/batch.py +0 -1095
  293. mlrun/model_monitoring/prometheus.py +0 -219
  294. mlrun/model_monitoring/stores/__init__.py +0 -111
  295. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
  296. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
  297. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  298. mlrun/model_monitoring/stores/models/base.py +0 -84
  299. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
  300. mlrun/platforms/other.py +0 -306
  301. mlrun-1.6.4rc8.dist-info/METADATA +0 -272
  302. mlrun-1.6.4rc8.dist-info/RECORD +0 -314
  303. {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
  304. {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
  305. {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
mlrun/datastore/utils.py CHANGED
@@ -12,10 +12,12 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  #
15
+ import math
15
16
  import tarfile
16
17
  import tempfile
17
18
  import typing
18
- from urllib.parse import parse_qs, urlparse, urlunparse
19
+ import warnings
20
+ from urllib.parse import parse_qs, urlparse
19
21
 
20
22
  import pandas as pd
21
23
  import semver
@@ -23,73 +25,29 @@ import semver
23
25
  import mlrun.datastore
24
26
 
25
27
 
26
- def store_path_to_spark(path, spark_options=None):
27
- schemas = ["redis://", "rediss://", "ds://"]
28
- if any(path.startswith(schema) for schema in schemas):
29
- url = urlparse(path)
30
- if url.path:
31
- path = url.path
32
- elif path.startswith("gcs://"):
33
- path = "gs:" + path[len("gcs:") :]
34
- elif path.startswith("v3io:///"):
35
- path = "v3io:" + path[len("v3io:/") :]
36
- elif path.startswith("az://"):
37
- account_key = None
38
- path = "wasbs:" + path[len("az:") :]
39
- prefix = "spark.hadoop.fs.azure.account.key."
40
- if spark_options:
41
- for key in spark_options:
42
- if key.startswith(prefix):
43
- account_key = key[len(prefix) :]
44
- break
45
- if account_key:
46
- # transfer "wasb://basket/some/path" to wasb://basket@account_key.blob.core.windows.net/some/path
47
- parsed_url = urlparse(path)
48
- new_netloc = f"{parsed_url.hostname}@{account_key}"
49
- path = urlunparse(
50
- (
51
- parsed_url.scheme,
52
- new_netloc,
53
- parsed_url.path,
54
- parsed_url.params,
55
- parsed_url.query,
56
- parsed_url.fragment,
57
- )
58
- )
59
- elif path.startswith("s3://"):
60
- if path.startswith("s3:///"):
61
- # 's3:///' not supported since mlrun 0.9.0 should use s3:// instead
62
- from mlrun.errors import MLRunInvalidArgumentError
63
-
64
- valid_path = "s3:" + path[len("s3:/") :]
65
- raise MLRunInvalidArgumentError(
66
- f"'s3:///' is not supported, try using 's3://' instead.\nE.g: '{valid_path}'"
67
- )
68
- else:
69
- path = "s3a:" + path[len("s3:") :]
70
- return path
71
-
72
-
73
28
  def parse_kafka_url(
74
- url: str, bootstrap_servers: typing.List = None
75
- ) -> typing.Tuple[str, typing.List]:
29
+ url: str, brokers: typing.Union[list, str] = None
30
+ ) -> tuple[str, list]:
76
31
  """Generating Kafka topic and adjusting a list of bootstrap servers.
77
32
 
78
33
  :param url: URL path to parse using urllib.parse.urlparse.
79
- :param bootstrap_servers: List of bootstrap servers for the kafka brokers.
34
+ :param brokers: List of kafka brokers.
80
35
 
81
36
  :return: A tuple of:
82
37
  [0] = Kafka topic value
83
38
  [1] = List of bootstrap servers
84
39
  """
85
- bootstrap_servers = bootstrap_servers or []
40
+ brokers = brokers or []
41
+
42
+ if isinstance(brokers, str):
43
+ brokers = brokers.split(",")
86
44
 
87
45
  # Parse the provided URL into six components according to the general structure of a URL
88
46
  url = urlparse(url)
89
47
 
90
48
  # Add the network location to the bootstrap servers list
91
49
  if url.netloc:
92
- bootstrap_servers = [url.netloc] + bootstrap_servers
50
+ brokers = [url.netloc] + brokers
93
51
 
94
52
  # Get the topic value from the parsed url
95
53
  query_dict = parse_qs(url.query)
@@ -98,7 +56,7 @@ def parse_kafka_url(
98
56
  else:
99
57
  topic = url.path
100
58
  topic = topic.lstrip("/")
101
- return topic, bootstrap_servers
59
+ return topic, brokers
102
60
 
103
61
 
104
62
  def upload_tarball(source_dir, target, secrets=None):
@@ -107,7 +65,7 @@ def upload_tarball(source_dir, target, secrets=None):
107
65
  with tarfile.open(mode="w:gz", fileobj=temp_fh) as tar:
108
66
  tar.add(source_dir, arcname="")
109
67
  stores = mlrun.datastore.store_manager.set(secrets)
110
- datastore, subpath = stores.get_or_create_store(target)
68
+ datastore, subpath, url = stores.get_or_create_store(target)
111
69
  datastore.upload(subpath, temp_fh.name)
112
70
 
113
71
 
@@ -157,7 +115,7 @@ def _execute_time_filter(
157
115
 
158
116
  def select_columns_from_df(
159
117
  df: typing.Union[pd.DataFrame, typing.Iterator[pd.DataFrame]],
160
- columns: typing.List[str],
118
+ columns: list[str],
161
119
  ) -> typing.Union[pd.DataFrame, typing.Iterator[pd.DataFrame]]:
162
120
  if not columns:
163
121
  return df
@@ -169,7 +127,7 @@ def select_columns_from_df(
169
127
 
170
128
  def select_columns_generator(
171
129
  dfs: typing.Union[pd.DataFrame, typing.Iterator[pd.DataFrame]],
172
- columns: typing.List[str],
130
+ columns: list[str],
173
131
  ) -> typing.Iterator[pd.DataFrame]:
174
132
  for df in dfs:
175
133
  yield df[columns]
@@ -179,7 +137,7 @@ def _generate_sql_query_with_time_filter(
179
137
  table_name: str,
180
138
  engine: "sqlalchemy.engine.Engine", # noqa: F821,
181
139
  time_column: str,
182
- parse_dates: typing.List[str],
140
+ parse_dates: list[str],
183
141
  start_time: pd.Timestamp,
184
142
  end_time: pd.Timestamp,
185
143
  ):
@@ -208,3 +166,59 @@ def _generate_sql_query_with_time_filter(
208
166
  query = query.filter(getattr(table.c, time_column) <= end_time)
209
167
 
210
168
  return query, parse_dates
169
+
170
+
171
+ def get_kafka_brokers_from_dict(options: dict, pop=False) -> typing.Optional[str]:
172
+ get_or_pop = options.pop if pop else options.get
173
+ kafka_brokers = get_or_pop("kafka_brokers", None)
174
+ if kafka_brokers:
175
+ return kafka_brokers
176
+ kafka_bootstrap_servers = get_or_pop("kafka_bootstrap_servers", None)
177
+ if kafka_bootstrap_servers:
178
+ warnings.warn(
179
+ "The 'kafka_bootstrap_servers' parameter is deprecated and will be removed in "
180
+ "1.9.0. Please pass the 'kafka_brokers' parameter instead.",
181
+ FutureWarning,
182
+ )
183
+ return kafka_bootstrap_servers
184
+
185
+
186
+ def transform_list_filters_to_tuple(additional_filters):
187
+ tuple_filters = []
188
+ if not additional_filters:
189
+ return tuple_filters
190
+ validate_additional_filters(additional_filters)
191
+ for additional_filter in additional_filters:
192
+ tuple_filters.append(tuple(additional_filter))
193
+ return tuple_filters
194
+
195
+
196
+ def validate_additional_filters(additional_filters):
197
+ nan_error_message = "using NaN in additional_filters is not supported"
198
+ if additional_filters in [None, [], ()]:
199
+ return
200
+ for filter_tuple in additional_filters:
201
+ if filter_tuple == () or filter_tuple == []:
202
+ continue
203
+ if not isinstance(filter_tuple, (list, tuple)):
204
+ raise mlrun.errors.MLRunInvalidArgumentError(
205
+ f"mlrun supports additional_filters only as a list of tuples."
206
+ f" Current additional_filters: {additional_filters}"
207
+ )
208
+ if isinstance(filter_tuple[0], (list, tuple)):
209
+ raise mlrun.errors.MLRunInvalidArgumentError(
210
+ f"additional_filters does not support nested list inside filter tuples except in -in- logic."
211
+ f" Current filter_tuple: {filter_tuple}."
212
+ )
213
+ if len(filter_tuple) != 3:
214
+ raise mlrun.errors.MLRunInvalidArgumentError(
215
+ f"illegal filter tuple length, {filter_tuple} in additional filters:"
216
+ f" {additional_filters}"
217
+ )
218
+ col_name, op, value = filter_tuple
219
+ if isinstance(value, float) and math.isnan(value):
220
+ raise mlrun.errors.MLRunInvalidArgumentError(nan_error_message)
221
+ elif isinstance(value, (list, tuple)):
222
+ for sub_value in value:
223
+ if isinstance(sub_value, float) and math.isnan(sub_value):
224
+ raise mlrun.errors.MLRunInvalidArgumentError(nan_error_message)
mlrun/datastore/v3io.py CHANGED
@@ -29,7 +29,7 @@ from .base import (
29
29
  )
30
30
 
31
31
  V3IO_LOCAL_ROOT = "v3io"
32
- V3IO_DEFAULT_UPLOAD_CHUNK_SIZE = 1024 * 1024 * 100
32
+ V3IO_DEFAULT_UPLOAD_CHUNK_SIZE = 1024 * 1024 * 10
33
33
 
34
34
 
35
35
  class V3ioStore(DataStore):
@@ -77,6 +77,10 @@ class V3ioStore(DataStore):
77
77
  schema = "https" if self.secure else "http"
78
78
  return f"{schema}://{self.endpoint}"
79
79
 
80
+ @property
81
+ def spark_url(self):
82
+ return "v3io:/"
83
+
80
84
  @property
81
85
  def filesystem(self):
82
86
  """return fsspec file system object, if supported"""
@@ -136,6 +140,7 @@ class V3ioStore(DataStore):
136
140
  max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
137
141
  ):
138
142
  """helper function for put method, allows for controlling max_chunk_size in testing"""
143
+ data, _ = self._prepare_put_data(data, append)
139
144
  container, path = split_path(self._join(key))
140
145
  buffer_size = len(data) # in bytes
141
146
  buffer_offset = 0