mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (275) hide show
  1. mlrun/__init__.py +26 -22
  2. mlrun/__main__.py +15 -16
  3. mlrun/alerts/alert.py +150 -15
  4. mlrun/api/schemas/__init__.py +1 -9
  5. mlrun/artifacts/__init__.py +2 -3
  6. mlrun/artifacts/base.py +62 -19
  7. mlrun/artifacts/dataset.py +17 -17
  8. mlrun/artifacts/document.py +454 -0
  9. mlrun/artifacts/manager.py +28 -18
  10. mlrun/artifacts/model.py +91 -59
  11. mlrun/artifacts/plots.py +2 -2
  12. mlrun/common/constants.py +8 -0
  13. mlrun/common/formatters/__init__.py +1 -0
  14. mlrun/common/formatters/artifact.py +1 -1
  15. mlrun/common/formatters/feature_set.py +2 -0
  16. mlrun/common/formatters/function.py +1 -0
  17. mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
  18. mlrun/common/formatters/pipeline.py +1 -2
  19. mlrun/common/formatters/project.py +9 -0
  20. mlrun/common/model_monitoring/__init__.py +0 -5
  21. mlrun/common/model_monitoring/helpers.py +12 -62
  22. mlrun/common/runtimes/constants.py +25 -4
  23. mlrun/common/schemas/__init__.py +9 -5
  24. mlrun/common/schemas/alert.py +114 -19
  25. mlrun/common/schemas/api_gateway.py +3 -3
  26. mlrun/common/schemas/artifact.py +22 -9
  27. mlrun/common/schemas/auth.py +8 -4
  28. mlrun/common/schemas/background_task.py +7 -7
  29. mlrun/common/schemas/client_spec.py +4 -4
  30. mlrun/common/schemas/clusterization_spec.py +2 -2
  31. mlrun/common/schemas/common.py +53 -3
  32. mlrun/common/schemas/constants.py +15 -0
  33. mlrun/common/schemas/datastore_profile.py +1 -1
  34. mlrun/common/schemas/feature_store.py +9 -9
  35. mlrun/common/schemas/frontend_spec.py +4 -4
  36. mlrun/common/schemas/function.py +10 -10
  37. mlrun/common/schemas/hub.py +1 -1
  38. mlrun/common/schemas/k8s.py +3 -3
  39. mlrun/common/schemas/memory_reports.py +3 -3
  40. mlrun/common/schemas/model_monitoring/__init__.py +4 -8
  41. mlrun/common/schemas/model_monitoring/constants.py +127 -46
  42. mlrun/common/schemas/model_monitoring/grafana.py +18 -12
  43. mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
  44. mlrun/common/schemas/notification.py +24 -3
  45. mlrun/common/schemas/object.py +1 -1
  46. mlrun/common/schemas/pagination.py +4 -4
  47. mlrun/common/schemas/partition.py +142 -0
  48. mlrun/common/schemas/pipeline.py +3 -3
  49. mlrun/common/schemas/project.py +26 -18
  50. mlrun/common/schemas/runs.py +3 -3
  51. mlrun/common/schemas/runtime_resource.py +5 -5
  52. mlrun/common/schemas/schedule.py +1 -1
  53. mlrun/common/schemas/secret.py +1 -1
  54. mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
  55. mlrun/common/schemas/tag.py +3 -3
  56. mlrun/common/schemas/workflow.py +6 -5
  57. mlrun/common/types.py +1 -0
  58. mlrun/config.py +157 -89
  59. mlrun/data_types/__init__.py +5 -3
  60. mlrun/data_types/infer.py +13 -3
  61. mlrun/data_types/spark.py +2 -1
  62. mlrun/datastore/__init__.py +59 -18
  63. mlrun/datastore/alibaba_oss.py +4 -1
  64. mlrun/datastore/azure_blob.py +4 -1
  65. mlrun/datastore/base.py +19 -24
  66. mlrun/datastore/datastore.py +10 -4
  67. mlrun/datastore/datastore_profile.py +178 -45
  68. mlrun/datastore/dbfs_store.py +4 -1
  69. mlrun/datastore/filestore.py +4 -1
  70. mlrun/datastore/google_cloud_storage.py +4 -1
  71. mlrun/datastore/hdfs.py +4 -1
  72. mlrun/datastore/inmem.py +4 -1
  73. mlrun/datastore/redis.py +4 -1
  74. mlrun/datastore/s3.py +14 -3
  75. mlrun/datastore/sources.py +89 -92
  76. mlrun/datastore/store_resources.py +7 -4
  77. mlrun/datastore/storeytargets.py +51 -16
  78. mlrun/datastore/targets.py +38 -31
  79. mlrun/datastore/utils.py +87 -4
  80. mlrun/datastore/v3io.py +4 -1
  81. mlrun/datastore/vectorstore.py +291 -0
  82. mlrun/datastore/wasbfs/fs.py +13 -12
  83. mlrun/db/base.py +286 -100
  84. mlrun/db/httpdb.py +1562 -490
  85. mlrun/db/nopdb.py +250 -83
  86. mlrun/errors.py +6 -2
  87. mlrun/execution.py +194 -50
  88. mlrun/feature_store/__init__.py +2 -10
  89. mlrun/feature_store/api.py +20 -458
  90. mlrun/feature_store/common.py +9 -9
  91. mlrun/feature_store/feature_set.py +20 -18
  92. mlrun/feature_store/feature_vector.py +105 -479
  93. mlrun/feature_store/feature_vector_utils.py +466 -0
  94. mlrun/feature_store/retrieval/base.py +15 -11
  95. mlrun/feature_store/retrieval/job.py +2 -1
  96. mlrun/feature_store/retrieval/storey_merger.py +1 -1
  97. mlrun/feature_store/steps.py +3 -3
  98. mlrun/features.py +30 -13
  99. mlrun/frameworks/__init__.py +1 -2
  100. mlrun/frameworks/_common/__init__.py +1 -2
  101. mlrun/frameworks/_common/artifacts_library.py +2 -2
  102. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  103. mlrun/frameworks/_common/model_handler.py +31 -31
  104. mlrun/frameworks/_common/producer.py +3 -1
  105. mlrun/frameworks/_dl_common/__init__.py +1 -2
  106. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  107. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  108. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  109. mlrun/frameworks/_ml_common/__init__.py +1 -2
  110. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  111. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  112. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  113. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  114. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  115. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  116. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  117. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  118. mlrun/frameworks/huggingface/__init__.py +1 -2
  119. mlrun/frameworks/huggingface/model_server.py +9 -9
  120. mlrun/frameworks/lgbm/__init__.py +47 -44
  121. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  122. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  123. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  124. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  125. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  126. mlrun/frameworks/lgbm/model_handler.py +15 -11
  127. mlrun/frameworks/lgbm/model_server.py +11 -7
  128. mlrun/frameworks/lgbm/utils.py +2 -2
  129. mlrun/frameworks/onnx/__init__.py +1 -2
  130. mlrun/frameworks/onnx/dataset.py +3 -3
  131. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  132. mlrun/frameworks/onnx/model_handler.py +7 -5
  133. mlrun/frameworks/onnx/model_server.py +8 -6
  134. mlrun/frameworks/parallel_coordinates.py +11 -11
  135. mlrun/frameworks/pytorch/__init__.py +22 -23
  136. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  137. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  138. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  139. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  140. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  141. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  142. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  143. mlrun/frameworks/pytorch/model_handler.py +21 -17
  144. mlrun/frameworks/pytorch/model_server.py +13 -9
  145. mlrun/frameworks/sklearn/__init__.py +19 -18
  146. mlrun/frameworks/sklearn/estimator.py +2 -2
  147. mlrun/frameworks/sklearn/metric.py +3 -3
  148. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  149. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  150. mlrun/frameworks/sklearn/model_handler.py +4 -3
  151. mlrun/frameworks/tf_keras/__init__.py +11 -12
  152. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  153. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  154. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  155. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  156. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  157. mlrun/frameworks/tf_keras/model_server.py +12 -8
  158. mlrun/frameworks/xgboost/__init__.py +19 -18
  159. mlrun/frameworks/xgboost/model_handler.py +13 -9
  160. mlrun/k8s_utils.py +2 -5
  161. mlrun/launcher/base.py +3 -4
  162. mlrun/launcher/client.py +2 -2
  163. mlrun/launcher/local.py +6 -2
  164. mlrun/launcher/remote.py +1 -1
  165. mlrun/lists.py +8 -4
  166. mlrun/model.py +132 -46
  167. mlrun/model_monitoring/__init__.py +3 -5
  168. mlrun/model_monitoring/api.py +113 -98
  169. mlrun/model_monitoring/applications/__init__.py +0 -5
  170. mlrun/model_monitoring/applications/_application_steps.py +81 -50
  171. mlrun/model_monitoring/applications/base.py +467 -14
  172. mlrun/model_monitoring/applications/context.py +212 -134
  173. mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
  174. mlrun/model_monitoring/applications/evidently/base.py +146 -0
  175. mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
  176. mlrun/model_monitoring/applications/results.py +67 -15
  177. mlrun/model_monitoring/controller.py +701 -315
  178. mlrun/model_monitoring/db/__init__.py +0 -2
  179. mlrun/model_monitoring/db/_schedules.py +242 -0
  180. mlrun/model_monitoring/db/_stats.py +189 -0
  181. mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
  182. mlrun/model_monitoring/db/tsdb/base.py +243 -49
  183. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
  184. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  185. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
  186. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
  187. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  188. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
  189. mlrun/model_monitoring/helpers.py +356 -114
  190. mlrun/model_monitoring/stream_processing.py +190 -345
  191. mlrun/model_monitoring/tracking_policy.py +11 -4
  192. mlrun/model_monitoring/writer.py +49 -90
  193. mlrun/package/__init__.py +3 -6
  194. mlrun/package/context_handler.py +2 -2
  195. mlrun/package/packager.py +12 -9
  196. mlrun/package/packagers/__init__.py +0 -2
  197. mlrun/package/packagers/default_packager.py +14 -11
  198. mlrun/package/packagers/numpy_packagers.py +16 -7
  199. mlrun/package/packagers/pandas_packagers.py +18 -18
  200. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  201. mlrun/package/packagers_manager.py +35 -32
  202. mlrun/package/utils/__init__.py +0 -3
  203. mlrun/package/utils/_pickler.py +6 -6
  204. mlrun/platforms/__init__.py +47 -16
  205. mlrun/platforms/iguazio.py +4 -1
  206. mlrun/projects/operations.py +30 -30
  207. mlrun/projects/pipelines.py +116 -47
  208. mlrun/projects/project.py +1292 -329
  209. mlrun/render.py +5 -9
  210. mlrun/run.py +57 -14
  211. mlrun/runtimes/__init__.py +1 -3
  212. mlrun/runtimes/base.py +30 -22
  213. mlrun/runtimes/daskjob.py +9 -9
  214. mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
  215. mlrun/runtimes/function_reference.py +5 -2
  216. mlrun/runtimes/generators.py +3 -2
  217. mlrun/runtimes/kubejob.py +6 -7
  218. mlrun/runtimes/mounts.py +574 -0
  219. mlrun/runtimes/mpijob/__init__.py +0 -2
  220. mlrun/runtimes/mpijob/abstract.py +7 -6
  221. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  222. mlrun/runtimes/nuclio/application/application.py +11 -13
  223. mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
  224. mlrun/runtimes/nuclio/function.py +127 -70
  225. mlrun/runtimes/nuclio/serving.py +105 -37
  226. mlrun/runtimes/pod.py +159 -54
  227. mlrun/runtimes/remotesparkjob.py +3 -2
  228. mlrun/runtimes/sparkjob/__init__.py +0 -2
  229. mlrun/runtimes/sparkjob/spark3job.py +22 -12
  230. mlrun/runtimes/utils.py +7 -6
  231. mlrun/secrets.py +2 -2
  232. mlrun/serving/__init__.py +8 -0
  233. mlrun/serving/merger.py +7 -5
  234. mlrun/serving/remote.py +35 -22
  235. mlrun/serving/routers.py +186 -240
  236. mlrun/serving/server.py +41 -10
  237. mlrun/serving/states.py +432 -118
  238. mlrun/serving/utils.py +13 -2
  239. mlrun/serving/v1_serving.py +3 -2
  240. mlrun/serving/v2_serving.py +161 -203
  241. mlrun/track/__init__.py +1 -1
  242. mlrun/track/tracker.py +2 -2
  243. mlrun/track/trackers/mlflow_tracker.py +6 -5
  244. mlrun/utils/async_http.py +35 -22
  245. mlrun/utils/clones.py +7 -4
  246. mlrun/utils/helpers.py +511 -58
  247. mlrun/utils/logger.py +119 -13
  248. mlrun/utils/notifications/notification/__init__.py +22 -19
  249. mlrun/utils/notifications/notification/base.py +39 -15
  250. mlrun/utils/notifications/notification/console.py +6 -6
  251. mlrun/utils/notifications/notification/git.py +11 -11
  252. mlrun/utils/notifications/notification/ipython.py +10 -9
  253. mlrun/utils/notifications/notification/mail.py +176 -0
  254. mlrun/utils/notifications/notification/slack.py +16 -8
  255. mlrun/utils/notifications/notification/webhook.py +24 -8
  256. mlrun/utils/notifications/notification_pusher.py +191 -200
  257. mlrun/utils/regex.py +12 -2
  258. mlrun/utils/version/version.json +2 -2
  259. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/METADATA +81 -54
  260. mlrun-1.8.0.dist-info/RECORD +351 -0
  261. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
  262. mlrun/model_monitoring/applications/evidently_base.py +0 -137
  263. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  264. mlrun/model_monitoring/db/stores/base/store.py +0 -213
  265. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
  266. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
  267. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
  268. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
  269. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
  270. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
  271. mlrun/model_monitoring/model_endpoint.py +0 -118
  272. mlrun-1.7.2rc3.dist-info/RECORD +0 -351
  273. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
  274. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
  275. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0
@@ -15,6 +15,7 @@
15
15
  import time
16
16
  from datetime import datetime
17
17
  from pathlib import Path
18
+ from typing import Optional
18
19
  from urllib.parse import urlparse
19
20
 
20
21
  import oss2
@@ -28,7 +29,9 @@ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
28
29
  class OSSStore(DataStore):
29
30
  using_bucket = True
30
31
 
31
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
32
+ def __init__(
33
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
34
+ ):
32
35
  super().__init__(parent, name, schema, endpoint, secrets)
33
36
  # will be used in case user asks to assume a role and work through fsspec
34
37
 
@@ -14,6 +14,7 @@
14
14
 
15
15
  import time
16
16
  from pathlib import Path
17
+ from typing import Optional
17
18
  from urllib.parse import urlparse
18
19
 
19
20
  from azure.storage.blob import BlobServiceClient
@@ -36,7 +37,9 @@ class AzureBlobStore(DataStore):
36
37
  1024 * 1024 * 8
37
38
  ) # for service_client property only, does not affect filesystem
38
39
 
39
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
40
+ def __init__(
41
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
42
+ ):
40
43
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
41
44
  self._service_client = None
42
45
  self._storage_options = None
mlrun/datastore/base.py CHANGED
@@ -14,6 +14,7 @@
14
14
  import tempfile
15
15
  import urllib.parse
16
16
  from base64 import b64encode
17
+ from copy import copy
17
18
  from os import path, remove
18
19
  from typing import Optional, Union
19
20
  from urllib.parse import urlparse
@@ -24,7 +25,6 @@ import pandas as pd
24
25
  import pyarrow
25
26
  import pytz
26
27
  import requests
27
- from deprecated import deprecated
28
28
 
29
29
  import mlrun.config
30
30
  import mlrun.errors
@@ -48,7 +48,7 @@ class FileStats:
48
48
  class DataStore:
49
49
  using_bucket = False
50
50
 
51
- def __init__(self, parent, name, kind, endpoint="", secrets: dict = None):
51
+ def __init__(self, parent, name, kind, endpoint="", secrets: Optional[dict] = None):
52
52
  self._parent = parent
53
53
  self.kind = kind
54
54
  self.name = name
@@ -95,16 +95,6 @@ class DataStore:
95
95
  def uri_to_ipython(endpoint, subpath):
96
96
  return ""
97
97
 
98
- # TODO: remove in 1.8.0
99
- @deprecated(
100
- version="1.8.0",
101
- reason="'get_filesystem()' will be removed in 1.8.0, use "
102
- "'filesystem' property instead",
103
- category=FutureWarning,
104
- )
105
- def get_filesystem(self):
106
- return self.filesystem
107
-
108
98
  @property
109
99
  def filesystem(self) -> Optional[fsspec.AbstractFileSystem]:
110
100
  """return fsspec file system object, if supported"""
@@ -500,12 +490,18 @@ class DataItem:
500
490
  """DataItem url e.g. /dir/path, s3://bucket/path"""
501
491
  return self._url
502
492
 
503
- def get(self, size=None, offset=0, encoding=None):
493
+ def get(
494
+ self,
495
+ size: Optional[int] = None,
496
+ offset: int = 0,
497
+ encoding: Optional[str] = None,
498
+ ) -> Union[bytes, str]:
504
499
  """read all or a byte range and return the content
505
500
 
506
501
  :param size: number of bytes to get
507
502
  :param offset: fetch from offset (in bytes)
508
503
  :param encoding: encoding (e.g. "utf-8") for converting bytes to str
504
+ :return: the bytes/str content
509
505
  """
510
506
  body = self._store.get(self._path, size=size, offset=offset)
511
507
  if encoding and isinstance(body, bytes):
@@ -519,7 +515,7 @@ class DataItem:
519
515
  """
520
516
  self._store.download(self._path, target_path)
521
517
 
522
- def put(self, data, append=False):
518
+ def put(self, data: Union[bytes, str], append: bool = False) -> None:
523
519
  """write/upload the data, append is only supported by some datastores
524
520
 
525
521
  :param data: data (bytes/str) to write
@@ -671,13 +667,6 @@ class DataItem:
671
667
  return f"'{self.url}'"
672
668
 
673
669
 
674
- def get_range(size, offset):
675
- byterange = f"bytes={offset}-"
676
- if size:
677
- byterange += str(offset + size)
678
- return byterange
679
-
680
-
681
670
  def basic_auth_header(user, password):
682
671
  username = user.encode("latin1")
683
672
  password = password.encode("latin1")
@@ -687,7 +676,9 @@ def basic_auth_header(user, password):
687
676
 
688
677
 
689
678
  class HttpStore(DataStore):
690
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
679
+ def __init__(
680
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
681
+ ):
691
682
  super().__init__(parent, name, schema, endpoint, secrets)
692
683
  self._https_auth_token = None
693
684
  self._schema = schema
@@ -713,7 +704,11 @@ class HttpStore(DataStore):
713
704
  raise ValueError("unimplemented")
714
705
 
715
706
  def get(self, key, size=None, offset=0):
716
- data = self._http_get(self.url + self._join(key), self._headers, self.auth)
707
+ headers = self._headers
708
+ if urlparse(self.url).hostname == "api.github.com":
709
+ headers = copy(self._headers)
710
+ headers["Accept"] = headers.get("Accept", "application/vnd.github.raw")
711
+ data = self._http_get(self.url + self._join(key), headers, self.auth)
717
712
  if offset:
718
713
  data = data[offset:]
719
714
  if size:
@@ -724,7 +719,7 @@ class HttpStore(DataStore):
724
719
  token = self._get_secret_or_env("HTTPS_AUTH_TOKEN")
725
720
  if token:
726
721
  self._https_auth_token = token
727
- self._headers.setdefault("Authorization", f"token {token}")
722
+ self._headers.setdefault("Authorization", f"Bearer {token}")
728
723
 
729
724
  def _validate_https_token(self):
730
725
  if self._https_auth_token and self._schema in ["http"]:
@@ -11,6 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ from typing import Optional
14
15
  from urllib.parse import urlparse
15
16
 
16
17
  from mergedeep import merge
@@ -110,7 +111,7 @@ def schema_to_store(schema):
110
111
 
111
112
  def uri_to_ipython(link):
112
113
  schema, endpoint, parsed_url = parse_url(link)
113
- if schema in [DB_SCHEMA, "memory"]:
114
+ if schema in [DB_SCHEMA, "memory", "ds"]:
114
115
  return ""
115
116
  return schema_to_store(schema).uri_to_ipython(endpoint, parsed_url.path)
116
117
 
@@ -178,12 +179,17 @@ class StoreManager:
178
179
  # which accepts a feature vector uri and generate the offline vector (parquet) for it if it doesnt exist
179
180
  if not target and not allow_empty_resources:
180
181
  raise mlrun.errors.MLRunInvalidArgumentError(
181
- f"resource {url} does not have a valid/persistent offline target"
182
+ f"Resource {url} does not have a valid/persistent offline target"
182
183
  )
183
184
  return resource, target or ""
184
185
 
185
186
  def object(
186
- self, url, key="", project="", allow_empty_resources=None, secrets: dict = None
187
+ self,
188
+ url,
189
+ key="",
190
+ project="",
191
+ allow_empty_resources=None,
192
+ secrets: Optional[dict] = None,
187
193
  ) -> DataItem:
188
194
  meta = artifact_url = None
189
195
  if is_store_uri(url):
@@ -205,7 +211,7 @@ class StoreManager:
205
211
  )
206
212
 
207
213
  def get_or_create_store(
208
- self, url, secrets: dict = None, project_name=""
214
+ self, url, secrets: Optional[dict] = None, project_name=""
209
215
  ) -> (DataStore, str, str):
210
216
  schema, endpoint, parsed_url = parse_url(url)
211
217
  subpath = parsed_url.path
@@ -11,15 +11,15 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
14
+
15
15
  import ast
16
16
  import base64
17
17
  import json
18
18
  import typing
19
19
  import warnings
20
- from urllib.parse import ParseResult, urlparse, urlunparse
20
+ from urllib.parse import ParseResult, urlparse
21
21
 
22
- import pydantic
22
+ import pydantic.v1
23
23
  from mergedeep import merge
24
24
 
25
25
  import mlrun
@@ -28,15 +28,15 @@ import mlrun.errors
28
28
  from ..secrets import get_secret_or_env
29
29
 
30
30
 
31
- class DatastoreProfile(pydantic.BaseModel):
31
+ class DatastoreProfile(pydantic.v1.BaseModel):
32
32
  type: str
33
33
  name: str
34
34
  _private_attributes: list = ()
35
35
 
36
36
  class Config:
37
- extra = pydantic.Extra.forbid
37
+ extra = pydantic.v1.Extra.forbid
38
38
 
39
- @pydantic.validator("name")
39
+ @pydantic.v1.validator("name")
40
40
  @classmethod
41
41
  def lower_case(cls, v):
42
42
  return v.lower()
@@ -75,14 +75,72 @@ class TemporaryClientDatastoreProfiles(metaclass=mlrun.utils.singleton.Singleton
75
75
 
76
76
 
77
77
  class DatastoreProfileBasic(DatastoreProfile):
78
- type: str = pydantic.Field("basic")
78
+ type: str = pydantic.v1.Field("basic")
79
79
  _private_attributes = "private"
80
80
  public: str
81
81
  private: typing.Optional[str] = None
82
82
 
83
83
 
84
+ class ConfigProfile(DatastoreProfile):
85
+ """
86
+ A profile class for managing configuration data with nested public and private attributes.
87
+ This class extends DatastoreProfile to handle configuration settings, separating them into
88
+ public and private dictionaries. Both dictionaries support nested structures, and the class
89
+ provides functionality to merge these attributes when needed.
90
+
91
+ Args:
92
+ public (Optional[dict]): Dictionary containing public configuration settings,
93
+ supporting nested structures
94
+ private (Optional[dict]): Dictionary containing private/sensitive configuration settings,
95
+ supporting nested structures
96
+
97
+ Example:
98
+ >>> public = {
99
+ "database": {
100
+ "host": "localhost",
101
+ "port": 5432
102
+ },
103
+ "api_version": "v1"
104
+ }
105
+ >>> private = {
106
+ "database": {
107
+ "password": "secret123",
108
+ "username": "admin"
109
+ },
110
+ "api_key": "xyz789"
111
+ }
112
+ >>> config = ConfigProfile("myconfig", public=public, private=private)
113
+
114
+ # When attributes() is called, it merges public and private:
115
+ # {
116
+ # "database": {
117
+ # "host": "localhost",
118
+ # "port": 5432,
119
+ # "password": "secret123",
120
+ # "username": "admin"
121
+ # },
122
+ # "api_version": "v1",
123
+ # "api_key": "xyz789"
124
+ # }
125
+
126
+ """
127
+
128
+ type = "config"
129
+ _private_attributes = "private"
130
+ public: typing.Optional[dict] = None
131
+ private: typing.Optional[dict] = None
132
+
133
+ def attributes(self):
134
+ res = {}
135
+ if self.public:
136
+ res = merge(res, self.public)
137
+ if self.private:
138
+ res = merge(res, self.private)
139
+ return res
140
+
141
+
84
142
  class DatastoreProfileKafkaTarget(DatastoreProfile):
85
- type: str = pydantic.Field("kafka_target")
143
+ type: str = pydantic.v1.Field("kafka_target")
86
144
  _private_attributes = "kwargs_private"
87
145
  bootstrap_servers: typing.Optional[str] = None
88
146
  brokers: typing.Optional[str] = None
@@ -107,12 +165,15 @@ class DatastoreProfileKafkaTarget(DatastoreProfile):
107
165
  self.brokers = self.bootstrap_servers
108
166
  self.bootstrap_servers = None
109
167
  warnings.warn(
110
- "'bootstrap_servers' parameter is deprecated in 1.7.0 and will be removed in 1.9.0, "
168
+ "'bootstrap_servers' parameter is deprecated in 1.7.0 and will be removed in 1.10.0, "
111
169
  "use 'brokers' instead.",
112
- # TODO: Remove this in 1.9.0
170
+ # TODO: Remove this in 1.10.0
113
171
  FutureWarning,
114
172
  )
115
173
 
174
+ def get_topic(self) -> typing.Optional[str]:
175
+ return self.topic
176
+
116
177
  def attributes(self):
117
178
  attributes = {"brokers": self.brokers or self.bootstrap_servers}
118
179
  if self.kwargs_public:
@@ -123,7 +184,7 @@ class DatastoreProfileKafkaTarget(DatastoreProfile):
123
184
 
124
185
 
125
186
  class DatastoreProfileKafkaSource(DatastoreProfile):
126
- type: str = pydantic.Field("kafka_source")
187
+ type: str = pydantic.v1.Field("kafka_source")
127
188
  _private_attributes = ("kwargs_private", "sasl_user", "sasl_pass")
128
189
  brokers: typing.Union[str, list[str]]
129
190
  topics: typing.Union[str, list[str]]
@@ -135,7 +196,11 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
135
196
  kwargs_public: typing.Optional[dict]
136
197
  kwargs_private: typing.Optional[dict]
137
198
 
138
- def attributes(self):
199
+ def get_topic(self) -> typing.Optional[str]:
200
+ topics = [self.topics] if isinstance(self.topics, str) else self.topics
201
+ return topics[0] if topics else None
202
+
203
+ def attributes(self) -> dict[str, typing.Any]:
139
204
  attributes = {}
140
205
  if self.kwargs_public:
141
206
  attributes = merge(attributes, self.kwargs_public)
@@ -151,18 +216,15 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
151
216
  attributes["initial_offset"] = self.initial_offset
152
217
  if self.partitions is not None:
153
218
  attributes["partitions"] = self.partitions
154
- sasl = attributes.pop("sasl", {})
155
- if self.sasl_user and self.sasl_pass:
156
- sasl["enabled"] = True
157
- sasl["user"] = self.sasl_user
158
- sasl["password"] = self.sasl_pass
159
- if sasl:
219
+ if sasl := mlrun.datastore.utils.KafkaParameters(attributes).sasl(
220
+ usr=self.sasl_user, pwd=self.sasl_pass
221
+ ):
160
222
  attributes["sasl"] = sasl
161
223
  return attributes
162
224
 
163
225
 
164
226
  class DatastoreProfileV3io(DatastoreProfile):
165
- type: str = pydantic.Field("v3io")
227
+ type: str = pydantic.v1.Field("v3io")
166
228
  v3io_access_key: typing.Optional[str] = None
167
229
  _private_attributes = "v3io_access_key"
168
230
 
@@ -178,7 +240,7 @@ class DatastoreProfileV3io(DatastoreProfile):
178
240
 
179
241
 
180
242
  class DatastoreProfileS3(DatastoreProfile):
181
- type: str = pydantic.Field("s3")
243
+ type: str = pydantic.v1.Field("s3")
182
244
  _private_attributes = ("access_key_id", "secret_key")
183
245
  endpoint_url: typing.Optional[str] = None
184
246
  force_non_anonymous: typing.Optional[str] = None
@@ -188,7 +250,7 @@ class DatastoreProfileS3(DatastoreProfile):
188
250
  secret_key: typing.Optional[str] = None
189
251
  bucket: typing.Optional[str] = None
190
252
 
191
- @pydantic.validator("bucket")
253
+ @pydantic.v1.validator("bucket")
192
254
  @classmethod
193
255
  def check_bucket(cls, v):
194
256
  if not v:
@@ -226,7 +288,7 @@ class DatastoreProfileS3(DatastoreProfile):
226
288
 
227
289
 
228
290
  class DatastoreProfileRedis(DatastoreProfile):
229
- type: str = pydantic.Field("redis")
291
+ type: str = pydantic.v1.Field("redis")
230
292
  _private_attributes = ("username", "password")
231
293
  endpoint_url: str
232
294
  username: typing.Optional[str] = None
@@ -254,7 +316,7 @@ class DatastoreProfileRedis(DatastoreProfile):
254
316
  query=parsed_url.query,
255
317
  fragment=parsed_url.fragment,
256
318
  )
257
- return urlunparse(new_parsed_url)
319
+ return new_parsed_url.geturl()
258
320
 
259
321
  def secrets(self) -> dict:
260
322
  res = {}
@@ -269,7 +331,7 @@ class DatastoreProfileRedis(DatastoreProfile):
269
331
 
270
332
 
271
333
  class DatastoreProfileDBFS(DatastoreProfile):
272
- type: str = pydantic.Field("dbfs")
334
+ type: str = pydantic.v1.Field("dbfs")
273
335
  _private_attributes = ("token",)
274
336
  endpoint_url: typing.Optional[str] = None # host
275
337
  token: typing.Optional[str] = None
@@ -287,13 +349,13 @@ class DatastoreProfileDBFS(DatastoreProfile):
287
349
 
288
350
 
289
351
  class DatastoreProfileGCS(DatastoreProfile):
290
- type: str = pydantic.Field("gcs")
352
+ type: str = pydantic.v1.Field("gcs")
291
353
  _private_attributes = ("gcp_credentials",)
292
354
  credentials_path: typing.Optional[str] = None # path to file.
293
355
  gcp_credentials: typing.Optional[typing.Union[str, dict]] = None
294
356
  bucket: typing.Optional[str] = None
295
357
 
296
- @pydantic.validator("bucket")
358
+ @pydantic.v1.validator("bucket")
297
359
  @classmethod
298
360
  def check_bucket(cls, v):
299
361
  if not v:
@@ -304,7 +366,7 @@ class DatastoreProfileGCS(DatastoreProfile):
304
366
  )
305
367
  return v
306
368
 
307
- @pydantic.validator("gcp_credentials", pre=True, always=True)
369
+ @pydantic.v1.validator("gcp_credentials", pre=True, always=True)
308
370
  @classmethod
309
371
  def convert_dict_to_json(cls, v):
310
372
  if isinstance(v, dict):
@@ -332,7 +394,7 @@ class DatastoreProfileGCS(DatastoreProfile):
332
394
 
333
395
 
334
396
  class DatastoreProfileAzureBlob(DatastoreProfile):
335
- type: str = pydantic.Field("az")
397
+ type: str = pydantic.v1.Field("az")
336
398
  _private_attributes = (
337
399
  "connection_string",
338
400
  "account_key",
@@ -350,7 +412,7 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
350
412
  credential: typing.Optional[str] = None
351
413
  container: typing.Optional[str] = None
352
414
 
353
- @pydantic.validator("container")
415
+ @pydantic.v1.validator("container")
354
416
  @classmethod
355
417
  def check_container(cls, v):
356
418
  if not v:
@@ -392,7 +454,7 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
392
454
 
393
455
 
394
456
  class DatastoreProfileHdfs(DatastoreProfile):
395
- type: str = pydantic.Field("hdfs")
457
+ type: str = pydantic.v1.Field("hdfs")
396
458
  _private_attributes = "token"
397
459
  host: typing.Optional[str] = None
398
460
  port: typing.Optional[int] = None
@@ -415,7 +477,60 @@ class DatastoreProfileHdfs(DatastoreProfile):
415
477
  return f"webhdfs://{self.host}:{self.http_port}{subpath}"
416
478
 
417
479
 
418
- class DatastoreProfile2Json(pydantic.BaseModel):
480
+ class DatastoreProfileTDEngine(DatastoreProfile):
481
+ """
482
+ A profile that holds the required parameters for a TDEngine database, with the websocket scheme.
483
+ https://docs.tdengine.com/developer-guide/connecting-to-tdengine/#websocket-connection
484
+ """
485
+
486
+ type: str = pydantic.v1.Field("taosws")
487
+ _private_attributes = ["password"]
488
+ user: str
489
+ # The password cannot be empty in real world scenarios. It's here just because of the profiles completion design.
490
+ password: typing.Optional[str]
491
+ host: str
492
+ port: int
493
+
494
+ def dsn(self) -> str:
495
+ """Get the Data Source Name of the configured TDEngine profile."""
496
+ return f"{self.type}://{self.user}:{self.password}@{self.host}:{self.port}"
497
+
498
+ @classmethod
499
+ def from_dsn(cls, dsn: str, profile_name: str) -> "DatastoreProfileTDEngine":
500
+ """
501
+ Construct a TDEngine profile from DSN (connection string) and a name for the profile.
502
+
503
+ :param dsn: The DSN (Data Source Name) of the TDEngine database, e.g.: ``"taosws://root:taosdata@localhost:6041"``.
504
+ :param profile_name: The new profile's name.
505
+ :return: The TDEngine profile.
506
+ """
507
+ parsed_url = urlparse(dsn)
508
+ return cls(
509
+ name=profile_name,
510
+ user=parsed_url.username,
511
+ password=parsed_url.password,
512
+ host=parsed_url.hostname,
513
+ port=parsed_url.port,
514
+ )
515
+
516
+
517
+ _DATASTORE_TYPE_TO_PROFILE_CLASS: dict[str, type[DatastoreProfile]] = {
518
+ "v3io": DatastoreProfileV3io,
519
+ "s3": DatastoreProfileS3,
520
+ "redis": DatastoreProfileRedis,
521
+ "basic": DatastoreProfileBasic,
522
+ "kafka_target": DatastoreProfileKafkaTarget,
523
+ "kafka_source": DatastoreProfileKafkaSource,
524
+ "dbfs": DatastoreProfileDBFS,
525
+ "gcs": DatastoreProfileGCS,
526
+ "az": DatastoreProfileAzureBlob,
527
+ "hdfs": DatastoreProfileHdfs,
528
+ "taosws": DatastoreProfileTDEngine,
529
+ "config": ConfigProfile,
530
+ }
531
+
532
+
533
+ class DatastoreProfile2Json(pydantic.v1.BaseModel):
419
534
  @staticmethod
420
535
  def _to_json(attributes):
421
536
  # First, base64 encode the values
@@ -465,18 +580,7 @@ class DatastoreProfile2Json(pydantic.BaseModel):
465
580
 
466
581
  decoded_dict = {k: safe_literal_eval(v) for k, v in decoded_dict.items()}
467
582
  datastore_type = decoded_dict.get("type")
468
- ds_profile_factory = {
469
- "v3io": DatastoreProfileV3io,
470
- "s3": DatastoreProfileS3,
471
- "redis": DatastoreProfileRedis,
472
- "basic": DatastoreProfileBasic,
473
- "kafka_target": DatastoreProfileKafkaTarget,
474
- "kafka_source": DatastoreProfileKafkaSource,
475
- "dbfs": DatastoreProfileDBFS,
476
- "gcs": DatastoreProfileGCS,
477
- "az": DatastoreProfileAzureBlob,
478
- "hdfs": DatastoreProfileHdfs,
479
- }
583
+ ds_profile_factory = _DATASTORE_TYPE_TO_PROFILE_CLASS
480
584
  if datastore_type in ds_profile_factory:
481
585
  return ds_profile_factory[datastore_type].parse_obj(decoded_dict)
482
586
  else:
@@ -489,7 +593,36 @@ class DatastoreProfile2Json(pydantic.BaseModel):
489
593
  )
490
594
 
491
595
 
492
- def datastore_profile_read(url, project_name="", secrets: dict = None):
596
+ def datastore_profile_read(url, project_name="", secrets: typing.Optional[dict] = None):
597
+ """
598
+ Read and retrieve a datastore profile from a given URL.
599
+
600
+ This function retrieves a datastore profile either from temporary client storage,
601
+ or from the MLRun database. It handles both client-side and server-side profile formats
602
+ and performs necessary conversions.
603
+
604
+ Args:
605
+ url (str): A URL with 'ds' scheme pointing to the datastore profile
606
+ (e.g., 'ds://profile-name').
607
+ project_name (str, optional): The project name where the profile is stored.
608
+ Defaults to MLRun's default project.
609
+ secrets (dict, optional): Dictionary containing secrets needed for profile retrieval.
610
+
611
+ Returns:
612
+ DatastoreProfile: The retrieved datastore profile object.
613
+
614
+ Raises:
615
+ MLRunInvalidArgumentError: In the following cases:
616
+ - If the URL scheme is not 'ds'
617
+ - If the profile cannot be retrieved from either server or local environment
618
+
619
+ Note:
620
+ When running from a client environment (outside MLRun pods), private profile information
621
+ is not accessible. In this case, use register_temporary_client_datastore_profile() to
622
+ register the profile with credentials for your local session. When running inside MLRun
623
+ pods, the private information is automatically available and no temporary registration is needed.
624
+ """
625
+
493
626
  parsed_url = urlparse(url)
494
627
  if parsed_url.scheme.lower() != "ds":
495
628
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -521,7 +654,7 @@ def datastore_profile_read(url, project_name="", secrets: dict = None):
521
654
  )
522
655
  private_body = get_secret_or_env(project_ds_name_private, secret_provider=secrets)
523
656
  if not public_profile or not private_body:
524
- raise mlrun.errors.MLRunInvalidArgumentError(
657
+ raise mlrun.errors.MLRunNotFoundError(
525
658
  f"Unable to retrieve the datastore profile '{url}' from either the server or local environment. "
526
659
  "Make sure the profile is registered correctly, or if running in a local environment, "
527
660
  "use register_temporary_client_datastore_profile() to provide credentials locally."
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import pathlib
16
+ from typing import Optional
16
17
 
17
18
  from fsspec.implementations.dbfs import DatabricksFile, DatabricksFileSystem
18
19
  from fsspec.registry import get_filesystem_class
@@ -81,7 +82,9 @@ class DatabricksFileSystemDisableCache(DatabricksFileSystem):
81
82
 
82
83
  # dbfs objects will be represented with the following URL: dbfs://<path>
83
84
  class DBFSStore(DataStore):
84
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
85
+ def __init__(
86
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
87
+ ):
85
88
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
86
89
 
87
90
  @property
@@ -14,6 +14,7 @@
14
14
  import time
15
15
  from os import listdir, makedirs, path, stat
16
16
  from shutil import copyfile
17
+ from typing import Optional
17
18
 
18
19
  import fsspec
19
20
 
@@ -23,7 +24,9 @@ from .base import DataStore, FileStats
23
24
 
24
25
 
25
26
  class FileStore(DataStore):
26
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
27
+ def __init__(
28
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
29
+ ):
27
30
  super().__init__(parent, name, "file", endpoint, secrets=secrets)
28
31
 
29
32
  self._item_path, self._real_path = None, None
@@ -14,6 +14,7 @@
14
14
  import json
15
15
  import os
16
16
  from pathlib import Path
17
+ from typing import Optional
17
18
 
18
19
  from fsspec.registry import get_filesystem_class
19
20
  from google.auth.credentials import Credentials
@@ -33,7 +34,9 @@ class GoogleCloudStorageStore(DataStore):
33
34
  workers = 8
34
35
  chunk_size = 32 * 1024 * 1024
35
36
 
36
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
37
+ def __init__(
38
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
39
+ ):
37
40
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
38
41
  self._storage_client = None
39
42
  self._storage_options = None
mlrun/datastore/hdfs.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import os
15
+ from typing import Optional
15
16
  from urllib.parse import urlparse
16
17
 
17
18
  import fsspec
@@ -20,7 +21,9 @@ from mlrun.datastore.base import DataStore
20
21
 
21
22
 
22
23
  class HdfsStore(DataStore):
23
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
24
+ def __init__(
25
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
26
+ ):
24
27
  super().__init__(parent, name, schema, endpoint, secrets)
25
28
 
26
29
  self.host = self._get_secret_or_env("HDFS_HOST")