mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (275) hide show
  1. mlrun/__init__.py +26 -22
  2. mlrun/__main__.py +15 -16
  3. mlrun/alerts/alert.py +150 -15
  4. mlrun/api/schemas/__init__.py +1 -9
  5. mlrun/artifacts/__init__.py +2 -3
  6. mlrun/artifacts/base.py +62 -19
  7. mlrun/artifacts/dataset.py +17 -17
  8. mlrun/artifacts/document.py +454 -0
  9. mlrun/artifacts/manager.py +28 -18
  10. mlrun/artifacts/model.py +91 -59
  11. mlrun/artifacts/plots.py +2 -2
  12. mlrun/common/constants.py +8 -0
  13. mlrun/common/formatters/__init__.py +1 -0
  14. mlrun/common/formatters/artifact.py +1 -1
  15. mlrun/common/formatters/feature_set.py +2 -0
  16. mlrun/common/formatters/function.py +1 -0
  17. mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
  18. mlrun/common/formatters/pipeline.py +1 -2
  19. mlrun/common/formatters/project.py +9 -0
  20. mlrun/common/model_monitoring/__init__.py +0 -5
  21. mlrun/common/model_monitoring/helpers.py +12 -62
  22. mlrun/common/runtimes/constants.py +25 -4
  23. mlrun/common/schemas/__init__.py +9 -5
  24. mlrun/common/schemas/alert.py +114 -19
  25. mlrun/common/schemas/api_gateway.py +3 -3
  26. mlrun/common/schemas/artifact.py +22 -9
  27. mlrun/common/schemas/auth.py +8 -4
  28. mlrun/common/schemas/background_task.py +7 -7
  29. mlrun/common/schemas/client_spec.py +4 -4
  30. mlrun/common/schemas/clusterization_spec.py +2 -2
  31. mlrun/common/schemas/common.py +53 -3
  32. mlrun/common/schemas/constants.py +15 -0
  33. mlrun/common/schemas/datastore_profile.py +1 -1
  34. mlrun/common/schemas/feature_store.py +9 -9
  35. mlrun/common/schemas/frontend_spec.py +4 -4
  36. mlrun/common/schemas/function.py +10 -10
  37. mlrun/common/schemas/hub.py +1 -1
  38. mlrun/common/schemas/k8s.py +3 -3
  39. mlrun/common/schemas/memory_reports.py +3 -3
  40. mlrun/common/schemas/model_monitoring/__init__.py +4 -8
  41. mlrun/common/schemas/model_monitoring/constants.py +127 -46
  42. mlrun/common/schemas/model_monitoring/grafana.py +18 -12
  43. mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
  44. mlrun/common/schemas/notification.py +24 -3
  45. mlrun/common/schemas/object.py +1 -1
  46. mlrun/common/schemas/pagination.py +4 -4
  47. mlrun/common/schemas/partition.py +142 -0
  48. mlrun/common/schemas/pipeline.py +3 -3
  49. mlrun/common/schemas/project.py +26 -18
  50. mlrun/common/schemas/runs.py +3 -3
  51. mlrun/common/schemas/runtime_resource.py +5 -5
  52. mlrun/common/schemas/schedule.py +1 -1
  53. mlrun/common/schemas/secret.py +1 -1
  54. mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
  55. mlrun/common/schemas/tag.py +3 -3
  56. mlrun/common/schemas/workflow.py +6 -5
  57. mlrun/common/types.py +1 -0
  58. mlrun/config.py +157 -89
  59. mlrun/data_types/__init__.py +5 -3
  60. mlrun/data_types/infer.py +13 -3
  61. mlrun/data_types/spark.py +2 -1
  62. mlrun/datastore/__init__.py +59 -18
  63. mlrun/datastore/alibaba_oss.py +4 -1
  64. mlrun/datastore/azure_blob.py +4 -1
  65. mlrun/datastore/base.py +19 -24
  66. mlrun/datastore/datastore.py +10 -4
  67. mlrun/datastore/datastore_profile.py +178 -45
  68. mlrun/datastore/dbfs_store.py +4 -1
  69. mlrun/datastore/filestore.py +4 -1
  70. mlrun/datastore/google_cloud_storage.py +4 -1
  71. mlrun/datastore/hdfs.py +4 -1
  72. mlrun/datastore/inmem.py +4 -1
  73. mlrun/datastore/redis.py +4 -1
  74. mlrun/datastore/s3.py +14 -3
  75. mlrun/datastore/sources.py +89 -92
  76. mlrun/datastore/store_resources.py +7 -4
  77. mlrun/datastore/storeytargets.py +51 -16
  78. mlrun/datastore/targets.py +38 -31
  79. mlrun/datastore/utils.py +87 -4
  80. mlrun/datastore/v3io.py +4 -1
  81. mlrun/datastore/vectorstore.py +291 -0
  82. mlrun/datastore/wasbfs/fs.py +13 -12
  83. mlrun/db/base.py +286 -100
  84. mlrun/db/httpdb.py +1562 -490
  85. mlrun/db/nopdb.py +250 -83
  86. mlrun/errors.py +6 -2
  87. mlrun/execution.py +194 -50
  88. mlrun/feature_store/__init__.py +2 -10
  89. mlrun/feature_store/api.py +20 -458
  90. mlrun/feature_store/common.py +9 -9
  91. mlrun/feature_store/feature_set.py +20 -18
  92. mlrun/feature_store/feature_vector.py +105 -479
  93. mlrun/feature_store/feature_vector_utils.py +466 -0
  94. mlrun/feature_store/retrieval/base.py +15 -11
  95. mlrun/feature_store/retrieval/job.py +2 -1
  96. mlrun/feature_store/retrieval/storey_merger.py +1 -1
  97. mlrun/feature_store/steps.py +3 -3
  98. mlrun/features.py +30 -13
  99. mlrun/frameworks/__init__.py +1 -2
  100. mlrun/frameworks/_common/__init__.py +1 -2
  101. mlrun/frameworks/_common/artifacts_library.py +2 -2
  102. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  103. mlrun/frameworks/_common/model_handler.py +31 -31
  104. mlrun/frameworks/_common/producer.py +3 -1
  105. mlrun/frameworks/_dl_common/__init__.py +1 -2
  106. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  107. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  108. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  109. mlrun/frameworks/_ml_common/__init__.py +1 -2
  110. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  111. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  112. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  113. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  114. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  115. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  116. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  117. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  118. mlrun/frameworks/huggingface/__init__.py +1 -2
  119. mlrun/frameworks/huggingface/model_server.py +9 -9
  120. mlrun/frameworks/lgbm/__init__.py +47 -44
  121. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  122. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  123. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  124. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  125. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  126. mlrun/frameworks/lgbm/model_handler.py +15 -11
  127. mlrun/frameworks/lgbm/model_server.py +11 -7
  128. mlrun/frameworks/lgbm/utils.py +2 -2
  129. mlrun/frameworks/onnx/__init__.py +1 -2
  130. mlrun/frameworks/onnx/dataset.py +3 -3
  131. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  132. mlrun/frameworks/onnx/model_handler.py +7 -5
  133. mlrun/frameworks/onnx/model_server.py +8 -6
  134. mlrun/frameworks/parallel_coordinates.py +11 -11
  135. mlrun/frameworks/pytorch/__init__.py +22 -23
  136. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  137. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  138. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  139. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  140. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  141. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  142. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  143. mlrun/frameworks/pytorch/model_handler.py +21 -17
  144. mlrun/frameworks/pytorch/model_server.py +13 -9
  145. mlrun/frameworks/sklearn/__init__.py +19 -18
  146. mlrun/frameworks/sklearn/estimator.py +2 -2
  147. mlrun/frameworks/sklearn/metric.py +3 -3
  148. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  149. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  150. mlrun/frameworks/sklearn/model_handler.py +4 -3
  151. mlrun/frameworks/tf_keras/__init__.py +11 -12
  152. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  153. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  154. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  155. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  156. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  157. mlrun/frameworks/tf_keras/model_server.py +12 -8
  158. mlrun/frameworks/xgboost/__init__.py +19 -18
  159. mlrun/frameworks/xgboost/model_handler.py +13 -9
  160. mlrun/k8s_utils.py +2 -5
  161. mlrun/launcher/base.py +3 -4
  162. mlrun/launcher/client.py +2 -2
  163. mlrun/launcher/local.py +6 -2
  164. mlrun/launcher/remote.py +1 -1
  165. mlrun/lists.py +8 -4
  166. mlrun/model.py +132 -46
  167. mlrun/model_monitoring/__init__.py +3 -5
  168. mlrun/model_monitoring/api.py +113 -98
  169. mlrun/model_monitoring/applications/__init__.py +0 -5
  170. mlrun/model_monitoring/applications/_application_steps.py +81 -50
  171. mlrun/model_monitoring/applications/base.py +467 -14
  172. mlrun/model_monitoring/applications/context.py +212 -134
  173. mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
  174. mlrun/model_monitoring/applications/evidently/base.py +146 -0
  175. mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
  176. mlrun/model_monitoring/applications/results.py +67 -15
  177. mlrun/model_monitoring/controller.py +701 -315
  178. mlrun/model_monitoring/db/__init__.py +0 -2
  179. mlrun/model_monitoring/db/_schedules.py +242 -0
  180. mlrun/model_monitoring/db/_stats.py +189 -0
  181. mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
  182. mlrun/model_monitoring/db/tsdb/base.py +243 -49
  183. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
  184. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  185. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
  186. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
  187. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  188. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
  189. mlrun/model_monitoring/helpers.py +356 -114
  190. mlrun/model_monitoring/stream_processing.py +190 -345
  191. mlrun/model_monitoring/tracking_policy.py +11 -4
  192. mlrun/model_monitoring/writer.py +49 -90
  193. mlrun/package/__init__.py +3 -6
  194. mlrun/package/context_handler.py +2 -2
  195. mlrun/package/packager.py +12 -9
  196. mlrun/package/packagers/__init__.py +0 -2
  197. mlrun/package/packagers/default_packager.py +14 -11
  198. mlrun/package/packagers/numpy_packagers.py +16 -7
  199. mlrun/package/packagers/pandas_packagers.py +18 -18
  200. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  201. mlrun/package/packagers_manager.py +35 -32
  202. mlrun/package/utils/__init__.py +0 -3
  203. mlrun/package/utils/_pickler.py +6 -6
  204. mlrun/platforms/__init__.py +47 -16
  205. mlrun/platforms/iguazio.py +4 -1
  206. mlrun/projects/operations.py +30 -30
  207. mlrun/projects/pipelines.py +116 -47
  208. mlrun/projects/project.py +1292 -329
  209. mlrun/render.py +5 -9
  210. mlrun/run.py +57 -14
  211. mlrun/runtimes/__init__.py +1 -3
  212. mlrun/runtimes/base.py +30 -22
  213. mlrun/runtimes/daskjob.py +9 -9
  214. mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
  215. mlrun/runtimes/function_reference.py +5 -2
  216. mlrun/runtimes/generators.py +3 -2
  217. mlrun/runtimes/kubejob.py +6 -7
  218. mlrun/runtimes/mounts.py +574 -0
  219. mlrun/runtimes/mpijob/__init__.py +0 -2
  220. mlrun/runtimes/mpijob/abstract.py +7 -6
  221. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  222. mlrun/runtimes/nuclio/application/application.py +11 -13
  223. mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
  224. mlrun/runtimes/nuclio/function.py +127 -70
  225. mlrun/runtimes/nuclio/serving.py +105 -37
  226. mlrun/runtimes/pod.py +159 -54
  227. mlrun/runtimes/remotesparkjob.py +3 -2
  228. mlrun/runtimes/sparkjob/__init__.py +0 -2
  229. mlrun/runtimes/sparkjob/spark3job.py +22 -12
  230. mlrun/runtimes/utils.py +7 -6
  231. mlrun/secrets.py +2 -2
  232. mlrun/serving/__init__.py +8 -0
  233. mlrun/serving/merger.py +7 -5
  234. mlrun/serving/remote.py +35 -22
  235. mlrun/serving/routers.py +186 -240
  236. mlrun/serving/server.py +41 -10
  237. mlrun/serving/states.py +432 -118
  238. mlrun/serving/utils.py +13 -2
  239. mlrun/serving/v1_serving.py +3 -2
  240. mlrun/serving/v2_serving.py +161 -203
  241. mlrun/track/__init__.py +1 -1
  242. mlrun/track/tracker.py +2 -2
  243. mlrun/track/trackers/mlflow_tracker.py +6 -5
  244. mlrun/utils/async_http.py +35 -22
  245. mlrun/utils/clones.py +7 -4
  246. mlrun/utils/helpers.py +511 -58
  247. mlrun/utils/logger.py +119 -13
  248. mlrun/utils/notifications/notification/__init__.py +22 -19
  249. mlrun/utils/notifications/notification/base.py +39 -15
  250. mlrun/utils/notifications/notification/console.py +6 -6
  251. mlrun/utils/notifications/notification/git.py +11 -11
  252. mlrun/utils/notifications/notification/ipython.py +10 -9
  253. mlrun/utils/notifications/notification/mail.py +176 -0
  254. mlrun/utils/notifications/notification/slack.py +16 -8
  255. mlrun/utils/notifications/notification/webhook.py +24 -8
  256. mlrun/utils/notifications/notification_pusher.py +191 -200
  257. mlrun/utils/regex.py +12 -2
  258. mlrun/utils/version/version.json +2 -2
  259. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/METADATA +81 -54
  260. mlrun-1.8.0.dist-info/RECORD +351 -0
  261. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
  262. mlrun/model_monitoring/applications/evidently_base.py +0 -137
  263. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  264. mlrun/model_monitoring/db/stores/base/store.py +0 -213
  265. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
  266. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
  267. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
  268. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
  269. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
  270. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
  271. mlrun/model_monitoring/model_endpoint.py +0 -118
  272. mlrun-1.7.2rc3.dist-info/RECORD +0 -351
  273. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
  274. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
  275. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0
mlrun/datastore/inmem.py CHANGED
@@ -17,6 +17,7 @@ from io import BytesIO, StringIO
17
17
  import pandas as pd
18
18
 
19
19
  import mlrun
20
+ import mlrun.utils.helpers
20
21
 
21
22
  from .base import DataStore, FileStats
22
23
 
@@ -35,7 +36,9 @@ class InMemoryStore(DataStore):
35
36
 
36
37
  def _get_item(self, key):
37
38
  if key not in self._items:
38
- raise ValueError(f"item {key} not found in memory store")
39
+ raise mlrun.errors.MLRunNotFoundError(
40
+ f"item {key} not found in memory store"
41
+ )
39
42
  return self._items[key]
40
43
 
41
44
  def get(self, key, size=None, offset=0):
mlrun/datastore/redis.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import Optional
15
16
  from urllib.parse import urlparse
16
17
 
17
18
  import redis
@@ -30,7 +31,9 @@ class RedisStore(DataStore):
30
31
  - key and value sizes are limited to 512MB
31
32
  """
32
33
 
33
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
34
+ def __init__(
35
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
36
+ ):
34
37
  redis_default_port = "6379"
35
38
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
36
39
  self.headers = None
mlrun/datastore/s3.py CHANGED
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import time
16
+ from typing import Optional
16
17
 
17
18
  import boto3
18
19
  from boto3.s3.transfer import TransferConfig
@@ -20,13 +21,15 @@ from fsspec.registry import get_filesystem_class
20
21
 
21
22
  import mlrun.errors
22
23
 
23
- from .base import DataStore, FileStats, get_range, make_datastore_schema_sanitizer
24
+ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
24
25
 
25
26
 
26
27
  class S3Store(DataStore):
27
28
  using_bucket = True
28
29
 
29
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
30
+ def __init__(
31
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
32
+ ):
30
33
  super().__init__(parent, name, schema, endpoint, secrets)
31
34
  # will be used in case user asks to assume a role and work through fsspec
32
35
  self._temp_credentials = None
@@ -105,6 +108,13 @@ class S3Store(DataStore):
105
108
  "choose-signer.s3.*", disable_signing
106
109
  )
107
110
 
111
+ @staticmethod
112
+ def get_range(size, offset):
113
+ byterange = f"bytes={offset}-"
114
+ if size:
115
+ byterange += str(offset + size - 1)
116
+ return byterange
117
+
108
118
  def get_spark_options(self):
109
119
  res = {}
110
120
  st = self.get_storage_options()
@@ -155,6 +165,7 @@ class S3Store(DataStore):
155
165
  key=access_key_id,
156
166
  secret=secret,
157
167
  token=token,
168
+ use_listings_cache=False,
158
169
  )
159
170
 
160
171
  if endpoint_url:
@@ -182,7 +193,7 @@ class S3Store(DataStore):
182
193
  bucket, key = self.get_bucket_and_key(key)
183
194
  obj = self.s3.Object(bucket, key)
184
195
  if size or offset:
185
- return obj.get(Range=get_range(size, offset))["Body"].read()
196
+ return obj.get(Range=S3Store.get_range(size, offset))["Body"].read()
186
197
  return obj.get()["Body"].read()
187
198
 
188
199
  def put(self, key, data, append=False):
@@ -18,7 +18,7 @@ import warnings
18
18
  from base64 import b64encode
19
19
  from copy import copy
20
20
  from datetime import datetime
21
- from typing import Optional, Union
21
+ from typing import Any, Optional, Union
22
22
 
23
23
  import pandas as pd
24
24
  import semver
@@ -34,6 +34,7 @@ from mlrun.datastore.utils import transform_list_filters_to_tuple
34
34
  from mlrun.secrets import SecretsStore
35
35
  from mlrun.utils import logger
36
36
 
37
+ from ..common.schemas.function import Function
37
38
  from ..model import DataSource
38
39
  from ..platforms.iguazio import parse_path
39
40
  from ..utils import get_class, is_explicit_ack_supported
@@ -181,10 +182,10 @@ class CSVSource(BaseSourceDriver):
181
182
  def __init__(
182
183
  self,
183
184
  name: str = "",
184
- path: str = None,
185
- attributes: dict[str, object] = None,
186
- key_field: str = None,
187
- schedule: str = None,
185
+ path: Optional[str] = None,
186
+ attributes: Optional[dict[str, object]] = None,
187
+ key_field: Optional[str] = None,
188
+ schedule: Optional[str] = None,
188
189
  parse_dates: Union[None, int, str, list[int], list[str]] = None,
189
190
  **kwargs,
190
191
  ):
@@ -308,11 +309,11 @@ class ParquetSource(BaseSourceDriver):
308
309
  def __init__(
309
310
  self,
310
311
  name: str = "",
311
- path: str = None,
312
- attributes: dict[str, object] = None,
313
- key_field: str = None,
314
- time_field: str = None,
315
- schedule: str = None,
312
+ path: Optional[str] = None,
313
+ attributes: Optional[dict[str, object]] = None,
314
+ key_field: Optional[str] = None,
315
+ time_field: Optional[str] = None,
316
+ schedule: Optional[str] = None,
316
317
  start_time: Optional[Union[datetime, str]] = None,
317
318
  end_time: Optional[Union[datetime, str]] = None,
318
319
  additional_filters: Optional[list[Union[tuple, list]]] = None,
@@ -392,7 +393,9 @@ class ParquetSource(BaseSourceDriver):
392
393
  )
393
394
 
394
395
  @classmethod
395
- def from_dict(cls, struct=None, fields=None, deprecated_fields: dict = None):
396
+ def from_dict(
397
+ cls, struct=None, fields=None, deprecated_fields: Optional[dict] = None
398
+ ):
396
399
  new_obj = super().from_dict(
397
400
  struct=struct, fields=fields, deprecated_fields=deprecated_fields
398
401
  )
@@ -564,18 +567,18 @@ class BigQuerySource(BaseSourceDriver):
564
567
  def __init__(
565
568
  self,
566
569
  name: str = "",
567
- table: str = None,
568
- max_results_for_table: int = None,
569
- query: str = None,
570
- materialization_dataset: str = None,
571
- chunksize: int = None,
572
- key_field: str = None,
573
- time_field: str = None,
574
- schedule: str = None,
570
+ table: Optional[str] = None,
571
+ max_results_for_table: Optional[int] = None,
572
+ query: Optional[str] = None,
573
+ materialization_dataset: Optional[str] = None,
574
+ chunksize: Optional[int] = None,
575
+ key_field: Optional[str] = None,
576
+ time_field: Optional[str] = None,
577
+ schedule: Optional[str] = None,
575
578
  start_time=None,
576
579
  end_time=None,
577
- gcp_project: str = None,
578
- spark_options: dict = None,
580
+ gcp_project: Optional[str] = None,
581
+ spark_options: Optional[dict] = None,
579
582
  **kwargs,
580
583
  ):
581
584
  if query and table:
@@ -776,27 +779,27 @@ class SnowflakeSource(BaseSourceDriver):
776
779
  def __init__(
777
780
  self,
778
781
  name: str = "",
779
- key_field: str = None,
780
- attributes: dict[str, object] = None,
781
- time_field: str = None,
782
- schedule: str = None,
782
+ key_field: Optional[str] = None,
783
+ attributes: Optional[dict[str, object]] = None,
784
+ time_field: Optional[str] = None,
785
+ schedule: Optional[str] = None,
783
786
  start_time=None,
784
787
  end_time=None,
785
- query: str = None,
786
- url: str = None,
787
- user: str = None,
788
- database: str = None,
789
- schema: str = None,
790
- db_schema: str = None,
791
- warehouse: str = None,
788
+ query: Optional[str] = None,
789
+ url: Optional[str] = None,
790
+ user: Optional[str] = None,
791
+ database: Optional[str] = None,
792
+ schema: Optional[str] = None,
793
+ db_schema: Optional[str] = None,
794
+ warehouse: Optional[str] = None,
792
795
  **kwargs,
793
796
  ):
794
- # TODO: Remove in 1.9.0
797
+ # TODO: Remove in 1.10.0
795
798
  if schema:
796
799
  warnings.warn(
797
- "schema is deprecated in 1.7.0, and will be removed in 1.9.0, please use db_schema"
800
+ "schema is deprecated in 1.7.0, and will be removed in 1.10.0, please use db_schema"
798
801
  )
799
- db_schema = db_schema or schema # TODO: Remove in 1.9.0
802
+ db_schema = db_schema or schema # TODO: Remove in 1.10.0
800
803
 
801
804
  attributes = attributes or {}
802
805
  if url:
@@ -850,9 +853,9 @@ class CustomSource(BaseSourceDriver):
850
853
 
851
854
  def __init__(
852
855
  self,
853
- class_name: str = None,
856
+ class_name: Optional[str] = None,
854
857
  name: str = "",
855
- schedule: str = None,
858
+ schedule: Optional[str] = None,
856
859
  **attributes,
857
860
  ):
858
861
  attributes = attributes or {}
@@ -930,12 +933,12 @@ class OnlineSource(BaseSourceDriver):
930
933
 
931
934
  def __init__(
932
935
  self,
933
- name: str = None,
934
- path: str = None,
935
- attributes: dict[str, object] = None,
936
- key_field: str = None,
937
- time_field: str = None,
938
- workers: int = None,
936
+ name: Optional[str] = None,
937
+ path: Optional[str] = None,
938
+ attributes: Optional[dict[str, object]] = None,
939
+ key_field: Optional[str] = None,
940
+ time_field: Optional[str] = None,
941
+ workers: Optional[int] = None,
939
942
  ):
940
943
  super().__init__(name, path, attributes, key_field, time_field)
941
944
  self.online = True
@@ -949,8 +952,7 @@ class OnlineSource(BaseSourceDriver):
949
952
  is_explicit_ack_supported(context)
950
953
  and mlrun.mlconf.is_explicit_ack_enabled()
951
954
  )
952
- # TODO: Change to AsyncEmitSource once we can drop support for nuclio<1.12.10
953
- src_class = storey.SyncEmitSource(
955
+ src_class = storey.AsyncEmitSource(
954
956
  context=context,
955
957
  key_field=self.key_field or key_field,
956
958
  full_event=True,
@@ -965,6 +967,21 @@ class OnlineSource(BaseSourceDriver):
965
967
  "This source type is not supported with ingestion service yet"
966
968
  )
967
969
 
970
+ @staticmethod
971
+ def set_explicit_ack_mode(function: Function, **extra_arguments) -> dict[str, Any]:
972
+ extra_arguments = extra_arguments or {}
973
+ engine = "sync"
974
+ if function.spec and hasattr(function.spec, "graph"):
975
+ engine = getattr(function.spec.graph, "engine", None) or engine
976
+ if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
977
+ extra_arguments["explicit_ack_mode"] = extra_arguments.get(
978
+ "explicit_ack_mode", "explicitOnly"
979
+ )
980
+ extra_arguments["worker_allocation_mode"] = extra_arguments.get(
981
+ "worker_allocation_mode", "static"
982
+ )
983
+ return extra_arguments
984
+
968
985
 
969
986
  class HttpSource(OnlineSource):
970
987
  kind = "http"
@@ -986,7 +1003,7 @@ class StreamSource(OnlineSource):
986
1003
  seek_to="earliest",
987
1004
  shards=1,
988
1005
  retention_in_hours=24,
989
- extra_attributes: dict = None,
1006
+ extra_attributes: Optional[dict] = None,
990
1007
  **kwargs,
991
1008
  ):
992
1009
  """
@@ -1027,15 +1044,7 @@ class StreamSource(OnlineSource):
1027
1044
  raise_for_status=v3io.dataplane.RaiseForStatus.never,
1028
1045
  )
1029
1046
  res.raise_for_status([409, 204])
1030
-
1031
- kwargs = {}
1032
- engine = "async"
1033
- if hasattr(function.spec, "graph") and function.spec.graph.engine:
1034
- engine = function.spec.graph.engine
1035
-
1036
- if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
1037
- kwargs["explicit_ack_mode"] = "explicitOnly"
1038
- kwargs["worker_allocation_mode"] = "static"
1047
+ kwargs = self.set_explicit_ack_mode(function=function)
1039
1048
 
1040
1049
  function.add_v3io_stream_trigger(
1041
1050
  url,
@@ -1086,12 +1095,9 @@ class KafkaSource(OnlineSource):
1086
1095
  attributes["initial_offset"] = initial_offset
1087
1096
  if partitions is not None:
1088
1097
  attributes["partitions"] = partitions
1089
- sasl = attributes.pop("sasl", {})
1090
- if sasl_user and sasl_pass:
1091
- sasl["enabled"] = True
1092
- sasl["user"] = sasl_user
1093
- sasl["password"] = sasl_pass
1094
- if sasl:
1098
+ if sasl := mlrun.datastore.utils.KafkaParameters(attributes).sasl(
1099
+ usr=sasl_user, pwd=sasl_pass
1100
+ ):
1095
1101
  attributes["sasl"] = sasl
1096
1102
  super().__init__(attributes=attributes, **kwargs)
1097
1103
 
@@ -1116,18 +1122,15 @@ class KafkaSource(OnlineSource):
1116
1122
  else:
1117
1123
  extra_attributes = copy(self.attributes)
1118
1124
  partitions = extra_attributes.pop("partitions", None)
1119
- explicit_ack_mode = None
1120
- engine = "async"
1121
- if hasattr(function.spec, "graph") and function.spec.graph.engine:
1122
- engine = function.spec.graph.engine
1123
1125
 
1124
- if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
1125
- explicit_ack_mode = "explicitOnly"
1126
- extra_attributes["workerAllocationMode"] = extra_attributes.get(
1127
- "worker_allocation_mode", "static"
1128
- )
1126
+ extra_attributes = self.set_explicit_ack_mode(function, **extra_attributes)
1127
+ explicit_ack_mode = extra_attributes.get("explicit_ack_mode")
1128
+ extra_attributes["workerAllocationMode"] = extra_attributes.get(
1129
+ "worker_allocation_mode", "pool"
1130
+ )
1129
1131
 
1130
1132
  trigger_kwargs = {}
1133
+
1131
1134
  if "max_workers" in extra_attributes:
1132
1135
  trigger_kwargs = {"max_workers": extra_attributes.pop("max_workers")}
1133
1136
 
@@ -1168,7 +1171,7 @@ class KafkaSource(OnlineSource):
1168
1171
  self,
1169
1172
  num_partitions: int = 4,
1170
1173
  replication_factor: int = 1,
1171
- topics: list[str] = None,
1174
+ topics: Optional[list[str]] = None,
1172
1175
  ):
1173
1176
  """
1174
1177
  Create Kafka topics with the specified number of partitions and replication factor.
@@ -1193,19 +1196,13 @@ class KafkaSource(OnlineSource):
1193
1196
  new_topics = [
1194
1197
  NewTopic(topic, num_partitions, replication_factor) for topic in topics
1195
1198
  ]
1196
- kafka_admin = KafkaAdminClient(
1197
- bootstrap_servers=brokers,
1198
- sasl_mechanism=self.attributes.get("sasl", {}).get("sasl_mechanism"),
1199
- sasl_plain_username=self.attributes.get("sasl", {}).get("username"),
1200
- sasl_plain_password=self.attributes.get("sasl", {}).get("password"),
1201
- sasl_kerberos_service_name=self.attributes.get("sasl", {}).get(
1202
- "sasl_kerberos_service_name", "kafka"
1203
- ),
1204
- sasl_kerberos_domain_name=self.attributes.get("sasl", {}).get(
1205
- "sasl_kerberos_domain_name"
1206
- ),
1207
- sasl_oauth_token_provider=self.attributes.get("sasl", {}).get("mechanism"),
1208
- )
1199
+
1200
+ kafka_admin_kwargs = {}
1201
+ kafka_admin_kwargs = mlrun.datastore.utils.KafkaParameters(
1202
+ self.attributes
1203
+ ).admin()
1204
+
1205
+ kafka_admin = KafkaAdminClient(bootstrap_servers=brokers, **kafka_admin_kwargs)
1209
1206
  try:
1210
1207
  kafka_admin.create_topics(new_topics)
1211
1208
  finally:
@@ -1226,16 +1223,16 @@ class SQLSource(BaseSourceDriver):
1226
1223
  def __init__(
1227
1224
  self,
1228
1225
  name: str = "",
1229
- chunksize: int = None,
1230
- key_field: str = None,
1231
- time_field: str = None,
1232
- schedule: str = None,
1226
+ chunksize: Optional[int] = None,
1227
+ key_field: Optional[str] = None,
1228
+ time_field: Optional[str] = None,
1229
+ schedule: Optional[str] = None,
1233
1230
  start_time: Optional[Union[datetime, str]] = None,
1234
1231
  end_time: Optional[Union[datetime, str]] = None,
1235
- db_url: str = None,
1236
- table_name: str = None,
1237
- spark_options: dict = None,
1238
- parse_dates: list[str] = None,
1232
+ db_url: Optional[str] = None,
1233
+ table_name: Optional[str] = None,
1234
+ spark_options: Optional[dict] = None,
1235
+ parse_dates: Optional[list[str]] = None,
1239
1236
  **kwargs,
1240
1237
  ):
1241
1238
  """
@@ -12,8 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
-
17
15
  import mlrun
18
16
  import mlrun.artifacts
19
17
  from mlrun.config import config
@@ -165,11 +163,16 @@ def get_store_resource(
165
163
  return db.get_feature_vector(name, project, tag, uid)
166
164
 
167
165
  elif StorePrefix.is_artifact(kind):
168
- project, key, iteration, tag, tree = parse_artifact_uri(
166
+ project, key, iteration, tag, tree, uid = parse_artifact_uri(
169
167
  uri, project or config.default_project
170
168
  )
171
169
  resource = db.read_artifact(
172
- key, project=project, tag=tag, iter=iteration, tree=tree
170
+ key,
171
+ project=project,
172
+ tag=tag,
173
+ iter=iteration,
174
+ tree=tree,
175
+ uid=uid,
173
176
  )
174
177
  if resource.get("kind", "") == "link":
175
178
  # todo: support other link types (not just iter, move this to the db/api layer
@@ -11,6 +11,8 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ from urllib.parse import urlparse
15
+
14
16
  import storey
15
17
  from mergedeep import merge
16
18
  from storey import V3ioDriver
@@ -18,6 +20,12 @@ from storey import V3ioDriver
18
20
  import mlrun
19
21
  import mlrun.model_monitoring.helpers
20
22
  from mlrun.datastore.base import DataStore
23
+ from mlrun.datastore.datastore_profile import (
24
+ DatastoreProfileKafkaSource,
25
+ DatastoreProfileKafkaTarget,
26
+ DatastoreProfileTDEngine,
27
+ datastore_profile_read,
28
+ )
21
29
 
22
30
  from ..platforms.iguazio import parse_path
23
31
  from .utils import (
@@ -42,9 +50,16 @@ def get_url_and_storage_options(path, external_storage_options=None):
42
50
 
43
51
 
44
52
  class TDEngineStoreyTarget(storey.TDEngineTarget):
45
- def __init__(self, *args, **kwargs):
46
- kwargs["url"] = mlrun.model_monitoring.helpers.get_tsdb_connection_string()
47
- super().__init__(*args, **kwargs)
53
+ def __init__(self, *args, url: str, **kwargs):
54
+ if url.startswith("ds://"):
55
+ datastore_profile = datastore_profile_read(url)
56
+ if not isinstance(datastore_profile, DatastoreProfileTDEngine):
57
+ raise ValueError(
58
+ f"Unexpected datastore profile type:{datastore_profile.type}."
59
+ "Only DatastoreProfileTDEngine is supported"
60
+ )
61
+ url = datastore_profile.dsn()
62
+ super().__init__(*args, url=url, **kwargs)
48
63
 
49
64
 
50
65
  class StoreyTargetUtils:
@@ -69,7 +84,12 @@ class StoreyTargetUtils:
69
84
 
70
85
  class ParquetStoreyTarget(storey.ParquetTarget):
71
86
  def __init__(self, *args, **kwargs):
87
+ alt_key_name = kwargs.pop("alternative_v3io_access_key", None)
72
88
  args, kwargs = StoreyTargetUtils.process_args_and_kwargs(args, kwargs)
89
+ storage_options = kwargs.get("storage_options", {})
90
+ if storage_options and storage_options.get("v3io_access_key") and alt_key_name:
91
+ if alt_key := mlrun.get_secret_or_env(alt_key_name):
92
+ storage_options["v3io_access_key"] = alt_key
73
93
  super().__init__(*args, **kwargs)
74
94
 
75
95
 
@@ -89,17 +109,20 @@ class StreamStoreyTarget(storey.StreamTarget):
89
109
  raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
90
110
 
91
111
  _, storage_options = get_url_and_storage_options(uri)
92
- endpoint, path = parse_path(uri)
112
+ _, path = parse_path(uri)
93
113
 
94
114
  access_key = storage_options.get("v3io_access_key")
95
- storage = V3ioDriver(
96
- webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key
97
- )
115
+
116
+ if alt_key_name := kwargs.pop("alternative_v3io_access_key", None):
117
+ if alt_key := mlrun.get_secret_or_env(alt_key_name):
118
+ access_key = alt_key
119
+
120
+ storage = V3ioDriver(access_key=access_key)
98
121
 
99
122
  if storage_options:
100
123
  kwargs["storage"] = storage
101
124
  if args:
102
- args[0] = endpoint
125
+ args[0] = path
103
126
  if "stream_path" in kwargs:
104
127
  kwargs["stream_path"] = path
105
128
 
@@ -108,17 +131,26 @@ class StreamStoreyTarget(storey.StreamTarget):
108
131
 
109
132
  class KafkaStoreyTarget(storey.KafkaTarget):
110
133
  def __init__(self, *args, **kwargs):
134
+ kwargs.pop("alternative_v3io_access_key", None)
111
135
  path = kwargs.pop("path")
112
- attributes = kwargs.pop("attributes", None)
136
+ attributes = kwargs.pop("attributes", {})
113
137
  if path and path.startswith("ds://"):
114
- datastore_profile = (
115
- mlrun.datastore.datastore_profile.datastore_profile_read(path)
116
- )
138
+ datastore_profile = datastore_profile_read(path)
139
+ if not isinstance(
140
+ datastore_profile,
141
+ (DatastoreProfileKafkaSource, DatastoreProfileKafkaTarget),
142
+ ):
143
+ raise mlrun.errors.MLRunInvalidArgumentError(
144
+ f"Unsupported datastore profile type: {type(datastore_profile)}"
145
+ )
146
+
117
147
  attributes = merge(attributes, datastore_profile.attributes())
118
- brokers = attributes.pop(
119
- "brokers", attributes.pop("bootstrap_servers", None)
148
+ brokers = attributes.pop("brokers", None)
149
+ # Override the topic with the one in the url (if any)
150
+ parsed = urlparse(path)
151
+ topic = (
152
+ parsed.path.strip("/") if parsed.path else datastore_profile.get_topic()
120
153
  )
121
- topic = datastore_profile.topic
122
154
  else:
123
155
  brokers = attributes.pop(
124
156
  "brokers", attributes.pop("bootstrap_servers", None)
@@ -129,7 +161,10 @@ class KafkaStoreyTarget(storey.KafkaTarget):
129
161
  raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a topic")
130
162
  kwargs["brokers"] = brokers
131
163
  kwargs["topic"] = topic
132
- super().__init__(*args, **kwargs, **attributes)
164
+
165
+ attributes = mlrun.datastore.utils.KafkaParameters(attributes).producer()
166
+
167
+ super().__init__(*args, **kwargs, producer_options=attributes)
133
168
 
134
169
 
135
170
  class NoSqlStoreyTarget(storey.NoSqlTarget):