mlrun 1.7.1rc10__py3-none-any.whl → 1.8.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (257) hide show
  1. mlrun/__init__.py +23 -21
  2. mlrun/__main__.py +3 -3
  3. mlrun/alerts/alert.py +148 -14
  4. mlrun/artifacts/__init__.py +1 -2
  5. mlrun/artifacts/base.py +46 -12
  6. mlrun/artifacts/dataset.py +16 -16
  7. mlrun/artifacts/document.py +334 -0
  8. mlrun/artifacts/manager.py +15 -13
  9. mlrun/artifacts/model.py +66 -53
  10. mlrun/common/constants.py +7 -0
  11. mlrun/common/formatters/__init__.py +1 -0
  12. mlrun/common/formatters/feature_set.py +1 -0
  13. mlrun/common/formatters/function.py +1 -0
  14. mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
  15. mlrun/common/formatters/pipeline.py +1 -2
  16. mlrun/common/formatters/project.py +9 -0
  17. mlrun/common/model_monitoring/__init__.py +0 -5
  18. mlrun/common/model_monitoring/helpers.py +1 -29
  19. mlrun/common/runtimes/constants.py +1 -2
  20. mlrun/common/schemas/__init__.py +6 -2
  21. mlrun/common/schemas/alert.py +111 -19
  22. mlrun/common/schemas/api_gateway.py +3 -3
  23. mlrun/common/schemas/artifact.py +11 -7
  24. mlrun/common/schemas/auth.py +6 -4
  25. mlrun/common/schemas/background_task.py +7 -7
  26. mlrun/common/schemas/client_spec.py +2 -3
  27. mlrun/common/schemas/clusterization_spec.py +2 -2
  28. mlrun/common/schemas/common.py +53 -3
  29. mlrun/common/schemas/constants.py +15 -0
  30. mlrun/common/schemas/datastore_profile.py +1 -1
  31. mlrun/common/schemas/feature_store.py +9 -9
  32. mlrun/common/schemas/frontend_spec.py +4 -4
  33. mlrun/common/schemas/function.py +10 -10
  34. mlrun/common/schemas/hub.py +1 -1
  35. mlrun/common/schemas/k8s.py +3 -3
  36. mlrun/common/schemas/memory_reports.py +3 -3
  37. mlrun/common/schemas/model_monitoring/__init__.py +2 -1
  38. mlrun/common/schemas/model_monitoring/constants.py +66 -14
  39. mlrun/common/schemas/model_monitoring/grafana.py +1 -1
  40. mlrun/common/schemas/model_monitoring/model_endpoints.py +91 -147
  41. mlrun/common/schemas/notification.py +24 -3
  42. mlrun/common/schemas/object.py +1 -1
  43. mlrun/common/schemas/pagination.py +4 -4
  44. mlrun/common/schemas/partition.py +137 -0
  45. mlrun/common/schemas/pipeline.py +2 -2
  46. mlrun/common/schemas/project.py +25 -17
  47. mlrun/common/schemas/runs.py +2 -2
  48. mlrun/common/schemas/runtime_resource.py +5 -5
  49. mlrun/common/schemas/schedule.py +1 -1
  50. mlrun/common/schemas/secret.py +1 -1
  51. mlrun/common/schemas/tag.py +3 -3
  52. mlrun/common/schemas/workflow.py +5 -5
  53. mlrun/config.py +67 -10
  54. mlrun/data_types/__init__.py +0 -2
  55. mlrun/data_types/infer.py +3 -1
  56. mlrun/data_types/spark.py +2 -1
  57. mlrun/datastore/__init__.py +0 -2
  58. mlrun/datastore/alibaba_oss.py +4 -1
  59. mlrun/datastore/azure_blob.py +4 -1
  60. mlrun/datastore/base.py +12 -4
  61. mlrun/datastore/datastore.py +9 -3
  62. mlrun/datastore/datastore_profile.py +79 -20
  63. mlrun/datastore/dbfs_store.py +4 -1
  64. mlrun/datastore/filestore.py +4 -1
  65. mlrun/datastore/google_cloud_storage.py +4 -1
  66. mlrun/datastore/hdfs.py +4 -1
  67. mlrun/datastore/inmem.py +4 -1
  68. mlrun/datastore/redis.py +4 -1
  69. mlrun/datastore/s3.py +4 -1
  70. mlrun/datastore/sources.py +52 -51
  71. mlrun/datastore/store_resources.py +0 -2
  72. mlrun/datastore/targets.py +21 -21
  73. mlrun/datastore/utils.py +2 -2
  74. mlrun/datastore/v3io.py +4 -1
  75. mlrun/datastore/vectorstore.py +194 -0
  76. mlrun/datastore/wasbfs/fs.py +13 -12
  77. mlrun/db/base.py +208 -82
  78. mlrun/db/factory.py +0 -3
  79. mlrun/db/httpdb.py +1237 -386
  80. mlrun/db/nopdb.py +201 -74
  81. mlrun/errors.py +2 -2
  82. mlrun/execution.py +136 -50
  83. mlrun/feature_store/__init__.py +0 -2
  84. mlrun/feature_store/api.py +41 -40
  85. mlrun/feature_store/common.py +9 -9
  86. mlrun/feature_store/feature_set.py +20 -18
  87. mlrun/feature_store/feature_vector.py +27 -24
  88. mlrun/feature_store/retrieval/base.py +14 -9
  89. mlrun/feature_store/retrieval/job.py +2 -1
  90. mlrun/feature_store/steps.py +2 -2
  91. mlrun/features.py +30 -13
  92. mlrun/frameworks/__init__.py +1 -2
  93. mlrun/frameworks/_common/__init__.py +1 -2
  94. mlrun/frameworks/_common/artifacts_library.py +2 -2
  95. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  96. mlrun/frameworks/_common/model_handler.py +29 -27
  97. mlrun/frameworks/_common/producer.py +3 -1
  98. mlrun/frameworks/_dl_common/__init__.py +1 -2
  99. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  100. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  101. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  102. mlrun/frameworks/_ml_common/__init__.py +1 -2
  103. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  104. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  105. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  106. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  107. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  108. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  109. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  110. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  111. mlrun/frameworks/huggingface/__init__.py +1 -2
  112. mlrun/frameworks/huggingface/model_server.py +9 -9
  113. mlrun/frameworks/lgbm/__init__.py +47 -44
  114. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  115. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  116. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  117. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  118. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  119. mlrun/frameworks/lgbm/model_handler.py +15 -11
  120. mlrun/frameworks/lgbm/model_server.py +11 -7
  121. mlrun/frameworks/lgbm/utils.py +2 -2
  122. mlrun/frameworks/onnx/__init__.py +1 -2
  123. mlrun/frameworks/onnx/dataset.py +3 -3
  124. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  125. mlrun/frameworks/onnx/model_handler.py +7 -5
  126. mlrun/frameworks/onnx/model_server.py +8 -6
  127. mlrun/frameworks/parallel_coordinates.py +11 -11
  128. mlrun/frameworks/pytorch/__init__.py +22 -23
  129. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  130. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  131. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  132. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  133. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  134. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  135. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  136. mlrun/frameworks/pytorch/model_handler.py +21 -17
  137. mlrun/frameworks/pytorch/model_server.py +13 -9
  138. mlrun/frameworks/sklearn/__init__.py +19 -18
  139. mlrun/frameworks/sklearn/estimator.py +2 -2
  140. mlrun/frameworks/sklearn/metric.py +3 -3
  141. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  142. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  143. mlrun/frameworks/sklearn/model_handler.py +4 -3
  144. mlrun/frameworks/tf_keras/__init__.py +11 -12
  145. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  146. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  147. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  148. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  149. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  150. mlrun/frameworks/tf_keras/model_server.py +12 -8
  151. mlrun/frameworks/xgboost/__init__.py +19 -18
  152. mlrun/frameworks/xgboost/model_handler.py +13 -9
  153. mlrun/launcher/base.py +3 -4
  154. mlrun/launcher/local.py +1 -1
  155. mlrun/launcher/remote.py +1 -1
  156. mlrun/lists.py +4 -3
  157. mlrun/model.py +117 -46
  158. mlrun/model_monitoring/__init__.py +4 -4
  159. mlrun/model_monitoring/api.py +61 -59
  160. mlrun/model_monitoring/applications/_application_steps.py +17 -17
  161. mlrun/model_monitoring/applications/base.py +165 -6
  162. mlrun/model_monitoring/applications/context.py +88 -37
  163. mlrun/model_monitoring/applications/evidently_base.py +0 -1
  164. mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
  165. mlrun/model_monitoring/applications/results.py +55 -3
  166. mlrun/model_monitoring/controller.py +207 -239
  167. mlrun/model_monitoring/db/__init__.py +0 -2
  168. mlrun/model_monitoring/db/_schedules.py +156 -0
  169. mlrun/model_monitoring/db/_stats.py +189 -0
  170. mlrun/model_monitoring/db/tsdb/base.py +78 -25
  171. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +61 -6
  172. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  173. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +255 -29
  174. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  175. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
  176. mlrun/model_monitoring/helpers.py +152 -49
  177. mlrun/model_monitoring/stream_processing.py +99 -283
  178. mlrun/model_monitoring/tracking_policy.py +10 -3
  179. mlrun/model_monitoring/writer.py +48 -36
  180. mlrun/package/__init__.py +3 -6
  181. mlrun/package/context_handler.py +1 -1
  182. mlrun/package/packager.py +12 -9
  183. mlrun/package/packagers/__init__.py +0 -2
  184. mlrun/package/packagers/default_packager.py +14 -11
  185. mlrun/package/packagers/numpy_packagers.py +16 -7
  186. mlrun/package/packagers/pandas_packagers.py +18 -18
  187. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  188. mlrun/package/packagers_manager.py +31 -14
  189. mlrun/package/utils/__init__.py +0 -3
  190. mlrun/package/utils/_pickler.py +6 -6
  191. mlrun/platforms/__init__.py +47 -16
  192. mlrun/platforms/iguazio.py +4 -1
  193. mlrun/projects/operations.py +27 -27
  194. mlrun/projects/pipelines.py +71 -36
  195. mlrun/projects/project.py +865 -206
  196. mlrun/run.py +53 -10
  197. mlrun/runtimes/__init__.py +1 -3
  198. mlrun/runtimes/base.py +15 -11
  199. mlrun/runtimes/daskjob.py +9 -9
  200. mlrun/runtimes/generators.py +2 -1
  201. mlrun/runtimes/kubejob.py +4 -5
  202. mlrun/runtimes/mounts.py +572 -0
  203. mlrun/runtimes/mpijob/__init__.py +0 -2
  204. mlrun/runtimes/mpijob/abstract.py +7 -6
  205. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  206. mlrun/runtimes/nuclio/application/application.py +11 -11
  207. mlrun/runtimes/nuclio/function.py +19 -17
  208. mlrun/runtimes/nuclio/serving.py +18 -11
  209. mlrun/runtimes/pod.py +154 -45
  210. mlrun/runtimes/remotesparkjob.py +3 -2
  211. mlrun/runtimes/sparkjob/__init__.py +0 -2
  212. mlrun/runtimes/sparkjob/spark3job.py +21 -11
  213. mlrun/runtimes/utils.py +6 -5
  214. mlrun/serving/merger.py +6 -4
  215. mlrun/serving/remote.py +18 -17
  216. mlrun/serving/routers.py +185 -172
  217. mlrun/serving/server.py +7 -1
  218. mlrun/serving/states.py +97 -78
  219. mlrun/serving/utils.py +13 -2
  220. mlrun/serving/v1_serving.py +3 -2
  221. mlrun/serving/v2_serving.py +74 -65
  222. mlrun/track/__init__.py +1 -1
  223. mlrun/track/tracker.py +2 -2
  224. mlrun/track/trackers/mlflow_tracker.py +6 -5
  225. mlrun/utils/async_http.py +1 -1
  226. mlrun/utils/clones.py +1 -1
  227. mlrun/utils/helpers.py +54 -16
  228. mlrun/utils/logger.py +106 -4
  229. mlrun/utils/notifications/notification/__init__.py +22 -19
  230. mlrun/utils/notifications/notification/base.py +33 -14
  231. mlrun/utils/notifications/notification/console.py +6 -6
  232. mlrun/utils/notifications/notification/git.py +11 -11
  233. mlrun/utils/notifications/notification/ipython.py +10 -9
  234. mlrun/utils/notifications/notification/mail.py +176 -0
  235. mlrun/utils/notifications/notification/slack.py +6 -6
  236. mlrun/utils/notifications/notification/webhook.py +6 -6
  237. mlrun/utils/notifications/notification_pusher.py +86 -44
  238. mlrun/utils/regex.py +3 -1
  239. mlrun/utils/version/version.json +2 -2
  240. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/METADATA +21 -16
  241. mlrun-1.8.0rc8.dist-info/RECORD +347 -0
  242. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  243. mlrun/model_monitoring/db/stores/base/store.py +0 -213
  244. mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
  245. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
  246. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
  247. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
  248. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
  249. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
  250. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
  251. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
  252. mlrun/model_monitoring/model_endpoint.py +0 -118
  253. mlrun-1.7.1rc10.dist-info/RECORD +0 -351
  254. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/LICENSE +0 -0
  255. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/WHEEL +0 -0
  256. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/entry_points.txt +0 -0
  257. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/top_level.txt +0 -0
@@ -19,7 +19,7 @@ import typing
19
19
  import warnings
20
20
  from urllib.parse import ParseResult, urlparse, urlunparse
21
21
 
22
- import pydantic
22
+ import pydantic.v1
23
23
  from mergedeep import merge
24
24
 
25
25
  import mlrun
@@ -28,15 +28,15 @@ import mlrun.errors
28
28
  from ..secrets import get_secret_or_env
29
29
 
30
30
 
31
- class DatastoreProfile(pydantic.BaseModel):
31
+ class DatastoreProfile(pydantic.v1.BaseModel):
32
32
  type: str
33
33
  name: str
34
34
  _private_attributes: list = ()
35
35
 
36
36
  class Config:
37
- extra = pydantic.Extra.forbid
37
+ extra = pydantic.v1.Extra.forbid
38
38
 
39
- @pydantic.validator("name")
39
+ @pydantic.v1.validator("name")
40
40
  @classmethod
41
41
  def lower_case(cls, v):
42
42
  return v.lower()
@@ -75,14 +75,72 @@ class TemporaryClientDatastoreProfiles(metaclass=mlrun.utils.singleton.Singleton
75
75
 
76
76
 
77
77
  class DatastoreProfileBasic(DatastoreProfile):
78
- type: str = pydantic.Field("basic")
78
+ type: str = pydantic.v1.Field("basic")
79
79
  _private_attributes = "private"
80
80
  public: str
81
81
  private: typing.Optional[str] = None
82
82
 
83
83
 
84
+ class ConfigProfile(DatastoreProfile):
85
+ """
86
+ A profile class for managing configuration data with nested public and private attributes.
87
+ This class extends DatastoreProfile to handle configuration settings, separating them into
88
+ public and private dictionaries. Both dictionaries support nested structures, and the class
89
+ provides functionality to merge these attributes when needed.
90
+
91
+ Args:
92
+ public (Optional[dict]): Dictionary containing public configuration settings,
93
+ supporting nested structures
94
+ private (Optional[dict]): Dictionary containing private/sensitive configuration settings,
95
+ supporting nested structures
96
+
97
+ Example:
98
+ >>> public = {
99
+ "database": {
100
+ "host": "localhost",
101
+ "port": 5432
102
+ },
103
+ "api_version": "v1"
104
+ }
105
+ >>> private = {
106
+ "database": {
107
+ "password": "secret123",
108
+ "username": "admin"
109
+ },
110
+ "api_key": "xyz789"
111
+ }
112
+ >>> config = ConfigProfile("myconfig", public=public, private=private)
113
+
114
+ # When attributes() is called, it merges public and private:
115
+ # {
116
+ # "database": {
117
+ # "host": "localhost",
118
+ # "port": 5432,
119
+ # "password": "secret123",
120
+ # "username": "admin"
121
+ # },
122
+ # "api_version": "v1",
123
+ # "api_key": "xyz789"
124
+ # }
125
+
126
+ """
127
+
128
+ type = "config"
129
+ _private_attributes = "private"
130
+ public: typing.Optional[dict] = None
131
+ private: typing.Optional[dict] = None
132
+
133
+ def attributes(self):
134
+ res = {}
135
+ if self.public:
136
+ res = merge(res, self.public)
137
+ if self.private:
138
+ res = merge(res, self.private)
139
+ return res
140
+
141
+
84
142
  class DatastoreProfileKafkaTarget(DatastoreProfile):
85
- type: str = pydantic.Field("kafka_target")
143
+ type: str = pydantic.v1.Field("kafka_target")
86
144
  _private_attributes = "kwargs_private"
87
145
  bootstrap_servers: typing.Optional[str] = None
88
146
  brokers: typing.Optional[str] = None
@@ -123,7 +181,7 @@ class DatastoreProfileKafkaTarget(DatastoreProfile):
123
181
 
124
182
 
125
183
  class DatastoreProfileKafkaSource(DatastoreProfile):
126
- type: str = pydantic.Field("kafka_source")
184
+ type: str = pydantic.v1.Field("kafka_source")
127
185
  _private_attributes = ("kwargs_private", "sasl_user", "sasl_pass")
128
186
  brokers: typing.Union[str, list[str]]
129
187
  topics: typing.Union[str, list[str]]
@@ -162,7 +220,7 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
162
220
 
163
221
 
164
222
  class DatastoreProfileV3io(DatastoreProfile):
165
- type: str = pydantic.Field("v3io")
223
+ type: str = pydantic.v1.Field("v3io")
166
224
  v3io_access_key: typing.Optional[str] = None
167
225
  _private_attributes = "v3io_access_key"
168
226
 
@@ -178,7 +236,7 @@ class DatastoreProfileV3io(DatastoreProfile):
178
236
 
179
237
 
180
238
  class DatastoreProfileS3(DatastoreProfile):
181
- type: str = pydantic.Field("s3")
239
+ type: str = pydantic.v1.Field("s3")
182
240
  _private_attributes = ("access_key_id", "secret_key")
183
241
  endpoint_url: typing.Optional[str] = None
184
242
  force_non_anonymous: typing.Optional[str] = None
@@ -188,7 +246,7 @@ class DatastoreProfileS3(DatastoreProfile):
188
246
  secret_key: typing.Optional[str] = None
189
247
  bucket: typing.Optional[str] = None
190
248
 
191
- @pydantic.validator("bucket")
249
+ @pydantic.v1.validator("bucket")
192
250
  @classmethod
193
251
  def check_bucket(cls, v):
194
252
  if not v:
@@ -226,7 +284,7 @@ class DatastoreProfileS3(DatastoreProfile):
226
284
 
227
285
 
228
286
  class DatastoreProfileRedis(DatastoreProfile):
229
- type: str = pydantic.Field("redis")
287
+ type: str = pydantic.v1.Field("redis")
230
288
  _private_attributes = ("username", "password")
231
289
  endpoint_url: str
232
290
  username: typing.Optional[str] = None
@@ -269,7 +327,7 @@ class DatastoreProfileRedis(DatastoreProfile):
269
327
 
270
328
 
271
329
  class DatastoreProfileDBFS(DatastoreProfile):
272
- type: str = pydantic.Field("dbfs")
330
+ type: str = pydantic.v1.Field("dbfs")
273
331
  _private_attributes = ("token",)
274
332
  endpoint_url: typing.Optional[str] = None # host
275
333
  token: typing.Optional[str] = None
@@ -287,13 +345,13 @@ class DatastoreProfileDBFS(DatastoreProfile):
287
345
 
288
346
 
289
347
  class DatastoreProfileGCS(DatastoreProfile):
290
- type: str = pydantic.Field("gcs")
348
+ type: str = pydantic.v1.Field("gcs")
291
349
  _private_attributes = ("gcp_credentials",)
292
350
  credentials_path: typing.Optional[str] = None # path to file.
293
351
  gcp_credentials: typing.Optional[typing.Union[str, dict]] = None
294
352
  bucket: typing.Optional[str] = None
295
353
 
296
- @pydantic.validator("bucket")
354
+ @pydantic.v1.validator("bucket")
297
355
  @classmethod
298
356
  def check_bucket(cls, v):
299
357
  if not v:
@@ -304,7 +362,7 @@ class DatastoreProfileGCS(DatastoreProfile):
304
362
  )
305
363
  return v
306
364
 
307
- @pydantic.validator("gcp_credentials", pre=True, always=True)
365
+ @pydantic.v1.validator("gcp_credentials", pre=True, always=True)
308
366
  @classmethod
309
367
  def convert_dict_to_json(cls, v):
310
368
  if isinstance(v, dict):
@@ -332,7 +390,7 @@ class DatastoreProfileGCS(DatastoreProfile):
332
390
 
333
391
 
334
392
  class DatastoreProfileAzureBlob(DatastoreProfile):
335
- type: str = pydantic.Field("az")
393
+ type: str = pydantic.v1.Field("az")
336
394
  _private_attributes = (
337
395
  "connection_string",
338
396
  "account_key",
@@ -350,7 +408,7 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
350
408
  credential: typing.Optional[str] = None
351
409
  container: typing.Optional[str] = None
352
410
 
353
- @pydantic.validator("container")
411
+ @pydantic.v1.validator("container")
354
412
  @classmethod
355
413
  def check_container(cls, v):
356
414
  if not v:
@@ -392,7 +450,7 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
392
450
 
393
451
 
394
452
  class DatastoreProfileHdfs(DatastoreProfile):
395
- type: str = pydantic.Field("hdfs")
453
+ type: str = pydantic.v1.Field("hdfs")
396
454
  _private_attributes = "token"
397
455
  host: typing.Optional[str] = None
398
456
  port: typing.Optional[int] = None
@@ -415,7 +473,7 @@ class DatastoreProfileHdfs(DatastoreProfile):
415
473
  return f"webhdfs://{self.host}:{self.http_port}{subpath}"
416
474
 
417
475
 
418
- class DatastoreProfile2Json(pydantic.BaseModel):
476
+ class DatastoreProfile2Json(pydantic.v1.BaseModel):
419
477
  @staticmethod
420
478
  def _to_json(attributes):
421
479
  # First, base64 encode the values
@@ -476,6 +534,7 @@ class DatastoreProfile2Json(pydantic.BaseModel):
476
534
  "gcs": DatastoreProfileGCS,
477
535
  "az": DatastoreProfileAzureBlob,
478
536
  "hdfs": DatastoreProfileHdfs,
537
+ "config": ConfigProfile,
479
538
  }
480
539
  if datastore_type in ds_profile_factory:
481
540
  return ds_profile_factory[datastore_type].parse_obj(decoded_dict)
@@ -489,7 +548,7 @@ class DatastoreProfile2Json(pydantic.BaseModel):
489
548
  )
490
549
 
491
550
 
492
- def datastore_profile_read(url, project_name="", secrets: dict = None):
551
+ def datastore_profile_read(url, project_name="", secrets: typing.Optional[dict] = None):
493
552
  parsed_url = urlparse(url)
494
553
  if parsed_url.scheme.lower() != "ds":
495
554
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import pathlib
16
+ from typing import Optional
16
17
 
17
18
  from fsspec.implementations.dbfs import DatabricksFile, DatabricksFileSystem
18
19
  from fsspec.registry import get_filesystem_class
@@ -81,7 +82,9 @@ class DatabricksFileSystemDisableCache(DatabricksFileSystem):
81
82
 
82
83
  # dbfs objects will be represented with the following URL: dbfs://<path>
83
84
  class DBFSStore(DataStore):
84
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
85
+ def __init__(
86
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
87
+ ):
85
88
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
86
89
 
87
90
  @property
@@ -14,6 +14,7 @@
14
14
  import time
15
15
  from os import listdir, makedirs, path, stat
16
16
  from shutil import copyfile
17
+ from typing import Optional
17
18
 
18
19
  import fsspec
19
20
 
@@ -23,7 +24,9 @@ from .base import DataStore, FileStats
23
24
 
24
25
 
25
26
  class FileStore(DataStore):
26
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
27
+ def __init__(
28
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
29
+ ):
27
30
  super().__init__(parent, name, "file", endpoint, secrets=secrets)
28
31
 
29
32
  self._item_path, self._real_path = None, None
@@ -14,6 +14,7 @@
14
14
  import json
15
15
  import os
16
16
  from pathlib import Path
17
+ from typing import Optional
17
18
 
18
19
  from fsspec.registry import get_filesystem_class
19
20
  from google.auth.credentials import Credentials
@@ -33,7 +34,9 @@ class GoogleCloudStorageStore(DataStore):
33
34
  workers = 8
34
35
  chunk_size = 32 * 1024 * 1024
35
36
 
36
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
37
+ def __init__(
38
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
39
+ ):
37
40
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
38
41
  self._storage_client = None
39
42
  self._storage_options = None
mlrun/datastore/hdfs.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import os
15
+ from typing import Optional
15
16
  from urllib.parse import urlparse
16
17
 
17
18
  import fsspec
@@ -20,7 +21,9 @@ from mlrun.datastore.base import DataStore
20
21
 
21
22
 
22
23
  class HdfsStore(DataStore):
23
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
24
+ def __init__(
25
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
26
+ ):
24
27
  super().__init__(parent, name, schema, endpoint, secrets)
25
28
 
26
29
  self.host = self._get_secret_or_env("HDFS_HOST")
mlrun/datastore/inmem.py CHANGED
@@ -17,6 +17,7 @@ from io import BytesIO, StringIO
17
17
  import pandas as pd
18
18
 
19
19
  import mlrun
20
+ import mlrun.utils.helpers
20
21
 
21
22
  from .base import DataStore, FileStats
22
23
 
@@ -35,7 +36,9 @@ class InMemoryStore(DataStore):
35
36
 
36
37
  def _get_item(self, key):
37
38
  if key not in self._items:
38
- raise ValueError(f"item {key} not found in memory store")
39
+ raise mlrun.errors.MLRunNotFoundError(
40
+ f"item {key} not found in memory store"
41
+ )
39
42
  return self._items[key]
40
43
 
41
44
  def get(self, key, size=None, offset=0):
mlrun/datastore/redis.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import Optional
15
16
  from urllib.parse import urlparse
16
17
 
17
18
  import redis
@@ -30,7 +31,9 @@ class RedisStore(DataStore):
30
31
  - key and value sizes are limited to 512MB
31
32
  """
32
33
 
33
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
34
+ def __init__(
35
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
36
+ ):
34
37
  redis_default_port = "6379"
35
38
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
36
39
  self.headers = None
mlrun/datastore/s3.py CHANGED
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import time
16
+ from typing import Optional
16
17
 
17
18
  import boto3
18
19
  from boto3.s3.transfer import TransferConfig
@@ -26,7 +27,9 @@ from .base import DataStore, FileStats, get_range, make_datastore_schema_sanitiz
26
27
  class S3Store(DataStore):
27
28
  using_bucket = True
28
29
 
29
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
30
+ def __init__(
31
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
32
+ ):
30
33
  super().__init__(parent, name, schema, endpoint, secrets)
31
34
  # will be used in case user asks to assume a role and work through fsspec
32
35
  self._temp_credentials = None
@@ -181,10 +181,10 @@ class CSVSource(BaseSourceDriver):
181
181
  def __init__(
182
182
  self,
183
183
  name: str = "",
184
- path: str = None,
185
- attributes: dict[str, object] = None,
186
- key_field: str = None,
187
- schedule: str = None,
184
+ path: Optional[str] = None,
185
+ attributes: Optional[dict[str, object]] = None,
186
+ key_field: Optional[str] = None,
187
+ schedule: Optional[str] = None,
188
188
  parse_dates: Union[None, int, str, list[int], list[str]] = None,
189
189
  **kwargs,
190
190
  ):
@@ -308,11 +308,11 @@ class ParquetSource(BaseSourceDriver):
308
308
  def __init__(
309
309
  self,
310
310
  name: str = "",
311
- path: str = None,
312
- attributes: dict[str, object] = None,
313
- key_field: str = None,
314
- time_field: str = None,
315
- schedule: str = None,
311
+ path: Optional[str] = None,
312
+ attributes: Optional[dict[str, object]] = None,
313
+ key_field: Optional[str] = None,
314
+ time_field: Optional[str] = None,
315
+ schedule: Optional[str] = None,
316
316
  start_time: Optional[Union[datetime, str]] = None,
317
317
  end_time: Optional[Union[datetime, str]] = None,
318
318
  additional_filters: Optional[list[Union[tuple, list]]] = None,
@@ -392,7 +392,9 @@ class ParquetSource(BaseSourceDriver):
392
392
  )
393
393
 
394
394
  @classmethod
395
- def from_dict(cls, struct=None, fields=None, deprecated_fields: dict = None):
395
+ def from_dict(
396
+ cls, struct=None, fields=None, deprecated_fields: Optional[dict] = None
397
+ ):
396
398
  new_obj = super().from_dict(
397
399
  struct=struct, fields=fields, deprecated_fields=deprecated_fields
398
400
  )
@@ -564,18 +566,18 @@ class BigQuerySource(BaseSourceDriver):
564
566
  def __init__(
565
567
  self,
566
568
  name: str = "",
567
- table: str = None,
568
- max_results_for_table: int = None,
569
- query: str = None,
570
- materialization_dataset: str = None,
571
- chunksize: int = None,
572
- key_field: str = None,
573
- time_field: str = None,
574
- schedule: str = None,
569
+ table: Optional[str] = None,
570
+ max_results_for_table: Optional[int] = None,
571
+ query: Optional[str] = None,
572
+ materialization_dataset: Optional[str] = None,
573
+ chunksize: Optional[int] = None,
574
+ key_field: Optional[str] = None,
575
+ time_field: Optional[str] = None,
576
+ schedule: Optional[str] = None,
575
577
  start_time=None,
576
578
  end_time=None,
577
- gcp_project: str = None,
578
- spark_options: dict = None,
579
+ gcp_project: Optional[str] = None,
580
+ spark_options: Optional[dict] = None,
579
581
  **kwargs,
580
582
  ):
581
583
  if query and table:
@@ -776,19 +778,19 @@ class SnowflakeSource(BaseSourceDriver):
776
778
  def __init__(
777
779
  self,
778
780
  name: str = "",
779
- key_field: str = None,
780
- attributes: dict[str, object] = None,
781
- time_field: str = None,
782
- schedule: str = None,
781
+ key_field: Optional[str] = None,
782
+ attributes: Optional[dict[str, object]] = None,
783
+ time_field: Optional[str] = None,
784
+ schedule: Optional[str] = None,
783
785
  start_time=None,
784
786
  end_time=None,
785
- query: str = None,
786
- url: str = None,
787
- user: str = None,
788
- database: str = None,
789
- schema: str = None,
790
- db_schema: str = None,
791
- warehouse: str = None,
787
+ query: Optional[str] = None,
788
+ url: Optional[str] = None,
789
+ user: Optional[str] = None,
790
+ database: Optional[str] = None,
791
+ schema: Optional[str] = None,
792
+ db_schema: Optional[str] = None,
793
+ warehouse: Optional[str] = None,
792
794
  **kwargs,
793
795
  ):
794
796
  # TODO: Remove in 1.9.0
@@ -850,9 +852,9 @@ class CustomSource(BaseSourceDriver):
850
852
 
851
853
  def __init__(
852
854
  self,
853
- class_name: str = None,
855
+ class_name: Optional[str] = None,
854
856
  name: str = "",
855
- schedule: str = None,
857
+ schedule: Optional[str] = None,
856
858
  **attributes,
857
859
  ):
858
860
  attributes = attributes or {}
@@ -930,12 +932,12 @@ class OnlineSource(BaseSourceDriver):
930
932
 
931
933
  def __init__(
932
934
  self,
933
- name: str = None,
934
- path: str = None,
935
- attributes: dict[str, object] = None,
936
- key_field: str = None,
937
- time_field: str = None,
938
- workers: int = None,
935
+ name: Optional[str] = None,
936
+ path: Optional[str] = None,
937
+ attributes: Optional[dict[str, object]] = None,
938
+ key_field: Optional[str] = None,
939
+ time_field: Optional[str] = None,
940
+ workers: Optional[int] = None,
939
941
  ):
940
942
  super().__init__(name, path, attributes, key_field, time_field)
941
943
  self.online = True
@@ -949,8 +951,7 @@ class OnlineSource(BaseSourceDriver):
949
951
  is_explicit_ack_supported(context)
950
952
  and mlrun.mlconf.is_explicit_ack_enabled()
951
953
  )
952
- # TODO: Change to AsyncEmitSource once we can drop support for nuclio<1.12.10
953
- src_class = storey.SyncEmitSource(
954
+ src_class = storey.AsyncEmitSource(
954
955
  context=context,
955
956
  key_field=self.key_field or key_field,
956
957
  full_event=True,
@@ -986,7 +987,7 @@ class StreamSource(OnlineSource):
986
987
  seek_to="earliest",
987
988
  shards=1,
988
989
  retention_in_hours=24,
989
- extra_attributes: dict = None,
990
+ extra_attributes: Optional[dict] = None,
990
991
  **kwargs,
991
992
  ):
992
993
  """
@@ -1168,7 +1169,7 @@ class KafkaSource(OnlineSource):
1168
1169
  self,
1169
1170
  num_partitions: int = 4,
1170
1171
  replication_factor: int = 1,
1171
- topics: list[str] = None,
1172
+ topics: Optional[list[str]] = None,
1172
1173
  ):
1173
1174
  """
1174
1175
  Create Kafka topics with the specified number of partitions and replication factor.
@@ -1226,16 +1227,16 @@ class SQLSource(BaseSourceDriver):
1226
1227
  def __init__(
1227
1228
  self,
1228
1229
  name: str = "",
1229
- chunksize: int = None,
1230
- key_field: str = None,
1231
- time_field: str = None,
1232
- schedule: str = None,
1230
+ chunksize: Optional[int] = None,
1231
+ key_field: Optional[str] = None,
1232
+ time_field: Optional[str] = None,
1233
+ schedule: Optional[str] = None,
1233
1234
  start_time: Optional[Union[datetime, str]] = None,
1234
1235
  end_time: Optional[Union[datetime, str]] = None,
1235
- db_url: str = None,
1236
- table_name: str = None,
1237
- spark_options: dict = None,
1238
- parse_dates: list[str] = None,
1236
+ db_url: Optional[str] = None,
1237
+ table_name: Optional[str] = None,
1238
+ spark_options: Optional[dict] = None,
1239
+ parse_dates: Optional[list[str]] = None,
1239
1240
  **kwargs,
1240
1241
  ):
1241
1242
  """
@@ -12,8 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
-
17
15
  import mlrun
18
16
  import mlrun.artifacts
19
17
  from mlrun.config import config
@@ -396,7 +396,7 @@ class BaseStoreTarget(DataTargetBase):
396
396
  self,
397
397
  name: str = "",
398
398
  path=None,
399
- attributes: dict[str, str] = None,
399
+ attributes: Optional[dict[str, str]] = None,
400
400
  after_step=None,
401
401
  columns=None,
402
402
  partitioned: bool = False,
@@ -405,8 +405,8 @@ class BaseStoreTarget(DataTargetBase):
405
405
  time_partitioning_granularity: Optional[str] = None,
406
406
  max_events: Optional[int] = None,
407
407
  flush_after_seconds: Optional[int] = None,
408
- storage_options: dict[str, str] = None,
409
- schema: dict[str, Any] = None,
408
+ storage_options: Optional[dict[str, str]] = None,
409
+ schema: Optional[dict[str, Any]] = None,
410
410
  credentials_prefix=None,
411
411
  ):
412
412
  super().__init__(
@@ -834,16 +834,16 @@ class ParquetTarget(BaseStoreTarget):
834
834
  self,
835
835
  name: str = "",
836
836
  path=None,
837
- attributes: dict[str, str] = None,
837
+ attributes: Optional[dict[str, str]] = None,
838
838
  after_step=None,
839
839
  columns=None,
840
- partitioned: bool = None,
840
+ partitioned: Optional[bool] = None,
841
841
  key_bucketing_number: Optional[int] = None,
842
842
  partition_cols: Optional[list[str]] = None,
843
843
  time_partitioning_granularity: Optional[str] = None,
844
844
  max_events: Optional[int] = 10000,
845
845
  flush_after_seconds: Optional[int] = 900,
846
- storage_options: dict[str, str] = None,
846
+ storage_options: Optional[dict[str, str]] = None,
847
847
  ):
848
848
  self.path = path
849
849
  if partitioned is None:
@@ -1199,7 +1199,7 @@ class SnowflakeTarget(BaseStoreTarget):
1199
1199
  self,
1200
1200
  name: str = "",
1201
1201
  path=None,
1202
- attributes: dict[str, str] = None,
1202
+ attributes: Optional[dict[str, str]] = None,
1203
1203
  after_step=None,
1204
1204
  columns=None,
1205
1205
  partitioned: bool = False,
@@ -1208,15 +1208,15 @@ class SnowflakeTarget(BaseStoreTarget):
1208
1208
  time_partitioning_granularity: Optional[str] = None,
1209
1209
  max_events: Optional[int] = None,
1210
1210
  flush_after_seconds: Optional[int] = None,
1211
- storage_options: dict[str, str] = None,
1212
- schema: dict[str, Any] = None,
1211
+ storage_options: Optional[dict[str, str]] = None,
1212
+ schema: Optional[dict[str, Any]] = None,
1213
1213
  credentials_prefix=None,
1214
- url: str = None,
1215
- user: str = None,
1216
- db_schema: str = None,
1217
- database: str = None,
1218
- warehouse: str = None,
1219
- table_name: str = None,
1214
+ url: Optional[str] = None,
1215
+ user: Optional[str] = None,
1216
+ db_schema: Optional[str] = None,
1217
+ database: Optional[str] = None,
1218
+ warehouse: Optional[str] = None,
1219
+ table_name: Optional[str] = None,
1220
1220
  ):
1221
1221
  attributes = attributes or {}
1222
1222
  if url:
@@ -1903,7 +1903,7 @@ class SQLTarget(BaseStoreTarget):
1903
1903
  self,
1904
1904
  name: str = "",
1905
1905
  path=None,
1906
- attributes: dict[str, str] = None,
1906
+ attributes: Optional[dict[str, str]] = None,
1907
1907
  after_step=None,
1908
1908
  partitioned: bool = False,
1909
1909
  key_bucketing_number: Optional[int] = None,
@@ -1911,16 +1911,16 @@ class SQLTarget(BaseStoreTarget):
1911
1911
  time_partitioning_granularity: Optional[str] = None,
1912
1912
  max_events: Optional[int] = None,
1913
1913
  flush_after_seconds: Optional[int] = None,
1914
- storage_options: dict[str, str] = None,
1915
- db_url: str = None,
1916
- table_name: str = None,
1917
- schema: dict[str, Any] = None,
1914
+ storage_options: Optional[dict[str, str]] = None,
1915
+ db_url: Optional[str] = None,
1916
+ table_name: Optional[str] = None,
1917
+ schema: Optional[dict[str, Any]] = None,
1918
1918
  primary_key_column: str = "",
1919
1919
  if_exists: str = "append",
1920
1920
  create_table: bool = False,
1921
1921
  # create_according_to_data: bool = False,
1922
1922
  varchar_len: int = 50,
1923
- parse_dates: list[str] = None,
1923
+ parse_dates: Optional[list[str]] = None,
1924
1924
  ):
1925
1925
  """
1926
1926
  Write to SqlDB as output target for a flow.
mlrun/datastore/utils.py CHANGED
@@ -26,7 +26,7 @@ import mlrun.datastore
26
26
 
27
27
 
28
28
  def parse_kafka_url(
29
- url: str, brokers: typing.Union[list, str] = None
29
+ url: str, brokers: typing.Optional[typing.Union[list, str]] = None
30
30
  ) -> tuple[str, list]:
31
31
  """Generating Kafka topic and adjusting a list of bootstrap servers.
32
32
 
@@ -71,7 +71,7 @@ def upload_tarball(source_dir, target, secrets=None):
71
71
 
72
72
  def filter_df_start_end_time(
73
73
  df: typing.Union[pd.DataFrame, typing.Iterator[pd.DataFrame]],
74
- time_column: str = None,
74
+ time_column: typing.Optional[str] = None,
75
75
  start_time: pd.Timestamp = None,
76
76
  end_time: pd.Timestamp = None,
77
77
  ) -> typing.Union[pd.DataFrame, typing.Iterator[pd.DataFrame]]: