mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (250) hide show
  1. mlrun/__init__.py +18 -18
  2. mlrun/__main__.py +3 -3
  3. mlrun/alerts/alert.py +19 -12
  4. mlrun/artifacts/__init__.py +0 -2
  5. mlrun/artifacts/base.py +34 -11
  6. mlrun/artifacts/dataset.py +16 -16
  7. mlrun/artifacts/manager.py +13 -13
  8. mlrun/artifacts/model.py +66 -53
  9. mlrun/common/constants.py +6 -0
  10. mlrun/common/formatters/__init__.py +1 -0
  11. mlrun/common/formatters/feature_set.py +1 -0
  12. mlrun/common/formatters/function.py +1 -0
  13. mlrun/common/formatters/model_endpoint.py +30 -0
  14. mlrun/common/formatters/pipeline.py +1 -2
  15. mlrun/common/formatters/project.py +9 -0
  16. mlrun/common/model_monitoring/__init__.py +0 -3
  17. mlrun/common/model_monitoring/helpers.py +1 -1
  18. mlrun/common/runtimes/constants.py +1 -2
  19. mlrun/common/schemas/__init__.py +7 -2
  20. mlrun/common/schemas/alert.py +31 -18
  21. mlrun/common/schemas/api_gateway.py +3 -3
  22. mlrun/common/schemas/artifact.py +7 -13
  23. mlrun/common/schemas/auth.py +6 -4
  24. mlrun/common/schemas/background_task.py +7 -7
  25. mlrun/common/schemas/client_spec.py +2 -2
  26. mlrun/common/schemas/clusterization_spec.py +2 -2
  27. mlrun/common/schemas/common.py +53 -3
  28. mlrun/common/schemas/datastore_profile.py +1 -1
  29. mlrun/common/schemas/feature_store.py +9 -9
  30. mlrun/common/schemas/frontend_spec.py +4 -4
  31. mlrun/common/schemas/function.py +10 -10
  32. mlrun/common/schemas/hub.py +1 -1
  33. mlrun/common/schemas/k8s.py +3 -3
  34. mlrun/common/schemas/memory_reports.py +3 -3
  35. mlrun/common/schemas/model_monitoring/__init__.py +8 -1
  36. mlrun/common/schemas/model_monitoring/constants.py +62 -12
  37. mlrun/common/schemas/model_monitoring/grafana.py +1 -1
  38. mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +149 -0
  39. mlrun/common/schemas/model_monitoring/model_endpoints.py +22 -6
  40. mlrun/common/schemas/notification.py +18 -3
  41. mlrun/common/schemas/object.py +1 -1
  42. mlrun/common/schemas/pagination.py +4 -4
  43. mlrun/common/schemas/partition.py +137 -0
  44. mlrun/common/schemas/pipeline.py +2 -2
  45. mlrun/common/schemas/project.py +22 -17
  46. mlrun/common/schemas/runs.py +2 -2
  47. mlrun/common/schemas/runtime_resource.py +5 -5
  48. mlrun/common/schemas/schedule.py +1 -1
  49. mlrun/common/schemas/secret.py +1 -1
  50. mlrun/common/schemas/tag.py +3 -3
  51. mlrun/common/schemas/workflow.py +5 -5
  52. mlrun/config.py +65 -15
  53. mlrun/data_types/__init__.py +0 -2
  54. mlrun/data_types/data_types.py +0 -1
  55. mlrun/data_types/infer.py +3 -1
  56. mlrun/data_types/spark.py +4 -4
  57. mlrun/data_types/to_pandas.py +2 -11
  58. mlrun/datastore/__init__.py +0 -2
  59. mlrun/datastore/alibaba_oss.py +4 -1
  60. mlrun/datastore/azure_blob.py +4 -1
  61. mlrun/datastore/base.py +12 -4
  62. mlrun/datastore/datastore.py +9 -3
  63. mlrun/datastore/datastore_profile.py +20 -20
  64. mlrun/datastore/dbfs_store.py +4 -1
  65. mlrun/datastore/filestore.py +4 -1
  66. mlrun/datastore/google_cloud_storage.py +4 -1
  67. mlrun/datastore/hdfs.py +4 -1
  68. mlrun/datastore/inmem.py +4 -1
  69. mlrun/datastore/redis.py +4 -1
  70. mlrun/datastore/s3.py +4 -1
  71. mlrun/datastore/sources.py +51 -49
  72. mlrun/datastore/store_resources.py +0 -2
  73. mlrun/datastore/targets.py +22 -23
  74. mlrun/datastore/utils.py +2 -2
  75. mlrun/datastore/v3io.py +4 -1
  76. mlrun/datastore/wasbfs/fs.py +13 -12
  77. mlrun/db/base.py +170 -64
  78. mlrun/db/factory.py +3 -0
  79. mlrun/db/httpdb.py +986 -238
  80. mlrun/db/nopdb.py +155 -57
  81. mlrun/errors.py +2 -2
  82. mlrun/execution.py +55 -29
  83. mlrun/feature_store/__init__.py +0 -2
  84. mlrun/feature_store/api.py +40 -40
  85. mlrun/feature_store/common.py +9 -9
  86. mlrun/feature_store/feature_set.py +20 -18
  87. mlrun/feature_store/feature_vector.py +27 -24
  88. mlrun/feature_store/retrieval/base.py +14 -9
  89. mlrun/feature_store/retrieval/job.py +2 -1
  90. mlrun/feature_store/steps.py +2 -2
  91. mlrun/features.py +30 -13
  92. mlrun/frameworks/__init__.py +1 -2
  93. mlrun/frameworks/_common/__init__.py +1 -2
  94. mlrun/frameworks/_common/artifacts_library.py +2 -2
  95. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  96. mlrun/frameworks/_common/model_handler.py +29 -27
  97. mlrun/frameworks/_common/producer.py +3 -1
  98. mlrun/frameworks/_dl_common/__init__.py +1 -2
  99. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  100. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  101. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  102. mlrun/frameworks/_ml_common/__init__.py +1 -2
  103. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  104. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  105. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  106. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  107. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  108. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  109. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  110. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  111. mlrun/frameworks/huggingface/__init__.py +1 -2
  112. mlrun/frameworks/huggingface/model_server.py +9 -9
  113. mlrun/frameworks/lgbm/__init__.py +47 -44
  114. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  115. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  116. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  117. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  118. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  119. mlrun/frameworks/lgbm/model_handler.py +15 -11
  120. mlrun/frameworks/lgbm/model_server.py +11 -7
  121. mlrun/frameworks/lgbm/utils.py +2 -2
  122. mlrun/frameworks/onnx/__init__.py +1 -2
  123. mlrun/frameworks/onnx/dataset.py +3 -3
  124. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  125. mlrun/frameworks/onnx/model_handler.py +7 -5
  126. mlrun/frameworks/onnx/model_server.py +8 -6
  127. mlrun/frameworks/parallel_coordinates.py +11 -11
  128. mlrun/frameworks/pytorch/__init__.py +22 -23
  129. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  130. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  131. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  132. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  133. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  134. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  135. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  136. mlrun/frameworks/pytorch/model_handler.py +21 -17
  137. mlrun/frameworks/pytorch/model_server.py +13 -9
  138. mlrun/frameworks/sklearn/__init__.py +19 -18
  139. mlrun/frameworks/sklearn/estimator.py +2 -2
  140. mlrun/frameworks/sklearn/metric.py +3 -3
  141. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  142. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  143. mlrun/frameworks/sklearn/model_handler.py +4 -3
  144. mlrun/frameworks/tf_keras/__init__.py +11 -12
  145. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  146. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  147. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  148. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  149. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  150. mlrun/frameworks/tf_keras/model_server.py +12 -8
  151. mlrun/frameworks/xgboost/__init__.py +19 -18
  152. mlrun/frameworks/xgboost/model_handler.py +13 -9
  153. mlrun/launcher/base.py +3 -4
  154. mlrun/launcher/local.py +1 -1
  155. mlrun/launcher/remote.py +1 -1
  156. mlrun/lists.py +4 -3
  157. mlrun/model.py +110 -46
  158. mlrun/model_monitoring/__init__.py +1 -2
  159. mlrun/model_monitoring/api.py +6 -6
  160. mlrun/model_monitoring/applications/_application_steps.py +13 -15
  161. mlrun/model_monitoring/applications/histogram_data_drift.py +41 -15
  162. mlrun/model_monitoring/applications/results.py +55 -3
  163. mlrun/model_monitoring/controller.py +185 -223
  164. mlrun/model_monitoring/db/_schedules.py +156 -0
  165. mlrun/model_monitoring/db/_stats.py +189 -0
  166. mlrun/model_monitoring/db/stores/__init__.py +1 -1
  167. mlrun/model_monitoring/db/stores/base/store.py +6 -65
  168. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -25
  169. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -97
  170. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +2 -58
  171. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -15
  172. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +6 -257
  173. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +9 -271
  174. mlrun/model_monitoring/db/tsdb/base.py +76 -24
  175. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +61 -6
  176. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  177. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +253 -28
  178. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  179. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -17
  180. mlrun/model_monitoring/helpers.py +91 -1
  181. mlrun/model_monitoring/model_endpoint.py +4 -2
  182. mlrun/model_monitoring/stream_processing.py +16 -13
  183. mlrun/model_monitoring/tracking_policy.py +10 -3
  184. mlrun/model_monitoring/writer.py +47 -26
  185. mlrun/package/__init__.py +3 -6
  186. mlrun/package/context_handler.py +1 -1
  187. mlrun/package/packager.py +12 -9
  188. mlrun/package/packagers/__init__.py +0 -2
  189. mlrun/package/packagers/default_packager.py +14 -11
  190. mlrun/package/packagers/numpy_packagers.py +16 -7
  191. mlrun/package/packagers/pandas_packagers.py +18 -18
  192. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  193. mlrun/package/packagers_manager.py +31 -14
  194. mlrun/package/utils/__init__.py +0 -3
  195. mlrun/package/utils/_pickler.py +6 -6
  196. mlrun/platforms/__init__.py +3 -16
  197. mlrun/platforms/iguazio.py +4 -1
  198. mlrun/projects/operations.py +27 -27
  199. mlrun/projects/pipelines.py +34 -35
  200. mlrun/projects/project.py +535 -182
  201. mlrun/run.py +13 -10
  202. mlrun/runtimes/__init__.py +1 -3
  203. mlrun/runtimes/base.py +15 -11
  204. mlrun/runtimes/daskjob.py +9 -9
  205. mlrun/runtimes/generators.py +2 -1
  206. mlrun/runtimes/kubejob.py +4 -5
  207. mlrun/runtimes/mounts.py +572 -0
  208. mlrun/runtimes/mpijob/__init__.py +0 -2
  209. mlrun/runtimes/mpijob/abstract.py +7 -6
  210. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  211. mlrun/runtimes/nuclio/application/application.py +11 -11
  212. mlrun/runtimes/nuclio/function.py +13 -13
  213. mlrun/runtimes/nuclio/serving.py +9 -9
  214. mlrun/runtimes/pod.py +154 -45
  215. mlrun/runtimes/remotesparkjob.py +3 -2
  216. mlrun/runtimes/sparkjob/__init__.py +0 -2
  217. mlrun/runtimes/sparkjob/spark3job.py +21 -11
  218. mlrun/runtimes/utils.py +6 -5
  219. mlrun/serving/merger.py +6 -4
  220. mlrun/serving/remote.py +18 -17
  221. mlrun/serving/routers.py +27 -27
  222. mlrun/serving/server.py +1 -1
  223. mlrun/serving/states.py +76 -71
  224. mlrun/serving/utils.py +13 -2
  225. mlrun/serving/v1_serving.py +3 -2
  226. mlrun/serving/v2_serving.py +4 -4
  227. mlrun/track/__init__.py +1 -1
  228. mlrun/track/tracker.py +2 -2
  229. mlrun/track/trackers/mlflow_tracker.py +6 -5
  230. mlrun/utils/async_http.py +1 -1
  231. mlrun/utils/helpers.py +70 -16
  232. mlrun/utils/logger.py +106 -4
  233. mlrun/utils/notifications/notification/__init__.py +22 -19
  234. mlrun/utils/notifications/notification/base.py +33 -14
  235. mlrun/utils/notifications/notification/console.py +6 -6
  236. mlrun/utils/notifications/notification/git.py +11 -11
  237. mlrun/utils/notifications/notification/ipython.py +10 -9
  238. mlrun/utils/notifications/notification/mail.py +149 -0
  239. mlrun/utils/notifications/notification/slack.py +6 -6
  240. mlrun/utils/notifications/notification/webhook.py +18 -22
  241. mlrun/utils/notifications/notification_pusher.py +43 -31
  242. mlrun/utils/regex.py +3 -1
  243. mlrun/utils/version/version.json +2 -2
  244. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/METADATA +18 -14
  245. mlrun-1.8.0rc2.dist-info/RECORD +358 -0
  246. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/WHEEL +1 -1
  247. mlrun-1.7.2rc3.dist-info/RECORD +0 -351
  248. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/LICENSE +0 -0
  249. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/entry_points.txt +0 -0
  250. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/top_level.txt +0 -0
@@ -15,6 +15,7 @@
15
15
  import time
16
16
  from datetime import datetime
17
17
  from pathlib import Path
18
+ from typing import Optional
18
19
  from urllib.parse import urlparse
19
20
 
20
21
  import oss2
@@ -28,7 +29,9 @@ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
28
29
  class OSSStore(DataStore):
29
30
  using_bucket = True
30
31
 
31
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
32
+ def __init__(
33
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
34
+ ):
32
35
  super().__init__(parent, name, schema, endpoint, secrets)
33
36
  # will be used in case user asks to assume a role and work through fsspec
34
37
 
@@ -14,6 +14,7 @@
14
14
 
15
15
  import time
16
16
  from pathlib import Path
17
+ from typing import Optional
17
18
  from urllib.parse import urlparse
18
19
 
19
20
  from azure.storage.blob import BlobServiceClient
@@ -36,7 +37,9 @@ class AzureBlobStore(DataStore):
36
37
  1024 * 1024 * 8
37
38
  ) # for service_client property only, does not affect filesystem
38
39
 
39
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
40
+ def __init__(
41
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
42
+ ):
40
43
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
41
44
  self._service_client = None
42
45
  self._storage_options = None
mlrun/datastore/base.py CHANGED
@@ -48,7 +48,7 @@ class FileStats:
48
48
  class DataStore:
49
49
  using_bucket = False
50
50
 
51
- def __init__(self, parent, name, kind, endpoint="", secrets: dict = None):
51
+ def __init__(self, parent, name, kind, endpoint="", secrets: Optional[dict] = None):
52
52
  self._parent = parent
53
53
  self.kind = kind
54
54
  self.name = name
@@ -500,12 +500,18 @@ class DataItem:
500
500
  """DataItem url e.g. /dir/path, s3://bucket/path"""
501
501
  return self._url
502
502
 
503
- def get(self, size=None, offset=0, encoding=None):
503
+ def get(
504
+ self,
505
+ size: Optional[int] = None,
506
+ offset: int = 0,
507
+ encoding: Optional[str] = None,
508
+ ) -> Union[bytes, str]:
504
509
  """read all or a byte range and return the content
505
510
 
506
511
  :param size: number of bytes to get
507
512
  :param offset: fetch from offset (in bytes)
508
513
  :param encoding: encoding (e.g. "utf-8") for converting bytes to str
514
+ :return: the bytes/str content
509
515
  """
510
516
  body = self._store.get(self._path, size=size, offset=offset)
511
517
  if encoding and isinstance(body, bytes):
@@ -519,7 +525,7 @@ class DataItem:
519
525
  """
520
526
  self._store.download(self._path, target_path)
521
527
 
522
- def put(self, data, append=False):
528
+ def put(self, data: Union[bytes, str], append: bool = False) -> None:
523
529
  """write/upload the data, append is only supported by some datastores
524
530
 
525
531
  :param data: data (bytes/str) to write
@@ -687,7 +693,9 @@ def basic_auth_header(user, password):
687
693
 
688
694
 
689
695
  class HttpStore(DataStore):
690
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
696
+ def __init__(
697
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
698
+ ):
691
699
  super().__init__(parent, name, schema, endpoint, secrets)
692
700
  self._https_auth_token = None
693
701
  self._schema = schema
@@ -11,6 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ from typing import Optional
14
15
  from urllib.parse import urlparse
15
16
 
16
17
  from mergedeep import merge
@@ -178,12 +179,17 @@ class StoreManager:
178
179
  # which accepts a feature vector uri and generate the offline vector (parquet) for it if it doesnt exist
179
180
  if not target and not allow_empty_resources:
180
181
  raise mlrun.errors.MLRunInvalidArgumentError(
181
- f"resource {url} does not have a valid/persistent offline target"
182
+ f"Resource {url} does not have a valid/persistent offline target"
182
183
  )
183
184
  return resource, target or ""
184
185
 
185
186
  def object(
186
- self, url, key="", project="", allow_empty_resources=None, secrets: dict = None
187
+ self,
188
+ url,
189
+ key="",
190
+ project="",
191
+ allow_empty_resources=None,
192
+ secrets: Optional[dict] = None,
187
193
  ) -> DataItem:
188
194
  meta = artifact_url = None
189
195
  if is_store_uri(url):
@@ -205,7 +211,7 @@ class StoreManager:
205
211
  )
206
212
 
207
213
  def get_or_create_store(
208
- self, url, secrets: dict = None, project_name=""
214
+ self, url, secrets: Optional[dict] = None, project_name=""
209
215
  ) -> (DataStore, str, str):
210
216
  schema, endpoint, parsed_url = parse_url(url)
211
217
  subpath = parsed_url.path
@@ -19,7 +19,7 @@ import typing
19
19
  import warnings
20
20
  from urllib.parse import ParseResult, urlparse, urlunparse
21
21
 
22
- import pydantic
22
+ import pydantic.v1
23
23
  from mergedeep import merge
24
24
 
25
25
  import mlrun
@@ -28,15 +28,15 @@ import mlrun.errors
28
28
  from ..secrets import get_secret_or_env
29
29
 
30
30
 
31
- class DatastoreProfile(pydantic.BaseModel):
31
+ class DatastoreProfile(pydantic.v1.BaseModel):
32
32
  type: str
33
33
  name: str
34
34
  _private_attributes: list = ()
35
35
 
36
36
  class Config:
37
- extra = pydantic.Extra.forbid
37
+ extra = pydantic.v1.Extra.forbid
38
38
 
39
- @pydantic.validator("name")
39
+ @pydantic.v1.validator("name")
40
40
  @classmethod
41
41
  def lower_case(cls, v):
42
42
  return v.lower()
@@ -75,14 +75,14 @@ class TemporaryClientDatastoreProfiles(metaclass=mlrun.utils.singleton.Singleton
75
75
 
76
76
 
77
77
  class DatastoreProfileBasic(DatastoreProfile):
78
- type: str = pydantic.Field("basic")
78
+ type: str = pydantic.v1.Field("basic")
79
79
  _private_attributes = "private"
80
80
  public: str
81
81
  private: typing.Optional[str] = None
82
82
 
83
83
 
84
84
  class DatastoreProfileKafkaTarget(DatastoreProfile):
85
- type: str = pydantic.Field("kafka_target")
85
+ type: str = pydantic.v1.Field("kafka_target")
86
86
  _private_attributes = "kwargs_private"
87
87
  bootstrap_servers: typing.Optional[str] = None
88
88
  brokers: typing.Optional[str] = None
@@ -123,7 +123,7 @@ class DatastoreProfileKafkaTarget(DatastoreProfile):
123
123
 
124
124
 
125
125
  class DatastoreProfileKafkaSource(DatastoreProfile):
126
- type: str = pydantic.Field("kafka_source")
126
+ type: str = pydantic.v1.Field("kafka_source")
127
127
  _private_attributes = ("kwargs_private", "sasl_user", "sasl_pass")
128
128
  brokers: typing.Union[str, list[str]]
129
129
  topics: typing.Union[str, list[str]]
@@ -162,7 +162,7 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
162
162
 
163
163
 
164
164
  class DatastoreProfileV3io(DatastoreProfile):
165
- type: str = pydantic.Field("v3io")
165
+ type: str = pydantic.v1.Field("v3io")
166
166
  v3io_access_key: typing.Optional[str] = None
167
167
  _private_attributes = "v3io_access_key"
168
168
 
@@ -178,7 +178,7 @@ class DatastoreProfileV3io(DatastoreProfile):
178
178
 
179
179
 
180
180
  class DatastoreProfileS3(DatastoreProfile):
181
- type: str = pydantic.Field("s3")
181
+ type: str = pydantic.v1.Field("s3")
182
182
  _private_attributes = ("access_key_id", "secret_key")
183
183
  endpoint_url: typing.Optional[str] = None
184
184
  force_non_anonymous: typing.Optional[str] = None
@@ -188,7 +188,7 @@ class DatastoreProfileS3(DatastoreProfile):
188
188
  secret_key: typing.Optional[str] = None
189
189
  bucket: typing.Optional[str] = None
190
190
 
191
- @pydantic.validator("bucket")
191
+ @pydantic.v1.validator("bucket")
192
192
  @classmethod
193
193
  def check_bucket(cls, v):
194
194
  if not v:
@@ -226,7 +226,7 @@ class DatastoreProfileS3(DatastoreProfile):
226
226
 
227
227
 
228
228
  class DatastoreProfileRedis(DatastoreProfile):
229
- type: str = pydantic.Field("redis")
229
+ type: str = pydantic.v1.Field("redis")
230
230
  _private_attributes = ("username", "password")
231
231
  endpoint_url: str
232
232
  username: typing.Optional[str] = None
@@ -269,7 +269,7 @@ class DatastoreProfileRedis(DatastoreProfile):
269
269
 
270
270
 
271
271
  class DatastoreProfileDBFS(DatastoreProfile):
272
- type: str = pydantic.Field("dbfs")
272
+ type: str = pydantic.v1.Field("dbfs")
273
273
  _private_attributes = ("token",)
274
274
  endpoint_url: typing.Optional[str] = None # host
275
275
  token: typing.Optional[str] = None
@@ -287,13 +287,13 @@ class DatastoreProfileDBFS(DatastoreProfile):
287
287
 
288
288
 
289
289
  class DatastoreProfileGCS(DatastoreProfile):
290
- type: str = pydantic.Field("gcs")
290
+ type: str = pydantic.v1.Field("gcs")
291
291
  _private_attributes = ("gcp_credentials",)
292
292
  credentials_path: typing.Optional[str] = None # path to file.
293
293
  gcp_credentials: typing.Optional[typing.Union[str, dict]] = None
294
294
  bucket: typing.Optional[str] = None
295
295
 
296
- @pydantic.validator("bucket")
296
+ @pydantic.v1.validator("bucket")
297
297
  @classmethod
298
298
  def check_bucket(cls, v):
299
299
  if not v:
@@ -304,7 +304,7 @@ class DatastoreProfileGCS(DatastoreProfile):
304
304
  )
305
305
  return v
306
306
 
307
- @pydantic.validator("gcp_credentials", pre=True, always=True)
307
+ @pydantic.v1.validator("gcp_credentials", pre=True, always=True)
308
308
  @classmethod
309
309
  def convert_dict_to_json(cls, v):
310
310
  if isinstance(v, dict):
@@ -332,7 +332,7 @@ class DatastoreProfileGCS(DatastoreProfile):
332
332
 
333
333
 
334
334
  class DatastoreProfileAzureBlob(DatastoreProfile):
335
- type: str = pydantic.Field("az")
335
+ type: str = pydantic.v1.Field("az")
336
336
  _private_attributes = (
337
337
  "connection_string",
338
338
  "account_key",
@@ -350,7 +350,7 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
350
350
  credential: typing.Optional[str] = None
351
351
  container: typing.Optional[str] = None
352
352
 
353
- @pydantic.validator("container")
353
+ @pydantic.v1.validator("container")
354
354
  @classmethod
355
355
  def check_container(cls, v):
356
356
  if not v:
@@ -392,7 +392,7 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
392
392
 
393
393
 
394
394
  class DatastoreProfileHdfs(DatastoreProfile):
395
- type: str = pydantic.Field("hdfs")
395
+ type: str = pydantic.v1.Field("hdfs")
396
396
  _private_attributes = "token"
397
397
  host: typing.Optional[str] = None
398
398
  port: typing.Optional[int] = None
@@ -415,7 +415,7 @@ class DatastoreProfileHdfs(DatastoreProfile):
415
415
  return f"webhdfs://{self.host}:{self.http_port}{subpath}"
416
416
 
417
417
 
418
- class DatastoreProfile2Json(pydantic.BaseModel):
418
+ class DatastoreProfile2Json(pydantic.v1.BaseModel):
419
419
  @staticmethod
420
420
  def _to_json(attributes):
421
421
  # First, base64 encode the values
@@ -489,7 +489,7 @@ class DatastoreProfile2Json(pydantic.BaseModel):
489
489
  )
490
490
 
491
491
 
492
- def datastore_profile_read(url, project_name="", secrets: dict = None):
492
+ def datastore_profile_read(url, project_name="", secrets: typing.Optional[dict] = None):
493
493
  parsed_url = urlparse(url)
494
494
  if parsed_url.scheme.lower() != "ds":
495
495
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import pathlib
16
+ from typing import Optional
16
17
 
17
18
  from fsspec.implementations.dbfs import DatabricksFile, DatabricksFileSystem
18
19
  from fsspec.registry import get_filesystem_class
@@ -81,7 +82,9 @@ class DatabricksFileSystemDisableCache(DatabricksFileSystem):
81
82
 
82
83
  # dbfs objects will be represented with the following URL: dbfs://<path>
83
84
  class DBFSStore(DataStore):
84
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
85
+ def __init__(
86
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
87
+ ):
85
88
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
86
89
 
87
90
  @property
@@ -14,6 +14,7 @@
14
14
  import time
15
15
  from os import listdir, makedirs, path, stat
16
16
  from shutil import copyfile
17
+ from typing import Optional
17
18
 
18
19
  import fsspec
19
20
 
@@ -23,7 +24,9 @@ from .base import DataStore, FileStats
23
24
 
24
25
 
25
26
  class FileStore(DataStore):
26
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
27
+ def __init__(
28
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
29
+ ):
27
30
  super().__init__(parent, name, "file", endpoint, secrets=secrets)
28
31
 
29
32
  self._item_path, self._real_path = None, None
@@ -14,6 +14,7 @@
14
14
  import json
15
15
  import os
16
16
  from pathlib import Path
17
+ from typing import Optional
17
18
 
18
19
  from fsspec.registry import get_filesystem_class
19
20
  from google.auth.credentials import Credentials
@@ -33,7 +34,9 @@ class GoogleCloudStorageStore(DataStore):
33
34
  workers = 8
34
35
  chunk_size = 32 * 1024 * 1024
35
36
 
36
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
37
+ def __init__(
38
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
39
+ ):
37
40
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
38
41
  self._storage_client = None
39
42
  self._storage_options = None
mlrun/datastore/hdfs.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import os
15
+ from typing import Optional
15
16
  from urllib.parse import urlparse
16
17
 
17
18
  import fsspec
@@ -20,7 +21,9 @@ from mlrun.datastore.base import DataStore
20
21
 
21
22
 
22
23
  class HdfsStore(DataStore):
23
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
24
+ def __init__(
25
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
26
+ ):
24
27
  super().__init__(parent, name, schema, endpoint, secrets)
25
28
 
26
29
  self.host = self._get_secret_or_env("HDFS_HOST")
mlrun/datastore/inmem.py CHANGED
@@ -17,6 +17,7 @@ from io import BytesIO, StringIO
17
17
  import pandas as pd
18
18
 
19
19
  import mlrun
20
+ import mlrun.utils.helpers
20
21
 
21
22
  from .base import DataStore, FileStats
22
23
 
@@ -35,7 +36,9 @@ class InMemoryStore(DataStore):
35
36
 
36
37
  def _get_item(self, key):
37
38
  if key not in self._items:
38
- raise ValueError(f"item {key} not found in memory store")
39
+ raise mlrun.errors.MLRunNotFoundError(
40
+ f"item {key} not found in memory store"
41
+ )
39
42
  return self._items[key]
40
43
 
41
44
  def get(self, key, size=None, offset=0):
mlrun/datastore/redis.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import Optional
15
16
  from urllib.parse import urlparse
16
17
 
17
18
  import redis
@@ -30,7 +31,9 @@ class RedisStore(DataStore):
30
31
  - key and value sizes are limited to 512MB
31
32
  """
32
33
 
33
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
34
+ def __init__(
35
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
36
+ ):
34
37
  redis_default_port = "6379"
35
38
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
36
39
  self.headers = None
mlrun/datastore/s3.py CHANGED
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import time
16
+ from typing import Optional
16
17
 
17
18
  import boto3
18
19
  from boto3.s3.transfer import TransferConfig
@@ -26,7 +27,9 @@ from .base import DataStore, FileStats, get_range, make_datastore_schema_sanitiz
26
27
  class S3Store(DataStore):
27
28
  using_bucket = True
28
29
 
29
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
30
+ def __init__(
31
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
32
+ ):
30
33
  super().__init__(parent, name, schema, endpoint, secrets)
31
34
  # will be used in case user asks to assume a role and work through fsspec
32
35
  self._temp_credentials = None
@@ -181,10 +181,10 @@ class CSVSource(BaseSourceDriver):
181
181
  def __init__(
182
182
  self,
183
183
  name: str = "",
184
- path: str = None,
185
- attributes: dict[str, object] = None,
186
- key_field: str = None,
187
- schedule: str = None,
184
+ path: Optional[str] = None,
185
+ attributes: Optional[dict[str, object]] = None,
186
+ key_field: Optional[str] = None,
187
+ schedule: Optional[str] = None,
188
188
  parse_dates: Union[None, int, str, list[int], list[str]] = None,
189
189
  **kwargs,
190
190
  ):
@@ -308,11 +308,11 @@ class ParquetSource(BaseSourceDriver):
308
308
  def __init__(
309
309
  self,
310
310
  name: str = "",
311
- path: str = None,
312
- attributes: dict[str, object] = None,
313
- key_field: str = None,
314
- time_field: str = None,
315
- schedule: str = None,
311
+ path: Optional[str] = None,
312
+ attributes: Optional[dict[str, object]] = None,
313
+ key_field: Optional[str] = None,
314
+ time_field: Optional[str] = None,
315
+ schedule: Optional[str] = None,
316
316
  start_time: Optional[Union[datetime, str]] = None,
317
317
  end_time: Optional[Union[datetime, str]] = None,
318
318
  additional_filters: Optional[list[Union[tuple, list]]] = None,
@@ -392,7 +392,9 @@ class ParquetSource(BaseSourceDriver):
392
392
  )
393
393
 
394
394
  @classmethod
395
- def from_dict(cls, struct=None, fields=None, deprecated_fields: dict = None):
395
+ def from_dict(
396
+ cls, struct=None, fields=None, deprecated_fields: Optional[dict] = None
397
+ ):
396
398
  new_obj = super().from_dict(
397
399
  struct=struct, fields=fields, deprecated_fields=deprecated_fields
398
400
  )
@@ -564,18 +566,18 @@ class BigQuerySource(BaseSourceDriver):
564
566
  def __init__(
565
567
  self,
566
568
  name: str = "",
567
- table: str = None,
568
- max_results_for_table: int = None,
569
- query: str = None,
570
- materialization_dataset: str = None,
571
- chunksize: int = None,
572
- key_field: str = None,
573
- time_field: str = None,
574
- schedule: str = None,
569
+ table: Optional[str] = None,
570
+ max_results_for_table: Optional[int] = None,
571
+ query: Optional[str] = None,
572
+ materialization_dataset: Optional[str] = None,
573
+ chunksize: Optional[int] = None,
574
+ key_field: Optional[str] = None,
575
+ time_field: Optional[str] = None,
576
+ schedule: Optional[str] = None,
575
577
  start_time=None,
576
578
  end_time=None,
577
- gcp_project: str = None,
578
- spark_options: dict = None,
579
+ gcp_project: Optional[str] = None,
580
+ spark_options: Optional[dict] = None,
579
581
  **kwargs,
580
582
  ):
581
583
  if query and table:
@@ -776,19 +778,19 @@ class SnowflakeSource(BaseSourceDriver):
776
778
  def __init__(
777
779
  self,
778
780
  name: str = "",
779
- key_field: str = None,
780
- attributes: dict[str, object] = None,
781
- time_field: str = None,
782
- schedule: str = None,
781
+ key_field: Optional[str] = None,
782
+ attributes: Optional[dict[str, object]] = None,
783
+ time_field: Optional[str] = None,
784
+ schedule: Optional[str] = None,
783
785
  start_time=None,
784
786
  end_time=None,
785
- query: str = None,
786
- url: str = None,
787
- user: str = None,
788
- database: str = None,
789
- schema: str = None,
790
- db_schema: str = None,
791
- warehouse: str = None,
787
+ query: Optional[str] = None,
788
+ url: Optional[str] = None,
789
+ user: Optional[str] = None,
790
+ database: Optional[str] = None,
791
+ schema: Optional[str] = None,
792
+ db_schema: Optional[str] = None,
793
+ warehouse: Optional[str] = None,
792
794
  **kwargs,
793
795
  ):
794
796
  # TODO: Remove in 1.9.0
@@ -850,9 +852,9 @@ class CustomSource(BaseSourceDriver):
850
852
 
851
853
  def __init__(
852
854
  self,
853
- class_name: str = None,
855
+ class_name: Optional[str] = None,
854
856
  name: str = "",
855
- schedule: str = None,
857
+ schedule: Optional[str] = None,
856
858
  **attributes,
857
859
  ):
858
860
  attributes = attributes or {}
@@ -930,12 +932,12 @@ class OnlineSource(BaseSourceDriver):
930
932
 
931
933
  def __init__(
932
934
  self,
933
- name: str = None,
934
- path: str = None,
935
- attributes: dict[str, object] = None,
936
- key_field: str = None,
937
- time_field: str = None,
938
- workers: int = None,
935
+ name: Optional[str] = None,
936
+ path: Optional[str] = None,
937
+ attributes: Optional[dict[str, object]] = None,
938
+ key_field: Optional[str] = None,
939
+ time_field: Optional[str] = None,
940
+ workers: Optional[int] = None,
939
941
  ):
940
942
  super().__init__(name, path, attributes, key_field, time_field)
941
943
  self.online = True
@@ -986,7 +988,7 @@ class StreamSource(OnlineSource):
986
988
  seek_to="earliest",
987
989
  shards=1,
988
990
  retention_in_hours=24,
989
- extra_attributes: dict = None,
991
+ extra_attributes: Optional[dict] = None,
990
992
  **kwargs,
991
993
  ):
992
994
  """
@@ -1168,7 +1170,7 @@ class KafkaSource(OnlineSource):
1168
1170
  self,
1169
1171
  num_partitions: int = 4,
1170
1172
  replication_factor: int = 1,
1171
- topics: list[str] = None,
1173
+ topics: Optional[list[str]] = None,
1172
1174
  ):
1173
1175
  """
1174
1176
  Create Kafka topics with the specified number of partitions and replication factor.
@@ -1226,16 +1228,16 @@ class SQLSource(BaseSourceDriver):
1226
1228
  def __init__(
1227
1229
  self,
1228
1230
  name: str = "",
1229
- chunksize: int = None,
1230
- key_field: str = None,
1231
- time_field: str = None,
1232
- schedule: str = None,
1231
+ chunksize: Optional[int] = None,
1232
+ key_field: Optional[str] = None,
1233
+ time_field: Optional[str] = None,
1234
+ schedule: Optional[str] = None,
1233
1235
  start_time: Optional[Union[datetime, str]] = None,
1234
1236
  end_time: Optional[Union[datetime, str]] = None,
1235
- db_url: str = None,
1236
- table_name: str = None,
1237
- spark_options: dict = None,
1238
- parse_dates: list[str] = None,
1237
+ db_url: Optional[str] = None,
1238
+ table_name: Optional[str] = None,
1239
+ spark_options: Optional[dict] = None,
1240
+ parse_dates: Optional[list[str]] = None,
1239
1241
  **kwargs,
1240
1242
  ):
1241
1243
  """
@@ -12,8 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
-
17
15
  import mlrun
18
16
  import mlrun.artifacts
19
17
  from mlrun.config import config