mlrun 1.7.2__py3-none-any.whl → 1.8.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (222) hide show
  1. mlrun/__init__.py +14 -12
  2. mlrun/__main__.py +3 -3
  3. mlrun/alerts/alert.py +19 -12
  4. mlrun/artifacts/__init__.py +0 -2
  5. mlrun/artifacts/base.py +34 -11
  6. mlrun/artifacts/dataset.py +16 -16
  7. mlrun/artifacts/manager.py +13 -13
  8. mlrun/artifacts/model.py +66 -53
  9. mlrun/common/constants.py +6 -0
  10. mlrun/common/formatters/__init__.py +1 -0
  11. mlrun/common/formatters/feature_set.py +1 -0
  12. mlrun/common/formatters/function.py +1 -0
  13. mlrun/common/formatters/model_endpoint.py +30 -0
  14. mlrun/common/formatters/pipeline.py +1 -2
  15. mlrun/common/model_monitoring/__init__.py +0 -3
  16. mlrun/common/model_monitoring/helpers.py +1 -1
  17. mlrun/common/runtimes/constants.py +1 -2
  18. mlrun/common/schemas/__init__.py +4 -2
  19. mlrun/common/schemas/artifact.py +0 -6
  20. mlrun/common/schemas/common.py +50 -0
  21. mlrun/common/schemas/model_monitoring/__init__.py +8 -1
  22. mlrun/common/schemas/model_monitoring/constants.py +62 -12
  23. mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +149 -0
  24. mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -5
  25. mlrun/common/schemas/partition.py +122 -0
  26. mlrun/config.py +43 -15
  27. mlrun/data_types/__init__.py +0 -2
  28. mlrun/data_types/data_types.py +0 -1
  29. mlrun/data_types/infer.py +3 -1
  30. mlrun/data_types/spark.py +4 -4
  31. mlrun/data_types/to_pandas.py +2 -11
  32. mlrun/datastore/__init__.py +0 -2
  33. mlrun/datastore/alibaba_oss.py +4 -1
  34. mlrun/datastore/azure_blob.py +4 -1
  35. mlrun/datastore/base.py +12 -4
  36. mlrun/datastore/datastore.py +9 -3
  37. mlrun/datastore/datastore_profile.py +1 -1
  38. mlrun/datastore/dbfs_store.py +4 -1
  39. mlrun/datastore/filestore.py +4 -1
  40. mlrun/datastore/google_cloud_storage.py +4 -1
  41. mlrun/datastore/hdfs.py +4 -1
  42. mlrun/datastore/inmem.py +4 -1
  43. mlrun/datastore/redis.py +4 -1
  44. mlrun/datastore/s3.py +4 -1
  45. mlrun/datastore/sources.py +51 -49
  46. mlrun/datastore/store_resources.py +0 -2
  47. mlrun/datastore/targets.py +22 -23
  48. mlrun/datastore/utils.py +2 -2
  49. mlrun/datastore/v3io.py +4 -1
  50. mlrun/datastore/wasbfs/fs.py +13 -12
  51. mlrun/db/base.py +126 -62
  52. mlrun/db/factory.py +3 -0
  53. mlrun/db/httpdb.py +767 -231
  54. mlrun/db/nopdb.py +126 -57
  55. mlrun/errors.py +2 -2
  56. mlrun/execution.py +55 -29
  57. mlrun/feature_store/__init__.py +0 -2
  58. mlrun/feature_store/api.py +40 -40
  59. mlrun/feature_store/common.py +9 -9
  60. mlrun/feature_store/feature_set.py +20 -18
  61. mlrun/feature_store/feature_vector.py +27 -24
  62. mlrun/feature_store/retrieval/base.py +14 -9
  63. mlrun/feature_store/retrieval/job.py +2 -1
  64. mlrun/feature_store/steps.py +2 -2
  65. mlrun/features.py +30 -13
  66. mlrun/frameworks/__init__.py +1 -2
  67. mlrun/frameworks/_common/__init__.py +1 -2
  68. mlrun/frameworks/_common/artifacts_library.py +2 -2
  69. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  70. mlrun/frameworks/_common/model_handler.py +29 -27
  71. mlrun/frameworks/_common/producer.py +3 -1
  72. mlrun/frameworks/_dl_common/__init__.py +1 -2
  73. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  74. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  75. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  76. mlrun/frameworks/_ml_common/__init__.py +1 -2
  77. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  78. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  79. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  80. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  81. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  82. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  83. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  84. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  85. mlrun/frameworks/huggingface/__init__.py +1 -2
  86. mlrun/frameworks/huggingface/model_server.py +9 -9
  87. mlrun/frameworks/lgbm/__init__.py +47 -44
  88. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  89. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  90. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  91. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  92. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  93. mlrun/frameworks/lgbm/model_handler.py +15 -11
  94. mlrun/frameworks/lgbm/model_server.py +11 -7
  95. mlrun/frameworks/lgbm/utils.py +2 -2
  96. mlrun/frameworks/onnx/__init__.py +1 -2
  97. mlrun/frameworks/onnx/dataset.py +3 -3
  98. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  99. mlrun/frameworks/onnx/model_handler.py +7 -5
  100. mlrun/frameworks/onnx/model_server.py +8 -6
  101. mlrun/frameworks/parallel_coordinates.py +11 -11
  102. mlrun/frameworks/pytorch/__init__.py +22 -23
  103. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  104. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  105. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  106. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  107. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  108. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  109. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  110. mlrun/frameworks/pytorch/model_handler.py +21 -17
  111. mlrun/frameworks/pytorch/model_server.py +13 -9
  112. mlrun/frameworks/sklearn/__init__.py +19 -18
  113. mlrun/frameworks/sklearn/estimator.py +2 -2
  114. mlrun/frameworks/sklearn/metric.py +3 -3
  115. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  116. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  117. mlrun/frameworks/sklearn/model_handler.py +4 -3
  118. mlrun/frameworks/tf_keras/__init__.py +11 -12
  119. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  120. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  121. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  122. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  123. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  124. mlrun/frameworks/tf_keras/model_server.py +12 -8
  125. mlrun/frameworks/xgboost/__init__.py +19 -18
  126. mlrun/frameworks/xgboost/model_handler.py +13 -9
  127. mlrun/launcher/base.py +3 -4
  128. mlrun/launcher/local.py +1 -1
  129. mlrun/launcher/remote.py +1 -1
  130. mlrun/lists.py +4 -3
  131. mlrun/model.py +108 -44
  132. mlrun/model_monitoring/__init__.py +1 -2
  133. mlrun/model_monitoring/api.py +6 -6
  134. mlrun/model_monitoring/applications/_application_steps.py +13 -15
  135. mlrun/model_monitoring/applications/histogram_data_drift.py +41 -15
  136. mlrun/model_monitoring/applications/results.py +55 -3
  137. mlrun/model_monitoring/controller.py +185 -223
  138. mlrun/model_monitoring/db/_schedules.py +156 -0
  139. mlrun/model_monitoring/db/_stats.py +189 -0
  140. mlrun/model_monitoring/db/stores/__init__.py +1 -1
  141. mlrun/model_monitoring/db/stores/base/store.py +6 -65
  142. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -25
  143. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -97
  144. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +2 -58
  145. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -15
  146. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +6 -257
  147. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +9 -271
  148. mlrun/model_monitoring/db/tsdb/base.py +74 -22
  149. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +66 -35
  150. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  151. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +284 -51
  152. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  153. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -17
  154. mlrun/model_monitoring/helpers.py +97 -1
  155. mlrun/model_monitoring/model_endpoint.py +4 -2
  156. mlrun/model_monitoring/stream_processing.py +2 -2
  157. mlrun/model_monitoring/tracking_policy.py +10 -3
  158. mlrun/model_monitoring/writer.py +47 -26
  159. mlrun/package/__init__.py +3 -6
  160. mlrun/package/context_handler.py +1 -1
  161. mlrun/package/packager.py +12 -9
  162. mlrun/package/packagers/__init__.py +0 -2
  163. mlrun/package/packagers/default_packager.py +14 -11
  164. mlrun/package/packagers/numpy_packagers.py +16 -7
  165. mlrun/package/packagers/pandas_packagers.py +18 -18
  166. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  167. mlrun/package/packagers_manager.py +31 -14
  168. mlrun/package/utils/__init__.py +0 -3
  169. mlrun/package/utils/_pickler.py +6 -6
  170. mlrun/platforms/__init__.py +3 -3
  171. mlrun/platforms/iguazio.py +4 -1
  172. mlrun/projects/__init__.py +1 -6
  173. mlrun/projects/operations.py +27 -27
  174. mlrun/projects/pipelines.py +85 -215
  175. mlrun/projects/project.py +444 -158
  176. mlrun/run.py +9 -9
  177. mlrun/runtimes/__init__.py +1 -3
  178. mlrun/runtimes/base.py +13 -10
  179. mlrun/runtimes/daskjob.py +9 -9
  180. mlrun/runtimes/generators.py +2 -1
  181. mlrun/runtimes/kubejob.py +4 -5
  182. mlrun/runtimes/mpijob/__init__.py +0 -2
  183. mlrun/runtimes/mpijob/abstract.py +7 -6
  184. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  185. mlrun/runtimes/nuclio/application/application.py +11 -11
  186. mlrun/runtimes/nuclio/function.py +14 -14
  187. mlrun/runtimes/nuclio/serving.py +9 -9
  188. mlrun/runtimes/pod.py +74 -29
  189. mlrun/runtimes/remotesparkjob.py +3 -2
  190. mlrun/runtimes/sparkjob/__init__.py +0 -2
  191. mlrun/runtimes/sparkjob/spark3job.py +21 -11
  192. mlrun/runtimes/utils.py +6 -5
  193. mlrun/serving/merger.py +6 -4
  194. mlrun/serving/remote.py +18 -17
  195. mlrun/serving/routers.py +27 -27
  196. mlrun/serving/server.py +1 -1
  197. mlrun/serving/states.py +76 -71
  198. mlrun/serving/utils.py +13 -2
  199. mlrun/serving/v1_serving.py +3 -2
  200. mlrun/serving/v2_serving.py +4 -4
  201. mlrun/track/__init__.py +1 -1
  202. mlrun/track/tracker.py +2 -2
  203. mlrun/track/trackers/mlflow_tracker.py +6 -5
  204. mlrun/utils/async_http.py +1 -1
  205. mlrun/utils/helpers.py +72 -28
  206. mlrun/utils/logger.py +104 -2
  207. mlrun/utils/notifications/notification/base.py +23 -4
  208. mlrun/utils/notifications/notification/console.py +1 -1
  209. mlrun/utils/notifications/notification/git.py +6 -6
  210. mlrun/utils/notifications/notification/ipython.py +5 -4
  211. mlrun/utils/notifications/notification/slack.py +1 -1
  212. mlrun/utils/notifications/notification/webhook.py +13 -17
  213. mlrun/utils/notifications/notification_pusher.py +23 -19
  214. mlrun/utils/regex.py +1 -1
  215. mlrun/utils/version/version.json +2 -2
  216. {mlrun-1.7.2.dist-info → mlrun-1.8.0rc1.dist-info}/METADATA +187 -199
  217. mlrun-1.8.0rc1.dist-info/RECORD +356 -0
  218. {mlrun-1.7.2.dist-info → mlrun-1.8.0rc1.dist-info}/WHEEL +1 -1
  219. mlrun-1.7.2.dist-info/RECORD +0 -351
  220. {mlrun-1.7.2.dist-info → mlrun-1.8.0rc1.dist-info}/LICENSE +0 -0
  221. {mlrun-1.7.2.dist-info → mlrun-1.8.0rc1.dist-info}/entry_points.txt +0 -0
  222. {mlrun-1.7.2.dist-info → mlrun-1.8.0rc1.dist-info}/top_level.txt +0 -0
@@ -181,10 +181,10 @@ class CSVSource(BaseSourceDriver):
181
181
  def __init__(
182
182
  self,
183
183
  name: str = "",
184
- path: str = None,
185
- attributes: dict[str, object] = None,
186
- key_field: str = None,
187
- schedule: str = None,
184
+ path: Optional[str] = None,
185
+ attributes: Optional[dict[str, object]] = None,
186
+ key_field: Optional[str] = None,
187
+ schedule: Optional[str] = None,
188
188
  parse_dates: Union[None, int, str, list[int], list[str]] = None,
189
189
  **kwargs,
190
190
  ):
@@ -308,11 +308,11 @@ class ParquetSource(BaseSourceDriver):
308
308
  def __init__(
309
309
  self,
310
310
  name: str = "",
311
- path: str = None,
312
- attributes: dict[str, object] = None,
313
- key_field: str = None,
314
- time_field: str = None,
315
- schedule: str = None,
311
+ path: Optional[str] = None,
312
+ attributes: Optional[dict[str, object]] = None,
313
+ key_field: Optional[str] = None,
314
+ time_field: Optional[str] = None,
315
+ schedule: Optional[str] = None,
316
316
  start_time: Optional[Union[datetime, str]] = None,
317
317
  end_time: Optional[Union[datetime, str]] = None,
318
318
  additional_filters: Optional[list[Union[tuple, list]]] = None,
@@ -392,7 +392,9 @@ class ParquetSource(BaseSourceDriver):
392
392
  )
393
393
 
394
394
  @classmethod
395
- def from_dict(cls, struct=None, fields=None, deprecated_fields: dict = None):
395
+ def from_dict(
396
+ cls, struct=None, fields=None, deprecated_fields: Optional[dict] = None
397
+ ):
396
398
  new_obj = super().from_dict(
397
399
  struct=struct, fields=fields, deprecated_fields=deprecated_fields
398
400
  )
@@ -564,18 +566,18 @@ class BigQuerySource(BaseSourceDriver):
564
566
  def __init__(
565
567
  self,
566
568
  name: str = "",
567
- table: str = None,
568
- max_results_for_table: int = None,
569
- query: str = None,
570
- materialization_dataset: str = None,
571
- chunksize: int = None,
572
- key_field: str = None,
573
- time_field: str = None,
574
- schedule: str = None,
569
+ table: Optional[str] = None,
570
+ max_results_for_table: Optional[int] = None,
571
+ query: Optional[str] = None,
572
+ materialization_dataset: Optional[str] = None,
573
+ chunksize: Optional[int] = None,
574
+ key_field: Optional[str] = None,
575
+ time_field: Optional[str] = None,
576
+ schedule: Optional[str] = None,
575
577
  start_time=None,
576
578
  end_time=None,
577
- gcp_project: str = None,
578
- spark_options: dict = None,
579
+ gcp_project: Optional[str] = None,
580
+ spark_options: Optional[dict] = None,
579
581
  **kwargs,
580
582
  ):
581
583
  if query and table:
@@ -776,19 +778,19 @@ class SnowflakeSource(BaseSourceDriver):
776
778
  def __init__(
777
779
  self,
778
780
  name: str = "",
779
- key_field: str = None,
780
- attributes: dict[str, object] = None,
781
- time_field: str = None,
782
- schedule: str = None,
781
+ key_field: Optional[str] = None,
782
+ attributes: Optional[dict[str, object]] = None,
783
+ time_field: Optional[str] = None,
784
+ schedule: Optional[str] = None,
783
785
  start_time=None,
784
786
  end_time=None,
785
- query: str = None,
786
- url: str = None,
787
- user: str = None,
788
- database: str = None,
789
- schema: str = None,
790
- db_schema: str = None,
791
- warehouse: str = None,
787
+ query: Optional[str] = None,
788
+ url: Optional[str] = None,
789
+ user: Optional[str] = None,
790
+ database: Optional[str] = None,
791
+ schema: Optional[str] = None,
792
+ db_schema: Optional[str] = None,
793
+ warehouse: Optional[str] = None,
792
794
  **kwargs,
793
795
  ):
794
796
  # TODO: Remove in 1.9.0
@@ -850,9 +852,9 @@ class CustomSource(BaseSourceDriver):
850
852
 
851
853
  def __init__(
852
854
  self,
853
- class_name: str = None,
855
+ class_name: Optional[str] = None,
854
856
  name: str = "",
855
- schedule: str = None,
857
+ schedule: Optional[str] = None,
856
858
  **attributes,
857
859
  ):
858
860
  attributes = attributes or {}
@@ -930,12 +932,12 @@ class OnlineSource(BaseSourceDriver):
930
932
 
931
933
  def __init__(
932
934
  self,
933
- name: str = None,
934
- path: str = None,
935
- attributes: dict[str, object] = None,
936
- key_field: str = None,
937
- time_field: str = None,
938
- workers: int = None,
935
+ name: Optional[str] = None,
936
+ path: Optional[str] = None,
937
+ attributes: Optional[dict[str, object]] = None,
938
+ key_field: Optional[str] = None,
939
+ time_field: Optional[str] = None,
940
+ workers: Optional[int] = None,
939
941
  ):
940
942
  super().__init__(name, path, attributes, key_field, time_field)
941
943
  self.online = True
@@ -986,7 +988,7 @@ class StreamSource(OnlineSource):
986
988
  seek_to="earliest",
987
989
  shards=1,
988
990
  retention_in_hours=24,
989
- extra_attributes: dict = None,
991
+ extra_attributes: Optional[dict] = None,
990
992
  **kwargs,
991
993
  ):
992
994
  """
@@ -1168,7 +1170,7 @@ class KafkaSource(OnlineSource):
1168
1170
  self,
1169
1171
  num_partitions: int = 4,
1170
1172
  replication_factor: int = 1,
1171
- topics: list[str] = None,
1173
+ topics: Optional[list[str]] = None,
1172
1174
  ):
1173
1175
  """
1174
1176
  Create Kafka topics with the specified number of partitions and replication factor.
@@ -1226,16 +1228,16 @@ class SQLSource(BaseSourceDriver):
1226
1228
  def __init__(
1227
1229
  self,
1228
1230
  name: str = "",
1229
- chunksize: int = None,
1230
- key_field: str = None,
1231
- time_field: str = None,
1232
- schedule: str = None,
1231
+ chunksize: Optional[int] = None,
1232
+ key_field: Optional[str] = None,
1233
+ time_field: Optional[str] = None,
1234
+ schedule: Optional[str] = None,
1233
1235
  start_time: Optional[Union[datetime, str]] = None,
1234
1236
  end_time: Optional[Union[datetime, str]] = None,
1235
- db_url: str = None,
1236
- table_name: str = None,
1237
- spark_options: dict = None,
1238
- parse_dates: list[str] = None,
1237
+ db_url: Optional[str] = None,
1238
+ table_name: Optional[str] = None,
1239
+ spark_options: Optional[dict] = None,
1240
+ parse_dates: Optional[list[str]] = None,
1239
1241
  **kwargs,
1240
1242
  ):
1241
1243
  """
@@ -12,8 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
-
17
15
  import mlrun
18
16
  import mlrun.artifacts
19
17
  from mlrun.config import config
@@ -396,7 +396,7 @@ class BaseStoreTarget(DataTargetBase):
396
396
  self,
397
397
  name: str = "",
398
398
  path=None,
399
- attributes: dict[str, str] = None,
399
+ attributes: Optional[dict[str, str]] = None,
400
400
  after_step=None,
401
401
  columns=None,
402
402
  partitioned: bool = False,
@@ -405,8 +405,8 @@ class BaseStoreTarget(DataTargetBase):
405
405
  time_partitioning_granularity: Optional[str] = None,
406
406
  max_events: Optional[int] = None,
407
407
  flush_after_seconds: Optional[int] = None,
408
- storage_options: dict[str, str] = None,
409
- schema: dict[str, Any] = None,
408
+ storage_options: Optional[dict[str, str]] = None,
409
+ schema: Optional[dict[str, Any]] = None,
410
410
  credentials_prefix=None,
411
411
  ):
412
412
  super().__init__(
@@ -834,16 +834,16 @@ class ParquetTarget(BaseStoreTarget):
834
834
  self,
835
835
  name: str = "",
836
836
  path=None,
837
- attributes: dict[str, str] = None,
837
+ attributes: Optional[dict[str, str]] = None,
838
838
  after_step=None,
839
839
  columns=None,
840
- partitioned: bool = None,
840
+ partitioned: Optional[bool] = None,
841
841
  key_bucketing_number: Optional[int] = None,
842
842
  partition_cols: Optional[list[str]] = None,
843
843
  time_partitioning_granularity: Optional[str] = None,
844
844
  max_events: Optional[int] = 10000,
845
845
  flush_after_seconds: Optional[int] = 900,
846
- storage_options: dict[str, str] = None,
846
+ storage_options: Optional[dict[str, str]] = None,
847
847
  ):
848
848
  self.path = path
849
849
  if partitioned is None:
@@ -1136,8 +1136,7 @@ class CSVTarget(BaseStoreTarget):
1136
1136
  import pyspark.sql.functions as funcs
1137
1137
 
1138
1138
  for col_name, col_type in df.dtypes:
1139
- # covers TimestampType and TimestampNTZType, which was added in PySpark 3.4.0
1140
- if col_type.startswith("timestamp"):
1139
+ if col_type == "timestamp":
1141
1140
  # df.write.csv saves timestamps with millisecond precision, but we want microsecond precision
1142
1141
  # for compatibility with storey.
1143
1142
  df = df.withColumn(
@@ -1200,7 +1199,7 @@ class SnowflakeTarget(BaseStoreTarget):
1200
1199
  self,
1201
1200
  name: str = "",
1202
1201
  path=None,
1203
- attributes: dict[str, str] = None,
1202
+ attributes: Optional[dict[str, str]] = None,
1204
1203
  after_step=None,
1205
1204
  columns=None,
1206
1205
  partitioned: bool = False,
@@ -1209,15 +1208,15 @@ class SnowflakeTarget(BaseStoreTarget):
1209
1208
  time_partitioning_granularity: Optional[str] = None,
1210
1209
  max_events: Optional[int] = None,
1211
1210
  flush_after_seconds: Optional[int] = None,
1212
- storage_options: dict[str, str] = None,
1213
- schema: dict[str, Any] = None,
1211
+ storage_options: Optional[dict[str, str]] = None,
1212
+ schema: Optional[dict[str, Any]] = None,
1214
1213
  credentials_prefix=None,
1215
- url: str = None,
1216
- user: str = None,
1217
- db_schema: str = None,
1218
- database: str = None,
1219
- warehouse: str = None,
1220
- table_name: str = None,
1214
+ url: Optional[str] = None,
1215
+ user: Optional[str] = None,
1216
+ db_schema: Optional[str] = None,
1217
+ database: Optional[str] = None,
1218
+ warehouse: Optional[str] = None,
1219
+ table_name: Optional[str] = None,
1221
1220
  ):
1222
1221
  attributes = attributes or {}
1223
1222
  if url:
@@ -1904,7 +1903,7 @@ class SQLTarget(BaseStoreTarget):
1904
1903
  self,
1905
1904
  name: str = "",
1906
1905
  path=None,
1907
- attributes: dict[str, str] = None,
1906
+ attributes: Optional[dict[str, str]] = None,
1908
1907
  after_step=None,
1909
1908
  partitioned: bool = False,
1910
1909
  key_bucketing_number: Optional[int] = None,
@@ -1912,16 +1911,16 @@ class SQLTarget(BaseStoreTarget):
1912
1911
  time_partitioning_granularity: Optional[str] = None,
1913
1912
  max_events: Optional[int] = None,
1914
1913
  flush_after_seconds: Optional[int] = None,
1915
- storage_options: dict[str, str] = None,
1916
- db_url: str = None,
1917
- table_name: str = None,
1918
- schema: dict[str, Any] = None,
1914
+ storage_options: Optional[dict[str, str]] = None,
1915
+ db_url: Optional[str] = None,
1916
+ table_name: Optional[str] = None,
1917
+ schema: Optional[dict[str, Any]] = None,
1919
1918
  primary_key_column: str = "",
1920
1919
  if_exists: str = "append",
1921
1920
  create_table: bool = False,
1922
1921
  # create_according_to_data: bool = False,
1923
1922
  varchar_len: int = 50,
1924
- parse_dates: list[str] = None,
1923
+ parse_dates: Optional[list[str]] = None,
1925
1924
  ):
1926
1925
  """
1927
1926
  Write to SqlDB as output target for a flow.
mlrun/datastore/utils.py CHANGED
@@ -26,7 +26,7 @@ import mlrun.datastore
26
26
 
27
27
 
28
28
  def parse_kafka_url(
29
- url: str, brokers: typing.Union[list, str] = None
29
+ url: str, brokers: typing.Optional[typing.Union[list, str]] = None
30
30
  ) -> tuple[str, list]:
31
31
  """Generating Kafka topic and adjusting a list of bootstrap servers.
32
32
 
@@ -71,7 +71,7 @@ def upload_tarball(source_dir, target, secrets=None):
71
71
 
72
72
  def filter_df_start_end_time(
73
73
  df: typing.Union[pd.DataFrame, typing.Iterator[pd.DataFrame]],
74
- time_column: str = None,
74
+ time_column: typing.Optional[str] = None,
75
75
  start_time: pd.Timestamp = None,
76
76
  end_time: pd.Timestamp = None,
77
77
  ) -> typing.Union[pd.DataFrame, typing.Iterator[pd.DataFrame]]:
mlrun/datastore/v3io.py CHANGED
@@ -14,6 +14,7 @@
14
14
 
15
15
  import time
16
16
  from datetime import datetime
17
+ from typing import Optional
17
18
 
18
19
  import fsspec
19
20
  import v3io
@@ -33,7 +34,9 @@ V3IO_DEFAULT_UPLOAD_CHUNK_SIZE = 1024 * 1024 * 10
33
34
 
34
35
 
35
36
  class V3ioStore(DataStore):
36
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
37
+ def __init__(
38
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
39
+ ):
37
40
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
38
41
  self.endpoint = self.endpoint or mlrun.mlconf.v3io_api
39
42
 
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import Optional
15
16
  from urllib.parse import urlparse
16
17
 
17
18
  from fsspec import AbstractFileSystem
@@ -22,23 +23,23 @@ class WasbFS(AbstractFileSystem):
22
23
 
23
24
  def __init__(
24
25
  self,
25
- account_name: str = None,
26
- account_key: str = None,
27
- connection_string: str = None,
28
- credential: str = None,
29
- sas_token: str = None,
26
+ account_name: Optional[str] = None,
27
+ account_key: Optional[str] = None,
28
+ connection_string: Optional[str] = None,
29
+ credential: Optional[str] = None,
30
+ sas_token: Optional[str] = None,
30
31
  request_session=None,
31
- socket_timeout: int = None,
32
- blocksize: int = None,
33
- client_id: str = None,
34
- client_secret: str = None,
35
- tenant_id: str = None,
32
+ socket_timeout: Optional[int] = None,
33
+ blocksize: Optional[int] = None,
34
+ client_id: Optional[str] = None,
35
+ client_secret: Optional[str] = None,
36
+ tenant_id: Optional[str] = None,
36
37
  anon: bool = True,
37
- location_mode: str = None,
38
+ location_mode: Optional[str] = None,
38
39
  loop=None,
39
40
  asynchronous: bool = False,
40
41
  default_fill_cache: bool = True,
41
- default_cache_type: str = None,
42
+ default_cache_type: Optional[str] = None,
42
43
  **kwargs,
43
44
  ):
44
45
  from adlfs import AzureBlobFileSystem