mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +14 -12
- mlrun/__main__.py +3 -3
- mlrun/alerts/alert.py +19 -12
- mlrun/artifacts/__init__.py +0 -2
- mlrun/artifacts/base.py +34 -11
- mlrun/artifacts/dataset.py +16 -16
- mlrun/artifacts/manager.py +13 -13
- mlrun/artifacts/model.py +66 -53
- mlrun/common/constants.py +6 -0
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/feature_set.py +1 -0
- mlrun/common/formatters/function.py +1 -0
- mlrun/common/formatters/model_endpoint.py +30 -0
- mlrun/common/formatters/pipeline.py +1 -2
- mlrun/common/model_monitoring/__init__.py +0 -3
- mlrun/common/model_monitoring/helpers.py +1 -1
- mlrun/common/runtimes/constants.py +1 -2
- mlrun/common/schemas/__init__.py +4 -2
- mlrun/common/schemas/artifact.py +0 -6
- mlrun/common/schemas/common.py +50 -0
- mlrun/common/schemas/model_monitoring/__init__.py +8 -1
- mlrun/common/schemas/model_monitoring/constants.py +62 -12
- mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +149 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -5
- mlrun/common/schemas/partition.py +122 -0
- mlrun/config.py +43 -15
- mlrun/data_types/__init__.py +0 -2
- mlrun/data_types/data_types.py +0 -1
- mlrun/data_types/infer.py +3 -1
- mlrun/data_types/spark.py +4 -4
- mlrun/data_types/to_pandas.py +2 -11
- mlrun/datastore/__init__.py +0 -2
- mlrun/datastore/alibaba_oss.py +4 -1
- mlrun/datastore/azure_blob.py +4 -1
- mlrun/datastore/base.py +12 -4
- mlrun/datastore/datastore.py +9 -3
- mlrun/datastore/datastore_profile.py +1 -1
- mlrun/datastore/dbfs_store.py +4 -1
- mlrun/datastore/filestore.py +4 -1
- mlrun/datastore/google_cloud_storage.py +4 -1
- mlrun/datastore/hdfs.py +4 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +4 -1
- mlrun/datastore/s3.py +4 -1
- mlrun/datastore/sources.py +51 -49
- mlrun/datastore/store_resources.py +0 -2
- mlrun/datastore/targets.py +22 -23
- mlrun/datastore/utils.py +2 -2
- mlrun/datastore/v3io.py +4 -1
- mlrun/datastore/wasbfs/fs.py +13 -12
- mlrun/db/base.py +126 -62
- mlrun/db/factory.py +3 -0
- mlrun/db/httpdb.py +767 -231
- mlrun/db/nopdb.py +126 -57
- mlrun/errors.py +2 -2
- mlrun/execution.py +55 -29
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +40 -40
- mlrun/feature_store/common.py +9 -9
- mlrun/feature_store/feature_set.py +20 -18
- mlrun/feature_store/feature_vector.py +27 -24
- mlrun/feature_store/retrieval/base.py +14 -9
- mlrun/feature_store/retrieval/job.py +2 -1
- mlrun/feature_store/steps.py +2 -2
- mlrun/features.py +30 -13
- mlrun/frameworks/__init__.py +1 -2
- mlrun/frameworks/_common/__init__.py +1 -2
- mlrun/frameworks/_common/artifacts_library.py +2 -2
- mlrun/frameworks/_common/mlrun_interface.py +10 -6
- mlrun/frameworks/_common/model_handler.py +29 -27
- mlrun/frameworks/_common/producer.py +3 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
- mlrun/frameworks/_ml_common/__init__.py +1 -2
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_ml_common/model_handler.py +21 -21
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/auto_mlrun/__init__.py +1 -2
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
- mlrun/frameworks/huggingface/__init__.py +1 -2
- mlrun/frameworks/huggingface/model_server.py +9 -9
- mlrun/frameworks/lgbm/__init__.py +47 -44
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
- mlrun/frameworks/lgbm/model_handler.py +15 -11
- mlrun/frameworks/lgbm/model_server.py +11 -7
- mlrun/frameworks/lgbm/utils.py +2 -2
- mlrun/frameworks/onnx/__init__.py +1 -2
- mlrun/frameworks/onnx/dataset.py +3 -3
- mlrun/frameworks/onnx/mlrun_interface.py +2 -2
- mlrun/frameworks/onnx/model_handler.py +7 -5
- mlrun/frameworks/onnx/model_server.py +8 -6
- mlrun/frameworks/parallel_coordinates.py +11 -11
- mlrun/frameworks/pytorch/__init__.py +22 -23
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
- mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
- mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
- mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
- mlrun/frameworks/pytorch/model_handler.py +21 -17
- mlrun/frameworks/pytorch/model_server.py +13 -9
- mlrun/frameworks/sklearn/__init__.py +19 -18
- mlrun/frameworks/sklearn/estimator.py +2 -2
- mlrun/frameworks/sklearn/metric.py +3 -3
- mlrun/frameworks/sklearn/metrics_library.py +8 -6
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
- mlrun/frameworks/sklearn/model_handler.py +4 -3
- mlrun/frameworks/tf_keras/__init__.py +11 -12
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
- mlrun/frameworks/tf_keras/model_handler.py +17 -13
- mlrun/frameworks/tf_keras/model_server.py +12 -8
- mlrun/frameworks/xgboost/__init__.py +19 -18
- mlrun/frameworks/xgboost/model_handler.py +13 -9
- mlrun/launcher/base.py +3 -4
- mlrun/launcher/local.py +1 -1
- mlrun/launcher/remote.py +1 -1
- mlrun/lists.py +4 -3
- mlrun/model.py +108 -44
- mlrun/model_monitoring/__init__.py +1 -2
- mlrun/model_monitoring/api.py +6 -6
- mlrun/model_monitoring/applications/_application_steps.py +13 -15
- mlrun/model_monitoring/applications/histogram_data_drift.py +41 -15
- mlrun/model_monitoring/applications/results.py +55 -3
- mlrun/model_monitoring/controller.py +185 -223
- mlrun/model_monitoring/db/_schedules.py +156 -0
- mlrun/model_monitoring/db/_stats.py +189 -0
- mlrun/model_monitoring/db/stores/__init__.py +1 -1
- mlrun/model_monitoring/db/stores/base/store.py +6 -65
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -25
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -97
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +2 -58
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -15
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +6 -257
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +9 -271
- mlrun/model_monitoring/db/tsdb/base.py +74 -22
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +66 -35
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +284 -51
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -17
- mlrun/model_monitoring/helpers.py +97 -1
- mlrun/model_monitoring/model_endpoint.py +4 -2
- mlrun/model_monitoring/stream_processing.py +2 -2
- mlrun/model_monitoring/tracking_policy.py +10 -3
- mlrun/model_monitoring/writer.py +47 -26
- mlrun/package/__init__.py +3 -6
- mlrun/package/context_handler.py +1 -1
- mlrun/package/packager.py +12 -9
- mlrun/package/packagers/__init__.py +0 -2
- mlrun/package/packagers/default_packager.py +14 -11
- mlrun/package/packagers/numpy_packagers.py +16 -7
- mlrun/package/packagers/pandas_packagers.py +18 -18
- mlrun/package/packagers/python_standard_library_packagers.py +25 -11
- mlrun/package/packagers_manager.py +31 -14
- mlrun/package/utils/__init__.py +0 -3
- mlrun/package/utils/_pickler.py +6 -6
- mlrun/platforms/__init__.py +3 -3
- mlrun/platforms/iguazio.py +4 -1
- mlrun/projects/__init__.py +1 -6
- mlrun/projects/operations.py +27 -27
- mlrun/projects/pipelines.py +85 -215
- mlrun/projects/project.py +444 -158
- mlrun/run.py +9 -9
- mlrun/runtimes/__init__.py +1 -3
- mlrun/runtimes/base.py +13 -10
- mlrun/runtimes/daskjob.py +9 -9
- mlrun/runtimes/generators.py +2 -1
- mlrun/runtimes/kubejob.py +4 -5
- mlrun/runtimes/mpijob/__init__.py +0 -2
- mlrun/runtimes/mpijob/abstract.py +7 -6
- mlrun/runtimes/nuclio/api_gateway.py +7 -7
- mlrun/runtimes/nuclio/application/application.py +11 -11
- mlrun/runtimes/nuclio/function.py +14 -13
- mlrun/runtimes/nuclio/serving.py +9 -9
- mlrun/runtimes/pod.py +74 -29
- mlrun/runtimes/remotesparkjob.py +3 -2
- mlrun/runtimes/sparkjob/__init__.py +0 -2
- mlrun/runtimes/sparkjob/spark3job.py +21 -11
- mlrun/runtimes/utils.py +6 -5
- mlrun/serving/merger.py +6 -4
- mlrun/serving/remote.py +18 -17
- mlrun/serving/routers.py +27 -27
- mlrun/serving/server.py +1 -1
- mlrun/serving/states.py +76 -71
- mlrun/serving/utils.py +13 -2
- mlrun/serving/v1_serving.py +3 -2
- mlrun/serving/v2_serving.py +4 -4
- mlrun/track/__init__.py +1 -1
- mlrun/track/tracker.py +2 -2
- mlrun/track/trackers/mlflow_tracker.py +6 -5
- mlrun/utils/async_http.py +1 -1
- mlrun/utils/helpers.py +72 -28
- mlrun/utils/logger.py +104 -2
- mlrun/utils/notifications/notification/base.py +23 -4
- mlrun/utils/notifications/notification/console.py +1 -1
- mlrun/utils/notifications/notification/git.py +6 -6
- mlrun/utils/notifications/notification/ipython.py +5 -4
- mlrun/utils/notifications/notification/slack.py +1 -1
- mlrun/utils/notifications/notification/webhook.py +13 -17
- mlrun/utils/notifications/notification_pusher.py +23 -19
- mlrun/utils/regex.py +1 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/METADATA +186 -186
- mlrun-1.8.0rc1.dist-info/RECORD +356 -0
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/WHEEL +1 -1
- mlrun-1.7.2rc3.dist-info/RECORD +0 -351
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/LICENSE +0 -0
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/top_level.txt +0 -0
mlrun/datastore/sources.py
CHANGED
|
@@ -181,10 +181,10 @@ class CSVSource(BaseSourceDriver):
|
|
|
181
181
|
def __init__(
|
|
182
182
|
self,
|
|
183
183
|
name: str = "",
|
|
184
|
-
path: str = None,
|
|
185
|
-
attributes: dict[str, object] = None,
|
|
186
|
-
key_field: str = None,
|
|
187
|
-
schedule: str = None,
|
|
184
|
+
path: Optional[str] = None,
|
|
185
|
+
attributes: Optional[dict[str, object]] = None,
|
|
186
|
+
key_field: Optional[str] = None,
|
|
187
|
+
schedule: Optional[str] = None,
|
|
188
188
|
parse_dates: Union[None, int, str, list[int], list[str]] = None,
|
|
189
189
|
**kwargs,
|
|
190
190
|
):
|
|
@@ -308,11 +308,11 @@ class ParquetSource(BaseSourceDriver):
|
|
|
308
308
|
def __init__(
|
|
309
309
|
self,
|
|
310
310
|
name: str = "",
|
|
311
|
-
path: str = None,
|
|
312
|
-
attributes: dict[str, object] = None,
|
|
313
|
-
key_field: str = None,
|
|
314
|
-
time_field: str = None,
|
|
315
|
-
schedule: str = None,
|
|
311
|
+
path: Optional[str] = None,
|
|
312
|
+
attributes: Optional[dict[str, object]] = None,
|
|
313
|
+
key_field: Optional[str] = None,
|
|
314
|
+
time_field: Optional[str] = None,
|
|
315
|
+
schedule: Optional[str] = None,
|
|
316
316
|
start_time: Optional[Union[datetime, str]] = None,
|
|
317
317
|
end_time: Optional[Union[datetime, str]] = None,
|
|
318
318
|
additional_filters: Optional[list[Union[tuple, list]]] = None,
|
|
@@ -392,7 +392,9 @@ class ParquetSource(BaseSourceDriver):
|
|
|
392
392
|
)
|
|
393
393
|
|
|
394
394
|
@classmethod
|
|
395
|
-
def from_dict(
|
|
395
|
+
def from_dict(
|
|
396
|
+
cls, struct=None, fields=None, deprecated_fields: Optional[dict] = None
|
|
397
|
+
):
|
|
396
398
|
new_obj = super().from_dict(
|
|
397
399
|
struct=struct, fields=fields, deprecated_fields=deprecated_fields
|
|
398
400
|
)
|
|
@@ -564,18 +566,18 @@ class BigQuerySource(BaseSourceDriver):
|
|
|
564
566
|
def __init__(
|
|
565
567
|
self,
|
|
566
568
|
name: str = "",
|
|
567
|
-
table: str = None,
|
|
568
|
-
max_results_for_table: int = None,
|
|
569
|
-
query: str = None,
|
|
570
|
-
materialization_dataset: str = None,
|
|
571
|
-
chunksize: int = None,
|
|
572
|
-
key_field: str = None,
|
|
573
|
-
time_field: str = None,
|
|
574
|
-
schedule: str = None,
|
|
569
|
+
table: Optional[str] = None,
|
|
570
|
+
max_results_for_table: Optional[int] = None,
|
|
571
|
+
query: Optional[str] = None,
|
|
572
|
+
materialization_dataset: Optional[str] = None,
|
|
573
|
+
chunksize: Optional[int] = None,
|
|
574
|
+
key_field: Optional[str] = None,
|
|
575
|
+
time_field: Optional[str] = None,
|
|
576
|
+
schedule: Optional[str] = None,
|
|
575
577
|
start_time=None,
|
|
576
578
|
end_time=None,
|
|
577
|
-
gcp_project: str = None,
|
|
578
|
-
spark_options: dict = None,
|
|
579
|
+
gcp_project: Optional[str] = None,
|
|
580
|
+
spark_options: Optional[dict] = None,
|
|
579
581
|
**kwargs,
|
|
580
582
|
):
|
|
581
583
|
if query and table:
|
|
@@ -776,19 +778,19 @@ class SnowflakeSource(BaseSourceDriver):
|
|
|
776
778
|
def __init__(
|
|
777
779
|
self,
|
|
778
780
|
name: str = "",
|
|
779
|
-
key_field: str = None,
|
|
780
|
-
attributes: dict[str, object] = None,
|
|
781
|
-
time_field: str = None,
|
|
782
|
-
schedule: str = None,
|
|
781
|
+
key_field: Optional[str] = None,
|
|
782
|
+
attributes: Optional[dict[str, object]] = None,
|
|
783
|
+
time_field: Optional[str] = None,
|
|
784
|
+
schedule: Optional[str] = None,
|
|
783
785
|
start_time=None,
|
|
784
786
|
end_time=None,
|
|
785
|
-
query: str = None,
|
|
786
|
-
url: str = None,
|
|
787
|
-
user: str = None,
|
|
788
|
-
database: str = None,
|
|
789
|
-
schema: str = None,
|
|
790
|
-
db_schema: str = None,
|
|
791
|
-
warehouse: str = None,
|
|
787
|
+
query: Optional[str] = None,
|
|
788
|
+
url: Optional[str] = None,
|
|
789
|
+
user: Optional[str] = None,
|
|
790
|
+
database: Optional[str] = None,
|
|
791
|
+
schema: Optional[str] = None,
|
|
792
|
+
db_schema: Optional[str] = None,
|
|
793
|
+
warehouse: Optional[str] = None,
|
|
792
794
|
**kwargs,
|
|
793
795
|
):
|
|
794
796
|
# TODO: Remove in 1.9.0
|
|
@@ -850,9 +852,9 @@ class CustomSource(BaseSourceDriver):
|
|
|
850
852
|
|
|
851
853
|
def __init__(
|
|
852
854
|
self,
|
|
853
|
-
class_name: str = None,
|
|
855
|
+
class_name: Optional[str] = None,
|
|
854
856
|
name: str = "",
|
|
855
|
-
schedule: str = None,
|
|
857
|
+
schedule: Optional[str] = None,
|
|
856
858
|
**attributes,
|
|
857
859
|
):
|
|
858
860
|
attributes = attributes or {}
|
|
@@ -930,12 +932,12 @@ class OnlineSource(BaseSourceDriver):
|
|
|
930
932
|
|
|
931
933
|
def __init__(
|
|
932
934
|
self,
|
|
933
|
-
name: str = None,
|
|
934
|
-
path: str = None,
|
|
935
|
-
attributes: dict[str, object] = None,
|
|
936
|
-
key_field: str = None,
|
|
937
|
-
time_field: str = None,
|
|
938
|
-
workers: int = None,
|
|
935
|
+
name: Optional[str] = None,
|
|
936
|
+
path: Optional[str] = None,
|
|
937
|
+
attributes: Optional[dict[str, object]] = None,
|
|
938
|
+
key_field: Optional[str] = None,
|
|
939
|
+
time_field: Optional[str] = None,
|
|
940
|
+
workers: Optional[int] = None,
|
|
939
941
|
):
|
|
940
942
|
super().__init__(name, path, attributes, key_field, time_field)
|
|
941
943
|
self.online = True
|
|
@@ -986,7 +988,7 @@ class StreamSource(OnlineSource):
|
|
|
986
988
|
seek_to="earliest",
|
|
987
989
|
shards=1,
|
|
988
990
|
retention_in_hours=24,
|
|
989
|
-
extra_attributes: dict = None,
|
|
991
|
+
extra_attributes: Optional[dict] = None,
|
|
990
992
|
**kwargs,
|
|
991
993
|
):
|
|
992
994
|
"""
|
|
@@ -1168,7 +1170,7 @@ class KafkaSource(OnlineSource):
|
|
|
1168
1170
|
self,
|
|
1169
1171
|
num_partitions: int = 4,
|
|
1170
1172
|
replication_factor: int = 1,
|
|
1171
|
-
topics: list[str] = None,
|
|
1173
|
+
topics: Optional[list[str]] = None,
|
|
1172
1174
|
):
|
|
1173
1175
|
"""
|
|
1174
1176
|
Create Kafka topics with the specified number of partitions and replication factor.
|
|
@@ -1226,16 +1228,16 @@ class SQLSource(BaseSourceDriver):
|
|
|
1226
1228
|
def __init__(
|
|
1227
1229
|
self,
|
|
1228
1230
|
name: str = "",
|
|
1229
|
-
chunksize: int = None,
|
|
1230
|
-
key_field: str = None,
|
|
1231
|
-
time_field: str = None,
|
|
1232
|
-
schedule: str = None,
|
|
1231
|
+
chunksize: Optional[int] = None,
|
|
1232
|
+
key_field: Optional[str] = None,
|
|
1233
|
+
time_field: Optional[str] = None,
|
|
1234
|
+
schedule: Optional[str] = None,
|
|
1233
1235
|
start_time: Optional[Union[datetime, str]] = None,
|
|
1234
1236
|
end_time: Optional[Union[datetime, str]] = None,
|
|
1235
|
-
db_url: str = None,
|
|
1236
|
-
table_name: str = None,
|
|
1237
|
-
spark_options: dict = None,
|
|
1238
|
-
parse_dates: list[str] = None,
|
|
1237
|
+
db_url: Optional[str] = None,
|
|
1238
|
+
table_name: Optional[str] = None,
|
|
1239
|
+
spark_options: Optional[dict] = None,
|
|
1240
|
+
parse_dates: Optional[list[str]] = None,
|
|
1239
1241
|
**kwargs,
|
|
1240
1242
|
):
|
|
1241
1243
|
"""
|
|
@@ -12,8 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
|
|
16
|
-
|
|
17
15
|
import mlrun
|
|
18
16
|
import mlrun.artifacts
|
|
19
17
|
from mlrun.config import config
|
mlrun/datastore/targets.py
CHANGED
|
@@ -396,7 +396,7 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
396
396
|
self,
|
|
397
397
|
name: str = "",
|
|
398
398
|
path=None,
|
|
399
|
-
attributes: dict[str, str] = None,
|
|
399
|
+
attributes: Optional[dict[str, str]] = None,
|
|
400
400
|
after_step=None,
|
|
401
401
|
columns=None,
|
|
402
402
|
partitioned: bool = False,
|
|
@@ -405,8 +405,8 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
405
405
|
time_partitioning_granularity: Optional[str] = None,
|
|
406
406
|
max_events: Optional[int] = None,
|
|
407
407
|
flush_after_seconds: Optional[int] = None,
|
|
408
|
-
storage_options: dict[str, str] = None,
|
|
409
|
-
schema: dict[str, Any] = None,
|
|
408
|
+
storage_options: Optional[dict[str, str]] = None,
|
|
409
|
+
schema: Optional[dict[str, Any]] = None,
|
|
410
410
|
credentials_prefix=None,
|
|
411
411
|
):
|
|
412
412
|
super().__init__(
|
|
@@ -834,16 +834,16 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
834
834
|
self,
|
|
835
835
|
name: str = "",
|
|
836
836
|
path=None,
|
|
837
|
-
attributes: dict[str, str] = None,
|
|
837
|
+
attributes: Optional[dict[str, str]] = None,
|
|
838
838
|
after_step=None,
|
|
839
839
|
columns=None,
|
|
840
|
-
partitioned: bool = None,
|
|
840
|
+
partitioned: Optional[bool] = None,
|
|
841
841
|
key_bucketing_number: Optional[int] = None,
|
|
842
842
|
partition_cols: Optional[list[str]] = None,
|
|
843
843
|
time_partitioning_granularity: Optional[str] = None,
|
|
844
844
|
max_events: Optional[int] = 10000,
|
|
845
845
|
flush_after_seconds: Optional[int] = 900,
|
|
846
|
-
storage_options: dict[str, str] = None,
|
|
846
|
+
storage_options: Optional[dict[str, str]] = None,
|
|
847
847
|
):
|
|
848
848
|
self.path = path
|
|
849
849
|
if partitioned is None:
|
|
@@ -1136,8 +1136,7 @@ class CSVTarget(BaseStoreTarget):
|
|
|
1136
1136
|
import pyspark.sql.functions as funcs
|
|
1137
1137
|
|
|
1138
1138
|
for col_name, col_type in df.dtypes:
|
|
1139
|
-
|
|
1140
|
-
if col_type.startswith("timestamp"):
|
|
1139
|
+
if col_type == "timestamp":
|
|
1141
1140
|
# df.write.csv saves timestamps with millisecond precision, but we want microsecond precision
|
|
1142
1141
|
# for compatibility with storey.
|
|
1143
1142
|
df = df.withColumn(
|
|
@@ -1200,7 +1199,7 @@ class SnowflakeTarget(BaseStoreTarget):
|
|
|
1200
1199
|
self,
|
|
1201
1200
|
name: str = "",
|
|
1202
1201
|
path=None,
|
|
1203
|
-
attributes: dict[str, str] = None,
|
|
1202
|
+
attributes: Optional[dict[str, str]] = None,
|
|
1204
1203
|
after_step=None,
|
|
1205
1204
|
columns=None,
|
|
1206
1205
|
partitioned: bool = False,
|
|
@@ -1209,15 +1208,15 @@ class SnowflakeTarget(BaseStoreTarget):
|
|
|
1209
1208
|
time_partitioning_granularity: Optional[str] = None,
|
|
1210
1209
|
max_events: Optional[int] = None,
|
|
1211
1210
|
flush_after_seconds: Optional[int] = None,
|
|
1212
|
-
storage_options: dict[str, str] = None,
|
|
1213
|
-
schema: dict[str, Any] = None,
|
|
1211
|
+
storage_options: Optional[dict[str, str]] = None,
|
|
1212
|
+
schema: Optional[dict[str, Any]] = None,
|
|
1214
1213
|
credentials_prefix=None,
|
|
1215
|
-
url: str = None,
|
|
1216
|
-
user: str = None,
|
|
1217
|
-
db_schema: str = None,
|
|
1218
|
-
database: str = None,
|
|
1219
|
-
warehouse: str = None,
|
|
1220
|
-
table_name: str = None,
|
|
1214
|
+
url: Optional[str] = None,
|
|
1215
|
+
user: Optional[str] = None,
|
|
1216
|
+
db_schema: Optional[str] = None,
|
|
1217
|
+
database: Optional[str] = None,
|
|
1218
|
+
warehouse: Optional[str] = None,
|
|
1219
|
+
table_name: Optional[str] = None,
|
|
1221
1220
|
):
|
|
1222
1221
|
attributes = attributes or {}
|
|
1223
1222
|
if url:
|
|
@@ -1904,7 +1903,7 @@ class SQLTarget(BaseStoreTarget):
|
|
|
1904
1903
|
self,
|
|
1905
1904
|
name: str = "",
|
|
1906
1905
|
path=None,
|
|
1907
|
-
attributes: dict[str, str] = None,
|
|
1906
|
+
attributes: Optional[dict[str, str]] = None,
|
|
1908
1907
|
after_step=None,
|
|
1909
1908
|
partitioned: bool = False,
|
|
1910
1909
|
key_bucketing_number: Optional[int] = None,
|
|
@@ -1912,16 +1911,16 @@ class SQLTarget(BaseStoreTarget):
|
|
|
1912
1911
|
time_partitioning_granularity: Optional[str] = None,
|
|
1913
1912
|
max_events: Optional[int] = None,
|
|
1914
1913
|
flush_after_seconds: Optional[int] = None,
|
|
1915
|
-
storage_options: dict[str, str] = None,
|
|
1916
|
-
db_url: str = None,
|
|
1917
|
-
table_name: str = None,
|
|
1918
|
-
schema: dict[str, Any] = None,
|
|
1914
|
+
storage_options: Optional[dict[str, str]] = None,
|
|
1915
|
+
db_url: Optional[str] = None,
|
|
1916
|
+
table_name: Optional[str] = None,
|
|
1917
|
+
schema: Optional[dict[str, Any]] = None,
|
|
1919
1918
|
primary_key_column: str = "",
|
|
1920
1919
|
if_exists: str = "append",
|
|
1921
1920
|
create_table: bool = False,
|
|
1922
1921
|
# create_according_to_data: bool = False,
|
|
1923
1922
|
varchar_len: int = 50,
|
|
1924
|
-
parse_dates: list[str] = None,
|
|
1923
|
+
parse_dates: Optional[list[str]] = None,
|
|
1925
1924
|
):
|
|
1926
1925
|
"""
|
|
1927
1926
|
Write to SqlDB as output target for a flow.
|
mlrun/datastore/utils.py
CHANGED
|
@@ -26,7 +26,7 @@ import mlrun.datastore
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def parse_kafka_url(
|
|
29
|
-
url: str, brokers: typing.Union[list, str] = None
|
|
29
|
+
url: str, brokers: typing.Optional[typing.Union[list, str]] = None
|
|
30
30
|
) -> tuple[str, list]:
|
|
31
31
|
"""Generating Kafka topic and adjusting a list of bootstrap servers.
|
|
32
32
|
|
|
@@ -71,7 +71,7 @@ def upload_tarball(source_dir, target, secrets=None):
|
|
|
71
71
|
|
|
72
72
|
def filter_df_start_end_time(
|
|
73
73
|
df: typing.Union[pd.DataFrame, typing.Iterator[pd.DataFrame]],
|
|
74
|
-
time_column: str = None,
|
|
74
|
+
time_column: typing.Optional[str] = None,
|
|
75
75
|
start_time: pd.Timestamp = None,
|
|
76
76
|
end_time: pd.Timestamp = None,
|
|
77
77
|
) -> typing.Union[pd.DataFrame, typing.Iterator[pd.DataFrame]]:
|
mlrun/datastore/v3io.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import time
|
|
16
16
|
from datetime import datetime
|
|
17
|
+
from typing import Optional
|
|
17
18
|
|
|
18
19
|
import fsspec
|
|
19
20
|
import v3io
|
|
@@ -33,7 +34,9 @@ V3IO_DEFAULT_UPLOAD_CHUNK_SIZE = 1024 * 1024 * 10
|
|
|
33
34
|
|
|
34
35
|
|
|
35
36
|
class V3ioStore(DataStore):
|
|
36
|
-
def __init__(
|
|
37
|
+
def __init__(
|
|
38
|
+
self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
|
|
39
|
+
):
|
|
37
40
|
super().__init__(parent, name, schema, endpoint, secrets=secrets)
|
|
38
41
|
self.endpoint = self.endpoint or mlrun.mlconf.v3io_api
|
|
39
42
|
|
mlrun/datastore/wasbfs/fs.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import Optional
|
|
15
16
|
from urllib.parse import urlparse
|
|
16
17
|
|
|
17
18
|
from fsspec import AbstractFileSystem
|
|
@@ -22,23 +23,23 @@ class WasbFS(AbstractFileSystem):
|
|
|
22
23
|
|
|
23
24
|
def __init__(
|
|
24
25
|
self,
|
|
25
|
-
account_name: str = None,
|
|
26
|
-
account_key: str = None,
|
|
27
|
-
connection_string: str = None,
|
|
28
|
-
credential: str = None,
|
|
29
|
-
sas_token: str = None,
|
|
26
|
+
account_name: Optional[str] = None,
|
|
27
|
+
account_key: Optional[str] = None,
|
|
28
|
+
connection_string: Optional[str] = None,
|
|
29
|
+
credential: Optional[str] = None,
|
|
30
|
+
sas_token: Optional[str] = None,
|
|
30
31
|
request_session=None,
|
|
31
|
-
socket_timeout: int = None,
|
|
32
|
-
blocksize: int = None,
|
|
33
|
-
client_id: str = None,
|
|
34
|
-
client_secret: str = None,
|
|
35
|
-
tenant_id: str = None,
|
|
32
|
+
socket_timeout: Optional[int] = None,
|
|
33
|
+
blocksize: Optional[int] = None,
|
|
34
|
+
client_id: Optional[str] = None,
|
|
35
|
+
client_secret: Optional[str] = None,
|
|
36
|
+
tenant_id: Optional[str] = None,
|
|
36
37
|
anon: bool = True,
|
|
37
|
-
location_mode: str = None,
|
|
38
|
+
location_mode: Optional[str] = None,
|
|
38
39
|
loop=None,
|
|
39
40
|
asynchronous: bool = False,
|
|
40
41
|
default_fill_cache: bool = True,
|
|
41
|
-
default_cache_type: str = None,
|
|
42
|
+
default_cache_type: Optional[str] = None,
|
|
42
43
|
**kwargs,
|
|
43
44
|
):
|
|
44
45
|
from adlfs import AzureBlobFileSystem
|