mlrun 1.3.3rc1__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -3
- mlrun/__main__.py +79 -37
- mlrun/api/__init__.py +1 -1
- mlrun/api/api/__init__.py +1 -1
- mlrun/api/api/api.py +4 -4
- mlrun/api/api/deps.py +10 -21
- mlrun/api/api/endpoints/__init__.py +1 -1
- mlrun/api/api/endpoints/artifacts.py +64 -36
- mlrun/api/api/endpoints/auth.py +4 -4
- mlrun/api/api/endpoints/background_tasks.py +11 -11
- mlrun/api/api/endpoints/client_spec.py +5 -5
- mlrun/api/api/endpoints/clusterization_spec.py +6 -4
- mlrun/api/api/endpoints/feature_store.py +124 -115
- mlrun/api/api/endpoints/files.py +22 -14
- mlrun/api/api/endpoints/frontend_spec.py +28 -21
- mlrun/api/api/endpoints/functions.py +142 -87
- mlrun/api/api/endpoints/grafana_proxy.py +89 -442
- mlrun/api/api/endpoints/healthz.py +20 -7
- mlrun/api/api/endpoints/hub.py +320 -0
- mlrun/api/api/endpoints/internal/__init__.py +1 -1
- mlrun/api/api/endpoints/internal/config.py +1 -1
- mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
- mlrun/api/api/endpoints/logs.py +11 -11
- mlrun/api/api/endpoints/model_endpoints.py +74 -70
- mlrun/api/api/endpoints/operations.py +13 -9
- mlrun/api/api/endpoints/pipelines.py +93 -88
- mlrun/api/api/endpoints/projects.py +35 -35
- mlrun/api/api/endpoints/runs.py +69 -27
- mlrun/api/api/endpoints/runtime_resources.py +28 -28
- mlrun/api/api/endpoints/schedules.py +98 -41
- mlrun/api/api/endpoints/secrets.py +37 -32
- mlrun/api/api/endpoints/submit.py +12 -12
- mlrun/api/api/endpoints/tags.py +20 -22
- mlrun/api/api/utils.py +251 -42
- mlrun/api/constants.py +1 -1
- mlrun/api/crud/__init__.py +18 -15
- mlrun/api/crud/artifacts.py +10 -10
- mlrun/api/crud/client_spec.py +4 -4
- mlrun/api/crud/clusterization_spec.py +3 -3
- mlrun/api/crud/feature_store.py +54 -46
- mlrun/api/crud/functions.py +3 -3
- mlrun/api/crud/hub.py +312 -0
- mlrun/api/crud/logs.py +11 -9
- mlrun/api/crud/model_monitoring/__init__.py +3 -3
- mlrun/api/crud/model_monitoring/grafana.py +435 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
- mlrun/api/crud/notifications.py +149 -0
- mlrun/api/crud/pipelines.py +67 -52
- mlrun/api/crud/projects.py +51 -23
- mlrun/api/crud/runs.py +7 -5
- mlrun/api/crud/runtime_resources.py +13 -13
- mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
- mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
- mlrun/api/crud/runtimes/nuclio/function.py +505 -0
- mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
- mlrun/api/crud/secrets.py +88 -46
- mlrun/api/crud/tags.py +5 -5
- mlrun/api/db/__init__.py +1 -1
- mlrun/api/db/base.py +102 -54
- mlrun/api/db/init_db.py +2 -3
- mlrun/api/db/session.py +4 -12
- mlrun/api/db/sqldb/__init__.py +1 -1
- mlrun/api/db/sqldb/db.py +439 -196
- mlrun/api/db/sqldb/helpers.py +1 -1
- mlrun/api/db/sqldb/models/__init__.py +3 -3
- mlrun/api/db/sqldb/models/models_mysql.py +82 -64
- mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
- mlrun/api/db/sqldb/session.py +27 -20
- mlrun/api/initial_data.py +82 -24
- mlrun/api/launcher.py +196 -0
- mlrun/api/main.py +91 -22
- mlrun/api/middlewares.py +6 -5
- mlrun/api/migrations_mysql/env.py +1 -1
- mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
- mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
- mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
- mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
- mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
- mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
- mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/env.py +1 -1
- mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
- mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
- mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
- mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
- mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
- mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
- mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
- mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
- mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
- mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
- mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
- mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
- mlrun/api/schemas/__init__.py +216 -138
- mlrun/api/utils/__init__.py +1 -1
- mlrun/api/utils/asyncio.py +1 -1
- mlrun/api/utils/auth/__init__.py +1 -1
- mlrun/api/utils/auth/providers/__init__.py +1 -1
- mlrun/api/utils/auth/providers/base.py +7 -7
- mlrun/api/utils/auth/providers/nop.py +6 -7
- mlrun/api/utils/auth/providers/opa.py +17 -17
- mlrun/api/utils/auth/verifier.py +36 -34
- mlrun/api/utils/background_tasks.py +24 -24
- mlrun/{builder.py → api/utils/builder.py} +216 -123
- mlrun/api/utils/clients/__init__.py +1 -1
- mlrun/api/utils/clients/chief.py +19 -4
- mlrun/api/utils/clients/iguazio.py +106 -60
- mlrun/api/utils/clients/log_collector.py +1 -1
- mlrun/api/utils/clients/nuclio.py +23 -23
- mlrun/api/utils/clients/protocols/grpc.py +2 -2
- mlrun/api/utils/db/__init__.py +1 -1
- mlrun/api/utils/db/alembic.py +1 -1
- mlrun/api/utils/db/backup.py +1 -1
- mlrun/api/utils/db/mysql.py +24 -25
- mlrun/api/utils/db/sql_collation.py +1 -1
- mlrun/api/utils/db/sqlite_migration.py +2 -2
- mlrun/api/utils/events/__init__.py +14 -0
- mlrun/api/utils/events/base.py +57 -0
- mlrun/api/utils/events/events_factory.py +41 -0
- mlrun/api/utils/events/iguazio.py +217 -0
- mlrun/api/utils/events/nop.py +55 -0
- mlrun/api/utils/helpers.py +16 -13
- mlrun/api/utils/memory_reports.py +1 -1
- mlrun/api/utils/periodic.py +6 -3
- mlrun/api/utils/projects/__init__.py +1 -1
- mlrun/api/utils/projects/follower.py +33 -33
- mlrun/api/utils/projects/leader.py +36 -34
- mlrun/api/utils/projects/member.py +27 -27
- mlrun/api/utils/projects/remotes/__init__.py +1 -1
- mlrun/api/utils/projects/remotes/follower.py +13 -13
- mlrun/api/utils/projects/remotes/leader.py +10 -10
- mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
- mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
- mlrun/api/utils/scheduler.py +140 -51
- mlrun/api/utils/singletons/__init__.py +1 -1
- mlrun/api/utils/singletons/db.py +9 -15
- mlrun/api/utils/singletons/k8s.py +677 -5
- mlrun/api/utils/singletons/logs_dir.py +1 -1
- mlrun/api/utils/singletons/project_member.py +1 -1
- mlrun/api/utils/singletons/scheduler.py +1 -1
- mlrun/artifacts/__init__.py +2 -2
- mlrun/artifacts/base.py +8 -2
- mlrun/artifacts/dataset.py +5 -3
- mlrun/artifacts/manager.py +7 -1
- mlrun/artifacts/model.py +15 -4
- mlrun/artifacts/plots.py +1 -1
- mlrun/common/__init__.py +1 -1
- mlrun/common/constants.py +15 -0
- mlrun/common/model_monitoring.py +209 -0
- mlrun/common/schemas/__init__.py +167 -0
- mlrun/{api → common}/schemas/artifact.py +13 -14
- mlrun/{api → common}/schemas/auth.py +10 -8
- mlrun/{api → common}/schemas/background_task.py +3 -3
- mlrun/{api → common}/schemas/client_spec.py +1 -1
- mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
- mlrun/{api → common}/schemas/constants.py +21 -8
- mlrun/common/schemas/events.py +36 -0
- mlrun/{api → common}/schemas/feature_store.py +2 -1
- mlrun/{api → common}/schemas/frontend_spec.py +7 -6
- mlrun/{api → common}/schemas/function.py +5 -5
- mlrun/{api → common}/schemas/http.py +3 -3
- mlrun/common/schemas/hub.py +134 -0
- mlrun/{api → common}/schemas/k8s.py +3 -3
- mlrun/{api → common}/schemas/memory_reports.py +1 -1
- mlrun/common/schemas/model_endpoints.py +342 -0
- mlrun/common/schemas/notification.py +57 -0
- mlrun/{api → common}/schemas/object.py +6 -6
- mlrun/{api → common}/schemas/pipeline.py +3 -3
- mlrun/{api → common}/schemas/project.py +6 -5
- mlrun/common/schemas/regex.py +24 -0
- mlrun/common/schemas/runs.py +30 -0
- mlrun/{api → common}/schemas/runtime_resource.py +3 -3
- mlrun/{api → common}/schemas/schedule.py +19 -7
- mlrun/{api → common}/schemas/secret.py +3 -3
- mlrun/{api → common}/schemas/tag.py +2 -2
- mlrun/common/types.py +25 -0
- mlrun/config.py +152 -20
- mlrun/data_types/__init__.py +7 -2
- mlrun/data_types/data_types.py +4 -2
- mlrun/data_types/infer.py +1 -1
- mlrun/data_types/spark.py +10 -3
- mlrun/datastore/__init__.py +10 -3
- mlrun/datastore/azure_blob.py +1 -1
- mlrun/datastore/base.py +185 -53
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/filestore.py +1 -1
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +1 -1
- mlrun/datastore/s3.py +1 -1
- mlrun/datastore/sources.py +192 -70
- mlrun/datastore/spark_udf.py +44 -0
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/targets.py +115 -45
- mlrun/datastore/utils.py +127 -5
- mlrun/datastore/v3io.py +1 -1
- mlrun/datastore/wasbfs/__init__.py +1 -1
- mlrun/datastore/wasbfs/fs.py +1 -1
- mlrun/db/__init__.py +7 -5
- mlrun/db/base.py +112 -68
- mlrun/db/httpdb.py +445 -277
- mlrun/db/nopdb.py +491 -0
- mlrun/db/sqldb.py +112 -65
- mlrun/errors.py +6 -1
- mlrun/execution.py +44 -22
- mlrun/feature_store/__init__.py +1 -1
- mlrun/feature_store/api.py +143 -95
- mlrun/feature_store/common.py +16 -20
- mlrun/feature_store/feature_set.py +42 -12
- mlrun/feature_store/feature_vector.py +32 -21
- mlrun/feature_store/ingestion.py +9 -12
- mlrun/feature_store/retrieval/__init__.py +3 -2
- mlrun/feature_store/retrieval/base.py +388 -66
- mlrun/feature_store/retrieval/dask_merger.py +63 -151
- mlrun/feature_store/retrieval/job.py +30 -12
- mlrun/feature_store/retrieval/local_merger.py +40 -133
- mlrun/feature_store/retrieval/spark_merger.py +129 -127
- mlrun/feature_store/retrieval/storey_merger.py +173 -0
- mlrun/feature_store/steps.py +132 -15
- mlrun/features.py +8 -3
- mlrun/frameworks/__init__.py +1 -1
- mlrun/frameworks/_common/__init__.py +1 -1
- mlrun/frameworks/_common/artifacts_library.py +1 -1
- mlrun/frameworks/_common/mlrun_interface.py +1 -1
- mlrun/frameworks/_common/model_handler.py +1 -1
- mlrun/frameworks/_common/plan.py +1 -1
- mlrun/frameworks/_common/producer.py +1 -1
- mlrun/frameworks/_common/utils.py +1 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
- mlrun/frameworks/_dl_common/model_handler.py +1 -1
- mlrun/frameworks/_dl_common/utils.py +1 -1
- mlrun/frameworks/_ml_common/__init__.py +1 -1
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/model_handler.py +1 -1
- mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
- mlrun/frameworks/_ml_common/producer.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +1 -1
- mlrun/frameworks/auto_mlrun/__init__.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
- mlrun/frameworks/huggingface/__init__.py +1 -1
- mlrun/frameworks/huggingface/model_server.py +1 -1
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/lgbm/model_server.py +1 -1
- mlrun/frameworks/lgbm/utils.py +1 -1
- mlrun/frameworks/onnx/__init__.py +1 -1
- mlrun/frameworks/onnx/dataset.py +1 -1
- mlrun/frameworks/onnx/mlrun_interface.py +1 -1
- mlrun/frameworks/onnx/model_handler.py +1 -1
- mlrun/frameworks/onnx/model_server.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +1 -1
- mlrun/frameworks/pytorch/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
- mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
- mlrun/frameworks/pytorch/model_handler.py +1 -1
- mlrun/frameworks/pytorch/model_server.py +1 -1
- mlrun/frameworks/pytorch/utils.py +1 -1
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/estimator.py +1 -1
- mlrun/frameworks/sklearn/metric.py +1 -1
- mlrun/frameworks/sklearn/metrics_library.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
- mlrun/frameworks/sklearn/model_handler.py +1 -1
- mlrun/frameworks/sklearn/utils.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
- mlrun/frameworks/tf_keras/model_handler.py +1 -1
- mlrun/frameworks/tf_keras/model_server.py +1 -1
- mlrun/frameworks/tf_keras/utils.py +1 -1
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
- mlrun/frameworks/xgboost/model_handler.py +1 -1
- mlrun/frameworks/xgboost/utils.py +1 -1
- mlrun/k8s_utils.py +14 -765
- mlrun/kfpops.py +14 -17
- mlrun/launcher/__init__.py +13 -0
- mlrun/launcher/base.py +406 -0
- mlrun/launcher/client.py +159 -0
- mlrun/launcher/factory.py +50 -0
- mlrun/launcher/local.py +276 -0
- mlrun/launcher/remote.py +178 -0
- mlrun/lists.py +10 -2
- mlrun/mlutils/__init__.py +1 -1
- mlrun/mlutils/data.py +1 -1
- mlrun/mlutils/models.py +1 -1
- mlrun/mlutils/plots.py +1 -1
- mlrun/model.py +252 -14
- mlrun/model_monitoring/__init__.py +41 -0
- mlrun/model_monitoring/features_drift_table.py +1 -1
- mlrun/model_monitoring/helpers.py +123 -38
- mlrun/model_monitoring/model_endpoint.py +144 -0
- mlrun/model_monitoring/model_monitoring_batch.py +310 -259
- mlrun/model_monitoring/stores/__init__.py +106 -0
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
- mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
- mlrun/model_monitoring/stores/models/__init__.py +23 -0
- mlrun/model_monitoring/stores/models/base.py +18 -0
- mlrun/model_monitoring/stores/models/mysql.py +100 -0
- mlrun/model_monitoring/stores/models/sqlite.py +98 -0
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
- mlrun/model_monitoring/stream_processing_fs.py +239 -271
- mlrun/package/__init__.py +163 -0
- mlrun/package/context_handler.py +325 -0
- mlrun/package/errors.py +47 -0
- mlrun/package/packager.py +298 -0
- mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
- mlrun/package/packagers/default_packager.py +422 -0
- mlrun/package/packagers/numpy_packagers.py +612 -0
- mlrun/package/packagers/pandas_packagers.py +968 -0
- mlrun/package/packagers/python_standard_library_packagers.py +616 -0
- mlrun/package/packagers_manager.py +786 -0
- mlrun/package/utils/__init__.py +53 -0
- mlrun/package/utils/_archiver.py +226 -0
- mlrun/package/utils/_formatter.py +211 -0
- mlrun/package/utils/_pickler.py +234 -0
- mlrun/package/utils/_supported_format.py +71 -0
- mlrun/package/utils/log_hint_utils.py +93 -0
- mlrun/package/utils/type_hint_utils.py +298 -0
- mlrun/platforms/__init__.py +1 -1
- mlrun/platforms/iguazio.py +34 -2
- mlrun/platforms/other.py +1 -1
- mlrun/projects/__init__.py +1 -1
- mlrun/projects/operations.py +14 -9
- mlrun/projects/pipelines.py +31 -13
- mlrun/projects/project.py +762 -238
- mlrun/render.py +49 -19
- mlrun/run.py +57 -326
- mlrun/runtimes/__init__.py +3 -9
- mlrun/runtimes/base.py +247 -784
- mlrun/runtimes/constants.py +1 -1
- mlrun/runtimes/daskjob.py +45 -41
- mlrun/runtimes/funcdoc.py +43 -7
- mlrun/runtimes/function.py +66 -656
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/generators.py +1 -1
- mlrun/runtimes/kubejob.py +99 -116
- mlrun/runtimes/local.py +59 -66
- mlrun/runtimes/mpijob/__init__.py +1 -1
- mlrun/runtimes/mpijob/abstract.py +13 -15
- mlrun/runtimes/mpijob/v1.py +3 -1
- mlrun/runtimes/mpijob/v1alpha1.py +1 -1
- mlrun/runtimes/nuclio.py +1 -1
- mlrun/runtimes/pod.py +51 -26
- mlrun/runtimes/remotesparkjob.py +3 -1
- mlrun/runtimes/serving.py +12 -4
- mlrun/runtimes/sparkjob/__init__.py +1 -2
- mlrun/runtimes/sparkjob/abstract.py +44 -31
- mlrun/runtimes/sparkjob/spark3job.py +11 -9
- mlrun/runtimes/utils.py +61 -42
- mlrun/secrets.py +16 -18
- mlrun/serving/__init__.py +3 -2
- mlrun/serving/merger.py +1 -1
- mlrun/serving/remote.py +1 -1
- mlrun/serving/routers.py +39 -42
- mlrun/serving/server.py +23 -13
- mlrun/serving/serving_wrapper.py +1 -1
- mlrun/serving/states.py +172 -39
- mlrun/serving/utils.py +1 -1
- mlrun/serving/v1_serving.py +1 -1
- mlrun/serving/v2_serving.py +29 -21
- mlrun/utils/__init__.py +1 -2
- mlrun/utils/async_http.py +8 -1
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +2 -2
- mlrun/utils/condition_evaluator.py +65 -0
- mlrun/utils/db.py +52 -0
- mlrun/utils/helpers.py +188 -13
- mlrun/utils/http.py +89 -54
- mlrun/utils/logger.py +48 -8
- mlrun/utils/model_monitoring.py +132 -100
- mlrun/utils/notifications/__init__.py +1 -1
- mlrun/utils/notifications/notification/__init__.py +8 -6
- mlrun/utils/notifications/notification/base.py +20 -14
- mlrun/utils/notifications/notification/console.py +7 -4
- mlrun/utils/notifications/notification/git.py +36 -19
- mlrun/utils/notifications/notification/ipython.py +10 -8
- mlrun/utils/notifications/notification/slack.py +18 -13
- mlrun/utils/notifications/notification_pusher.py +377 -56
- mlrun/utils/regex.py +6 -1
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +1 -1
- mlrun/utils/vault.py +270 -269
- mlrun/utils/version/__init__.py +1 -1
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +1 -1
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
- mlrun-1.4.0.dist-info/RECORD +434 -0
- mlrun/api/api/endpoints/marketplace.py +0 -257
- mlrun/api/crud/marketplace.py +0 -221
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
- mlrun/api/db/filedb/db.py +0 -518
- mlrun/api/schemas/marketplace.py +0 -128
- mlrun/api/schemas/model_endpoints.py +0 -185
- mlrun/db/filedb.py +0 -891
- mlrun/feature_store/retrieval/online.py +0 -92
- mlrun/model_monitoring/constants.py +0 -67
- mlrun/runtimes/package/context_handler.py +0 -711
- mlrun/runtimes/sparkjob/spark2job.py +0 -59
- mlrun-1.3.3rc1.dist-info/RECORD +0 -381
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
mlrun/datastore/sources.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -32,7 +32,12 @@ from ..config import config
|
|
|
32
32
|
from ..model import DataSource
|
|
33
33
|
from ..platforms.iguazio import parse_path
|
|
34
34
|
from ..utils import get_class
|
|
35
|
-
from .utils import
|
|
35
|
+
from .utils import (
|
|
36
|
+
_generate_sql_query_with_time_filter,
|
|
37
|
+
filter_df_start_end_time,
|
|
38
|
+
select_columns_from_df,
|
|
39
|
+
store_path_to_spark,
|
|
40
|
+
)
|
|
36
41
|
|
|
37
42
|
|
|
38
43
|
def get_source_from_dict(source):
|
|
@@ -62,38 +67,59 @@ class BaseSourceDriver(DataSource):
|
|
|
62
67
|
def to_step(self, key_field=None, time_field=None, context=None):
|
|
63
68
|
import storey
|
|
64
69
|
|
|
70
|
+
if not self.support_storey:
|
|
71
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
72
|
+
f"{type(self).__name__} does not support storey engine"
|
|
73
|
+
)
|
|
74
|
+
|
|
65
75
|
return storey.SyncEmitSource(context=context)
|
|
66
76
|
|
|
67
77
|
def get_table_object(self):
|
|
68
78
|
"""get storey Table object"""
|
|
69
79
|
return None
|
|
70
80
|
|
|
71
|
-
def to_dataframe(
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
81
|
+
def to_dataframe(
|
|
82
|
+
self,
|
|
83
|
+
columns=None,
|
|
84
|
+
df_module=None,
|
|
85
|
+
entities=None,
|
|
86
|
+
start_time=None,
|
|
87
|
+
end_time=None,
|
|
88
|
+
time_field=None,
|
|
89
|
+
):
|
|
90
|
+
"""return the source data as dataframe"""
|
|
91
|
+
return mlrun.store_manager.object(url=self.path).as_df(
|
|
92
|
+
columns=columns,
|
|
93
|
+
df_module=df_module,
|
|
94
|
+
start_time=start_time or self.start_time,
|
|
95
|
+
end_time=end_time or self.end_time,
|
|
96
|
+
time_column=time_field or self.time_field,
|
|
97
|
+
)
|
|
88
98
|
|
|
89
|
-
def to_spark_df(self, session, named_view=False, time_field=None):
|
|
99
|
+
def to_spark_df(self, session, named_view=False, time_field=None, columns=None):
|
|
90
100
|
if self.support_spark:
|
|
91
101
|
df = session.read.load(**self.get_spark_options())
|
|
92
102
|
if named_view:
|
|
93
103
|
df.createOrReplaceTempView(self.name)
|
|
94
|
-
return df
|
|
104
|
+
return self._filter_spark_df(df, time_field, columns)
|
|
95
105
|
raise NotImplementedError()
|
|
96
106
|
|
|
107
|
+
def _filter_spark_df(self, df, time_field=None, columns=None):
|
|
108
|
+
if not (columns or time_field):
|
|
109
|
+
return df
|
|
110
|
+
|
|
111
|
+
from pyspark.sql.functions import col
|
|
112
|
+
|
|
113
|
+
if time_field:
|
|
114
|
+
if self.start_time:
|
|
115
|
+
df = df.filter(col(time_field) > self.start_time)
|
|
116
|
+
if self.end_time:
|
|
117
|
+
df = df.filter(col(time_field) <= self.end_time)
|
|
118
|
+
|
|
119
|
+
if columns:
|
|
120
|
+
df = df.select([col(name) for name in columns])
|
|
121
|
+
return df
|
|
122
|
+
|
|
97
123
|
def get_spark_options(self):
|
|
98
124
|
# options used in spark.read.load(**options)
|
|
99
125
|
raise NotImplementedError()
|
|
@@ -166,7 +192,6 @@ class CSVSource(BaseSourceDriver):
|
|
|
166
192
|
|
|
167
193
|
return storey.CSVSource(
|
|
168
194
|
paths=self.path,
|
|
169
|
-
header=True,
|
|
170
195
|
build_dict=True,
|
|
171
196
|
key_field=self.key_field or key_field,
|
|
172
197
|
storage_options=self._get_store().get_storage_options(),
|
|
@@ -182,7 +207,7 @@ class CSVSource(BaseSourceDriver):
|
|
|
182
207
|
"inferSchema": "true",
|
|
183
208
|
}
|
|
184
209
|
|
|
185
|
-
def to_spark_df(self, session, named_view=False, time_field=None):
|
|
210
|
+
def to_spark_df(self, session, named_view=False, time_field=None, columns=None):
|
|
186
211
|
import pyspark.sql.functions as funcs
|
|
187
212
|
|
|
188
213
|
df = session.read.load(**self.get_spark_options())
|
|
@@ -196,15 +221,28 @@ class CSVSource(BaseSourceDriver):
|
|
|
196
221
|
df = df.withColumn(col_name, funcs.col(col_name).cast("timestamp"))
|
|
197
222
|
if named_view:
|
|
198
223
|
df.createOrReplaceTempView(self.name)
|
|
199
|
-
return df
|
|
224
|
+
return self._filter_spark_df(df, time_field, columns)
|
|
200
225
|
|
|
201
|
-
def to_dataframe(
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
226
|
+
def to_dataframe(
|
|
227
|
+
self,
|
|
228
|
+
columns=None,
|
|
229
|
+
df_module=None,
|
|
230
|
+
entities=None,
|
|
231
|
+
start_time=None,
|
|
232
|
+
end_time=None,
|
|
233
|
+
time_field=None,
|
|
234
|
+
):
|
|
235
|
+
reader_args = self.attributes.get("reader_args", {})
|
|
206
236
|
return mlrun.store_manager.object(url=self.path).as_df(
|
|
207
|
-
|
|
237
|
+
columns=columns,
|
|
238
|
+
df_module=df_module,
|
|
239
|
+
format="csv",
|
|
240
|
+
start_time=start_time or self.start_time,
|
|
241
|
+
end_time=end_time or self.end_time,
|
|
242
|
+
time_column=time_field or self.time_field,
|
|
243
|
+
parse_dates=self._parse_dates,
|
|
244
|
+
chunksize=self.attributes.get("chunksize"),
|
|
245
|
+
**reader_args,
|
|
208
246
|
)
|
|
209
247
|
|
|
210
248
|
def is_iterator(self):
|
|
@@ -246,7 +284,6 @@ class ParquetSource(BaseSourceDriver):
|
|
|
246
284
|
start_time: Optional[Union[datetime, str]] = None,
|
|
247
285
|
end_time: Optional[Union[datetime, str]] = None,
|
|
248
286
|
):
|
|
249
|
-
|
|
250
287
|
super().__init__(
|
|
251
288
|
name,
|
|
252
289
|
path,
|
|
@@ -312,10 +349,24 @@ class ParquetSource(BaseSourceDriver):
|
|
|
312
349
|
"format": "parquet",
|
|
313
350
|
}
|
|
314
351
|
|
|
315
|
-
def to_dataframe(
|
|
316
|
-
|
|
352
|
+
def to_dataframe(
|
|
353
|
+
self,
|
|
354
|
+
columns=None,
|
|
355
|
+
df_module=None,
|
|
356
|
+
entities=None,
|
|
357
|
+
start_time=None,
|
|
358
|
+
end_time=None,
|
|
359
|
+
time_field=None,
|
|
360
|
+
):
|
|
361
|
+
reader_args = self.attributes.get("reader_args", {})
|
|
317
362
|
return mlrun.store_manager.object(url=self.path).as_df(
|
|
318
|
-
|
|
363
|
+
columns=columns,
|
|
364
|
+
df_module=df_module,
|
|
365
|
+
start_time=start_time or self.start_time,
|
|
366
|
+
end_time=end_time or self.end_time,
|
|
367
|
+
time_column=time_field or self.time_field,
|
|
368
|
+
format="parquet",
|
|
369
|
+
**reader_args,
|
|
319
370
|
)
|
|
320
371
|
|
|
321
372
|
|
|
@@ -323,8 +374,13 @@ class BigQuerySource(BaseSourceDriver):
|
|
|
323
374
|
"""
|
|
324
375
|
Reads Google BigQuery query results as input source for a flow.
|
|
325
376
|
|
|
377
|
+
For authentication, set the GCP_CREDENTIALS project secret to the credentials json string.
|
|
378
|
+
|
|
326
379
|
example::
|
|
327
380
|
|
|
381
|
+
# set the credentials
|
|
382
|
+
project.set_secrets({"GCP_CREDENTIALS": gcp_credentials_json})
|
|
383
|
+
|
|
328
384
|
# use sql query
|
|
329
385
|
query_string = "SELECT * FROM `the-psf.pypi.downloads20210328` LIMIT 5000"
|
|
330
386
|
source = BigQuerySource("bq1", query=query_string,
|
|
@@ -371,11 +427,21 @@ class BigQuerySource(BaseSourceDriver):
|
|
|
371
427
|
end_time=None,
|
|
372
428
|
gcp_project: str = None,
|
|
373
429
|
spark_options: dict = None,
|
|
430
|
+
**kwargs,
|
|
374
431
|
):
|
|
375
432
|
if query and table:
|
|
376
433
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
377
434
|
"cannot specify both table and query args"
|
|
378
435
|
)
|
|
436
|
+
# Otherwise, the client library does not fully respect the limit
|
|
437
|
+
if (
|
|
438
|
+
max_results_for_table
|
|
439
|
+
and chunksize
|
|
440
|
+
and max_results_for_table % chunksize != 0
|
|
441
|
+
):
|
|
442
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
443
|
+
"max_results_for_table must be a multiple of chunksize"
|
|
444
|
+
)
|
|
379
445
|
attrs = {
|
|
380
446
|
"query": query,
|
|
381
447
|
"table": table,
|
|
@@ -394,8 +460,8 @@ class BigQuerySource(BaseSourceDriver):
|
|
|
394
460
|
schedule=schedule,
|
|
395
461
|
start_time=start_time,
|
|
396
462
|
end_time=end_time,
|
|
463
|
+
**kwargs,
|
|
397
464
|
)
|
|
398
|
-
self._rows_iterator = None
|
|
399
465
|
|
|
400
466
|
def _get_credentials_string(self):
|
|
401
467
|
gcp_project = self.attributes.get("gcp_project", None)
|
|
@@ -417,7 +483,15 @@ class BigQuerySource(BaseSourceDriver):
|
|
|
417
483
|
return credentials, gcp_project or gcp_cred_dict["project_id"]
|
|
418
484
|
return None, gcp_project
|
|
419
485
|
|
|
420
|
-
def to_dataframe(
|
|
486
|
+
def to_dataframe(
|
|
487
|
+
self,
|
|
488
|
+
columns=None,
|
|
489
|
+
df_module=None,
|
|
490
|
+
entities=None,
|
|
491
|
+
start_time=None,
|
|
492
|
+
end_time=None,
|
|
493
|
+
time_field=None,
|
|
494
|
+
):
|
|
421
495
|
from google.cloud import bigquery
|
|
422
496
|
from google.cloud.bigquery_storage_v1 import BigQueryReadClient
|
|
423
497
|
|
|
@@ -438,39 +512,43 @@ class BigQuerySource(BaseSourceDriver):
|
|
|
438
512
|
if query:
|
|
439
513
|
query_job = bqclient.query(query)
|
|
440
514
|
|
|
441
|
-
|
|
442
|
-
dtypes = schema_to_dtypes(self._rows_iterator.schema)
|
|
443
|
-
if chunksize:
|
|
444
|
-
# passing bqstorage_client greatly improves performance
|
|
445
|
-
return self._rows_iterator.to_dataframe_iterable(
|
|
446
|
-
bqstorage_client=BigQueryReadClient(), dtypes=dtypes
|
|
447
|
-
)
|
|
448
|
-
else:
|
|
449
|
-
return self._rows_iterator.to_dataframe(dtypes=dtypes)
|
|
515
|
+
rows_iterator = query_job.result(page_size=chunksize)
|
|
450
516
|
elif table:
|
|
451
517
|
table = self.attributes.get("table")
|
|
452
518
|
max_results = self.attributes.get("max_results")
|
|
453
519
|
|
|
454
|
-
|
|
520
|
+
rows_iterator = bqclient.list_rows(
|
|
455
521
|
table, page_size=chunksize, max_results=max_results
|
|
456
522
|
)
|
|
457
|
-
dtypes = schema_to_dtypes(rows.schema)
|
|
458
|
-
if chunksize:
|
|
459
|
-
# passing bqstorage_client greatly improves performance
|
|
460
|
-
return rows.to_dataframe_iterable(
|
|
461
|
-
bqstorage_client=BigQueryReadClient(), dtypes=dtypes
|
|
462
|
-
)
|
|
463
|
-
else:
|
|
464
|
-
return rows.to_dataframe(dtypes=dtypes)
|
|
465
523
|
else:
|
|
466
524
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
467
525
|
"table or query args must be specified"
|
|
468
526
|
)
|
|
469
527
|
|
|
528
|
+
dtypes = schema_to_dtypes(rows_iterator.schema)
|
|
529
|
+
if chunksize:
|
|
530
|
+
# passing bqstorage_client greatly improves performance
|
|
531
|
+
df = rows_iterator.to_dataframe_iterable(
|
|
532
|
+
bqstorage_client=BigQueryReadClient(), dtypes=dtypes
|
|
533
|
+
)
|
|
534
|
+
else:
|
|
535
|
+
df = rows_iterator.to_dataframe(dtypes=dtypes)
|
|
536
|
+
|
|
537
|
+
# TODO : filter as part of the query
|
|
538
|
+
return select_columns_from_df(
|
|
539
|
+
filter_df_start_end_time(
|
|
540
|
+
df,
|
|
541
|
+
time_column=time_field or self.time_field,
|
|
542
|
+
start_time=start_time or self.start_time,
|
|
543
|
+
end_time=end_time or self.end_time,
|
|
544
|
+
),
|
|
545
|
+
columns=columns,
|
|
546
|
+
)
|
|
547
|
+
|
|
470
548
|
def is_iterator(self):
|
|
471
549
|
return bool(self.attributes.get("chunksize"))
|
|
472
550
|
|
|
473
|
-
def to_spark_df(self, session, named_view=False, time_field=None):
|
|
551
|
+
def to_spark_df(self, session, named_view=False, time_field=None, columns=None):
|
|
474
552
|
options = copy(self.attributes.get("spark_options", {}))
|
|
475
553
|
credentials, gcp_project = self._get_credentials_string()
|
|
476
554
|
if credentials:
|
|
@@ -500,7 +578,7 @@ class BigQuerySource(BaseSourceDriver):
|
|
|
500
578
|
df = session.read.format("bigquery").load(**options)
|
|
501
579
|
if named_view:
|
|
502
580
|
df.createOrReplaceTempView(self.name)
|
|
503
|
-
return df
|
|
581
|
+
return self._filter_spark_df(df, time_field, columns)
|
|
504
582
|
|
|
505
583
|
|
|
506
584
|
class SnowflakeSource(BaseSourceDriver):
|
|
@@ -555,6 +633,7 @@ class SnowflakeSource(BaseSourceDriver):
|
|
|
555
633
|
database: str = None,
|
|
556
634
|
schema: str = None,
|
|
557
635
|
warehouse: str = None,
|
|
636
|
+
**kwargs,
|
|
558
637
|
):
|
|
559
638
|
attrs = {
|
|
560
639
|
"query": query,
|
|
@@ -573,6 +652,7 @@ class SnowflakeSource(BaseSourceDriver):
|
|
|
573
652
|
schedule=schedule,
|
|
574
653
|
start_time=start_time,
|
|
575
654
|
end_time=end_time,
|
|
655
|
+
**kwargs,
|
|
576
656
|
)
|
|
577
657
|
|
|
578
658
|
def _get_password(self):
|
|
@@ -664,7 +744,7 @@ class DataFrameSource:
|
|
|
664
744
|
context=self.context or context,
|
|
665
745
|
)
|
|
666
746
|
|
|
667
|
-
def to_dataframe(self):
|
|
747
|
+
def to_dataframe(self, **kwargs):
|
|
668
748
|
return self._df
|
|
669
749
|
|
|
670
750
|
def is_iterator(self):
|
|
@@ -839,7 +919,15 @@ class KafkaSource(OnlineSource):
|
|
|
839
919
|
attributes["sasl"] = sasl
|
|
840
920
|
super().__init__(attributes=attributes, **kwargs)
|
|
841
921
|
|
|
842
|
-
def to_dataframe(
|
|
922
|
+
def to_dataframe(
|
|
923
|
+
self,
|
|
924
|
+
columns=None,
|
|
925
|
+
df_module=None,
|
|
926
|
+
entities=None,
|
|
927
|
+
start_time=None,
|
|
928
|
+
end_time=None,
|
|
929
|
+
time_field=None,
|
|
930
|
+
):
|
|
843
931
|
raise mlrun.MLRunInvalidArgumentError(
|
|
844
932
|
"KafkaSource does not support batch processing"
|
|
845
933
|
)
|
|
@@ -880,13 +968,15 @@ class SQLSource(BaseSourceDriver):
|
|
|
880
968
|
table_name: str = None,
|
|
881
969
|
spark_options: dict = None,
|
|
882
970
|
time_fields: List[str] = None,
|
|
971
|
+
parse_dates: List[str] = None,
|
|
972
|
+
**kwargs,
|
|
883
973
|
):
|
|
884
974
|
"""
|
|
885
975
|
Reads SqlDB as input source for a flow.
|
|
886
976
|
example::
|
|
887
|
-
|
|
888
|
-
source =
|
|
889
|
-
|
|
977
|
+
db_url = "mysql+pymysql://<username>:<password>@<host>:<port>/<db_name>"
|
|
978
|
+
source = SQLSource(
|
|
979
|
+
table_name='source_name', db_url=db_url, key_field='key'
|
|
890
980
|
)
|
|
891
981
|
:param name: source name
|
|
892
982
|
:param chunksize: number of rows per chunk (default large single chunk)
|
|
@@ -903,19 +993,32 @@ class SQLSource(BaseSourceDriver):
|
|
|
903
993
|
from the current database
|
|
904
994
|
:param spark_options: additional spark read options
|
|
905
995
|
:param time_fields : all the field to be parsed as timestamp.
|
|
996
|
+
:param parse_dates : all the field to be parsed as timestamp.
|
|
906
997
|
"""
|
|
907
|
-
|
|
998
|
+
if time_fields:
|
|
999
|
+
warnings.warn(
|
|
1000
|
+
"'time_fields' is deprecated, use 'parse_dates' instead. "
|
|
1001
|
+
"This will be removed in 1.6.0",
|
|
1002
|
+
# TODO: Remove this in 1.6.0
|
|
1003
|
+
FutureWarning,
|
|
1004
|
+
)
|
|
1005
|
+
parse_dates = time_fields
|
|
908
1006
|
db_url = db_url or mlrun.mlconf.sql.url
|
|
909
1007
|
if db_url is None:
|
|
910
1008
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
911
1009
|
"cannot specify without db_path arg or secret MLRUN_SQL__URL"
|
|
912
1010
|
)
|
|
1011
|
+
if time_field:
|
|
1012
|
+
if parse_dates:
|
|
1013
|
+
time_fields.append(time_field)
|
|
1014
|
+
else:
|
|
1015
|
+
parse_dates = [time_field]
|
|
913
1016
|
attrs = {
|
|
914
1017
|
"chunksize": chunksize,
|
|
915
1018
|
"spark_options": spark_options,
|
|
916
1019
|
"table_name": table_name,
|
|
917
1020
|
"db_path": db_url,
|
|
918
|
-
"
|
|
1021
|
+
"parse_dates": parse_dates,
|
|
919
1022
|
}
|
|
920
1023
|
attrs = {key: value for key, value in attrs.items() if value is not None}
|
|
921
1024
|
super().__init__(
|
|
@@ -926,24 +1029,43 @@ class SQLSource(BaseSourceDriver):
|
|
|
926
1029
|
schedule=schedule,
|
|
927
1030
|
start_time=start_time,
|
|
928
1031
|
end_time=end_time,
|
|
1032
|
+
**kwargs,
|
|
929
1033
|
)
|
|
930
1034
|
|
|
931
|
-
def to_dataframe(
|
|
932
|
-
|
|
1035
|
+
def to_dataframe(
|
|
1036
|
+
self,
|
|
1037
|
+
columns=None,
|
|
1038
|
+
df_module=None,
|
|
1039
|
+
entities=None,
|
|
1040
|
+
start_time=None,
|
|
1041
|
+
end_time=None,
|
|
1042
|
+
time_field=None,
|
|
1043
|
+
):
|
|
1044
|
+
import sqlalchemy as sqlalchemy
|
|
933
1045
|
|
|
934
|
-
query = self.attributes.get("query", None)
|
|
935
1046
|
db_path = self.attributes.get("db_path")
|
|
936
1047
|
table_name = self.attributes.get("table_name")
|
|
937
|
-
|
|
938
|
-
|
|
1048
|
+
parse_dates = self.attributes.get("parse_dates")
|
|
1049
|
+
time_field = time_field or self.time_field
|
|
1050
|
+
start_time = start_time or self.start_time
|
|
1051
|
+
end_time = end_time or self.end_time
|
|
939
1052
|
if table_name and db_path:
|
|
940
|
-
engine =
|
|
1053
|
+
engine = sqlalchemy.create_engine(db_path)
|
|
1054
|
+
query, parse_dates = _generate_sql_query_with_time_filter(
|
|
1055
|
+
table_name=table_name,
|
|
1056
|
+
engine=engine,
|
|
1057
|
+
time_column=time_field,
|
|
1058
|
+
parse_dates=parse_dates,
|
|
1059
|
+
start_time=start_time,
|
|
1060
|
+
end_time=end_time,
|
|
1061
|
+
)
|
|
941
1062
|
with engine.connect() as con:
|
|
942
1063
|
return pd.read_sql(
|
|
943
1064
|
query,
|
|
944
1065
|
con=con,
|
|
945
1066
|
chunksize=self.attributes.get("chunksize"),
|
|
946
|
-
parse_dates=
|
|
1067
|
+
parse_dates=parse_dates,
|
|
1068
|
+
columns=columns,
|
|
947
1069
|
)
|
|
948
1070
|
else:
|
|
949
1071
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import hashlib
|
|
15
|
+
|
|
16
|
+
from pyspark.sql.functions import udf
|
|
17
|
+
from pyspark.sql.types import StringType
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _hash_list(*list_to_hash):
|
|
21
|
+
list_to_hash = [str(element) for element in list_to_hash]
|
|
22
|
+
str_concatted = "".join(list_to_hash)
|
|
23
|
+
sha1 = hashlib.sha1()
|
|
24
|
+
sha1.update(str_concatted.encode("utf8"))
|
|
25
|
+
return sha1.hexdigest()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _redis_stringify_key(*args):
|
|
29
|
+
if len(args) == 1:
|
|
30
|
+
key_list = args[0]
|
|
31
|
+
else:
|
|
32
|
+
key_list = list(args)
|
|
33
|
+
suffix = "}:static"
|
|
34
|
+
if isinstance(key_list, list):
|
|
35
|
+
if len(key_list) >= 3:
|
|
36
|
+
return str(key_list[0]) + "." + _hash_list(*key_list[1:]) + suffix
|
|
37
|
+
if len(key_list) == 2:
|
|
38
|
+
return str(key_list[0]) + "." + str(key_list[1]) + suffix
|
|
39
|
+
return str(key_list[0]) + suffix
|
|
40
|
+
return str(key_list) + suffix
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
hash_and_concat_v3io_udf = udf(_hash_list, StringType())
|
|
44
|
+
hash_and_concat_redis_udf = udf(_redis_stringify_key, StringType())
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -81,7 +81,7 @@ class ResourceCache:
|
|
|
81
81
|
endpoint, uri = parse_path(uri)
|
|
82
82
|
self._tabels[uri] = Table(
|
|
83
83
|
uri,
|
|
84
|
-
V3ioDriver(webapi=endpoint),
|
|
84
|
+
V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api),
|
|
85
85
|
flush_interval_secs=mlrun.mlconf.feature_store.flush_interval,
|
|
86
86
|
)
|
|
87
87
|
return self._tabels[uri]
|
|
@@ -101,8 +101,8 @@ class ResourceCache:
|
|
|
101
101
|
if is_store_uri(uri):
|
|
102
102
|
resource = get_store_resource(uri)
|
|
103
103
|
if resource.kind in [
|
|
104
|
-
mlrun.
|
|
105
|
-
mlrun.
|
|
104
|
+
mlrun.common.schemas.ObjectKind.feature_set.value,
|
|
105
|
+
mlrun.common.schemas.ObjectKind.feature_vector.value,
|
|
106
106
|
]:
|
|
107
107
|
target = get_online_target(resource)
|
|
108
108
|
if not target:
|