mlrun 1.3.3rc1__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -3
- mlrun/__main__.py +79 -37
- mlrun/api/__init__.py +1 -1
- mlrun/api/api/__init__.py +1 -1
- mlrun/api/api/api.py +4 -4
- mlrun/api/api/deps.py +10 -21
- mlrun/api/api/endpoints/__init__.py +1 -1
- mlrun/api/api/endpoints/artifacts.py +64 -36
- mlrun/api/api/endpoints/auth.py +4 -4
- mlrun/api/api/endpoints/background_tasks.py +11 -11
- mlrun/api/api/endpoints/client_spec.py +5 -5
- mlrun/api/api/endpoints/clusterization_spec.py +6 -4
- mlrun/api/api/endpoints/feature_store.py +124 -115
- mlrun/api/api/endpoints/files.py +22 -14
- mlrun/api/api/endpoints/frontend_spec.py +28 -21
- mlrun/api/api/endpoints/functions.py +142 -87
- mlrun/api/api/endpoints/grafana_proxy.py +89 -442
- mlrun/api/api/endpoints/healthz.py +20 -7
- mlrun/api/api/endpoints/hub.py +320 -0
- mlrun/api/api/endpoints/internal/__init__.py +1 -1
- mlrun/api/api/endpoints/internal/config.py +1 -1
- mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
- mlrun/api/api/endpoints/logs.py +11 -11
- mlrun/api/api/endpoints/model_endpoints.py +74 -70
- mlrun/api/api/endpoints/operations.py +13 -9
- mlrun/api/api/endpoints/pipelines.py +93 -88
- mlrun/api/api/endpoints/projects.py +35 -35
- mlrun/api/api/endpoints/runs.py +69 -27
- mlrun/api/api/endpoints/runtime_resources.py +28 -28
- mlrun/api/api/endpoints/schedules.py +98 -41
- mlrun/api/api/endpoints/secrets.py +37 -32
- mlrun/api/api/endpoints/submit.py +12 -12
- mlrun/api/api/endpoints/tags.py +20 -22
- mlrun/api/api/utils.py +251 -42
- mlrun/api/constants.py +1 -1
- mlrun/api/crud/__init__.py +18 -15
- mlrun/api/crud/artifacts.py +10 -10
- mlrun/api/crud/client_spec.py +4 -4
- mlrun/api/crud/clusterization_spec.py +3 -3
- mlrun/api/crud/feature_store.py +54 -46
- mlrun/api/crud/functions.py +3 -3
- mlrun/api/crud/hub.py +312 -0
- mlrun/api/crud/logs.py +11 -9
- mlrun/api/crud/model_monitoring/__init__.py +3 -3
- mlrun/api/crud/model_monitoring/grafana.py +435 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
- mlrun/api/crud/notifications.py +149 -0
- mlrun/api/crud/pipelines.py +67 -52
- mlrun/api/crud/projects.py +51 -23
- mlrun/api/crud/runs.py +7 -5
- mlrun/api/crud/runtime_resources.py +13 -13
- mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
- mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
- mlrun/api/crud/runtimes/nuclio/function.py +505 -0
- mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
- mlrun/api/crud/secrets.py +88 -46
- mlrun/api/crud/tags.py +5 -5
- mlrun/api/db/__init__.py +1 -1
- mlrun/api/db/base.py +102 -54
- mlrun/api/db/init_db.py +2 -3
- mlrun/api/db/session.py +4 -12
- mlrun/api/db/sqldb/__init__.py +1 -1
- mlrun/api/db/sqldb/db.py +439 -196
- mlrun/api/db/sqldb/helpers.py +1 -1
- mlrun/api/db/sqldb/models/__init__.py +3 -3
- mlrun/api/db/sqldb/models/models_mysql.py +82 -64
- mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
- mlrun/api/db/sqldb/session.py +27 -20
- mlrun/api/initial_data.py +82 -24
- mlrun/api/launcher.py +196 -0
- mlrun/api/main.py +91 -22
- mlrun/api/middlewares.py +6 -5
- mlrun/api/migrations_mysql/env.py +1 -1
- mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
- mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
- mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
- mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
- mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
- mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
- mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/env.py +1 -1
- mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
- mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
- mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
- mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
- mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
- mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
- mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
- mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
- mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
- mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
- mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
- mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
- mlrun/api/schemas/__init__.py +216 -138
- mlrun/api/utils/__init__.py +1 -1
- mlrun/api/utils/asyncio.py +1 -1
- mlrun/api/utils/auth/__init__.py +1 -1
- mlrun/api/utils/auth/providers/__init__.py +1 -1
- mlrun/api/utils/auth/providers/base.py +7 -7
- mlrun/api/utils/auth/providers/nop.py +6 -7
- mlrun/api/utils/auth/providers/opa.py +17 -17
- mlrun/api/utils/auth/verifier.py +36 -34
- mlrun/api/utils/background_tasks.py +24 -24
- mlrun/{builder.py → api/utils/builder.py} +216 -123
- mlrun/api/utils/clients/__init__.py +1 -1
- mlrun/api/utils/clients/chief.py +19 -4
- mlrun/api/utils/clients/iguazio.py +106 -60
- mlrun/api/utils/clients/log_collector.py +1 -1
- mlrun/api/utils/clients/nuclio.py +23 -23
- mlrun/api/utils/clients/protocols/grpc.py +2 -2
- mlrun/api/utils/db/__init__.py +1 -1
- mlrun/api/utils/db/alembic.py +1 -1
- mlrun/api/utils/db/backup.py +1 -1
- mlrun/api/utils/db/mysql.py +24 -25
- mlrun/api/utils/db/sql_collation.py +1 -1
- mlrun/api/utils/db/sqlite_migration.py +2 -2
- mlrun/api/utils/events/__init__.py +14 -0
- mlrun/api/utils/events/base.py +57 -0
- mlrun/api/utils/events/events_factory.py +41 -0
- mlrun/api/utils/events/iguazio.py +217 -0
- mlrun/api/utils/events/nop.py +55 -0
- mlrun/api/utils/helpers.py +16 -13
- mlrun/api/utils/memory_reports.py +1 -1
- mlrun/api/utils/periodic.py +6 -3
- mlrun/api/utils/projects/__init__.py +1 -1
- mlrun/api/utils/projects/follower.py +33 -33
- mlrun/api/utils/projects/leader.py +36 -34
- mlrun/api/utils/projects/member.py +27 -27
- mlrun/api/utils/projects/remotes/__init__.py +1 -1
- mlrun/api/utils/projects/remotes/follower.py +13 -13
- mlrun/api/utils/projects/remotes/leader.py +10 -10
- mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
- mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
- mlrun/api/utils/scheduler.py +140 -51
- mlrun/api/utils/singletons/__init__.py +1 -1
- mlrun/api/utils/singletons/db.py +9 -15
- mlrun/api/utils/singletons/k8s.py +677 -5
- mlrun/api/utils/singletons/logs_dir.py +1 -1
- mlrun/api/utils/singletons/project_member.py +1 -1
- mlrun/api/utils/singletons/scheduler.py +1 -1
- mlrun/artifacts/__init__.py +2 -2
- mlrun/artifacts/base.py +8 -2
- mlrun/artifacts/dataset.py +5 -3
- mlrun/artifacts/manager.py +7 -1
- mlrun/artifacts/model.py +15 -4
- mlrun/artifacts/plots.py +1 -1
- mlrun/common/__init__.py +1 -1
- mlrun/common/constants.py +15 -0
- mlrun/common/model_monitoring.py +209 -0
- mlrun/common/schemas/__init__.py +167 -0
- mlrun/{api → common}/schemas/artifact.py +13 -14
- mlrun/{api → common}/schemas/auth.py +10 -8
- mlrun/{api → common}/schemas/background_task.py +3 -3
- mlrun/{api → common}/schemas/client_spec.py +1 -1
- mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
- mlrun/{api → common}/schemas/constants.py +21 -8
- mlrun/common/schemas/events.py +36 -0
- mlrun/{api → common}/schemas/feature_store.py +2 -1
- mlrun/{api → common}/schemas/frontend_spec.py +7 -6
- mlrun/{api → common}/schemas/function.py +5 -5
- mlrun/{api → common}/schemas/http.py +3 -3
- mlrun/common/schemas/hub.py +134 -0
- mlrun/{api → common}/schemas/k8s.py +3 -3
- mlrun/{api → common}/schemas/memory_reports.py +1 -1
- mlrun/common/schemas/model_endpoints.py +342 -0
- mlrun/common/schemas/notification.py +57 -0
- mlrun/{api → common}/schemas/object.py +6 -6
- mlrun/{api → common}/schemas/pipeline.py +3 -3
- mlrun/{api → common}/schemas/project.py +6 -5
- mlrun/common/schemas/regex.py +24 -0
- mlrun/common/schemas/runs.py +30 -0
- mlrun/{api → common}/schemas/runtime_resource.py +3 -3
- mlrun/{api → common}/schemas/schedule.py +19 -7
- mlrun/{api → common}/schemas/secret.py +3 -3
- mlrun/{api → common}/schemas/tag.py +2 -2
- mlrun/common/types.py +25 -0
- mlrun/config.py +152 -20
- mlrun/data_types/__init__.py +7 -2
- mlrun/data_types/data_types.py +4 -2
- mlrun/data_types/infer.py +1 -1
- mlrun/data_types/spark.py +10 -3
- mlrun/datastore/__init__.py +10 -3
- mlrun/datastore/azure_blob.py +1 -1
- mlrun/datastore/base.py +185 -53
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/filestore.py +1 -1
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +1 -1
- mlrun/datastore/s3.py +1 -1
- mlrun/datastore/sources.py +192 -70
- mlrun/datastore/spark_udf.py +44 -0
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/targets.py +115 -45
- mlrun/datastore/utils.py +127 -5
- mlrun/datastore/v3io.py +1 -1
- mlrun/datastore/wasbfs/__init__.py +1 -1
- mlrun/datastore/wasbfs/fs.py +1 -1
- mlrun/db/__init__.py +7 -5
- mlrun/db/base.py +112 -68
- mlrun/db/httpdb.py +445 -277
- mlrun/db/nopdb.py +491 -0
- mlrun/db/sqldb.py +112 -65
- mlrun/errors.py +6 -1
- mlrun/execution.py +44 -22
- mlrun/feature_store/__init__.py +1 -1
- mlrun/feature_store/api.py +143 -95
- mlrun/feature_store/common.py +16 -20
- mlrun/feature_store/feature_set.py +42 -12
- mlrun/feature_store/feature_vector.py +32 -21
- mlrun/feature_store/ingestion.py +9 -12
- mlrun/feature_store/retrieval/__init__.py +3 -2
- mlrun/feature_store/retrieval/base.py +388 -66
- mlrun/feature_store/retrieval/dask_merger.py +63 -151
- mlrun/feature_store/retrieval/job.py +30 -12
- mlrun/feature_store/retrieval/local_merger.py +40 -133
- mlrun/feature_store/retrieval/spark_merger.py +129 -127
- mlrun/feature_store/retrieval/storey_merger.py +173 -0
- mlrun/feature_store/steps.py +132 -15
- mlrun/features.py +8 -3
- mlrun/frameworks/__init__.py +1 -1
- mlrun/frameworks/_common/__init__.py +1 -1
- mlrun/frameworks/_common/artifacts_library.py +1 -1
- mlrun/frameworks/_common/mlrun_interface.py +1 -1
- mlrun/frameworks/_common/model_handler.py +1 -1
- mlrun/frameworks/_common/plan.py +1 -1
- mlrun/frameworks/_common/producer.py +1 -1
- mlrun/frameworks/_common/utils.py +1 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
- mlrun/frameworks/_dl_common/model_handler.py +1 -1
- mlrun/frameworks/_dl_common/utils.py +1 -1
- mlrun/frameworks/_ml_common/__init__.py +1 -1
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/model_handler.py +1 -1
- mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
- mlrun/frameworks/_ml_common/producer.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +1 -1
- mlrun/frameworks/auto_mlrun/__init__.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
- mlrun/frameworks/huggingface/__init__.py +1 -1
- mlrun/frameworks/huggingface/model_server.py +1 -1
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/lgbm/model_server.py +1 -1
- mlrun/frameworks/lgbm/utils.py +1 -1
- mlrun/frameworks/onnx/__init__.py +1 -1
- mlrun/frameworks/onnx/dataset.py +1 -1
- mlrun/frameworks/onnx/mlrun_interface.py +1 -1
- mlrun/frameworks/onnx/model_handler.py +1 -1
- mlrun/frameworks/onnx/model_server.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +1 -1
- mlrun/frameworks/pytorch/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
- mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
- mlrun/frameworks/pytorch/model_handler.py +1 -1
- mlrun/frameworks/pytorch/model_server.py +1 -1
- mlrun/frameworks/pytorch/utils.py +1 -1
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/estimator.py +1 -1
- mlrun/frameworks/sklearn/metric.py +1 -1
- mlrun/frameworks/sklearn/metrics_library.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
- mlrun/frameworks/sklearn/model_handler.py +1 -1
- mlrun/frameworks/sklearn/utils.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
- mlrun/frameworks/tf_keras/model_handler.py +1 -1
- mlrun/frameworks/tf_keras/model_server.py +1 -1
- mlrun/frameworks/tf_keras/utils.py +1 -1
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
- mlrun/frameworks/xgboost/model_handler.py +1 -1
- mlrun/frameworks/xgboost/utils.py +1 -1
- mlrun/k8s_utils.py +14 -765
- mlrun/kfpops.py +14 -17
- mlrun/launcher/__init__.py +13 -0
- mlrun/launcher/base.py +406 -0
- mlrun/launcher/client.py +159 -0
- mlrun/launcher/factory.py +50 -0
- mlrun/launcher/local.py +276 -0
- mlrun/launcher/remote.py +178 -0
- mlrun/lists.py +10 -2
- mlrun/mlutils/__init__.py +1 -1
- mlrun/mlutils/data.py +1 -1
- mlrun/mlutils/models.py +1 -1
- mlrun/mlutils/plots.py +1 -1
- mlrun/model.py +252 -14
- mlrun/model_monitoring/__init__.py +41 -0
- mlrun/model_monitoring/features_drift_table.py +1 -1
- mlrun/model_monitoring/helpers.py +123 -38
- mlrun/model_monitoring/model_endpoint.py +144 -0
- mlrun/model_monitoring/model_monitoring_batch.py +310 -259
- mlrun/model_monitoring/stores/__init__.py +106 -0
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
- mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
- mlrun/model_monitoring/stores/models/__init__.py +23 -0
- mlrun/model_monitoring/stores/models/base.py +18 -0
- mlrun/model_monitoring/stores/models/mysql.py +100 -0
- mlrun/model_monitoring/stores/models/sqlite.py +98 -0
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
- mlrun/model_monitoring/stream_processing_fs.py +239 -271
- mlrun/package/__init__.py +163 -0
- mlrun/package/context_handler.py +325 -0
- mlrun/package/errors.py +47 -0
- mlrun/package/packager.py +298 -0
- mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
- mlrun/package/packagers/default_packager.py +422 -0
- mlrun/package/packagers/numpy_packagers.py +612 -0
- mlrun/package/packagers/pandas_packagers.py +968 -0
- mlrun/package/packagers/python_standard_library_packagers.py +616 -0
- mlrun/package/packagers_manager.py +786 -0
- mlrun/package/utils/__init__.py +53 -0
- mlrun/package/utils/_archiver.py +226 -0
- mlrun/package/utils/_formatter.py +211 -0
- mlrun/package/utils/_pickler.py +234 -0
- mlrun/package/utils/_supported_format.py +71 -0
- mlrun/package/utils/log_hint_utils.py +93 -0
- mlrun/package/utils/type_hint_utils.py +298 -0
- mlrun/platforms/__init__.py +1 -1
- mlrun/platforms/iguazio.py +34 -2
- mlrun/platforms/other.py +1 -1
- mlrun/projects/__init__.py +1 -1
- mlrun/projects/operations.py +14 -9
- mlrun/projects/pipelines.py +31 -13
- mlrun/projects/project.py +762 -238
- mlrun/render.py +49 -19
- mlrun/run.py +57 -326
- mlrun/runtimes/__init__.py +3 -9
- mlrun/runtimes/base.py +247 -784
- mlrun/runtimes/constants.py +1 -1
- mlrun/runtimes/daskjob.py +45 -41
- mlrun/runtimes/funcdoc.py +43 -7
- mlrun/runtimes/function.py +66 -656
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/generators.py +1 -1
- mlrun/runtimes/kubejob.py +99 -116
- mlrun/runtimes/local.py +59 -66
- mlrun/runtimes/mpijob/__init__.py +1 -1
- mlrun/runtimes/mpijob/abstract.py +13 -15
- mlrun/runtimes/mpijob/v1.py +3 -1
- mlrun/runtimes/mpijob/v1alpha1.py +1 -1
- mlrun/runtimes/nuclio.py +1 -1
- mlrun/runtimes/pod.py +51 -26
- mlrun/runtimes/remotesparkjob.py +3 -1
- mlrun/runtimes/serving.py +12 -4
- mlrun/runtimes/sparkjob/__init__.py +1 -2
- mlrun/runtimes/sparkjob/abstract.py +44 -31
- mlrun/runtimes/sparkjob/spark3job.py +11 -9
- mlrun/runtimes/utils.py +61 -42
- mlrun/secrets.py +16 -18
- mlrun/serving/__init__.py +3 -2
- mlrun/serving/merger.py +1 -1
- mlrun/serving/remote.py +1 -1
- mlrun/serving/routers.py +39 -42
- mlrun/serving/server.py +23 -13
- mlrun/serving/serving_wrapper.py +1 -1
- mlrun/serving/states.py +172 -39
- mlrun/serving/utils.py +1 -1
- mlrun/serving/v1_serving.py +1 -1
- mlrun/serving/v2_serving.py +29 -21
- mlrun/utils/__init__.py +1 -2
- mlrun/utils/async_http.py +8 -1
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +2 -2
- mlrun/utils/condition_evaluator.py +65 -0
- mlrun/utils/db.py +52 -0
- mlrun/utils/helpers.py +188 -13
- mlrun/utils/http.py +89 -54
- mlrun/utils/logger.py +48 -8
- mlrun/utils/model_monitoring.py +132 -100
- mlrun/utils/notifications/__init__.py +1 -1
- mlrun/utils/notifications/notification/__init__.py +8 -6
- mlrun/utils/notifications/notification/base.py +20 -14
- mlrun/utils/notifications/notification/console.py +7 -4
- mlrun/utils/notifications/notification/git.py +36 -19
- mlrun/utils/notifications/notification/ipython.py +10 -8
- mlrun/utils/notifications/notification/slack.py +18 -13
- mlrun/utils/notifications/notification_pusher.py +377 -56
- mlrun/utils/regex.py +6 -1
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +1 -1
- mlrun/utils/vault.py +270 -269
- mlrun/utils/version/__init__.py +1 -1
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +1 -1
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
- mlrun-1.4.0.dist-info/RECORD +434 -0
- mlrun/api/api/endpoints/marketplace.py +0 -257
- mlrun/api/crud/marketplace.py +0 -221
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
- mlrun/api/db/filedb/db.py +0 -518
- mlrun/api/schemas/marketplace.py +0 -128
- mlrun/api/schemas/model_endpoints.py +0 -185
- mlrun/db/filedb.py +0 -891
- mlrun/feature_store/retrieval/online.py +0 -92
- mlrun/model_monitoring/constants.py +0 -67
- mlrun/runtimes/package/context_handler.py +0 -711
- mlrun/runtimes/sparkjob/spark2job.py +0 -59
- mlrun-1.3.3rc1.dist-info/RECORD +0 -381
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
mlrun/datastore/targets.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -15,7 +15,9 @@ import ast
|
|
|
15
15
|
import datetime
|
|
16
16
|
import os
|
|
17
17
|
import random
|
|
18
|
+
import sys
|
|
18
19
|
import time
|
|
20
|
+
import warnings
|
|
19
21
|
from collections import Counter
|
|
20
22
|
from copy import copy
|
|
21
23
|
from typing import Any, Dict, List, Optional, Union
|
|
@@ -34,7 +36,13 @@ from mlrun.utils.v3io_clients import get_frames_client
|
|
|
34
36
|
from .. import errors
|
|
35
37
|
from ..data_types import ValueType
|
|
36
38
|
from ..platforms.iguazio import parse_path, split_path
|
|
37
|
-
from .utils import
|
|
39
|
+
from .utils import (
|
|
40
|
+
_generate_sql_query_with_time_filter,
|
|
41
|
+
filter_df_start_end_time,
|
|
42
|
+
parse_kafka_url,
|
|
43
|
+
select_columns_from_df,
|
|
44
|
+
store_path_to_spark,
|
|
45
|
+
)
|
|
38
46
|
|
|
39
47
|
|
|
40
48
|
class TargetTypes:
|
|
@@ -74,11 +82,12 @@ def default_target_names():
|
|
|
74
82
|
return [target.strip() for target in targets.split(",")]
|
|
75
83
|
|
|
76
84
|
|
|
77
|
-
def get_default_targets():
|
|
85
|
+
def get_default_targets(offline_only=False):
|
|
78
86
|
"""initialize the default feature set targets list"""
|
|
79
87
|
return [
|
|
80
88
|
DataTargetBase(target, name=str(target), partitioned=(target == "parquet"))
|
|
81
89
|
for target in default_target_names()
|
|
90
|
+
if not offline_only or not target == "nosql"
|
|
82
91
|
]
|
|
83
92
|
|
|
84
93
|
|
|
@@ -986,6 +995,9 @@ class CSVTarget(BaseStoreTarget):
|
|
|
986
995
|
df_module=df_module,
|
|
987
996
|
entities=entities,
|
|
988
997
|
format="csv",
|
|
998
|
+
start_time=start_time,
|
|
999
|
+
end_time=end_time,
|
|
1000
|
+
time_column=time_column,
|
|
989
1001
|
**kwargs,
|
|
990
1002
|
)
|
|
991
1003
|
if entities:
|
|
@@ -1050,24 +1062,11 @@ class NoSqlBaseTarget(BaseStoreTarget):
|
|
|
1050
1062
|
**self.attributes,
|
|
1051
1063
|
)
|
|
1052
1064
|
|
|
1065
|
+
def prepare_spark_df(self, df, key_columns):
|
|
1066
|
+
raise NotImplementedError()
|
|
1067
|
+
|
|
1053
1068
|
def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
|
|
1054
|
-
|
|
1055
|
-
"path": store_path_to_spark(self.get_target_path()),
|
|
1056
|
-
"format": "io.iguaz.v3io.spark.sql.kv",
|
|
1057
|
-
}
|
|
1058
|
-
if isinstance(key_column, list) and len(key_column) >= 1:
|
|
1059
|
-
if len(key_column) > 2:
|
|
1060
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1061
|
-
f"Spark supports maximun of 2 keys and {key_column} are provided"
|
|
1062
|
-
)
|
|
1063
|
-
spark_options["key"] = key_column[0]
|
|
1064
|
-
if len(key_column) > 1:
|
|
1065
|
-
spark_options["sorting-key"] = key_column[1]
|
|
1066
|
-
else:
|
|
1067
|
-
spark_options["key"] = key_column
|
|
1068
|
-
if not overwrite:
|
|
1069
|
-
spark_options["columnUpdate"] = True
|
|
1070
|
-
return spark_options
|
|
1069
|
+
raise NotImplementedError()
|
|
1071
1070
|
|
|
1072
1071
|
def get_dask_options(self):
|
|
1073
1072
|
return {"format": "csv"}
|
|
@@ -1075,15 +1074,6 @@ class NoSqlBaseTarget(BaseStoreTarget):
|
|
|
1075
1074
|
def as_df(self, columns=None, df_module=None, **kwargs):
|
|
1076
1075
|
raise NotImplementedError()
|
|
1077
1076
|
|
|
1078
|
-
def prepare_spark_df(self, df, key_columns):
|
|
1079
|
-
import pyspark.sql.functions as funcs
|
|
1080
|
-
|
|
1081
|
-
for col_name, col_type in df.dtypes:
|
|
1082
|
-
if col_type.startswith("decimal("):
|
|
1083
|
-
# V3IO does not support this level of precision
|
|
1084
|
-
df = df.withColumn(col_name, funcs.col(col_name).cast("double"))
|
|
1085
|
-
return df
|
|
1086
|
-
|
|
1087
1077
|
def write_dataframe(
|
|
1088
1078
|
self, df, key_column=None, timestamp_key=None, chunk_id=0, **kwargs
|
|
1089
1079
|
):
|
|
@@ -1123,10 +1113,52 @@ class NoSqlTarget(NoSqlBaseTarget):
|
|
|
1123
1113
|
endpoint, uri = parse_path(self.get_target_path())
|
|
1124
1114
|
return Table(
|
|
1125
1115
|
uri,
|
|
1126
|
-
V3ioDriver(webapi=endpoint),
|
|
1116
|
+
V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api),
|
|
1127
1117
|
flush_interval_secs=mlrun.mlconf.feature_store.flush_interval,
|
|
1128
1118
|
)
|
|
1129
1119
|
|
|
1120
|
+
def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
|
|
1121
|
+
spark_options = {
|
|
1122
|
+
"path": store_path_to_spark(self.get_target_path()),
|
|
1123
|
+
"format": "io.iguaz.v3io.spark.sql.kv",
|
|
1124
|
+
}
|
|
1125
|
+
if isinstance(key_column, list) and len(key_column) >= 1:
|
|
1126
|
+
spark_options["key"] = key_column[0]
|
|
1127
|
+
if len(key_column) > 2:
|
|
1128
|
+
spark_options["sorting-key"] = "_spark_object_name"
|
|
1129
|
+
if len(key_column) == 2:
|
|
1130
|
+
spark_options["sorting-key"] = key_column[1]
|
|
1131
|
+
else:
|
|
1132
|
+
spark_options["key"] = key_column
|
|
1133
|
+
if not overwrite:
|
|
1134
|
+
spark_options["columnUpdate"] = True
|
|
1135
|
+
return spark_options
|
|
1136
|
+
|
|
1137
|
+
def prepare_spark_df(self, df, key_columns):
|
|
1138
|
+
from pyspark.sql.functions import col
|
|
1139
|
+
|
|
1140
|
+
spark_udf_directory = os.path.dirname(os.path.abspath(__file__))
|
|
1141
|
+
sys.path.append(spark_udf_directory)
|
|
1142
|
+
try:
|
|
1143
|
+
import spark_udf
|
|
1144
|
+
|
|
1145
|
+
df.rdd.context.addFile(spark_udf.__file__)
|
|
1146
|
+
|
|
1147
|
+
for col_name, col_type in df.dtypes:
|
|
1148
|
+
if col_type.startswith("decimal("):
|
|
1149
|
+
# V3IO does not support this level of precision
|
|
1150
|
+
df = df.withColumn(col_name, col(col_name).cast("double"))
|
|
1151
|
+
if len(key_columns) > 2:
|
|
1152
|
+
return df.withColumn(
|
|
1153
|
+
"_spark_object_name",
|
|
1154
|
+
spark_udf.hash_and_concat_v3io_udf(
|
|
1155
|
+
*[col(c) for c in key_columns[1:]]
|
|
1156
|
+
),
|
|
1157
|
+
)
|
|
1158
|
+
finally:
|
|
1159
|
+
sys.path.remove(spark_udf_directory)
|
|
1160
|
+
return df
|
|
1161
|
+
|
|
1130
1162
|
|
|
1131
1163
|
class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
1132
1164
|
kind = TargetTypes.redisnosql
|
|
@@ -1186,11 +1218,23 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1186
1218
|
return endpoint
|
|
1187
1219
|
|
|
1188
1220
|
def prepare_spark_df(self, df, key_columns):
|
|
1189
|
-
from pyspark.sql.functions import
|
|
1190
|
-
|
|
1221
|
+
from pyspark.sql.functions import col
|
|
1222
|
+
|
|
1223
|
+
spark_udf_directory = os.path.dirname(os.path.abspath(__file__))
|
|
1224
|
+
sys.path.append(spark_udf_directory)
|
|
1225
|
+
try:
|
|
1226
|
+
import spark_udf
|
|
1227
|
+
|
|
1228
|
+
df.rdd.context.addFile(spark_udf.__file__)
|
|
1191
1229
|
|
|
1192
|
-
|
|
1193
|
-
|
|
1230
|
+
df = df.withColumn(
|
|
1231
|
+
"_spark_object_name",
|
|
1232
|
+
spark_udf.hash_and_concat_redis_udf(*[col(c) for c in key_columns]),
|
|
1233
|
+
)
|
|
1234
|
+
finally:
|
|
1235
|
+
sys.path.remove(spark_udf_directory)
|
|
1236
|
+
|
|
1237
|
+
return df
|
|
1194
1238
|
|
|
1195
1239
|
|
|
1196
1240
|
class StreamTarget(BaseStoreTarget):
|
|
@@ -1224,7 +1268,7 @@ class StreamTarget(BaseStoreTarget):
|
|
|
1224
1268
|
graph_shape="cylinder",
|
|
1225
1269
|
class_name="storey.StreamTarget",
|
|
1226
1270
|
columns=column_list,
|
|
1227
|
-
storage=V3ioDriver(webapi=endpoint),
|
|
1271
|
+
storage=V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api),
|
|
1228
1272
|
stream_path=uri,
|
|
1229
1273
|
**self.attributes,
|
|
1230
1274
|
)
|
|
@@ -1441,7 +1485,15 @@ class DFTarget(BaseStoreTarget):
|
|
|
1441
1485
|
time_column=None,
|
|
1442
1486
|
**kwargs,
|
|
1443
1487
|
):
|
|
1444
|
-
return
|
|
1488
|
+
return select_columns_from_df(
|
|
1489
|
+
filter_df_start_end_time(
|
|
1490
|
+
self._df,
|
|
1491
|
+
time_column=time_column,
|
|
1492
|
+
start_time=start_time,
|
|
1493
|
+
end_time=end_time,
|
|
1494
|
+
),
|
|
1495
|
+
columns,
|
|
1496
|
+
)
|
|
1445
1497
|
|
|
1446
1498
|
|
|
1447
1499
|
class SQLTarget(BaseStoreTarget):
|
|
@@ -1472,14 +1524,15 @@ class SQLTarget(BaseStoreTarget):
|
|
|
1472
1524
|
# create_according_to_data: bool = False,
|
|
1473
1525
|
time_fields: List[str] = None,
|
|
1474
1526
|
varchar_len: int = 50,
|
|
1527
|
+
parse_dates: List[str] = None,
|
|
1475
1528
|
):
|
|
1476
1529
|
"""
|
|
1477
1530
|
Write to SqlDB as output target for a flow.
|
|
1478
1531
|
example::
|
|
1479
|
-
|
|
1532
|
+
db_url = "sqlite:///stockmarket.db"
|
|
1480
1533
|
schema = {'time': datetime.datetime, 'ticker': str,
|
|
1481
1534
|
'bid': float, 'ask': float, 'ind': int}
|
|
1482
|
-
target = SqlDBTarget(table_name=f'{name}-
|
|
1535
|
+
target = SqlDBTarget(table_name=f'{name}-target', db_url=db_url, create_table=True,
|
|
1483
1536
|
schema=schema, primary_key_column=key)
|
|
1484
1537
|
:param name:
|
|
1485
1538
|
:param path:
|
|
@@ -1509,8 +1562,17 @@ class SQLTarget(BaseStoreTarget):
|
|
|
1509
1562
|
:param create_according_to_data: (not valid)
|
|
1510
1563
|
:param time_fields : all the field to be parsed as timestamp.
|
|
1511
1564
|
:param varchar_len : the defalut len of the all the varchar column (using if needed to create the table).
|
|
1565
|
+
:param parse_dates : all the field to be parsed as timestamp.
|
|
1512
1566
|
"""
|
|
1513
1567
|
create_according_to_data = False # TODO: open for user
|
|
1568
|
+
if time_fields:
|
|
1569
|
+
warnings.warn(
|
|
1570
|
+
"'time_fields' is deprecated, use 'parse_dates' instead. "
|
|
1571
|
+
"This will be removed in 1.6.0",
|
|
1572
|
+
# TODO: Remove this in 1.6.0
|
|
1573
|
+
FutureWarning,
|
|
1574
|
+
)
|
|
1575
|
+
parse_dates = time_fields
|
|
1514
1576
|
db_url = db_url or mlrun.mlconf.sql.url
|
|
1515
1577
|
if db_url is None or table_name is None:
|
|
1516
1578
|
attr = {}
|
|
@@ -1523,7 +1585,7 @@ class SQLTarget(BaseStoreTarget):
|
|
|
1523
1585
|
"db_path": db_url,
|
|
1524
1586
|
"create_according_to_data": create_according_to_data,
|
|
1525
1587
|
"if_exists": if_exists,
|
|
1526
|
-
"
|
|
1588
|
+
"parse_dates": parse_dates,
|
|
1527
1589
|
"varchar_len": varchar_len,
|
|
1528
1590
|
}
|
|
1529
1591
|
path = (
|
|
@@ -1610,16 +1672,24 @@ class SQLTarget(BaseStoreTarget):
|
|
|
1610
1672
|
):
|
|
1611
1673
|
db_path, table_name, _, _, _, _ = self._parse_url()
|
|
1612
1674
|
engine = sqlalchemy.create_engine(db_path)
|
|
1675
|
+
parse_dates: Optional[List[str]] = self.attributes.get("parse_dates")
|
|
1613
1676
|
with engine.connect() as conn:
|
|
1677
|
+
query, parse_dates = _generate_sql_query_with_time_filter(
|
|
1678
|
+
table_name=table_name,
|
|
1679
|
+
engine=engine,
|
|
1680
|
+
time_column=time_column,
|
|
1681
|
+
parse_dates=parse_dates,
|
|
1682
|
+
start_time=start_time,
|
|
1683
|
+
end_time=end_time,
|
|
1684
|
+
)
|
|
1614
1685
|
df = pd.read_sql(
|
|
1615
|
-
|
|
1686
|
+
query,
|
|
1616
1687
|
con=conn,
|
|
1617
|
-
parse_dates=
|
|
1688
|
+
parse_dates=parse_dates,
|
|
1689
|
+
columns=columns,
|
|
1618
1690
|
)
|
|
1619
1691
|
if self._primary_key_column:
|
|
1620
1692
|
df.set_index(self._primary_key_column, inplace=True)
|
|
1621
|
-
if columns:
|
|
1622
|
-
df = df[columns]
|
|
1623
1693
|
return df
|
|
1624
1694
|
|
|
1625
1695
|
def write_dataframe(
|
|
@@ -1730,12 +1800,12 @@ def _get_target_path(driver, resource, run_id_mode=False):
|
|
|
1730
1800
|
if not suffix:
|
|
1731
1801
|
if (
|
|
1732
1802
|
kind == ParquetTarget.kind
|
|
1733
|
-
and resource.kind == mlrun.
|
|
1803
|
+
and resource.kind == mlrun.common.schemas.ObjectKind.feature_vector
|
|
1734
1804
|
):
|
|
1735
1805
|
suffix = ".parquet"
|
|
1736
1806
|
kind_prefix = (
|
|
1737
1807
|
"sets"
|
|
1738
|
-
if resource.kind == mlrun.
|
|
1808
|
+
if resource.kind == mlrun.common.schemas.ObjectKind.feature_set
|
|
1739
1809
|
else "vectors"
|
|
1740
1810
|
)
|
|
1741
1811
|
name = resource.metadata.name
|
mlrun/datastore/utils.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -12,7 +12,15 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
-
|
|
15
|
+
import tarfile
|
|
16
|
+
import tempfile
|
|
17
|
+
import typing
|
|
18
|
+
from urllib.parse import parse_qs, urlparse
|
|
19
|
+
|
|
20
|
+
import pandas as pd
|
|
21
|
+
import sqlalchemy
|
|
22
|
+
|
|
23
|
+
import mlrun.datastore
|
|
16
24
|
|
|
17
25
|
|
|
18
26
|
def store_path_to_spark(path):
|
|
@@ -36,11 +44,125 @@ def store_path_to_spark(path):
|
|
|
36
44
|
return path
|
|
37
45
|
|
|
38
46
|
|
|
39
|
-
def parse_kafka_url(
|
|
47
|
+
def parse_kafka_url(
|
|
48
|
+
url: str, bootstrap_servers: typing.List = None
|
|
49
|
+
) -> typing.Tuple[str, typing.List]:
|
|
50
|
+
"""Generating Kafka topic and adjusting a list of bootstrap servers.
|
|
51
|
+
|
|
52
|
+
:param url: URL path to parse using urllib.parse.urlparse.
|
|
53
|
+
:param bootstrap_servers: List of bootstrap servers for the kafka brokers.
|
|
54
|
+
|
|
55
|
+
:return: A tuple of:
|
|
56
|
+
[0] = Kafka topic value
|
|
57
|
+
[1] = List of bootstrap servers
|
|
58
|
+
"""
|
|
40
59
|
bootstrap_servers = bootstrap_servers or []
|
|
60
|
+
|
|
61
|
+
# Parse the provided URL into six components according to the general structure of a URL
|
|
41
62
|
url = urlparse(url)
|
|
63
|
+
|
|
64
|
+
# Add the network location to the bootstrap servers list
|
|
42
65
|
if url.netloc:
|
|
43
66
|
bootstrap_servers = [url.netloc] + bootstrap_servers
|
|
44
|
-
|
|
45
|
-
topic
|
|
67
|
+
|
|
68
|
+
# Get the topic value from the parsed url
|
|
69
|
+
query_dict = parse_qs(url.query)
|
|
70
|
+
if "topic" in query_dict:
|
|
71
|
+
topic = query_dict["topic"][0]
|
|
72
|
+
else:
|
|
73
|
+
topic = url.path
|
|
74
|
+
topic = topic.lstrip("/")
|
|
46
75
|
return topic, bootstrap_servers
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def upload_tarball(source_dir, target, secrets=None):
|
|
79
|
+
# will delete the temp file
|
|
80
|
+
with tempfile.NamedTemporaryFile(suffix=".tar.gz") as temp_fh:
|
|
81
|
+
with tarfile.open(mode="w:gz", fileobj=temp_fh) as tar:
|
|
82
|
+
tar.add(source_dir, arcname="")
|
|
83
|
+
stores = mlrun.datastore.store_manager.set(secrets)
|
|
84
|
+
datastore, subpath = stores.get_or_create_store(target)
|
|
85
|
+
datastore.upload(subpath, temp_fh.name)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def filter_df_start_end_time(
|
|
89
|
+
df: typing.Union[pd.DataFrame, typing.Iterator[pd.DataFrame]],
|
|
90
|
+
time_column: str = None,
|
|
91
|
+
start_time: pd.Timestamp = None,
|
|
92
|
+
end_time: pd.Timestamp = None,
|
|
93
|
+
) -> typing.Union[pd.DataFrame, typing.Iterator[pd.DataFrame]]:
|
|
94
|
+
if not time_column or (not start_time and not end_time):
|
|
95
|
+
return df
|
|
96
|
+
if isinstance(df, pd.DataFrame):
|
|
97
|
+
return _execute_time_filter(df, time_column, start_time, end_time)
|
|
98
|
+
else:
|
|
99
|
+
return filter_df_generator(df, time_column, start_time, end_time)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def filter_df_generator(
|
|
103
|
+
dfs: typing.Iterator[pd.DataFrame],
|
|
104
|
+
time_field: str,
|
|
105
|
+
start_time: pd.Timestamp,
|
|
106
|
+
end_time: pd.Timestamp,
|
|
107
|
+
) -> typing.Iterator[pd.DataFrame]:
|
|
108
|
+
for df in dfs:
|
|
109
|
+
yield _execute_time_filter(df, time_field, start_time, end_time)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _execute_time_filter(
|
|
113
|
+
df: pd.DataFrame, time_column: str, start_time: pd.Timestamp, end_time: pd.Timestamp
|
|
114
|
+
):
|
|
115
|
+
df[time_column] = pd.to_datetime(df[time_column])
|
|
116
|
+
if start_time:
|
|
117
|
+
df = df[df[time_column] > start_time]
|
|
118
|
+
if end_time:
|
|
119
|
+
df = df[df[time_column] <= end_time]
|
|
120
|
+
return df
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def select_columns_from_df(
|
|
124
|
+
df: typing.Union[pd.DataFrame, typing.Iterator[pd.DataFrame]],
|
|
125
|
+
columns: typing.List[str],
|
|
126
|
+
) -> typing.Union[pd.DataFrame, typing.Iterator[pd.DataFrame]]:
|
|
127
|
+
if not columns:
|
|
128
|
+
return df
|
|
129
|
+
if isinstance(df, pd.DataFrame):
|
|
130
|
+
return df[columns]
|
|
131
|
+
else:
|
|
132
|
+
return select_columns_generator(df, columns)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def select_columns_generator(
|
|
136
|
+
dfs: typing.Union[pd.DataFrame, typing.Iterator[pd.DataFrame]],
|
|
137
|
+
columns: typing.List[str],
|
|
138
|
+
) -> typing.Iterator[pd.DataFrame]:
|
|
139
|
+
for df in dfs:
|
|
140
|
+
yield df[columns]
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _generate_sql_query_with_time_filter(
|
|
144
|
+
table_name: str,
|
|
145
|
+
engine: sqlalchemy.engine.Engine,
|
|
146
|
+
time_column: str,
|
|
147
|
+
parse_dates: typing.List[str],
|
|
148
|
+
start_time: pd.Timestamp,
|
|
149
|
+
end_time: pd.Timestamp,
|
|
150
|
+
):
|
|
151
|
+
table = sqlalchemy.Table(
|
|
152
|
+
table_name,
|
|
153
|
+
sqlalchemy.MetaData(),
|
|
154
|
+
autoload=True,
|
|
155
|
+
autoload_with=engine,
|
|
156
|
+
)
|
|
157
|
+
query = sqlalchemy.select(table)
|
|
158
|
+
if time_column:
|
|
159
|
+
if parse_dates and time_column not in parse_dates:
|
|
160
|
+
parse_dates.append(time_column)
|
|
161
|
+
else:
|
|
162
|
+
parse_dates = [time_column]
|
|
163
|
+
if start_time:
|
|
164
|
+
query = query.filter(getattr(table.c, time_column) > start_time)
|
|
165
|
+
if end_time:
|
|
166
|
+
query = query.filter(getattr(table.c, time_column) <= end_time)
|
|
167
|
+
|
|
168
|
+
return query, parse_dates
|
mlrun/datastore/v3io.py
CHANGED
mlrun/datastore/wasbfs/fs.py
CHANGED
mlrun/db/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -18,7 +18,7 @@ from ..config import config
|
|
|
18
18
|
from ..platforms import add_or_refresh_credentials
|
|
19
19
|
from ..utils import logger
|
|
20
20
|
from .base import RunDBError, RunDBInterface # noqa
|
|
21
|
-
from .
|
|
21
|
+
from .nopdb import NopDB
|
|
22
22
|
from .sqldb import SQLDB
|
|
23
23
|
|
|
24
24
|
|
|
@@ -69,12 +69,14 @@ def get_run_db(url="", secrets=None, force_reconnect=False):
|
|
|
69
69
|
kwargs = {}
|
|
70
70
|
if "://" not in str(url) or scheme in ["file", "s3", "v3io", "v3ios"]:
|
|
71
71
|
logger.warning(
|
|
72
|
-
"Could not detect path to API server,
|
|
72
|
+
"Could not detect path to API server, not connected to API server!"
|
|
73
73
|
)
|
|
74
74
|
logger.warning(
|
|
75
|
-
"
|
|
75
|
+
"MLRUN_DBPATH is not set. Set this environment variable to the URL of the API server"
|
|
76
|
+
" in order to connect"
|
|
76
77
|
)
|
|
77
|
-
cls =
|
|
78
|
+
cls = NopDB
|
|
79
|
+
|
|
78
80
|
elif scheme in ("http", "https"):
|
|
79
81
|
# import here to avoid circular imports
|
|
80
82
|
from .httpdb import HTTPRunDB
|