mlrun 1.3.3rc1__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -3
- mlrun/__main__.py +79 -37
- mlrun/api/__init__.py +1 -1
- mlrun/api/api/__init__.py +1 -1
- mlrun/api/api/api.py +4 -4
- mlrun/api/api/deps.py +10 -21
- mlrun/api/api/endpoints/__init__.py +1 -1
- mlrun/api/api/endpoints/artifacts.py +64 -36
- mlrun/api/api/endpoints/auth.py +4 -4
- mlrun/api/api/endpoints/background_tasks.py +11 -11
- mlrun/api/api/endpoints/client_spec.py +5 -5
- mlrun/api/api/endpoints/clusterization_spec.py +6 -4
- mlrun/api/api/endpoints/feature_store.py +124 -115
- mlrun/api/api/endpoints/files.py +22 -14
- mlrun/api/api/endpoints/frontend_spec.py +28 -21
- mlrun/api/api/endpoints/functions.py +142 -87
- mlrun/api/api/endpoints/grafana_proxy.py +89 -442
- mlrun/api/api/endpoints/healthz.py +20 -7
- mlrun/api/api/endpoints/hub.py +320 -0
- mlrun/api/api/endpoints/internal/__init__.py +1 -1
- mlrun/api/api/endpoints/internal/config.py +1 -1
- mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
- mlrun/api/api/endpoints/logs.py +11 -11
- mlrun/api/api/endpoints/model_endpoints.py +74 -70
- mlrun/api/api/endpoints/operations.py +13 -9
- mlrun/api/api/endpoints/pipelines.py +93 -88
- mlrun/api/api/endpoints/projects.py +35 -35
- mlrun/api/api/endpoints/runs.py +69 -27
- mlrun/api/api/endpoints/runtime_resources.py +28 -28
- mlrun/api/api/endpoints/schedules.py +98 -41
- mlrun/api/api/endpoints/secrets.py +37 -32
- mlrun/api/api/endpoints/submit.py +12 -12
- mlrun/api/api/endpoints/tags.py +20 -22
- mlrun/api/api/utils.py +251 -42
- mlrun/api/constants.py +1 -1
- mlrun/api/crud/__init__.py +18 -15
- mlrun/api/crud/artifacts.py +10 -10
- mlrun/api/crud/client_spec.py +4 -4
- mlrun/api/crud/clusterization_spec.py +3 -3
- mlrun/api/crud/feature_store.py +54 -46
- mlrun/api/crud/functions.py +3 -3
- mlrun/api/crud/hub.py +312 -0
- mlrun/api/crud/logs.py +11 -9
- mlrun/api/crud/model_monitoring/__init__.py +3 -3
- mlrun/api/crud/model_monitoring/grafana.py +435 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
- mlrun/api/crud/notifications.py +149 -0
- mlrun/api/crud/pipelines.py +67 -52
- mlrun/api/crud/projects.py +51 -23
- mlrun/api/crud/runs.py +7 -5
- mlrun/api/crud/runtime_resources.py +13 -13
- mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
- mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
- mlrun/api/crud/runtimes/nuclio/function.py +505 -0
- mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
- mlrun/api/crud/secrets.py +88 -46
- mlrun/api/crud/tags.py +5 -5
- mlrun/api/db/__init__.py +1 -1
- mlrun/api/db/base.py +102 -54
- mlrun/api/db/init_db.py +2 -3
- mlrun/api/db/session.py +4 -12
- mlrun/api/db/sqldb/__init__.py +1 -1
- mlrun/api/db/sqldb/db.py +439 -196
- mlrun/api/db/sqldb/helpers.py +1 -1
- mlrun/api/db/sqldb/models/__init__.py +3 -3
- mlrun/api/db/sqldb/models/models_mysql.py +82 -64
- mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
- mlrun/api/db/sqldb/session.py +27 -20
- mlrun/api/initial_data.py +82 -24
- mlrun/api/launcher.py +196 -0
- mlrun/api/main.py +91 -22
- mlrun/api/middlewares.py +6 -5
- mlrun/api/migrations_mysql/env.py +1 -1
- mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
- mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
- mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
- mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
- mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
- mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
- mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/env.py +1 -1
- mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
- mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
- mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
- mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
- mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
- mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
- mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
- mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
- mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
- mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
- mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
- mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
- mlrun/api/schemas/__init__.py +216 -138
- mlrun/api/utils/__init__.py +1 -1
- mlrun/api/utils/asyncio.py +1 -1
- mlrun/api/utils/auth/__init__.py +1 -1
- mlrun/api/utils/auth/providers/__init__.py +1 -1
- mlrun/api/utils/auth/providers/base.py +7 -7
- mlrun/api/utils/auth/providers/nop.py +6 -7
- mlrun/api/utils/auth/providers/opa.py +17 -17
- mlrun/api/utils/auth/verifier.py +36 -34
- mlrun/api/utils/background_tasks.py +24 -24
- mlrun/{builder.py → api/utils/builder.py} +216 -123
- mlrun/api/utils/clients/__init__.py +1 -1
- mlrun/api/utils/clients/chief.py +19 -4
- mlrun/api/utils/clients/iguazio.py +106 -60
- mlrun/api/utils/clients/log_collector.py +1 -1
- mlrun/api/utils/clients/nuclio.py +23 -23
- mlrun/api/utils/clients/protocols/grpc.py +2 -2
- mlrun/api/utils/db/__init__.py +1 -1
- mlrun/api/utils/db/alembic.py +1 -1
- mlrun/api/utils/db/backup.py +1 -1
- mlrun/api/utils/db/mysql.py +24 -25
- mlrun/api/utils/db/sql_collation.py +1 -1
- mlrun/api/utils/db/sqlite_migration.py +2 -2
- mlrun/api/utils/events/__init__.py +14 -0
- mlrun/api/utils/events/base.py +57 -0
- mlrun/api/utils/events/events_factory.py +41 -0
- mlrun/api/utils/events/iguazio.py +217 -0
- mlrun/api/utils/events/nop.py +55 -0
- mlrun/api/utils/helpers.py +16 -13
- mlrun/api/utils/memory_reports.py +1 -1
- mlrun/api/utils/periodic.py +6 -3
- mlrun/api/utils/projects/__init__.py +1 -1
- mlrun/api/utils/projects/follower.py +33 -33
- mlrun/api/utils/projects/leader.py +36 -34
- mlrun/api/utils/projects/member.py +27 -27
- mlrun/api/utils/projects/remotes/__init__.py +1 -1
- mlrun/api/utils/projects/remotes/follower.py +13 -13
- mlrun/api/utils/projects/remotes/leader.py +10 -10
- mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
- mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
- mlrun/api/utils/scheduler.py +140 -51
- mlrun/api/utils/singletons/__init__.py +1 -1
- mlrun/api/utils/singletons/db.py +9 -15
- mlrun/api/utils/singletons/k8s.py +677 -5
- mlrun/api/utils/singletons/logs_dir.py +1 -1
- mlrun/api/utils/singletons/project_member.py +1 -1
- mlrun/api/utils/singletons/scheduler.py +1 -1
- mlrun/artifacts/__init__.py +2 -2
- mlrun/artifacts/base.py +8 -2
- mlrun/artifacts/dataset.py +5 -3
- mlrun/artifacts/manager.py +7 -1
- mlrun/artifacts/model.py +15 -4
- mlrun/artifacts/plots.py +1 -1
- mlrun/common/__init__.py +1 -1
- mlrun/common/constants.py +15 -0
- mlrun/common/model_monitoring.py +209 -0
- mlrun/common/schemas/__init__.py +167 -0
- mlrun/{api → common}/schemas/artifact.py +13 -14
- mlrun/{api → common}/schemas/auth.py +10 -8
- mlrun/{api → common}/schemas/background_task.py +3 -3
- mlrun/{api → common}/schemas/client_spec.py +1 -1
- mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
- mlrun/{api → common}/schemas/constants.py +21 -8
- mlrun/common/schemas/events.py +36 -0
- mlrun/{api → common}/schemas/feature_store.py +2 -1
- mlrun/{api → common}/schemas/frontend_spec.py +7 -6
- mlrun/{api → common}/schemas/function.py +5 -5
- mlrun/{api → common}/schemas/http.py +3 -3
- mlrun/common/schemas/hub.py +134 -0
- mlrun/{api → common}/schemas/k8s.py +3 -3
- mlrun/{api → common}/schemas/memory_reports.py +1 -1
- mlrun/common/schemas/model_endpoints.py +342 -0
- mlrun/common/schemas/notification.py +57 -0
- mlrun/{api → common}/schemas/object.py +6 -6
- mlrun/{api → common}/schemas/pipeline.py +3 -3
- mlrun/{api → common}/schemas/project.py +6 -5
- mlrun/common/schemas/regex.py +24 -0
- mlrun/common/schemas/runs.py +30 -0
- mlrun/{api → common}/schemas/runtime_resource.py +3 -3
- mlrun/{api → common}/schemas/schedule.py +19 -7
- mlrun/{api → common}/schemas/secret.py +3 -3
- mlrun/{api → common}/schemas/tag.py +2 -2
- mlrun/common/types.py +25 -0
- mlrun/config.py +152 -20
- mlrun/data_types/__init__.py +7 -2
- mlrun/data_types/data_types.py +4 -2
- mlrun/data_types/infer.py +1 -1
- mlrun/data_types/spark.py +10 -3
- mlrun/datastore/__init__.py +10 -3
- mlrun/datastore/azure_blob.py +1 -1
- mlrun/datastore/base.py +185 -53
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/filestore.py +1 -1
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +1 -1
- mlrun/datastore/s3.py +1 -1
- mlrun/datastore/sources.py +192 -70
- mlrun/datastore/spark_udf.py +44 -0
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/targets.py +115 -45
- mlrun/datastore/utils.py +127 -5
- mlrun/datastore/v3io.py +1 -1
- mlrun/datastore/wasbfs/__init__.py +1 -1
- mlrun/datastore/wasbfs/fs.py +1 -1
- mlrun/db/__init__.py +7 -5
- mlrun/db/base.py +112 -68
- mlrun/db/httpdb.py +445 -277
- mlrun/db/nopdb.py +491 -0
- mlrun/db/sqldb.py +112 -65
- mlrun/errors.py +6 -1
- mlrun/execution.py +44 -22
- mlrun/feature_store/__init__.py +1 -1
- mlrun/feature_store/api.py +143 -95
- mlrun/feature_store/common.py +16 -20
- mlrun/feature_store/feature_set.py +42 -12
- mlrun/feature_store/feature_vector.py +32 -21
- mlrun/feature_store/ingestion.py +9 -12
- mlrun/feature_store/retrieval/__init__.py +3 -2
- mlrun/feature_store/retrieval/base.py +388 -66
- mlrun/feature_store/retrieval/dask_merger.py +63 -151
- mlrun/feature_store/retrieval/job.py +30 -12
- mlrun/feature_store/retrieval/local_merger.py +40 -133
- mlrun/feature_store/retrieval/spark_merger.py +129 -127
- mlrun/feature_store/retrieval/storey_merger.py +173 -0
- mlrun/feature_store/steps.py +132 -15
- mlrun/features.py +8 -3
- mlrun/frameworks/__init__.py +1 -1
- mlrun/frameworks/_common/__init__.py +1 -1
- mlrun/frameworks/_common/artifacts_library.py +1 -1
- mlrun/frameworks/_common/mlrun_interface.py +1 -1
- mlrun/frameworks/_common/model_handler.py +1 -1
- mlrun/frameworks/_common/plan.py +1 -1
- mlrun/frameworks/_common/producer.py +1 -1
- mlrun/frameworks/_common/utils.py +1 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
- mlrun/frameworks/_dl_common/model_handler.py +1 -1
- mlrun/frameworks/_dl_common/utils.py +1 -1
- mlrun/frameworks/_ml_common/__init__.py +1 -1
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/model_handler.py +1 -1
- mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
- mlrun/frameworks/_ml_common/producer.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +1 -1
- mlrun/frameworks/auto_mlrun/__init__.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
- mlrun/frameworks/huggingface/__init__.py +1 -1
- mlrun/frameworks/huggingface/model_server.py +1 -1
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/lgbm/model_server.py +1 -1
- mlrun/frameworks/lgbm/utils.py +1 -1
- mlrun/frameworks/onnx/__init__.py +1 -1
- mlrun/frameworks/onnx/dataset.py +1 -1
- mlrun/frameworks/onnx/mlrun_interface.py +1 -1
- mlrun/frameworks/onnx/model_handler.py +1 -1
- mlrun/frameworks/onnx/model_server.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +1 -1
- mlrun/frameworks/pytorch/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
- mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
- mlrun/frameworks/pytorch/model_handler.py +1 -1
- mlrun/frameworks/pytorch/model_server.py +1 -1
- mlrun/frameworks/pytorch/utils.py +1 -1
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/estimator.py +1 -1
- mlrun/frameworks/sklearn/metric.py +1 -1
- mlrun/frameworks/sklearn/metrics_library.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
- mlrun/frameworks/sklearn/model_handler.py +1 -1
- mlrun/frameworks/sklearn/utils.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
- mlrun/frameworks/tf_keras/model_handler.py +1 -1
- mlrun/frameworks/tf_keras/model_server.py +1 -1
- mlrun/frameworks/tf_keras/utils.py +1 -1
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
- mlrun/frameworks/xgboost/model_handler.py +1 -1
- mlrun/frameworks/xgboost/utils.py +1 -1
- mlrun/k8s_utils.py +14 -765
- mlrun/kfpops.py +14 -17
- mlrun/launcher/__init__.py +13 -0
- mlrun/launcher/base.py +406 -0
- mlrun/launcher/client.py +159 -0
- mlrun/launcher/factory.py +50 -0
- mlrun/launcher/local.py +276 -0
- mlrun/launcher/remote.py +178 -0
- mlrun/lists.py +10 -2
- mlrun/mlutils/__init__.py +1 -1
- mlrun/mlutils/data.py +1 -1
- mlrun/mlutils/models.py +1 -1
- mlrun/mlutils/plots.py +1 -1
- mlrun/model.py +252 -14
- mlrun/model_monitoring/__init__.py +41 -0
- mlrun/model_monitoring/features_drift_table.py +1 -1
- mlrun/model_monitoring/helpers.py +123 -38
- mlrun/model_monitoring/model_endpoint.py +144 -0
- mlrun/model_monitoring/model_monitoring_batch.py +310 -259
- mlrun/model_monitoring/stores/__init__.py +106 -0
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
- mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
- mlrun/model_monitoring/stores/models/__init__.py +23 -0
- mlrun/model_monitoring/stores/models/base.py +18 -0
- mlrun/model_monitoring/stores/models/mysql.py +100 -0
- mlrun/model_monitoring/stores/models/sqlite.py +98 -0
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
- mlrun/model_monitoring/stream_processing_fs.py +239 -271
- mlrun/package/__init__.py +163 -0
- mlrun/package/context_handler.py +325 -0
- mlrun/package/errors.py +47 -0
- mlrun/package/packager.py +298 -0
- mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
- mlrun/package/packagers/default_packager.py +422 -0
- mlrun/package/packagers/numpy_packagers.py +612 -0
- mlrun/package/packagers/pandas_packagers.py +968 -0
- mlrun/package/packagers/python_standard_library_packagers.py +616 -0
- mlrun/package/packagers_manager.py +786 -0
- mlrun/package/utils/__init__.py +53 -0
- mlrun/package/utils/_archiver.py +226 -0
- mlrun/package/utils/_formatter.py +211 -0
- mlrun/package/utils/_pickler.py +234 -0
- mlrun/package/utils/_supported_format.py +71 -0
- mlrun/package/utils/log_hint_utils.py +93 -0
- mlrun/package/utils/type_hint_utils.py +298 -0
- mlrun/platforms/__init__.py +1 -1
- mlrun/platforms/iguazio.py +34 -2
- mlrun/platforms/other.py +1 -1
- mlrun/projects/__init__.py +1 -1
- mlrun/projects/operations.py +14 -9
- mlrun/projects/pipelines.py +31 -13
- mlrun/projects/project.py +762 -238
- mlrun/render.py +49 -19
- mlrun/run.py +57 -326
- mlrun/runtimes/__init__.py +3 -9
- mlrun/runtimes/base.py +247 -784
- mlrun/runtimes/constants.py +1 -1
- mlrun/runtimes/daskjob.py +45 -41
- mlrun/runtimes/funcdoc.py +43 -7
- mlrun/runtimes/function.py +66 -656
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/generators.py +1 -1
- mlrun/runtimes/kubejob.py +99 -116
- mlrun/runtimes/local.py +59 -66
- mlrun/runtimes/mpijob/__init__.py +1 -1
- mlrun/runtimes/mpijob/abstract.py +13 -15
- mlrun/runtimes/mpijob/v1.py +3 -1
- mlrun/runtimes/mpijob/v1alpha1.py +1 -1
- mlrun/runtimes/nuclio.py +1 -1
- mlrun/runtimes/pod.py +51 -26
- mlrun/runtimes/remotesparkjob.py +3 -1
- mlrun/runtimes/serving.py +12 -4
- mlrun/runtimes/sparkjob/__init__.py +1 -2
- mlrun/runtimes/sparkjob/abstract.py +44 -31
- mlrun/runtimes/sparkjob/spark3job.py +11 -9
- mlrun/runtimes/utils.py +61 -42
- mlrun/secrets.py +16 -18
- mlrun/serving/__init__.py +3 -2
- mlrun/serving/merger.py +1 -1
- mlrun/serving/remote.py +1 -1
- mlrun/serving/routers.py +39 -42
- mlrun/serving/server.py +23 -13
- mlrun/serving/serving_wrapper.py +1 -1
- mlrun/serving/states.py +172 -39
- mlrun/serving/utils.py +1 -1
- mlrun/serving/v1_serving.py +1 -1
- mlrun/serving/v2_serving.py +29 -21
- mlrun/utils/__init__.py +1 -2
- mlrun/utils/async_http.py +8 -1
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +2 -2
- mlrun/utils/condition_evaluator.py +65 -0
- mlrun/utils/db.py +52 -0
- mlrun/utils/helpers.py +188 -13
- mlrun/utils/http.py +89 -54
- mlrun/utils/logger.py +48 -8
- mlrun/utils/model_monitoring.py +132 -100
- mlrun/utils/notifications/__init__.py +1 -1
- mlrun/utils/notifications/notification/__init__.py +8 -6
- mlrun/utils/notifications/notification/base.py +20 -14
- mlrun/utils/notifications/notification/console.py +7 -4
- mlrun/utils/notifications/notification/git.py +36 -19
- mlrun/utils/notifications/notification/ipython.py +10 -8
- mlrun/utils/notifications/notification/slack.py +18 -13
- mlrun/utils/notifications/notification_pusher.py +377 -56
- mlrun/utils/regex.py +6 -1
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +1 -1
- mlrun/utils/vault.py +270 -269
- mlrun/utils/version/__init__.py +1 -1
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +1 -1
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
- mlrun-1.4.0.dist-info/RECORD +434 -0
- mlrun/api/api/endpoints/marketplace.py +0 -257
- mlrun/api/crud/marketplace.py +0 -221
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
- mlrun/api/db/filedb/db.py +0 -518
- mlrun/api/schemas/marketplace.py +0 -128
- mlrun/api/schemas/model_endpoints.py +0 -185
- mlrun/db/filedb.py +0 -891
- mlrun/feature_store/retrieval/online.py +0 -92
- mlrun/model_monitoring/constants.py +0 -67
- mlrun/runtimes/package/context_handler.py +0 -711
- mlrun/runtimes/sparkjob/spark2job.py +0 -59
- mlrun-1.3.3rc1.dist-info/RECORD +0 -381
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
mlrun/datastore/base.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -11,21 +11,27 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
import sys
|
|
15
14
|
import tempfile
|
|
15
|
+
import urllib.parse
|
|
16
16
|
from base64 import b64encode
|
|
17
17
|
from os import path, remove
|
|
18
|
+
from typing import Optional, Union
|
|
18
19
|
|
|
19
20
|
import dask.dataframe as dd
|
|
20
21
|
import fsspec
|
|
21
22
|
import orjson
|
|
22
23
|
import pandas as pd
|
|
24
|
+
import pyarrow
|
|
25
|
+
import pytz
|
|
23
26
|
import requests
|
|
24
27
|
import urllib3
|
|
25
28
|
|
|
26
29
|
import mlrun.errors
|
|
27
30
|
from mlrun.errors import err_to_str
|
|
28
|
-
from mlrun.utils import is_ipython, logger
|
|
31
|
+
from mlrun.utils import StorePrefix, is_ipython, logger
|
|
32
|
+
|
|
33
|
+
from .store_resources import is_store_uri, parse_store_uri
|
|
34
|
+
from .utils import filter_df_start_end_time, select_columns_from_df
|
|
29
35
|
|
|
30
36
|
verify_ssl = False
|
|
31
37
|
if not verify_ssl:
|
|
@@ -63,6 +69,17 @@ class DataStore:
|
|
|
63
69
|
def is_unstructured(self):
|
|
64
70
|
return True
|
|
65
71
|
|
|
72
|
+
@staticmethod
|
|
73
|
+
def _sanitize_url(url):
|
|
74
|
+
"""
|
|
75
|
+
Extract only the schema, netloc, and path from an input URL if they exist,
|
|
76
|
+
excluding parameters, query, or fragments.
|
|
77
|
+
"""
|
|
78
|
+
parsed_url = urllib.parse.urlparse(url)
|
|
79
|
+
scheme = f"{parsed_url.scheme}:" if parsed_url.scheme else ""
|
|
80
|
+
netloc = f"//{parsed_url.netloc}" if parsed_url.netloc else "//"
|
|
81
|
+
return f"{scheme}{netloc}{parsed_url.path}"
|
|
82
|
+
|
|
66
83
|
@staticmethod
|
|
67
84
|
def uri_to_kfp(endpoint, subpath):
|
|
68
85
|
raise ValueError("data store doesnt support KFP URLs")
|
|
@@ -71,7 +88,7 @@ class DataStore:
|
|
|
71
88
|
def uri_to_ipython(endpoint, subpath):
|
|
72
89
|
return ""
|
|
73
90
|
|
|
74
|
-
def get_filesystem(self, silent=True):
|
|
91
|
+
def get_filesystem(self, silent=True) -> Optional[fsspec.AbstractFileSystem]:
|
|
75
92
|
"""return fsspec file system object, if supported"""
|
|
76
93
|
return None
|
|
77
94
|
|
|
@@ -135,6 +152,64 @@ class DataStore:
|
|
|
135
152
|
def upload(self, key, src_path):
|
|
136
153
|
pass
|
|
137
154
|
|
|
155
|
+
@staticmethod
|
|
156
|
+
def _parquet_reader(df_module, url, file_system, time_column, start_time, end_time):
|
|
157
|
+
from storey.utils import find_filters, find_partitions
|
|
158
|
+
|
|
159
|
+
def set_filters(
|
|
160
|
+
partitions_time_attributes, start_time_inner, end_time_inner, kwargs
|
|
161
|
+
):
|
|
162
|
+
filters = []
|
|
163
|
+
find_filters(
|
|
164
|
+
partitions_time_attributes,
|
|
165
|
+
start_time_inner,
|
|
166
|
+
end_time_inner,
|
|
167
|
+
filters,
|
|
168
|
+
time_column,
|
|
169
|
+
)
|
|
170
|
+
kwargs["filters"] = filters
|
|
171
|
+
|
|
172
|
+
def reader(*args, **kwargs):
|
|
173
|
+
if start_time or end_time:
|
|
174
|
+
if time_column is None:
|
|
175
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
176
|
+
"When providing start_time or end_time, must provide time_column"
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
partitions_time_attributes = find_partitions(url, file_system)
|
|
180
|
+
set_filters(
|
|
181
|
+
partitions_time_attributes,
|
|
182
|
+
start_time,
|
|
183
|
+
end_time,
|
|
184
|
+
kwargs,
|
|
185
|
+
)
|
|
186
|
+
try:
|
|
187
|
+
return df_module.read_parquet(*args, **kwargs)
|
|
188
|
+
except pyarrow.lib.ArrowInvalid as ex:
|
|
189
|
+
if not str(ex).startswith(
|
|
190
|
+
"Cannot compare timestamp with timezone to timestamp without timezone"
|
|
191
|
+
):
|
|
192
|
+
raise ex
|
|
193
|
+
|
|
194
|
+
if start_time.tzinfo:
|
|
195
|
+
start_time_inner = start_time.replace(tzinfo=None)
|
|
196
|
+
end_time_inner = end_time.replace(tzinfo=None)
|
|
197
|
+
else:
|
|
198
|
+
start_time_inner = start_time.replace(tzinfo=pytz.utc)
|
|
199
|
+
end_time_inner = end_time.replace(tzinfo=pytz.utc)
|
|
200
|
+
|
|
201
|
+
set_filters(
|
|
202
|
+
partitions_time_attributes,
|
|
203
|
+
start_time_inner,
|
|
204
|
+
end_time_inner,
|
|
205
|
+
kwargs,
|
|
206
|
+
)
|
|
207
|
+
return df_module.read_parquet(*args, **kwargs)
|
|
208
|
+
else:
|
|
209
|
+
return df_module.read_parquet(*args, **kwargs)
|
|
210
|
+
|
|
211
|
+
return reader
|
|
212
|
+
|
|
138
213
|
def as_df(
|
|
139
214
|
self,
|
|
140
215
|
url,
|
|
@@ -148,17 +223,29 @@ class DataStore:
|
|
|
148
223
|
**kwargs,
|
|
149
224
|
):
|
|
150
225
|
df_module = df_module or pd
|
|
151
|
-
|
|
226
|
+
file_url = self._sanitize_url(url)
|
|
227
|
+
is_csv, is_json, drop_time_column = False, False, False
|
|
228
|
+
file_system = self.get_filesystem()
|
|
229
|
+
if file_url.endswith(".csv") or format == "csv":
|
|
230
|
+
is_csv = True
|
|
231
|
+
drop_time_column = False
|
|
152
232
|
if columns:
|
|
233
|
+
if (
|
|
234
|
+
time_column
|
|
235
|
+
and (start_time or end_time)
|
|
236
|
+
and time_column not in columns
|
|
237
|
+
):
|
|
238
|
+
columns.append(time_column)
|
|
239
|
+
drop_time_column = True
|
|
153
240
|
kwargs["usecols"] = columns
|
|
241
|
+
|
|
154
242
|
reader = df_module.read_csv
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
if filesystem.isdir(url):
|
|
243
|
+
if file_system:
|
|
244
|
+
if file_system.isdir(file_url):
|
|
158
245
|
|
|
159
246
|
def reader(*args, **kwargs):
|
|
160
247
|
base_path = args[0]
|
|
161
|
-
file_entries =
|
|
248
|
+
file_entries = file_system.listdir(base_path)
|
|
162
249
|
filenames = []
|
|
163
250
|
for file_entry in file_entries:
|
|
164
251
|
if (
|
|
@@ -176,51 +263,31 @@ class DataStore:
|
|
|
176
263
|
dfs.append(df_module.read_csv(*updated_args, **kwargs))
|
|
177
264
|
return pd.concat(dfs)
|
|
178
265
|
|
|
179
|
-
elif
|
|
266
|
+
elif (
|
|
267
|
+
file_url.endswith(".parquet")
|
|
268
|
+
or file_url.endswith(".pq")
|
|
269
|
+
or format == "parquet"
|
|
270
|
+
):
|
|
180
271
|
if columns:
|
|
181
272
|
kwargs["columns"] = columns
|
|
182
273
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
raise ValueError(
|
|
187
|
-
f"feature not supported for python version {sys.version_info}"
|
|
188
|
-
)
|
|
189
|
-
|
|
190
|
-
if time_column is None:
|
|
191
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
192
|
-
"When providing start_time or end_time, must provide time_column"
|
|
193
|
-
)
|
|
194
|
-
|
|
195
|
-
from storey.utils import find_filters, find_partitions
|
|
196
|
-
|
|
197
|
-
filters = []
|
|
198
|
-
partitions_time_attributes = find_partitions(url, file_system)
|
|
199
|
-
|
|
200
|
-
find_filters(
|
|
201
|
-
partitions_time_attributes,
|
|
202
|
-
start_time,
|
|
203
|
-
end_time,
|
|
204
|
-
filters,
|
|
205
|
-
time_column,
|
|
206
|
-
)
|
|
207
|
-
kwargs["filters"] = filters
|
|
208
|
-
|
|
209
|
-
return df_module.read_parquet(*args, **kwargs)
|
|
274
|
+
reader = self._parquet_reader(
|
|
275
|
+
df_module, url, file_system, time_column, start_time, end_time
|
|
276
|
+
)
|
|
210
277
|
|
|
211
|
-
elif
|
|
278
|
+
elif file_url.endswith(".json") or format == "json":
|
|
279
|
+
is_json = True
|
|
212
280
|
reader = df_module.read_json
|
|
213
281
|
|
|
214
282
|
else:
|
|
215
283
|
raise Exception(f"file type unhandled {url}")
|
|
216
284
|
|
|
217
|
-
file_system = self.get_filesystem()
|
|
218
285
|
if file_system:
|
|
219
|
-
if self.supports_isdir() and file_system.isdir(
|
|
286
|
+
if self.supports_isdir() and file_system.isdir(file_url) or df_module == dd:
|
|
220
287
|
storage_options = self.get_storage_options()
|
|
221
288
|
if storage_options:
|
|
222
289
|
kwargs["storage_options"] = storage_options
|
|
223
|
-
|
|
290
|
+
df = reader(url, **kwargs)
|
|
224
291
|
else:
|
|
225
292
|
|
|
226
293
|
file = url
|
|
@@ -230,12 +297,26 @@ class DataStore:
|
|
|
230
297
|
# support the storage_options parameter.
|
|
231
298
|
file = file_system.open(url)
|
|
232
299
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
300
|
+
df = reader(file, **kwargs)
|
|
301
|
+
else:
|
|
302
|
+
temp_file = tempfile.NamedTemporaryFile(delete=False)
|
|
303
|
+
self.download(self._join(subpath), temp_file.name)
|
|
304
|
+
df = reader(temp_file.name, **kwargs)
|
|
305
|
+
remove(temp_file.name)
|
|
306
|
+
|
|
307
|
+
if is_json or is_csv:
|
|
308
|
+
# for parquet file the time filtering is executed in `reader`
|
|
309
|
+
df = filter_df_start_end_time(
|
|
310
|
+
df,
|
|
311
|
+
time_column=time_column,
|
|
312
|
+
start_time=start_time,
|
|
313
|
+
end_time=end_time,
|
|
314
|
+
)
|
|
315
|
+
if drop_time_column:
|
|
316
|
+
df.drop(columns=[time_column], inplace=True)
|
|
317
|
+
if is_json:
|
|
318
|
+
# for csv and parquet files the columns select is executed in `reader`.
|
|
319
|
+
df = select_columns_from_df(df, columns=columns)
|
|
239
320
|
return df
|
|
240
321
|
|
|
241
322
|
def to_dict(self):
|
|
@@ -383,7 +464,7 @@ class DataItem:
|
|
|
383
464
|
return self._store.listdir(self._path)
|
|
384
465
|
|
|
385
466
|
def local(self):
|
|
386
|
-
"""get the local path of the file, download to tmp first if
|
|
467
|
+
"""get the local path of the file, download to tmp first if it's a remote object"""
|
|
387
468
|
if self.kind == "file":
|
|
388
469
|
return self._path
|
|
389
470
|
if self._local_path:
|
|
@@ -397,27 +478,47 @@ class DataItem:
|
|
|
397
478
|
self.download(self._local_path)
|
|
398
479
|
return self._local_path
|
|
399
480
|
|
|
481
|
+
def remove_local(self):
|
|
482
|
+
"""remove the local file if it exists and was downloaded from a remote object"""
|
|
483
|
+
if self.kind == "file":
|
|
484
|
+
return
|
|
485
|
+
|
|
486
|
+
if self._local_path:
|
|
487
|
+
remove(self._local_path)
|
|
488
|
+
self._local_path = ""
|
|
489
|
+
|
|
400
490
|
def as_df(
|
|
401
491
|
self,
|
|
402
492
|
columns=None,
|
|
403
493
|
df_module=None,
|
|
404
494
|
format="",
|
|
495
|
+
time_column=None,
|
|
496
|
+
start_time=None,
|
|
497
|
+
end_time=None,
|
|
405
498
|
**kwargs,
|
|
406
499
|
):
|
|
407
500
|
"""return a dataframe object (generated from the dataitem).
|
|
408
501
|
|
|
409
|
-
:param columns:
|
|
410
|
-
:param df_module:
|
|
411
|
-
:param format:
|
|
502
|
+
:param columns: optional, list of columns to select
|
|
503
|
+
:param df_module: optional, py module used to create the DataFrame (e.g. pd, dd, cudf, ..)
|
|
504
|
+
:param format: file format, if not specified it will be deducted from the suffix
|
|
505
|
+
:param start_time: filters out data before this time
|
|
506
|
+
:param end_time: filters out data after this time
|
|
507
|
+
:param time_column: Store timestamp_key will be used if None.
|
|
508
|
+
The results will be filtered by this column and start_time & end_time.
|
|
412
509
|
"""
|
|
413
|
-
|
|
510
|
+
df = self._store.as_df(
|
|
414
511
|
self._url,
|
|
415
512
|
self._path,
|
|
416
513
|
columns=columns,
|
|
417
514
|
df_module=df_module,
|
|
418
515
|
format=format,
|
|
516
|
+
time_column=time_column,
|
|
517
|
+
start_time=start_time,
|
|
518
|
+
end_time=end_time,
|
|
419
519
|
**kwargs,
|
|
420
520
|
)
|
|
521
|
+
return df
|
|
421
522
|
|
|
422
523
|
def show(self, format=None):
|
|
423
524
|
"""show the data object content in Jupyter
|
|
@@ -451,6 +552,19 @@ class DataItem:
|
|
|
451
552
|
else:
|
|
452
553
|
logger.error(f"unsupported show() format {suffix} for {self.url}")
|
|
453
554
|
|
|
555
|
+
def get_artifact_type(self) -> Union[str, None]:
|
|
556
|
+
"""
|
|
557
|
+
Check if the data item represents an Artifact (one of Artifact, DatasetArtifact and ModelArtifact). If it does
|
|
558
|
+
it return the store uri prefix (artifacts, datasets or models), otherwise None.
|
|
559
|
+
|
|
560
|
+
:return: The store prefix of the artifact if it is an artifact data item and None if not.
|
|
561
|
+
"""
|
|
562
|
+
if self.artifact_url and is_store_uri(url=self.artifact_url):
|
|
563
|
+
store_uri_prefix = parse_store_uri(self.artifact_url)[0]
|
|
564
|
+
if StorePrefix.is_artifact(prefix=store_uri_prefix):
|
|
565
|
+
return store_uri_prefix
|
|
566
|
+
return None
|
|
567
|
+
|
|
454
568
|
def __str__(self):
|
|
455
569
|
return self.url
|
|
456
570
|
|
|
@@ -514,7 +628,12 @@ def http_upload(url, file_path, headers=None, auth=None):
|
|
|
514
628
|
class HttpStore(DataStore):
|
|
515
629
|
def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
|
|
516
630
|
super().__init__(parent, name, schema, endpoint, secrets)
|
|
631
|
+
self._https_auth_token = None
|
|
632
|
+
self._schema = schema
|
|
517
633
|
self.auth = None
|
|
634
|
+
self._headers = {}
|
|
635
|
+
self._enrich_https_token()
|
|
636
|
+
self._validate_https_token()
|
|
518
637
|
|
|
519
638
|
def get_filesystem(self, silent=True):
|
|
520
639
|
"""return fsspec file system object, if supported"""
|
|
@@ -532,9 +651,22 @@ class HttpStore(DataStore):
|
|
|
532
651
|
raise ValueError("unimplemented")
|
|
533
652
|
|
|
534
653
|
def get(self, key, size=None, offset=0):
|
|
535
|
-
data = http_get(self.url + self._join(key),
|
|
654
|
+
data = http_get(self.url + self._join(key), self._headers, self.auth)
|
|
536
655
|
if offset:
|
|
537
656
|
data = data[offset:]
|
|
538
657
|
if size:
|
|
539
658
|
data = data[:size]
|
|
540
659
|
return data
|
|
660
|
+
|
|
661
|
+
def _enrich_https_token(self):
|
|
662
|
+
token = self._get_secret_or_env("HTTPS_AUTH_TOKEN")
|
|
663
|
+
if token:
|
|
664
|
+
self._https_auth_token = token
|
|
665
|
+
self._headers.setdefault("Authorization", f"token {token}")
|
|
666
|
+
|
|
667
|
+
def _validate_https_token(self):
|
|
668
|
+
if self._https_auth_token and self._schema in ["http"]:
|
|
669
|
+
logger.warn(
|
|
670
|
+
f"A AUTH TOKEN should not be provided while using {self._schema} "
|
|
671
|
+
f"schema as it is not secure and is not recommended."
|
|
672
|
+
)
|
mlrun/datastore/datastore.py
CHANGED
mlrun/datastore/filestore.py
CHANGED
mlrun/datastore/inmem.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -80,5 +80,8 @@ class InMemoryStore(DataStore):
|
|
|
80
80
|
reader = df_module.read_json
|
|
81
81
|
else:
|
|
82
82
|
raise mlrun.errors.MLRunInvalidArgumentError(f"file type unhandled {url}")
|
|
83
|
+
# InMemoryStore store do not filter on time
|
|
84
|
+
for field in ["time_column", "start_time", "end_time"]:
|
|
85
|
+
kwargs.pop(field, None)
|
|
83
86
|
|
|
84
87
|
return reader(item, **kwargs)
|
mlrun/datastore/redis.py
CHANGED
mlrun/datastore/s3.py
CHANGED