PyPI - mlrun - Versions diffs - 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

mlrun 1.3.3py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (444) hide show

mlrun/__init__.py +3 -3
mlrun/__main__.py +79 -37
mlrun/api/__init__.py +1 -1
mlrun/api/api/__init__.py +1 -1
mlrun/api/api/api.py +4 -4
mlrun/api/api/deps.py +10 -21
mlrun/api/api/endpoints/__init__.py +1 -1
mlrun/api/api/endpoints/artifacts.py +64 -36
mlrun/api/api/endpoints/auth.py +4 -4
mlrun/api/api/endpoints/background_tasks.py +11 -11
mlrun/api/api/endpoints/client_spec.py +5 -5
mlrun/api/api/endpoints/clusterization_spec.py +6 -4
mlrun/api/api/endpoints/feature_store.py +124 -115
mlrun/api/api/endpoints/files.py +22 -14
mlrun/api/api/endpoints/frontend_spec.py +28 -21
mlrun/api/api/endpoints/functions.py +142 -87
mlrun/api/api/endpoints/grafana_proxy.py +89 -442
mlrun/api/api/endpoints/healthz.py +20 -7
mlrun/api/api/endpoints/hub.py +320 -0
mlrun/api/api/endpoints/internal/__init__.py +1 -1
mlrun/api/api/endpoints/internal/config.py +1 -1
mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
mlrun/api/api/endpoints/logs.py +11 -11
mlrun/api/api/endpoints/model_endpoints.py +74 -70
mlrun/api/api/endpoints/operations.py +13 -9
mlrun/api/api/endpoints/pipelines.py +93 -88
mlrun/api/api/endpoints/projects.py +35 -35
mlrun/api/api/endpoints/runs.py +69 -27
mlrun/api/api/endpoints/runtime_resources.py +28 -28
mlrun/api/api/endpoints/schedules.py +98 -41
mlrun/api/api/endpoints/secrets.py +37 -32
mlrun/api/api/endpoints/submit.py +12 -12
mlrun/api/api/endpoints/tags.py +20 -22
mlrun/api/api/utils.py +251 -42
mlrun/api/constants.py +1 -1
mlrun/api/crud/__init__.py +18 -15
mlrun/api/crud/artifacts.py +10 -10
mlrun/api/crud/client_spec.py +4 -4
mlrun/api/crud/clusterization_spec.py +3 -3
mlrun/api/crud/feature_store.py +54 -46
mlrun/api/crud/functions.py +3 -3
mlrun/api/crud/hub.py +312 -0
mlrun/api/crud/logs.py +11 -9
mlrun/api/crud/model_monitoring/__init__.py +3 -3
mlrun/api/crud/model_monitoring/grafana.py +435 -0
mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
mlrun/api/crud/notifications.py +149 -0
mlrun/api/crud/pipelines.py +67 -52
mlrun/api/crud/projects.py +51 -23
mlrun/api/crud/runs.py +7 -5
mlrun/api/crud/runtime_resources.py +13 -13
mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
mlrun/api/crud/runtimes/nuclio/function.py +505 -0
mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
mlrun/api/crud/secrets.py +88 -46
mlrun/api/crud/tags.py +5 -5
mlrun/api/db/__init__.py +1 -1
mlrun/api/db/base.py +102 -54
mlrun/api/db/init_db.py +2 -3
mlrun/api/db/session.py +4 -12
mlrun/api/db/sqldb/__init__.py +1 -1
mlrun/api/db/sqldb/db.py +439 -196
mlrun/api/db/sqldb/helpers.py +1 -1
mlrun/api/db/sqldb/models/__init__.py +3 -3
mlrun/api/db/sqldb/models/models_mysql.py +82 -64
mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
mlrun/api/db/sqldb/session.py +27 -20
mlrun/api/initial_data.py +82 -24
mlrun/api/launcher.py +196 -0
mlrun/api/main.py +91 -22
mlrun/api/middlewares.py +6 -5
mlrun/api/migrations_mysql/env.py +1 -1
mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
mlrun/api/migrations_sqlite/env.py +1 -1
mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
mlrun/api/schemas/__init__.py +216 -138
mlrun/api/utils/__init__.py +1 -1
mlrun/api/utils/asyncio.py +1 -1
mlrun/api/utils/auth/__init__.py +1 -1
mlrun/api/utils/auth/providers/__init__.py +1 -1
mlrun/api/utils/auth/providers/base.py +7 -7
mlrun/api/utils/auth/providers/nop.py +6 -7
mlrun/api/utils/auth/providers/opa.py +17 -17
mlrun/api/utils/auth/verifier.py +36 -34
mlrun/api/utils/background_tasks.py +24 -24
mlrun/{builder.py → api/utils/builder.py} +216 -123
mlrun/api/utils/clients/__init__.py +1 -1
mlrun/api/utils/clients/chief.py +19 -4
mlrun/api/utils/clients/iguazio.py +106 -60
mlrun/api/utils/clients/log_collector.py +1 -1
mlrun/api/utils/clients/nuclio.py +23 -23
mlrun/api/utils/clients/protocols/grpc.py +2 -2
mlrun/api/utils/db/__init__.py +1 -1
mlrun/api/utils/db/alembic.py +1 -1
mlrun/api/utils/db/backup.py +1 -1
mlrun/api/utils/db/mysql.py +24 -25
mlrun/api/utils/db/sql_collation.py +1 -1
mlrun/api/utils/db/sqlite_migration.py +2 -2
mlrun/api/utils/events/__init__.py +14 -0
mlrun/api/utils/events/base.py +57 -0
mlrun/api/utils/events/events_factory.py +41 -0
mlrun/api/utils/events/iguazio.py +217 -0
mlrun/api/utils/events/nop.py +55 -0
mlrun/api/utils/helpers.py +16 -13
mlrun/api/utils/memory_reports.py +1 -1
mlrun/api/utils/periodic.py +6 -3
mlrun/api/utils/projects/__init__.py +1 -1
mlrun/api/utils/projects/follower.py +33 -33
mlrun/api/utils/projects/leader.py +36 -34
mlrun/api/utils/projects/member.py +27 -27
mlrun/api/utils/projects/remotes/__init__.py +1 -1
mlrun/api/utils/projects/remotes/follower.py +13 -13
mlrun/api/utils/projects/remotes/leader.py +10 -10
mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
mlrun/api/utils/scheduler.py +140 -51
mlrun/api/utils/singletons/__init__.py +1 -1
mlrun/api/utils/singletons/db.py +9 -15
mlrun/api/utils/singletons/k8s.py +677 -5
mlrun/api/utils/singletons/logs_dir.py +1 -1
mlrun/api/utils/singletons/project_member.py +1 -1
mlrun/api/utils/singletons/scheduler.py +1 -1
mlrun/artifacts/__init__.py +2 -2
mlrun/artifacts/base.py +8 -2
mlrun/artifacts/dataset.py +5 -3
mlrun/artifacts/manager.py +7 -1
mlrun/artifacts/model.py +15 -4
mlrun/artifacts/plots.py +1 -1
mlrun/common/__init__.py +1 -1
mlrun/common/constants.py +15 -0
mlrun/common/model_monitoring.py +209 -0
mlrun/common/schemas/__init__.py +167 -0
mlrun/{api → common}/schemas/artifact.py +13 -14
mlrun/{api → common}/schemas/auth.py +10 -8
mlrun/{api → common}/schemas/background_task.py +3 -3
mlrun/{api → common}/schemas/client_spec.py +1 -1
mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
mlrun/{api → common}/schemas/constants.py +21 -8
mlrun/common/schemas/events.py +36 -0
mlrun/{api → common}/schemas/feature_store.py +2 -1
mlrun/{api → common}/schemas/frontend_spec.py +7 -6
mlrun/{api → common}/schemas/function.py +5 -5
mlrun/{api → common}/schemas/http.py +3 -3
mlrun/common/schemas/hub.py +134 -0
mlrun/{api → common}/schemas/k8s.py +3 -3
mlrun/{api → common}/schemas/memory_reports.py +1 -1
mlrun/common/schemas/model_endpoints.py +342 -0
mlrun/common/schemas/notification.py +57 -0
mlrun/{api → common}/schemas/object.py +6 -6
mlrun/{api → common}/schemas/pipeline.py +3 -3
mlrun/{api → common}/schemas/project.py +6 -5
mlrun/common/schemas/regex.py +24 -0
mlrun/common/schemas/runs.py +30 -0
mlrun/{api → common}/schemas/runtime_resource.py +3 -3
mlrun/{api → common}/schemas/schedule.py +19 -7
mlrun/{api → common}/schemas/secret.py +3 -3
mlrun/{api → common}/schemas/tag.py +2 -2
mlrun/common/types.py +25 -0
mlrun/config.py +152 -20
mlrun/data_types/__init__.py +7 -2
mlrun/data_types/data_types.py +4 -2
mlrun/data_types/infer.py +1 -1
mlrun/data_types/spark.py +10 -3
mlrun/datastore/__init__.py +10 -3
mlrun/datastore/azure_blob.py +1 -1
mlrun/datastore/base.py +185 -53
mlrun/datastore/datastore.py +1 -1
mlrun/datastore/filestore.py +1 -1
mlrun/datastore/google_cloud_storage.py +1 -1
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +1 -1
mlrun/datastore/s3.py +1 -1
mlrun/datastore/sources.py +192 -70
mlrun/datastore/spark_udf.py +44 -0
mlrun/datastore/store_resources.py +4 -4
mlrun/datastore/targets.py +115 -45
mlrun/datastore/utils.py +127 -5
mlrun/datastore/v3io.py +1 -1
mlrun/datastore/wasbfs/__init__.py +1 -1
mlrun/datastore/wasbfs/fs.py +1 -1
mlrun/db/__init__.py +7 -5
mlrun/db/base.py +112 -68
mlrun/db/httpdb.py +445 -277
mlrun/db/nopdb.py +491 -0
mlrun/db/sqldb.py +112 -65
mlrun/errors.py +6 -1
mlrun/execution.py +44 -22
mlrun/feature_store/__init__.py +1 -1
mlrun/feature_store/api.py +143 -95
mlrun/feature_store/common.py +16 -20
mlrun/feature_store/feature_set.py +42 -12
mlrun/feature_store/feature_vector.py +32 -21
mlrun/feature_store/ingestion.py +9 -12
mlrun/feature_store/retrieval/__init__.py +3 -2
mlrun/feature_store/retrieval/base.py +388 -66
mlrun/feature_store/retrieval/dask_merger.py +63 -151
mlrun/feature_store/retrieval/job.py +30 -12
mlrun/feature_store/retrieval/local_merger.py +40 -133
mlrun/feature_store/retrieval/spark_merger.py +129 -127
mlrun/feature_store/retrieval/storey_merger.py +173 -0
mlrun/feature_store/steps.py +132 -15
mlrun/features.py +8 -3
mlrun/frameworks/__init__.py +1 -1
mlrun/frameworks/_common/__init__.py +1 -1
mlrun/frameworks/_common/artifacts_library.py +1 -1
mlrun/frameworks/_common/mlrun_interface.py +1 -1
mlrun/frameworks/_common/model_handler.py +1 -1
mlrun/frameworks/_common/plan.py +1 -1
mlrun/frameworks/_common/producer.py +1 -1
mlrun/frameworks/_common/utils.py +1 -1
mlrun/frameworks/_dl_common/__init__.py +1 -1
mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
mlrun/frameworks/_dl_common/model_handler.py +1 -1
mlrun/frameworks/_dl_common/utils.py +1 -1
mlrun/frameworks/_ml_common/__init__.py +1 -1
mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
mlrun/frameworks/_ml_common/model_handler.py +1 -1
mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
mlrun/frameworks/_ml_common/plan.py +1 -1
mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
mlrun/frameworks/_ml_common/producer.py +1 -1
mlrun/frameworks/_ml_common/utils.py +1 -1
mlrun/frameworks/auto_mlrun/__init__.py +1 -1
mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
mlrun/frameworks/huggingface/__init__.py +1 -1
mlrun/frameworks/huggingface/model_server.py +1 -1
mlrun/frameworks/lgbm/__init__.py +1 -1
mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
mlrun/frameworks/lgbm/model_handler.py +1 -1
mlrun/frameworks/lgbm/model_server.py +1 -1
mlrun/frameworks/lgbm/utils.py +1 -1
mlrun/frameworks/onnx/__init__.py +1 -1
mlrun/frameworks/onnx/dataset.py +1 -1
mlrun/frameworks/onnx/mlrun_interface.py +1 -1
mlrun/frameworks/onnx/model_handler.py +1 -1
mlrun/frameworks/onnx/model_server.py +1 -1
mlrun/frameworks/parallel_coordinates.py +1 -1
mlrun/frameworks/pytorch/__init__.py +1 -1
mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
mlrun/frameworks/pytorch/model_handler.py +1 -1
mlrun/frameworks/pytorch/model_server.py +1 -1
mlrun/frameworks/pytorch/utils.py +1 -1
mlrun/frameworks/sklearn/__init__.py +1 -1
mlrun/frameworks/sklearn/estimator.py +1 -1
mlrun/frameworks/sklearn/metric.py +1 -1
mlrun/frameworks/sklearn/metrics_library.py +1 -1
mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
mlrun/frameworks/sklearn/model_handler.py +1 -1
mlrun/frameworks/sklearn/utils.py +1 -1
mlrun/frameworks/tf_keras/__init__.py +1 -1
mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
mlrun/frameworks/tf_keras/model_handler.py +1 -1
mlrun/frameworks/tf_keras/model_server.py +1 -1
mlrun/frameworks/tf_keras/utils.py +1 -1
mlrun/frameworks/xgboost/__init__.py +1 -1
mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
mlrun/frameworks/xgboost/model_handler.py +1 -1
mlrun/frameworks/xgboost/utils.py +1 -1
mlrun/k8s_utils.py +14 -765
mlrun/kfpops.py +14 -17
mlrun/launcher/__init__.py +13 -0
mlrun/launcher/base.py +406 -0
mlrun/launcher/client.py +159 -0
mlrun/launcher/factory.py +50 -0
mlrun/launcher/local.py +276 -0
mlrun/launcher/remote.py +178 -0
mlrun/lists.py +10 -2
mlrun/mlutils/__init__.py +1 -1
mlrun/mlutils/data.py +1 -1
mlrun/mlutils/models.py +1 -1
mlrun/mlutils/plots.py +1 -1
mlrun/model.py +252 -14
mlrun/model_monitoring/__init__.py +41 -0
mlrun/model_monitoring/features_drift_table.py +1 -1
mlrun/model_monitoring/helpers.py +123 -38
mlrun/model_monitoring/model_endpoint.py +144 -0
mlrun/model_monitoring/model_monitoring_batch.py +310 -259
mlrun/model_monitoring/stores/__init__.py +106 -0
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
mlrun/model_monitoring/stores/models/__init__.py +23 -0
mlrun/model_monitoring/stores/models/base.py +18 -0
mlrun/model_monitoring/stores/models/mysql.py +100 -0
mlrun/model_monitoring/stores/models/sqlite.py +98 -0
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
mlrun/model_monitoring/stream_processing_fs.py +239 -271
mlrun/package/__init__.py +163 -0
mlrun/package/context_handler.py +325 -0
mlrun/package/errors.py +47 -0
mlrun/package/packager.py +298 -0
mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
mlrun/package/packagers/default_packager.py +422 -0
mlrun/package/packagers/numpy_packagers.py +612 -0
mlrun/package/packagers/pandas_packagers.py +968 -0
mlrun/package/packagers/python_standard_library_packagers.py +616 -0
mlrun/package/packagers_manager.py +786 -0
mlrun/package/utils/__init__.py +53 -0
mlrun/package/utils/_archiver.py +226 -0
mlrun/package/utils/_formatter.py +211 -0
mlrun/package/utils/_pickler.py +234 -0
mlrun/package/utils/_supported_format.py +71 -0
mlrun/package/utils/log_hint_utils.py +93 -0
mlrun/package/utils/type_hint_utils.py +298 -0
mlrun/platforms/__init__.py +1 -1
mlrun/platforms/iguazio.py +34 -2
mlrun/platforms/other.py +1 -1
mlrun/projects/__init__.py +1 -1
mlrun/projects/operations.py +14 -9
mlrun/projects/pipelines.py +31 -13
mlrun/projects/project.py +762 -238
mlrun/render.py +49 -19
mlrun/run.py +57 -326
mlrun/runtimes/__init__.py +3 -9
mlrun/runtimes/base.py +247 -784
mlrun/runtimes/constants.py +1 -1
mlrun/runtimes/daskjob.py +45 -41
mlrun/runtimes/funcdoc.py +43 -7
mlrun/runtimes/function.py +66 -656
mlrun/runtimes/function_reference.py +1 -1
mlrun/runtimes/generators.py +1 -1
mlrun/runtimes/kubejob.py +99 -116
mlrun/runtimes/local.py +59 -66
mlrun/runtimes/mpijob/__init__.py +1 -1
mlrun/runtimes/mpijob/abstract.py +13 -15
mlrun/runtimes/mpijob/v1.py +3 -1
mlrun/runtimes/mpijob/v1alpha1.py +1 -1
mlrun/runtimes/nuclio.py +1 -1
mlrun/runtimes/pod.py +51 -26
mlrun/runtimes/remotesparkjob.py +3 -1
mlrun/runtimes/serving.py +12 -4
mlrun/runtimes/sparkjob/__init__.py +1 -2
mlrun/runtimes/sparkjob/abstract.py +44 -31
mlrun/runtimes/sparkjob/spark3job.py +11 -9
mlrun/runtimes/utils.py +61 -42
mlrun/secrets.py +16 -18
mlrun/serving/__init__.py +3 -2
mlrun/serving/merger.py +1 -1
mlrun/serving/remote.py +1 -1
mlrun/serving/routers.py +39 -42
mlrun/serving/server.py +23 -13
mlrun/serving/serving_wrapper.py +1 -1
mlrun/serving/states.py +172 -39
mlrun/serving/utils.py +1 -1
mlrun/serving/v1_serving.py +1 -1
mlrun/serving/v2_serving.py +29 -21
mlrun/utils/__init__.py +1 -2
mlrun/utils/async_http.py +8 -1
mlrun/utils/azure_vault.py +1 -1
mlrun/utils/clones.py +2 -2
mlrun/utils/condition_evaluator.py +65 -0
mlrun/utils/db.py +52 -0
mlrun/utils/helpers.py +188 -13
mlrun/utils/http.py +89 -54
mlrun/utils/logger.py +48 -8
mlrun/utils/model_monitoring.py +132 -100
mlrun/utils/notifications/__init__.py +1 -1
mlrun/utils/notifications/notification/__init__.py +8 -6
mlrun/utils/notifications/notification/base.py +20 -14
mlrun/utils/notifications/notification/console.py +7 -4
mlrun/utils/notifications/notification/git.py +36 -19
mlrun/utils/notifications/notification/ipython.py +10 -8
mlrun/utils/notifications/notification/slack.py +18 -13
mlrun/utils/notifications/notification_pusher.py +377 -56
mlrun/utils/regex.py +6 -1
mlrun/utils/singleton.py +1 -1
mlrun/utils/v3io_clients.py +1 -1
mlrun/utils/vault.py +270 -269
mlrun/utils/version/__init__.py +1 -1
mlrun/utils/version/version.json +2 -2
mlrun/utils/version/version.py +1 -1
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
mlrun-1.4.0.dist-info/RECORD +434 -0
mlrun/api/api/endpoints/marketplace.py +0 -257
mlrun/api/crud/marketplace.py +0 -221
mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
mlrun/api/db/filedb/db.py +0 -518
mlrun/api/schemas/marketplace.py +0 -128
mlrun/api/schemas/model_endpoints.py +0 -185
mlrun/db/filedb.py +0 -891
mlrun/feature_store/retrieval/online.py +0 -92
mlrun/model_monitoring/constants.py +0 -67
mlrun/runtimes/package/context_handler.py +0 -711
mlrun/runtimes/sparkjob/spark2job.py +0 -59
mlrun-1.3.3.dist-info/RECORD +0 -381
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0

mlrun/package/packagers/pandas_packagers.py ADDED Viewed

@@ -0,0 +1,968 @@
+# Copyright 2023 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import importlib
+import os
+import pathlib
+import tempfile
+from abc import ABC, abstractmethod
+from typing import Any, List, Tuple, Union
+import pandas as pd
+from mlrun.artifacts import Artifact, DatasetArtifact
+from mlrun.datastore import DataItem
+from mlrun.errors import MLRunInvalidArgumentError
+from ..utils import ArtifactType, SupportedFormat
+from .default_packager import DefaultPackager
+class _Formatter(ABC):
+    """
+    An abstract class for a pandas formatter - supporting saving and loading dataframes to and from specific file type.
+    """
+    @classmethod
+    @abstractmethod
+    def to(
+        cls, obj: pd.DataFrame, file_path: str, flatten: bool = True, **to_kwargs
+    ) -> dict:
+        """
+        Save the given dataframe to the file path given.
+        :param obj:       The dataframe to save.
+        :param file_path: The file to save to.
+        :param flatten:   Whether to flatten the dataframe before saving. For some formats it is mandatory to enable
+                          flattening, otherwise saving and loading the dataframe will cause unexpected behavior
+                          especially in case it is multi-level or multi-index. Default to True.
+        :param to_kwargs: Additional keyword arguments to pass to the relevant `to_x` function.
+        :return A dictionary of keyword arguments for reading the dataframe from file.
+        """
+        pass
+    @classmethod
+    @abstractmethod
+    def read(
+        cls, file_path: str, unflatten_kwargs: dict = None, **read_kwargs
+    ) -> pd.DataFrame:
+        """
+        Read the dataframe from the given file path.
+        :param file_path:        The file to read the dataframe from.
+        :param unflatten_kwargs: Unflatten keyword arguments for unflattening the read dataframe.
+        :param read_kwargs:      Additional keyword arguments to pass to the relevant read function of pandas.
+        :return: The loaded dataframe.
+        """
+        pass
+    @staticmethod
+    def _flatten_dataframe(dataframe: pd.DataFrame) -> Tuple[pd.DataFrame, dict]:
+        """
+        Flatten the dataframe: moving all indexes to be columns at the start (from column 0) and lowering the columns
+        levels to 1, renaming them from tuples. All columns and index info is stored so it can be unflatten later on.
+        :param dataframe: The dataframe to flatten.
+        :return: The flat dataframe.
+        """
+        # Save columns info:
+        columns = list(dataframe.columns)
+        if isinstance(dataframe.columns, pd.MultiIndex):
+            columns = [list(column_tuple) for column_tuple in columns]
+        columns_levels = list(dataframe.columns.names)
+        # Save index info:
+        index_levels = list(dataframe.index.names)
+        # Turn multi-index columns into single columns:
+        if len(columns_levels) > 1:
+            # We turn the column tuple into a string to eliminate parsing issues during savings to text formats:
+            dataframe.columns = pd.Index(
+                "-".join(column_tuple) for column_tuple in columns
+            )
+        # Rename indexes in case they appear in the columns so it won't get overriden when the index reset:
+        dataframe.index.set_names(
+            names=[
+                name
+                if name is not None and name not in dataframe.columns
+                else f"INDEX_{name}_{i}"
+                for i, name in enumerate(dataframe.index.names)
+            ],
+            inplace=True,
+        )
+        # Reset the index, moving the current index to a column:
+        dataframe.reset_index(inplace=True)
+        return dataframe, {
+            "columns": columns,
+            "columns_levels": columns_levels,
+            "index_levels": index_levels,
+        }
+    @staticmethod
+    def _unflatten_dataframe(
+        dataframe: pd.DataFrame,
+        columns: list,
+        columns_levels: list,
+        index_levels: list,
+    ) -> pd.DataFrame:
+        """
+        Unflatten the dataframe, moving the indexes from the columns and resuming the columns levels and names.
+        :param dataframe:      The dataframe to unflatten.
+        :param columns:        The original list of columns.
+        :param columns_levels: The original columns levels names.
+        :param index_levels:   The original index levels names.
+        :return: The un-flatted dataframe.
+        """
+        # Move back index from columns:
+        dataframe.set_index(
+            keys=list(dataframe.columns[: len(index_levels)]), inplace=True
+        )
+        dataframe.index.set_names(names=index_levels, inplace=True)
+        # Set the columns back in case they were multi-leveled:
+        if len(columns_levels) > 1:
+            dataframe.columns = pd.MultiIndex.from_tuples(
+                tuples=columns, names=columns_levels
+            )
+        else:
+            dataframe.columns.set_names(names=columns_levels, inplace=True)
+        return dataframe
+class _ParquetFormatter(_Formatter):
+    """
+    A static class for managing pandas parquet files.
+    """
+    @classmethod
+    def to(
+        cls, obj: pd.DataFrame, file_path: str, flatten: bool = True, **to_kwargs
+    ) -> dict:
+        """
+        Save the given dataframe to the parquet file path given.
+        :param obj:       The dataframe to save.
+        :param file_path: The file to save to.
+        :param flatten:   Ignored for parquet format.
+        :param to_kwargs: Additional keyword arguments to pass to the `to_parquet` function.
+        :return A dictionary of keyword arguments for reading the dataframe from file.
+        """
+        obj.to_parquet(path=file_path, **to_kwargs)
+        return {}
+    @classmethod
+    def read(
+        cls, file_path: str, unflatten_kwargs: dict = None, **read_kwargs
+    ) -> pd.DataFrame:
+        """
+        Read the dataframe from the given parquet file path.
+        :param file_path:        The file to read the dataframe from.
+        :param unflatten_kwargs: Ignored for parquet format.
+        :param read_kwargs:      Additional keyword arguments to pass to the `read_parquet` function.
+        :return: The loaded dataframe.
+        """
+        return pd.read_parquet(path=file_path, **read_kwargs)
+class _CSVFormatter(_Formatter):
+    """
+    A static class for managing pandas csv files.
+    """
+    @classmethod
+    def to(
+        cls, obj: pd.DataFrame, file_path: str, flatten: bool = True, **to_kwargs
+    ) -> dict:
+        """
+        Save the given dataframe to the csv file path given.
+        :param obj:       The dataframe to save.
+        :param file_path: The file to save to.
+        :param flatten:   Whether to flatten the dataframe before saving. For some formats it is mandatory to enable
+                          flattening, otherwise saving and loading the dataframe will cause unexpected behavior
+                          especially in case it is multi-level or multi-index. Default to True.
+        :param to_kwargs: Additional keyword arguments to pass to the `to_csv` function.
+        :return A dictionary of keyword arguments for reading the dataframe from file.
+        """
+        # Flatten the dataframe (this format have problems saving multi-level dataframes):
+        instructions = {}
+        if flatten:
+            obj, unflatten_kwargs = cls._flatten_dataframe(dataframe=obj)
+            instructions["unflatten_kwargs"] = unflatten_kwargs
+        # Write to csv:
+        obj.to_csv(path_or_buf=file_path, **to_kwargs)
+        return instructions
+    @classmethod
+    def read(
+        cls, file_path: str, unflatten_kwargs: dict = None, **read_kwargs
+    ) -> pd.DataFrame:
+        """
+        Read the dataframe from the given csv file path.
+        :param file_path:        The file to read the dataframe from.
+        :param unflatten_kwargs: Unflatten keyword arguments for unflattening the read dataframe.
+        :param read_kwargs:      Additional keyword arguments to pass to the `read_csv` function.
+        :return: The loaded dataframe.
+        """
+        # Read the csv:
+        obj = pd.read_csv(filepath_or_buffer=file_path, **read_kwargs)
+        # Check if it was flattened in packing:
+        if unflatten_kwargs is not None:
+            # Remove the default index (joined with reset index):
+            if obj.columns[0] == "Unnamed: 0":
+                obj.drop(columns=["Unnamed: 0"], inplace=True)
+            # Unflatten the dataframe:
+            obj = cls._unflatten_dataframe(dataframe=obj, **unflatten_kwargs)
+        return obj
+class _H5Formatter(_Formatter):
+    """
+    A static class for managing pandas h5 files.
+    """
+    @classmethod
+    def to(
+        cls, obj: pd.DataFrame, file_path: str, flatten: bool = True, **to_kwargs
+    ) -> dict:
+        """
+        Save the given dataframe to the h5 file path given.
+        :param obj:       The dataframe to save.
+        :param file_path: The file to save to.
+        :param flatten:   Ignored for h5 format.
+        :param to_kwargs: Additional keyword arguments to pass to the `to_hdf` function.
+        :return A dictionary of keyword arguments for reading the dataframe from file.
+        """
+        # If user didn't provide a key for the dataframe, use default key 'table':
+        key = to_kwargs.pop("key", "table")
+        # Write to h5:
+        obj.to_hdf(path_or_buf=file_path, key=key, **to_kwargs)
+        return {"key": key}
+    @classmethod
+    def read(
+        cls, file_path: str, unflatten_kwargs: dict = None, **read_kwargs
+    ) -> pd.DataFrame:
+        """
+        Read the dataframe from the given h5 file path.
+        :param file_path:        The file to read the dataframe from.
+        :param unflatten_kwargs: Ignored for h5 format.
+        :param read_kwargs:      Additional keyword arguments to pass to the `read_hdf` function.
+        :return: The loaded dataframe.
+        """
+        return pd.read_hdf(path_or_buf=file_path, **read_kwargs)
+class _XMLFormatter(_Formatter):
+    """
+    A static class for managing pandas xml files.
+    """
+    @classmethod
+    def to(
+        cls, obj: pd.DataFrame, file_path: str, flatten: bool = True, **to_kwargs
+    ) -> dict:
+        """
+        Save the given dataframe to the xml file path given.
+        :param obj:       The dataframe to save.
+        :param file_path: The file to save to.
+        :param flatten:   Whether to flatten the dataframe before saving. For some formats it is mandatory to enable
+                          flattening, otherwise saving and loading the dataframe will cause unexpected behavior
+                          especially in case it is multi-level or multi-index. Default to True.
+        :param to_kwargs: Additional keyword arguments to pass to the `to_xml` function.
+        :return A dictionary of keyword arguments for reading the dataframe from file.
+        """
+        # Get the parser (if not provided, try to use `lxml`, otherwise `etree`):
+        parser = to_kwargs.pop("parser", None)
+        if parser is None:
+            try:
+                importlib.import_module("lxml")
+                parser = "lxml"
+            except ModuleNotFoundError:
+                parser = "etree"
+        instructions = {"parser": parser}
+        # Flatten the dataframe (this format have problems saving multi-level dataframes):
+        if flatten:
+            obj, unflatten_kwargs = cls._flatten_dataframe(dataframe=obj)
+            instructions["unflatten_kwargs"] = unflatten_kwargs
+        # Write to xml:
+        obj.to_xml(path_or_buffer=file_path, parser="etree", **to_kwargs)
+        return instructions
+    @classmethod
+    def read(
+        cls, file_path: str, unflatten_kwargs: dict = None, **read_kwargs
+    ) -> pd.DataFrame:
+        """
+        Read the dataframe from the given xml file path.
+        :param file_path:        The file to read the dataframe from.
+        :param unflatten_kwargs: Unflatten keyword arguments for unflattening the read dataframe.
+        :param read_kwargs:      Additional keyword arguments to pass to the `read_xml` function.
+        :return: The loaded dataframe.
+        """
+        # Read the xml:
+        obj = pd.read_xml(path_or_buffer=file_path, **read_kwargs)
+        # Check if it was flattened in packing:
+        if unflatten_kwargs is not None:
+            # Remove the default index (joined with reset index):
+            if obj.columns[0] == "index":
+                obj.drop(columns=["index"], inplace=True)
+            # Unflatten the dataframe:
+            obj = cls._unflatten_dataframe(dataframe=obj, **unflatten_kwargs)
+        return obj
+class _XLSXFormatter(_Formatter):
+    """
+    A static class for managing pandas xlsx files.
+    """
+    @classmethod
+    def to(
+        cls, obj: pd.DataFrame, file_path: str, flatten: bool = True, **to_kwargs
+    ) -> dict:
+        """
+        Save the given dataframe to the xlsx file path given.
+        :param obj:       The dataframe to save.
+        :param file_path: The file to save to.
+        :param flatten:   Whether to flatten the dataframe before saving. For some formats it is mandatory to enable
+                          flattening, otherwise saving and loading the dataframe will cause unexpected behavior
+                          especially in case it is multi-level or multi-index. Default to True.
+        :param to_kwargs: Additional keyword arguments to pass to the `to_excel` function.
+        """
+        # Get the engine to pass when unpacked:
+        instructions = {"engine": to_kwargs.get("engine", None)}
+        # Flatten the dataframe (this format have problems saving multi-level dataframes):
+        if flatten:
+            obj, unflatten_kwargs = cls._flatten_dataframe(dataframe=obj)
+            instructions["unflatten_kwargs"] = unflatten_kwargs
+        # Write to xlsx:
+        obj.to_excel(excel_writer=file_path, **to_kwargs)
+        return instructions
+    @classmethod
+    def read(
+        cls, file_path: str, unflatten_kwargs: dict = None, **read_kwargs
+    ) -> pd.DataFrame:
+        """
+        Read the dataframe from the given xlsx file path.
+        :param file_path:        The file to read the dataframe from.
+        :param unflatten_kwargs: Unflatten keyword arguments for unflattening the read dataframe.
+        :param read_kwargs:      Additional keyword arguments to pass to the `read_excel` function.
+        :return: The loaded dataframe.
+        """
+        # Read the xlsx:
+        obj = pd.read_excel(io=file_path, **read_kwargs)
+        # Check if it was flattened in packing:
+        if unflatten_kwargs is not None:
+            # Remove the default index (joined with reset index):
+            if obj.columns[0] == "Unnamed: 0":
+                obj.drop(columns=["Unnamed: 0"], inplace=True)
+            # Unflatten the dataframe:
+            obj = cls._unflatten_dataframe(dataframe=obj, **unflatten_kwargs)
+        return obj
+class _HTMLFormatter(_Formatter):
+    """
+    A static class for managing pandas html files.
+    """
+    @classmethod
+    def to(
+        cls, obj: pd.DataFrame, file_path: str, flatten: bool = True, **to_kwargs
+    ) -> dict:
+        """
+        Save the given dataframe to the html file path given.
+        :param obj:       The dataframe to save.
+        :param file_path: The file to save to.
+        :param flatten:   Whether to flatten the dataframe before saving. For some formats it is mandatory to enable
+                          flattening, otherwise saving and loading the dataframe will cause unexpected behavior
+                          especially in case it is multi-level or multi-index. Default to True.
+        :param to_kwargs: Additional keyword arguments to pass to the `to_html` function.
+        :return A dictionary of keyword arguments for reading the dataframe from file.
+        """
+        # Flatten the dataframe (this format have problems saving multi-level dataframes):
+        instructions = {}
+        if flatten:
+            obj, unflatten_kwargs = cls._flatten_dataframe(dataframe=obj)
+            instructions["unflatten_kwargs"] = unflatten_kwargs
+        # Write to html:
+        obj.to_html(buf=file_path, **to_kwargs)
+        return instructions
+    @classmethod
+    def read(
+        cls, file_path: str, unflatten_kwargs: dict = None, **read_kwargs
+    ) -> pd.DataFrame:
+        """
+        Read dataframes from the given html file path.
+        :param file_path:        The file to read the dataframe from.
+        :param unflatten_kwargs: Unflatten keyword arguments for unflattening the read dataframe.
+        :param read_kwargs:      Additional keyword arguments to pass to the `read_html` function.
+        :return: The loaded dataframe.
+        """
+        # Read the html:
+        obj = pd.read_html(io=file_path, **read_kwargs)[0]
+        # Check if it was flattened in packing:
+        if unflatten_kwargs is not None:
+            # Remove the default index (joined with reset index):
+            if obj.columns[0] == "Unnamed: 0":
+                obj.drop(columns=["Unnamed: 0"], inplace=True)
+            # Unflatten the dataframe:
+            obj = cls._unflatten_dataframe(dataframe=obj, **unflatten_kwargs)
+        return obj
+class _JSONFormatter(_Formatter):
+    """
+    A static class for managing pandas json files.
+    """
+    @classmethod
+    def to(
+        cls, obj: pd.DataFrame, file_path: str, flatten: bool = True, **to_kwargs
+    ) -> dict:
+        """
+        Save the given dataframe to the json file path given.
+        :param obj:       The dataframe to save.
+        :param file_path: The file to save to.
+        :param flatten:   Whether to flatten the dataframe before saving. For some formats it is mandatory to enable
+                          flattening, otherwise saving and loading the dataframe will cause unexpected behavior
+                          especially in case it is multi-level or multi-index. Default to True.
+        :param to_kwargs: Additional keyword arguments to pass to the `to_json` function.
+        :return A dictionary of keyword arguments for reading the dataframe from file.
+        """
+        # Get the orient to pass when unpacked:
+        instructions = {"orient": to_kwargs.get("orient", None)}
+        # Flatten the dataframe (this format have problems saving multi-level dataframes):
+        if flatten:
+            obj, unflatten_kwargs = cls._flatten_dataframe(dataframe=obj)
+            instructions["unflatten_kwargs"] = unflatten_kwargs
+        # Write to json:
+        obj.to_json(path_or_buf=file_path, **to_kwargs)
+        return instructions
+    @classmethod
+    def read(
+        cls, file_path: str, unflatten_kwargs: dict = None, **read_kwargs
+    ) -> pd.DataFrame:
+        """
+        Read dataframes from the given json file path.
+        :param file_path:        The file to read the dataframe from.
+        :param unflatten_kwargs: Unflatten keyword arguments for unflattening the read dataframe.
+        :param read_kwargs:      Additional keyword arguments to pass to the `read_json` function.
+        :return: The loaded dataframe.
+        """
+        # Read the json:
+        obj = pd.read_json(path_or_buf=file_path, **read_kwargs)
+        # Check if it was flattened in packing:
+        if unflatten_kwargs is not None:
+            obj = cls._unflatten_dataframe(dataframe=obj, **unflatten_kwargs)
+        return obj
+class _FeatherFormatter(_Formatter):
+    """
+    A static class for managing pandas feather files.
+    """
+    @classmethod
+    def to(
+        cls, obj: pd.DataFrame, file_path: str, flatten: bool = True, **to_kwargs
+    ) -> dict:
+        """
+        Save the given dataframe to the feather file path given.
+        :param obj:       The dataframe to save.
+        :param file_path: The file to save to.
+        :param flatten:   Whether to flatten the dataframe before saving. For some formats it is mandatory to enable
+                          flattening, otherwise saving and loading the dataframe will cause unexpected behavior
+                          especially in case it is multi-level or multi-index. Default to True.
+        :param to_kwargs: Additional keyword arguments to pass to the `to_feather` function.
+        :return A dictionary of keyword arguments for reading the dataframe from file.
+        """
+        # Flatten the dataframe (this format have problems saving multi-level dataframes):
+        instructions = {}
+        if flatten:
+            obj, unflatten_kwargs = cls._flatten_dataframe(dataframe=obj)
+            instructions["unflatten_kwargs"] = unflatten_kwargs
+        # Write to feather:
+        obj.to_feather(path=file_path, **to_kwargs)
+        return instructions
+    @classmethod
+    def read(
+        cls, file_path: str, unflatten_kwargs: dict = None, **read_kwargs
+    ) -> pd.DataFrame:
+        """
+        Read dataframes from the given feather file path.
+        :param file_path:        The file to read the dataframe from.
+        :param unflatten_kwargs: Unflatten keyword arguments for unflattening the read dataframe.
+        :param read_kwargs:      Additional keyword arguments to pass to the `read_feather` function.
+        :return: The loaded dataframe.
+        """
+        # Read the feather:
+        obj = pd.read_feather(path=file_path, **read_kwargs)
+        # Check if it was flattened in packing:
+        if unflatten_kwargs is not None:
+            obj = cls._unflatten_dataframe(dataframe=obj, **unflatten_kwargs)
+        return obj
+class _ORCFormatter(_Formatter):
+    """
+    A static class for managing pandas orc files.
+    """
+    @classmethod
+    def to(
+        cls, obj: pd.DataFrame, file_path: str, flatten: bool = True, **to_kwargs
+    ) -> dict:
+        """
+        Save the given dataframe to the orc file path given.
+        :param obj:       The dataframe to save.
+        :param file_path: The file to save to.
+        :param flatten:   Whether to flatten the dataframe before saving. For some formats it is mandatory to enable
+                          flattening, otherwise saving and loading the dataframe will cause unexpected behavior
+                          especially in case it is multi-level or multi-index. Default to True.
+        :param to_kwargs: Additional keyword arguments to pass to the `to_orc` function.
+        :return A dictionary of keyword arguments for reading the dataframe from file.
+        """
+        # Flatten the dataframe (this format have problems saving multi-level dataframes):
+        instructions = {}
+        if flatten:
+            obj, unflatten_kwargs = cls._flatten_dataframe(dataframe=obj)
+            instructions["unflatten_kwargs"] = unflatten_kwargs
+        # Write to feather:
+        obj.to_orc(path=file_path, **to_kwargs)
+        return instructions
+    @classmethod
+    def read(
+        cls, file_path: str, unflatten_kwargs: dict = None, **read_kwargs
+    ) -> pd.DataFrame:
+        """
+        Read dataframes from the given orc file path.
+        :param file_path:        The file to read the dataframe from.
+        :param unflatten_kwargs: Unflatten keyword arguments for unflattening the read dataframe.
+        :param read_kwargs:      Additional keyword arguments to pass to the `read_orc` function.
+        :return: The loaded dataframe.
+        """
+        # Read the feather:
+        obj = pd.read_orc(path=file_path, **read_kwargs)
+        # Check if it was flattened in packing:
+        if unflatten_kwargs is not None:
+            obj = cls._unflatten_dataframe(dataframe=obj, **unflatten_kwargs)
+        return obj
+class PandasSupportedFormat(SupportedFormat[_Formatter]):
+    """
+    Library of Pandas formats (file extensions) supported by the Pandas packagers.
+    """
+    PARQUET = "parquet"
+    CSV = "csv"
+    H5 = "h5"
+    XML = "xml"
+    XLSX = "xlsx"
+    HTML = "html"
+    JSON = "json"
+    FEATHER = "feather"
+    ORC = "orc"
+    _FORMAT_HANDLERS_MAP = {
+        PARQUET: _ParquetFormatter,
+        CSV: _CSVFormatter,
+        H5: _H5Formatter,
+        XML: _XMLFormatter,
+        XLSX: _XLSXFormatter,
+        HTML: _HTMLFormatter,
+        JSON: _JSONFormatter,
+        FEATHER: _FeatherFormatter,
+        ORC: _ORCFormatter,
+    }
+# Default file formats for pandas DataFrame and Series file artifacts:
+DEFAULT_PANDAS_FORMAT = PandasSupportedFormat.PARQUET
+NON_STRING_COLUMN_NAMES_DEFAULT_PANDAS_FORMAT = PandasSupportedFormat.CSV
+class PandasDataFramePackager(DefaultPackager):
+    """
+    ``pd.DataFrame`` packager.
+    """
+    PACKABLE_OBJECT_TYPE = pd.DataFrame
+    DEFAULT_PACKING_ARTIFACT_TYPE = ArtifactType.DATASET
+    @classmethod
+    def get_default_unpacking_artifact_type(cls, data_item: DataItem) -> str:
+        """
+        Get the default artifact type used for unpacking. Returns dataset if the data item represents a
+        `DatasetArtifact` and otherwise, file.
+        :param data_item: The about to be unpacked data item.
+        :return: The default artifact type.
+        """
+        is_artifact = data_item.get_artifact_type()
+        if is_artifact and is_artifact == "datasets":
+            return ArtifactType.DATASET
+        return ArtifactType.FILE
+    @classmethod
+    def pack_result(cls, obj: pd.DataFrame, key: str) -> dict:
+        """
+        Pack a dataframe as a result.
+        :param obj: The dataframe to pack and log.
+        :param key: The result's key.
+        :return: The result dictionary.
+        """
+        # Parse to dictionary according to the indexes in the dataframe:
+        if len(obj.index.names) > 1:
+            # Multiple indexes:
+            orient = "split"
+        elif obj.index.name is not None:
+            # Not a default index (user would likely want to keep it):
+            orient = "dict"
+        else:
+            # Default index can be ignored:
+            orient = "list"
+        # Cast to dictionary:
+        dataframe_dictionary = obj.to_dict(orient=orient)
+        # Prepare the result (casting tuples to lists):
+        dataframe_dictionary = PandasDataFramePackager._prepare_result(
+            obj=dataframe_dictionary
+        )
+        return super().pack_result(obj=dataframe_dictionary, key=key)
+    @classmethod
+    def pack_file(
+        cls,
+        obj: pd.DataFrame,
+        key: str,
+        file_format: str = None,
+        flatten: bool = True,
+        **to_kwargs,
+    ) -> Tuple[Artifact, dict]:
+        """
+        Pack a dataframe as a file by the given format.
+        :param obj:         The series to pack.
+        :param key:         The key to use for the artifact.
+        :param file_format: The file format to save as. Default is parquet or csv (depends on the column names as
+                            parquet cannot be used for non string column names).
+        :param flatten:     Whether to flatten the dataframe before saving. For some formats it is mandatory to enable
+                            flattening, otherwise saving and loading the dataframe will cause unexpected behavior
+                            especially in case it is multi-level or multi-index. Default to True.
+        :param to_kwargs:   Additional keyword arguments to pass to the pandas `to_x` functions.
+        :return: The packed artifact and instructions.
+        """
+        # Set default file format if not given:
+        if file_format is None:
+            file_format = (
+                DEFAULT_PANDAS_FORMAT
+                if all(isinstance(name, str) for name in obj.columns)
+                else NON_STRING_COLUMN_NAMES_DEFAULT_PANDAS_FORMAT
+            )
+        # Save to file:
+        formatter = PandasSupportedFormat.get_format_handler(fmt=file_format)
+        temp_directory = pathlib.Path(tempfile.mkdtemp())
+        cls.add_future_clearing_path(path=temp_directory)
+        file_path = temp_directory / f"{key}.{file_format}"
+        read_kwargs = formatter.to(
+            obj=obj, file_path=str(file_path), flatten=flatten, **to_kwargs
+        )
+        # Create the artifact and instructions:
+        artifact = Artifact(key=key, src_path=os.path.abspath(file_path))
+        return artifact, {"file_format": file_format, "read_kwargs": read_kwargs}
+    @classmethod
+    def pack_dataset(cls, obj: pd.DataFrame, key: str, file_format: str = "parquet"):
+        """
+        Pack a pandas dataframe as a dataset.
+        :param obj:         The dataframe to pack.
+        :param key:         The key to use for the artifact.
+        :param file_format: The file format to save as. Default is parquet.
+        :return: The packed artifact and instructions.
+        """
+        return DatasetArtifact(key=key, df=obj, format=file_format), {}
+    @classmethod
+    def unpack_file(
+        cls,
+        data_item: DataItem,
+        file_format: str = None,
+        read_kwargs: dict = None,
+    ) -> pd.DataFrame:
+        """
+        Unpack a pandas dataframe from file.
+        :param data_item:   The data item to unpack.
+        :param file_format: The file format to use for reading the series. Default is None - will be read by the file
+                            extension.
+        :param read_kwargs: Keyword arguments to pass to the read of the formatter.
+        :return: The unpacked series.
+        """
+        # Get the file:
+        file_path = data_item.local()
+        cls.add_future_clearing_path(path=file_path)
+        # Get the archive format by the file extension if needed:
+        if file_format is None:
+            file_format = PandasSupportedFormat.match_format(path=file_path)
+        if file_format is None:
+            raise MLRunInvalidArgumentError(
+                f"File format of {data_item.key} ('{''.join(pathlib.Path(file_path).suffixes)}') is not supported. "
+                f"Supported formats are: {' '.join(PandasSupportedFormat.get_all_formats())}"
+            )
+        # Read the object:
+        formatter = PandasSupportedFormat.get_format_handler(fmt=file_format)
+        if read_kwargs is None:
+            read_kwargs = {}
+        return formatter.read(file_path=file_path, **read_kwargs)
+    @classmethod
+    def unpack_dataset(cls, data_item: DataItem):
+        """
+        Unpack a padnas dataframe from a dataset artifact.
+        :param data_item: The data item to unpack.
+        :return: The unpacked dataframe.
+        """
+        return data_item.as_df()
+    @staticmethod
+    def _prepare_result(obj: Union[list, dict, tuple]) -> Any:
+        """
+        A dataframe can be logged as a result when it being cast to a dictionary. If the dataframe has multiple indexes,
+        pandas store them as a tuple, which is not json serializable, so we cast them into lists.
+        :param obj: The dataframe dictionary (or list and tuple as it is recursive).
+        :return: Prepared result.
+        """
+        if isinstance(obj, dict):
+            for key, value in obj.items():
+                obj[
+                    PandasDataFramePackager._prepare_result(obj=key)
+                ] = PandasDataFramePackager._prepare_result(obj=value)
+        elif isinstance(obj, list):
+            for i, value in enumerate(obj):
+                obj[i] = PandasDataFramePackager._prepare_result(obj=value)
+        elif isinstance(obj, tuple):
+            obj = [PandasDataFramePackager._prepare_result(obj=value) for value in obj]
+        return obj
+class PandasSeriesPackager(PandasDataFramePackager):
+    """
+    ``pd.Series`` packager.
+    """
+    PACKABLE_OBJECT_TYPE = pd.Series
+    DEFAULT_PACKING_ARTIFACT_TYPE = ArtifactType.FILE
+    @classmethod
+    def get_supported_artifact_types(cls) -> List[str]:
+        """
+        Get all the supported artifact types on this packager. It will be the same as `PandasDataFramePackager` but
+        without the 'dataset' artifact type support.
+        :return: A list of all the supported artifact types.
+        """
+        supported_artifacts = super().get_supported_artifact_types()
+        supported_artifacts.remove("dataset")
+        return supported_artifacts
+    @classmethod
+    def pack_result(cls, obj: pd.Series, key: str) -> dict:
+        """
+        Pack a series as a result.
+        :param obj: The series to pack and log.
+        :param key: The result's key.
+        :return: The result dictionary.
+        """
+        return super().pack_result(obj=pd.DataFrame(obj), key=key)
+    @classmethod
+    def pack_file(
+        cls,
+        obj: pd.Series,
+        key: str,
+        file_format: str = None,
+        flatten: bool = True,
+        **to_kwargs,
+    ) -> Tuple[Artifact, dict]:
+        """
+        Pack a series as a file by the given format.
+        :param obj:         The series to pack.
+        :param key:         The key to use for the artifact.
+        :param file_format: The file format to save as. Default is parquet or csv (depends on the column names as
+                            parquet cannot be used for non string column names).
+        :param flatten:     Whether to flatten the dataframe before saving. For some formats it is mandatory to enable
+                            flattening, otherwise saving and loading the dataframe will cause unexpected behavior
+                            especially in case it is multi-level or multi-index. Default to True.
+        :param to_kwargs:   Additional keyword arguments to pass to the pandas `to_x` functions.
+        :return: The packed artifact and instructions.
+        """
+        # Get the series column name:
+        column_name = obj.name
+        # Cast to dataframe and call the parent `pack_file`:
+        artifact, instructions = super().pack_file(
+            obj=pd.DataFrame(obj),
+            key=key,
+            file_format=file_format,
+            flatten=flatten,
+            **to_kwargs,
+        )
+        # Return the artifact with the updated instructions:
+        return artifact, {**instructions, "column_name": column_name}
+    @classmethod
+    def unpack_file(
+        cls,
+        data_item: DataItem,
+        file_format: str = None,
+        read_kwargs: dict = None,
+        column_name: Union[str, int] = None,
+    ) -> pd.Series:
+        """
+        Unpack a pandas series from file.
+        :param data_item:     The data item to unpack.
+        :param file_format:   The file format to use for reading the series. Default is None - will be read by the file
+                              extension.
+        :param read_kwargs:   Keyword arguments to pass to the read of the formatter.
+        :param column_name:   The name of the series column.
+        :return: The unpacked series.
+        """
+        # Read the object:
+        obj = super().unpack_file(
+            data_item=data_item,
+            file_format=file_format,
+            read_kwargs=read_kwargs,
+        )
+        # Cast the dataframe into a series:
+        if len(obj.columns) != 1:
+            raise MLRunInvalidArgumentError(
+                f"The data item received is of a `pandas.DataFrame` with more than one column: "
+                f"{', '.join(obj.columns)}. Hence it cannot be turned into a `pandas.Series`."
+            )
+        obj = obj[obj.columns[0]]
+        # Edit the column name (if `read_kwargs` is not None we can be sure it is a packed file artifact, so the column
+        # name, even if None, should be set to restore the object as it was):
+        if read_kwargs is not None:
+            obj.name = column_name
+        return obj

mlrun 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl

Potentially problematic release.

mlrun 1.3.3py3-none-any.whl → 1.4.0py3-none-any.whl