mlrun 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -3
- mlrun/__main__.py +79 -37
- mlrun/api/__init__.py +1 -1
- mlrun/api/api/__init__.py +1 -1
- mlrun/api/api/api.py +4 -4
- mlrun/api/api/deps.py +10 -21
- mlrun/api/api/endpoints/__init__.py +1 -1
- mlrun/api/api/endpoints/artifacts.py +64 -36
- mlrun/api/api/endpoints/auth.py +4 -4
- mlrun/api/api/endpoints/background_tasks.py +11 -11
- mlrun/api/api/endpoints/client_spec.py +5 -5
- mlrun/api/api/endpoints/clusterization_spec.py +6 -4
- mlrun/api/api/endpoints/feature_store.py +124 -115
- mlrun/api/api/endpoints/files.py +22 -14
- mlrun/api/api/endpoints/frontend_spec.py +28 -21
- mlrun/api/api/endpoints/functions.py +142 -87
- mlrun/api/api/endpoints/grafana_proxy.py +89 -442
- mlrun/api/api/endpoints/healthz.py +20 -7
- mlrun/api/api/endpoints/hub.py +320 -0
- mlrun/api/api/endpoints/internal/__init__.py +1 -1
- mlrun/api/api/endpoints/internal/config.py +1 -1
- mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
- mlrun/api/api/endpoints/logs.py +11 -11
- mlrun/api/api/endpoints/model_endpoints.py +74 -70
- mlrun/api/api/endpoints/operations.py +13 -9
- mlrun/api/api/endpoints/pipelines.py +93 -88
- mlrun/api/api/endpoints/projects.py +35 -35
- mlrun/api/api/endpoints/runs.py +69 -27
- mlrun/api/api/endpoints/runtime_resources.py +28 -28
- mlrun/api/api/endpoints/schedules.py +98 -41
- mlrun/api/api/endpoints/secrets.py +37 -32
- mlrun/api/api/endpoints/submit.py +12 -12
- mlrun/api/api/endpoints/tags.py +20 -22
- mlrun/api/api/utils.py +251 -42
- mlrun/api/constants.py +1 -1
- mlrun/api/crud/__init__.py +18 -15
- mlrun/api/crud/artifacts.py +10 -10
- mlrun/api/crud/client_spec.py +4 -4
- mlrun/api/crud/clusterization_spec.py +3 -3
- mlrun/api/crud/feature_store.py +54 -46
- mlrun/api/crud/functions.py +3 -3
- mlrun/api/crud/hub.py +312 -0
- mlrun/api/crud/logs.py +11 -9
- mlrun/api/crud/model_monitoring/__init__.py +3 -3
- mlrun/api/crud/model_monitoring/grafana.py +435 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
- mlrun/api/crud/notifications.py +149 -0
- mlrun/api/crud/pipelines.py +67 -52
- mlrun/api/crud/projects.py +51 -23
- mlrun/api/crud/runs.py +7 -5
- mlrun/api/crud/runtime_resources.py +13 -13
- mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
- mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
- mlrun/api/crud/runtimes/nuclio/function.py +505 -0
- mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
- mlrun/api/crud/secrets.py +88 -46
- mlrun/api/crud/tags.py +5 -5
- mlrun/api/db/__init__.py +1 -1
- mlrun/api/db/base.py +102 -54
- mlrun/api/db/init_db.py +2 -3
- mlrun/api/db/session.py +4 -12
- mlrun/api/db/sqldb/__init__.py +1 -1
- mlrun/api/db/sqldb/db.py +439 -196
- mlrun/api/db/sqldb/helpers.py +1 -1
- mlrun/api/db/sqldb/models/__init__.py +3 -3
- mlrun/api/db/sqldb/models/models_mysql.py +82 -64
- mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
- mlrun/api/db/sqldb/session.py +27 -20
- mlrun/api/initial_data.py +82 -24
- mlrun/api/launcher.py +196 -0
- mlrun/api/main.py +91 -22
- mlrun/api/middlewares.py +6 -5
- mlrun/api/migrations_mysql/env.py +1 -1
- mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
- mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
- mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
- mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
- mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
- mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
- mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/env.py +1 -1
- mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
- mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
- mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
- mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
- mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
- mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
- mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
- mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
- mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
- mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
- mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
- mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
- mlrun/api/schemas/__init__.py +216 -138
- mlrun/api/utils/__init__.py +1 -1
- mlrun/api/utils/asyncio.py +1 -1
- mlrun/api/utils/auth/__init__.py +1 -1
- mlrun/api/utils/auth/providers/__init__.py +1 -1
- mlrun/api/utils/auth/providers/base.py +7 -7
- mlrun/api/utils/auth/providers/nop.py +6 -7
- mlrun/api/utils/auth/providers/opa.py +17 -17
- mlrun/api/utils/auth/verifier.py +36 -34
- mlrun/api/utils/background_tasks.py +24 -24
- mlrun/{builder.py → api/utils/builder.py} +216 -123
- mlrun/api/utils/clients/__init__.py +1 -1
- mlrun/api/utils/clients/chief.py +19 -4
- mlrun/api/utils/clients/iguazio.py +106 -60
- mlrun/api/utils/clients/log_collector.py +1 -1
- mlrun/api/utils/clients/nuclio.py +23 -23
- mlrun/api/utils/clients/protocols/grpc.py +2 -2
- mlrun/api/utils/db/__init__.py +1 -1
- mlrun/api/utils/db/alembic.py +1 -1
- mlrun/api/utils/db/backup.py +1 -1
- mlrun/api/utils/db/mysql.py +24 -25
- mlrun/api/utils/db/sql_collation.py +1 -1
- mlrun/api/utils/db/sqlite_migration.py +2 -2
- mlrun/api/utils/events/__init__.py +14 -0
- mlrun/api/utils/events/base.py +57 -0
- mlrun/api/utils/events/events_factory.py +41 -0
- mlrun/api/utils/events/iguazio.py +217 -0
- mlrun/api/utils/events/nop.py +55 -0
- mlrun/api/utils/helpers.py +16 -13
- mlrun/api/utils/memory_reports.py +1 -1
- mlrun/api/utils/periodic.py +6 -3
- mlrun/api/utils/projects/__init__.py +1 -1
- mlrun/api/utils/projects/follower.py +33 -33
- mlrun/api/utils/projects/leader.py +36 -34
- mlrun/api/utils/projects/member.py +27 -27
- mlrun/api/utils/projects/remotes/__init__.py +1 -1
- mlrun/api/utils/projects/remotes/follower.py +13 -13
- mlrun/api/utils/projects/remotes/leader.py +10 -10
- mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
- mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
- mlrun/api/utils/scheduler.py +140 -51
- mlrun/api/utils/singletons/__init__.py +1 -1
- mlrun/api/utils/singletons/db.py +9 -15
- mlrun/api/utils/singletons/k8s.py +677 -5
- mlrun/api/utils/singletons/logs_dir.py +1 -1
- mlrun/api/utils/singletons/project_member.py +1 -1
- mlrun/api/utils/singletons/scheduler.py +1 -1
- mlrun/artifacts/__init__.py +2 -2
- mlrun/artifacts/base.py +8 -2
- mlrun/artifacts/dataset.py +5 -3
- mlrun/artifacts/manager.py +7 -1
- mlrun/artifacts/model.py +15 -4
- mlrun/artifacts/plots.py +1 -1
- mlrun/common/__init__.py +1 -1
- mlrun/common/constants.py +15 -0
- mlrun/common/model_monitoring.py +209 -0
- mlrun/common/schemas/__init__.py +167 -0
- mlrun/{api → common}/schemas/artifact.py +13 -14
- mlrun/{api → common}/schemas/auth.py +10 -8
- mlrun/{api → common}/schemas/background_task.py +3 -3
- mlrun/{api → common}/schemas/client_spec.py +1 -1
- mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
- mlrun/{api → common}/schemas/constants.py +21 -8
- mlrun/common/schemas/events.py +36 -0
- mlrun/{api → common}/schemas/feature_store.py +2 -1
- mlrun/{api → common}/schemas/frontend_spec.py +7 -6
- mlrun/{api → common}/schemas/function.py +5 -5
- mlrun/{api → common}/schemas/http.py +3 -3
- mlrun/common/schemas/hub.py +134 -0
- mlrun/{api → common}/schemas/k8s.py +3 -3
- mlrun/{api → common}/schemas/memory_reports.py +1 -1
- mlrun/common/schemas/model_endpoints.py +342 -0
- mlrun/common/schemas/notification.py +57 -0
- mlrun/{api → common}/schemas/object.py +6 -6
- mlrun/{api → common}/schemas/pipeline.py +3 -3
- mlrun/{api → common}/schemas/project.py +6 -5
- mlrun/common/schemas/regex.py +24 -0
- mlrun/common/schemas/runs.py +30 -0
- mlrun/{api → common}/schemas/runtime_resource.py +3 -3
- mlrun/{api → common}/schemas/schedule.py +19 -7
- mlrun/{api → common}/schemas/secret.py +3 -3
- mlrun/{api → common}/schemas/tag.py +2 -2
- mlrun/common/types.py +25 -0
- mlrun/config.py +152 -20
- mlrun/data_types/__init__.py +7 -2
- mlrun/data_types/data_types.py +4 -2
- mlrun/data_types/infer.py +1 -1
- mlrun/data_types/spark.py +10 -3
- mlrun/datastore/__init__.py +10 -3
- mlrun/datastore/azure_blob.py +1 -1
- mlrun/datastore/base.py +185 -53
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/filestore.py +1 -1
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +1 -1
- mlrun/datastore/s3.py +1 -1
- mlrun/datastore/sources.py +192 -70
- mlrun/datastore/spark_udf.py +44 -0
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/targets.py +115 -45
- mlrun/datastore/utils.py +127 -5
- mlrun/datastore/v3io.py +1 -1
- mlrun/datastore/wasbfs/__init__.py +1 -1
- mlrun/datastore/wasbfs/fs.py +1 -1
- mlrun/db/__init__.py +7 -5
- mlrun/db/base.py +112 -68
- mlrun/db/httpdb.py +445 -277
- mlrun/db/nopdb.py +491 -0
- mlrun/db/sqldb.py +112 -65
- mlrun/errors.py +6 -1
- mlrun/execution.py +44 -22
- mlrun/feature_store/__init__.py +1 -1
- mlrun/feature_store/api.py +143 -95
- mlrun/feature_store/common.py +16 -20
- mlrun/feature_store/feature_set.py +42 -12
- mlrun/feature_store/feature_vector.py +32 -21
- mlrun/feature_store/ingestion.py +9 -12
- mlrun/feature_store/retrieval/__init__.py +3 -2
- mlrun/feature_store/retrieval/base.py +388 -66
- mlrun/feature_store/retrieval/dask_merger.py +63 -151
- mlrun/feature_store/retrieval/job.py +30 -12
- mlrun/feature_store/retrieval/local_merger.py +40 -133
- mlrun/feature_store/retrieval/spark_merger.py +129 -127
- mlrun/feature_store/retrieval/storey_merger.py +173 -0
- mlrun/feature_store/steps.py +132 -15
- mlrun/features.py +8 -3
- mlrun/frameworks/__init__.py +1 -1
- mlrun/frameworks/_common/__init__.py +1 -1
- mlrun/frameworks/_common/artifacts_library.py +1 -1
- mlrun/frameworks/_common/mlrun_interface.py +1 -1
- mlrun/frameworks/_common/model_handler.py +1 -1
- mlrun/frameworks/_common/plan.py +1 -1
- mlrun/frameworks/_common/producer.py +1 -1
- mlrun/frameworks/_common/utils.py +1 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
- mlrun/frameworks/_dl_common/model_handler.py +1 -1
- mlrun/frameworks/_dl_common/utils.py +1 -1
- mlrun/frameworks/_ml_common/__init__.py +1 -1
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/model_handler.py +1 -1
- mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
- mlrun/frameworks/_ml_common/producer.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +1 -1
- mlrun/frameworks/auto_mlrun/__init__.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
- mlrun/frameworks/huggingface/__init__.py +1 -1
- mlrun/frameworks/huggingface/model_server.py +1 -1
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/lgbm/model_server.py +1 -1
- mlrun/frameworks/lgbm/utils.py +1 -1
- mlrun/frameworks/onnx/__init__.py +1 -1
- mlrun/frameworks/onnx/dataset.py +1 -1
- mlrun/frameworks/onnx/mlrun_interface.py +1 -1
- mlrun/frameworks/onnx/model_handler.py +1 -1
- mlrun/frameworks/onnx/model_server.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +1 -1
- mlrun/frameworks/pytorch/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
- mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
- mlrun/frameworks/pytorch/model_handler.py +1 -1
- mlrun/frameworks/pytorch/model_server.py +1 -1
- mlrun/frameworks/pytorch/utils.py +1 -1
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/estimator.py +1 -1
- mlrun/frameworks/sklearn/metric.py +1 -1
- mlrun/frameworks/sklearn/metrics_library.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
- mlrun/frameworks/sklearn/model_handler.py +1 -1
- mlrun/frameworks/sklearn/utils.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
- mlrun/frameworks/tf_keras/model_handler.py +1 -1
- mlrun/frameworks/tf_keras/model_server.py +1 -1
- mlrun/frameworks/tf_keras/utils.py +1 -1
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
- mlrun/frameworks/xgboost/model_handler.py +1 -1
- mlrun/frameworks/xgboost/utils.py +1 -1
- mlrun/k8s_utils.py +14 -765
- mlrun/kfpops.py +14 -17
- mlrun/launcher/__init__.py +13 -0
- mlrun/launcher/base.py +406 -0
- mlrun/launcher/client.py +159 -0
- mlrun/launcher/factory.py +50 -0
- mlrun/launcher/local.py +276 -0
- mlrun/launcher/remote.py +178 -0
- mlrun/lists.py +10 -2
- mlrun/mlutils/__init__.py +1 -1
- mlrun/mlutils/data.py +1 -1
- mlrun/mlutils/models.py +1 -1
- mlrun/mlutils/plots.py +1 -1
- mlrun/model.py +252 -14
- mlrun/model_monitoring/__init__.py +41 -0
- mlrun/model_monitoring/features_drift_table.py +1 -1
- mlrun/model_monitoring/helpers.py +123 -38
- mlrun/model_monitoring/model_endpoint.py +144 -0
- mlrun/model_monitoring/model_monitoring_batch.py +310 -259
- mlrun/model_monitoring/stores/__init__.py +106 -0
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
- mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
- mlrun/model_monitoring/stores/models/__init__.py +23 -0
- mlrun/model_monitoring/stores/models/base.py +18 -0
- mlrun/model_monitoring/stores/models/mysql.py +100 -0
- mlrun/model_monitoring/stores/models/sqlite.py +98 -0
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
- mlrun/model_monitoring/stream_processing_fs.py +239 -271
- mlrun/package/__init__.py +163 -0
- mlrun/package/context_handler.py +325 -0
- mlrun/package/errors.py +47 -0
- mlrun/package/packager.py +298 -0
- mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
- mlrun/package/packagers/default_packager.py +422 -0
- mlrun/package/packagers/numpy_packagers.py +612 -0
- mlrun/package/packagers/pandas_packagers.py +968 -0
- mlrun/package/packagers/python_standard_library_packagers.py +616 -0
- mlrun/package/packagers_manager.py +786 -0
- mlrun/package/utils/__init__.py +53 -0
- mlrun/package/utils/_archiver.py +226 -0
- mlrun/package/utils/_formatter.py +211 -0
- mlrun/package/utils/_pickler.py +234 -0
- mlrun/package/utils/_supported_format.py +71 -0
- mlrun/package/utils/log_hint_utils.py +93 -0
- mlrun/package/utils/type_hint_utils.py +298 -0
- mlrun/platforms/__init__.py +1 -1
- mlrun/platforms/iguazio.py +34 -2
- mlrun/platforms/other.py +1 -1
- mlrun/projects/__init__.py +1 -1
- mlrun/projects/operations.py +14 -9
- mlrun/projects/pipelines.py +31 -13
- mlrun/projects/project.py +762 -238
- mlrun/render.py +49 -19
- mlrun/run.py +57 -326
- mlrun/runtimes/__init__.py +3 -9
- mlrun/runtimes/base.py +247 -784
- mlrun/runtimes/constants.py +1 -1
- mlrun/runtimes/daskjob.py +45 -41
- mlrun/runtimes/funcdoc.py +43 -7
- mlrun/runtimes/function.py +66 -656
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/generators.py +1 -1
- mlrun/runtimes/kubejob.py +99 -116
- mlrun/runtimes/local.py +59 -66
- mlrun/runtimes/mpijob/__init__.py +1 -1
- mlrun/runtimes/mpijob/abstract.py +13 -15
- mlrun/runtimes/mpijob/v1.py +3 -1
- mlrun/runtimes/mpijob/v1alpha1.py +1 -1
- mlrun/runtimes/nuclio.py +1 -1
- mlrun/runtimes/pod.py +51 -26
- mlrun/runtimes/remotesparkjob.py +3 -1
- mlrun/runtimes/serving.py +12 -4
- mlrun/runtimes/sparkjob/__init__.py +1 -2
- mlrun/runtimes/sparkjob/abstract.py +44 -31
- mlrun/runtimes/sparkjob/spark3job.py +11 -9
- mlrun/runtimes/utils.py +61 -42
- mlrun/secrets.py +16 -18
- mlrun/serving/__init__.py +3 -2
- mlrun/serving/merger.py +1 -1
- mlrun/serving/remote.py +1 -1
- mlrun/serving/routers.py +39 -42
- mlrun/serving/server.py +23 -13
- mlrun/serving/serving_wrapper.py +1 -1
- mlrun/serving/states.py +172 -39
- mlrun/serving/utils.py +1 -1
- mlrun/serving/v1_serving.py +1 -1
- mlrun/serving/v2_serving.py +29 -21
- mlrun/utils/__init__.py +1 -2
- mlrun/utils/async_http.py +8 -1
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +2 -2
- mlrun/utils/condition_evaluator.py +65 -0
- mlrun/utils/db.py +52 -0
- mlrun/utils/helpers.py +188 -13
- mlrun/utils/http.py +89 -54
- mlrun/utils/logger.py +48 -8
- mlrun/utils/model_monitoring.py +132 -100
- mlrun/utils/notifications/__init__.py +1 -1
- mlrun/utils/notifications/notification/__init__.py +8 -6
- mlrun/utils/notifications/notification/base.py +20 -14
- mlrun/utils/notifications/notification/console.py +7 -4
- mlrun/utils/notifications/notification/git.py +36 -19
- mlrun/utils/notifications/notification/ipython.py +10 -8
- mlrun/utils/notifications/notification/slack.py +18 -13
- mlrun/utils/notifications/notification_pusher.py +377 -56
- mlrun/utils/regex.py +6 -1
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +1 -1
- mlrun/utils/vault.py +270 -269
- mlrun/utils/version/__init__.py +1 -1
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +1 -1
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
- mlrun-1.4.0.dist-info/RECORD +434 -0
- mlrun/api/api/endpoints/marketplace.py +0 -257
- mlrun/api/crud/marketplace.py +0 -221
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
- mlrun/api/db/filedb/db.py +0 -518
- mlrun/api/schemas/marketplace.py +0 -128
- mlrun/api/schemas/model_endpoints.py +0 -185
- mlrun/db/filedb.py +0 -891
- mlrun/feature_store/retrieval/online.py +0 -92
- mlrun/model_monitoring/constants.py +0 -67
- mlrun/runtimes/package/context_handler.py +0 -711
- mlrun/runtimes/sparkjob/spark2job.py +0 -59
- mlrun-1.3.3.dist-info/RECORD +0 -381
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,612 @@
|
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
import os
|
|
16
|
+
import pathlib
|
|
17
|
+
import tempfile
|
|
18
|
+
from abc import ABC, abstractmethod
|
|
19
|
+
from typing import Any, Dict, List, Tuple, Union
|
|
20
|
+
|
|
21
|
+
import numpy as np
|
|
22
|
+
import pandas as pd
|
|
23
|
+
|
|
24
|
+
from mlrun.artifacts import Artifact, DatasetArtifact
|
|
25
|
+
from mlrun.datastore import DataItem
|
|
26
|
+
from mlrun.errors import MLRunInvalidArgumentError
|
|
27
|
+
|
|
28
|
+
from ..utils import ArtifactType, SupportedFormat
|
|
29
|
+
from .default_packager import DefaultPackager
|
|
30
|
+
|
|
31
|
+
# Type for collection of numpy arrays (list / dict of arrays):
|
|
32
|
+
NumPyArrayCollectionType = Union[List[np.ndarray], Dict[str, np.ndarray]]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class _Formatter(ABC):
|
|
36
|
+
"""
|
|
37
|
+
An abstract class for a numpy formatter - supporting saving and loading arrays to and from specific file type.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def save(
|
|
43
|
+
cls,
|
|
44
|
+
obj: Union[np.ndarray, NumPyArrayCollectionType],
|
|
45
|
+
file_path: str,
|
|
46
|
+
**save_kwargs: dict,
|
|
47
|
+
):
|
|
48
|
+
"""
|
|
49
|
+
Save the given array to the file path given.
|
|
50
|
+
|
|
51
|
+
:param obj: The numpy array to save.
|
|
52
|
+
:param file_path: The file to save to.
|
|
53
|
+
:param save_kwargs: Additional keyword arguments to pass to the relevant save function of numpy.
|
|
54
|
+
"""
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
@abstractmethod
|
|
59
|
+
def load(
|
|
60
|
+
cls, file_path: str, **load_kwargs: dict
|
|
61
|
+
) -> Union[np.ndarray, NumPyArrayCollectionType]:
|
|
62
|
+
"""
|
|
63
|
+
Load the array from the given file path.
|
|
64
|
+
|
|
65
|
+
:param file_path: The file to load the array from.
|
|
66
|
+
:param load_kwargs: Additional keyword arguments to pass to the relevant load function of numpy.
|
|
67
|
+
|
|
68
|
+
:return: The loaded array.
|
|
69
|
+
"""
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class _TXTFormatter(_Formatter):
|
|
74
|
+
"""
|
|
75
|
+
A static class for managing numpy txt files.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def save(cls, obj: np.ndarray, file_path: str, **save_kwargs: dict):
|
|
80
|
+
"""
|
|
81
|
+
Save the given array to the file path given.
|
|
82
|
+
|
|
83
|
+
:param obj: The numpy array to save.
|
|
84
|
+
:param file_path: The file to save to.
|
|
85
|
+
:param save_kwargs: Additional keyword arguments to pass to the relevant save function of numpy.
|
|
86
|
+
|
|
87
|
+
:raise MLRunInvalidArgumentError: If the array is above 2D.
|
|
88
|
+
"""
|
|
89
|
+
if len(obj.shape) > 2:
|
|
90
|
+
raise MLRunInvalidArgumentError(
|
|
91
|
+
f"Cannot save the given array to file. Only 1D and 2D arrays can be saved to text files but the given "
|
|
92
|
+
f"array is {len(obj.shape)}D (shape of {obj.shape}). Please use 'npy' format instead."
|
|
93
|
+
)
|
|
94
|
+
np.savetxt(file_path, obj, **save_kwargs)
|
|
95
|
+
|
|
96
|
+
@classmethod
|
|
97
|
+
def load(cls, file_path: str, **load_kwargs: dict) -> np.ndarray:
|
|
98
|
+
"""
|
|
99
|
+
Load the array from the given 'txt' file path.
|
|
100
|
+
|
|
101
|
+
:param file_path: The file to load the array from.
|
|
102
|
+
:param load_kwargs: Additional keyword arguments to pass to the relevant load function of numpy.
|
|
103
|
+
|
|
104
|
+
:return: The loaded array.
|
|
105
|
+
"""
|
|
106
|
+
return np.loadtxt(file_path, **load_kwargs)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class _CSVFormatter(_TXTFormatter):
|
|
110
|
+
"""
|
|
111
|
+
A static class for managing numpy csv files.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
@classmethod
|
|
115
|
+
def save(cls, obj: np.ndarray, file_path: str, **save_kwargs: dict):
|
|
116
|
+
"""
|
|
117
|
+
Save the given array to the file path given.
|
|
118
|
+
|
|
119
|
+
:param obj: The numpy array to save.
|
|
120
|
+
:param file_path: The file to save to.
|
|
121
|
+
:param save_kwargs: Additional keyword arguments to pass to the relevant save function of numpy.
|
|
122
|
+
|
|
123
|
+
:raise MLRunInvalidArgumentError: If the array is above 2D.
|
|
124
|
+
"""
|
|
125
|
+
super().save(obj=obj, file_path=file_path, **{"delimiter": ",", **save_kwargs})
|
|
126
|
+
|
|
127
|
+
@classmethod
|
|
128
|
+
def load(cls, file_path: str, **load_kwargs: dict) -> np.ndarray:
|
|
129
|
+
"""
|
|
130
|
+
Load the array from the given 'txt' file path.
|
|
131
|
+
|
|
132
|
+
:param file_path: The file to load the array from.
|
|
133
|
+
:param load_kwargs: Additional keyword arguments to pass to the relevant load function of numpy.
|
|
134
|
+
|
|
135
|
+
:return: The loaded array.
|
|
136
|
+
"""
|
|
137
|
+
return super().load(file_path=file_path, **{"delimiter": ",", **load_kwargs})
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class _NPYFormatter(_Formatter):
|
|
141
|
+
"""
|
|
142
|
+
A static class for managing numpy npy files.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
@classmethod
|
|
146
|
+
def save(cls, obj: np.ndarray, file_path: str, **save_kwargs: dict):
|
|
147
|
+
"""
|
|
148
|
+
Save the given array to the file path given.
|
|
149
|
+
|
|
150
|
+
:param obj: The numpy array to save.
|
|
151
|
+
:param file_path: The file to save to.
|
|
152
|
+
:param save_kwargs: Additional keyword arguments to pass to the relevant save function of numpy.
|
|
153
|
+
"""
|
|
154
|
+
np.save(file_path, obj, **save_kwargs)
|
|
155
|
+
|
|
156
|
+
@classmethod
|
|
157
|
+
def load(cls, file_path: str, **load_kwargs: dict) -> np.ndarray:
|
|
158
|
+
"""
|
|
159
|
+
Load the array from the given 'npy' file path.
|
|
160
|
+
|
|
161
|
+
:param file_path: The file to load the array from.
|
|
162
|
+
:param load_kwargs: Additional keyword arguments to pass to the relevant load function of numpy.
|
|
163
|
+
|
|
164
|
+
:return: The loaded array.
|
|
165
|
+
"""
|
|
166
|
+
return np.load(file_path, **load_kwargs)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class _NPZFormatter(_Formatter):
|
|
170
|
+
"""
|
|
171
|
+
A static class for managing numpy npz files.
|
|
172
|
+
"""
|
|
173
|
+
|
|
174
|
+
@classmethod
|
|
175
|
+
def save(
|
|
176
|
+
cls,
|
|
177
|
+
obj: NumPyArrayCollectionType,
|
|
178
|
+
file_path: str,
|
|
179
|
+
is_compressed: bool = False,
|
|
180
|
+
**save_kwargs: dict,
|
|
181
|
+
):
|
|
182
|
+
"""
|
|
183
|
+
Save the given array to the file path given.
|
|
184
|
+
|
|
185
|
+
:param obj: The numpy array to save.
|
|
186
|
+
:param file_path: The file to save to.
|
|
187
|
+
:param is_compressed: Whether to save it as a compressed npz file.
|
|
188
|
+
:param save_kwargs: Additional keyword arguments to pass to the relevant save function of numpy.
|
|
189
|
+
"""
|
|
190
|
+
save_function = np.savez_compressed if is_compressed else np.savez
|
|
191
|
+
if isinstance(obj, list):
|
|
192
|
+
save_function(file_path, *obj)
|
|
193
|
+
else:
|
|
194
|
+
save_function(file_path, **obj)
|
|
195
|
+
|
|
196
|
+
@classmethod
|
|
197
|
+
def load(cls, file_path: str, **load_kwargs: dict) -> Dict[str, np.ndarray]:
|
|
198
|
+
"""
|
|
199
|
+
Load the arrays from the given 'npz' file path.
|
|
200
|
+
|
|
201
|
+
:param file_path: The file to load the array from.
|
|
202
|
+
:param load_kwargs: Additional keyword arguments to pass to the relevant load function of numpy.
|
|
203
|
+
|
|
204
|
+
:return: The loaded arrays as a mapping (dictionary) of type `np.lib.npyio.NpzFile`.
|
|
205
|
+
"""
|
|
206
|
+
return np.load(file_path, **load_kwargs)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class NumPySupportedFormat(SupportedFormat[_Formatter]):
|
|
210
|
+
"""
|
|
211
|
+
Library of numpy formats (file extensions) supported by the NumPy packagers.
|
|
212
|
+
"""
|
|
213
|
+
|
|
214
|
+
NPY = "npy"
|
|
215
|
+
NPZ = "npz"
|
|
216
|
+
TXT = "txt"
|
|
217
|
+
GZ = "gz"
|
|
218
|
+
CSV = "csv"
|
|
219
|
+
|
|
220
|
+
_FORMAT_HANDLERS_MAP = {
|
|
221
|
+
NPY: _NPYFormatter,
|
|
222
|
+
NPZ: _NPZFormatter,
|
|
223
|
+
TXT: _TXTFormatter,
|
|
224
|
+
GZ: _TXTFormatter, # 'gz' format handled the same as 'txt'.
|
|
225
|
+
CSV: _CSVFormatter,
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
@classmethod
|
|
229
|
+
def get_single_array_formats(cls) -> List[str]:
|
|
230
|
+
"""
|
|
231
|
+
Get the supported formats for saving one numpy array.
|
|
232
|
+
|
|
233
|
+
:return: A list of all the supported formats for saving one numpy array.
|
|
234
|
+
"""
|
|
235
|
+
return [cls.NPY, cls.TXT, cls.GZ, cls.CSV]
|
|
236
|
+
|
|
237
|
+
@classmethod
|
|
238
|
+
def get_multi_array_formats(cls) -> List[str]:
|
|
239
|
+
"""
|
|
240
|
+
Get the supported formats for saving a collection (multiple) numpy arrays - e.g. list of arrays or dictionary of
|
|
241
|
+
arrays.
|
|
242
|
+
|
|
243
|
+
:return: A list of all the supported formats for saving multiple numpy arrays.
|
|
244
|
+
"""
|
|
245
|
+
return [cls.NPZ]
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
# Default file formats for numpy arrays file artifacts:
|
|
249
|
+
DEFAULT_NUMPY_ARRAY_FORMAT = NumPySupportedFormat.NPY
|
|
250
|
+
DEFAULT_NUMPPY_ARRAY_COLLECTION_FORMAT = NumPySupportedFormat.NPZ
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
class NumPyNDArrayPackager(DefaultPackager):
|
|
254
|
+
"""
|
|
255
|
+
``numpy.ndarray`` packager.
|
|
256
|
+
"""
|
|
257
|
+
|
|
258
|
+
PACKABLE_OBJECT_TYPE = np.ndarray
|
|
259
|
+
|
|
260
|
+
# The size of an array to be stored as a result, rather than a file in the `get_default_packing_artifact_type`
|
|
261
|
+
# method:
|
|
262
|
+
_ARRAY_SIZE_AS_RESULT = 10
|
|
263
|
+
|
|
264
|
+
@classmethod
|
|
265
|
+
def get_default_packing_artifact_type(cls, obj: np.ndarray) -> str:
|
|
266
|
+
"""
|
|
267
|
+
Get the default artifact type. Will be a result if the array size is less than 10, otherwise file.
|
|
268
|
+
|
|
269
|
+
:param obj: The about to be packed array.
|
|
270
|
+
|
|
271
|
+
:return: The default artifact type.
|
|
272
|
+
"""
|
|
273
|
+
if obj.size < cls._ARRAY_SIZE_AS_RESULT:
|
|
274
|
+
return ArtifactType.RESULT
|
|
275
|
+
return ArtifactType.FILE
|
|
276
|
+
|
|
277
|
+
@classmethod
|
|
278
|
+
def get_default_unpacking_artifact_type(cls, data_item: DataItem) -> str:
|
|
279
|
+
"""
|
|
280
|
+
Get the default artifact type used for unpacking. Returns dataset if the data item represents a
|
|
281
|
+
`DatasetArtifact` and otherwise, file.
|
|
282
|
+
|
|
283
|
+
:param data_item: The about to be unpacked data item.
|
|
284
|
+
|
|
285
|
+
:return: The default artifact type.
|
|
286
|
+
"""
|
|
287
|
+
is_artifact = data_item.get_artifact_type()
|
|
288
|
+
if is_artifact and is_artifact == "datasets":
|
|
289
|
+
return ArtifactType.DATASET
|
|
290
|
+
return ArtifactType.FILE
|
|
291
|
+
|
|
292
|
+
@classmethod
|
|
293
|
+
def pack_result(cls, obj: np.ndarray, key: str) -> dict:
|
|
294
|
+
"""
|
|
295
|
+
Pack an array as a result.
|
|
296
|
+
|
|
297
|
+
:param obj: The array to pack and log.
|
|
298
|
+
:param key: The result's key.
|
|
299
|
+
|
|
300
|
+
:return: The result dictionary.
|
|
301
|
+
"""
|
|
302
|
+
# If the array is a number (size of 1), then we'll lok it as a single number. Otherwise, log as a list result:
|
|
303
|
+
if obj.size == 1:
|
|
304
|
+
obj = obj.item()
|
|
305
|
+
else:
|
|
306
|
+
obj = obj.tolist()
|
|
307
|
+
|
|
308
|
+
return super().pack_result(obj=obj, key=key)
|
|
309
|
+
|
|
310
|
+
@classmethod
|
|
311
|
+
def pack_file(
|
|
312
|
+
cls,
|
|
313
|
+
obj: np.ndarray,
|
|
314
|
+
key: str,
|
|
315
|
+
file_format: str = DEFAULT_NUMPY_ARRAY_FORMAT,
|
|
316
|
+
**save_kwargs,
|
|
317
|
+
) -> Tuple[Artifact, dict]:
|
|
318
|
+
"""
|
|
319
|
+
Pack an array as a file by the given format.
|
|
320
|
+
|
|
321
|
+
:param obj: The aray to pack.
|
|
322
|
+
:param key: The key to use for the artifact.
|
|
323
|
+
:param file_format: The file format to save as. Default is npy.
|
|
324
|
+
:param save_kwargs: Additional keyword arguments to pass to the numpy save functions.
|
|
325
|
+
|
|
326
|
+
:return: The packed artifact and instructions.
|
|
327
|
+
"""
|
|
328
|
+
# Save to file:
|
|
329
|
+
formatter = NumPySupportedFormat.get_format_handler(fmt=file_format)
|
|
330
|
+
temp_directory = pathlib.Path(tempfile.mkdtemp())
|
|
331
|
+
cls.add_future_clearing_path(path=temp_directory)
|
|
332
|
+
file_path = temp_directory / f"{key}.{file_format}"
|
|
333
|
+
formatter.save(obj=obj, file_path=str(file_path), **save_kwargs)
|
|
334
|
+
|
|
335
|
+
# Create the artifact and instructions:
|
|
336
|
+
artifact = Artifact(key=key, src_path=os.path.abspath(file_path))
|
|
337
|
+
instructions = {"file_format": file_format}
|
|
338
|
+
|
|
339
|
+
return artifact, instructions
|
|
340
|
+
|
|
341
|
+
@classmethod
|
|
342
|
+
def pack_dataset(
|
|
343
|
+
cls,
|
|
344
|
+
obj: np.ndarray,
|
|
345
|
+
key: str,
|
|
346
|
+
file_format: str = "",
|
|
347
|
+
) -> Tuple[Artifact, dict]:
|
|
348
|
+
"""
|
|
349
|
+
Pack an array as a dataset.
|
|
350
|
+
|
|
351
|
+
:param obj: The aray to pack.
|
|
352
|
+
:param key: The key to use for the artifact.
|
|
353
|
+
:param file_format: The file format to save as. Default is parquet.
|
|
354
|
+
|
|
355
|
+
:return: The packed artifact and instructions.
|
|
356
|
+
|
|
357
|
+
:raise MLRunInvalidArgumentError: IF the shape of the array is not 1D / 2D.
|
|
358
|
+
"""
|
|
359
|
+
# Validate it's a 2D array:
|
|
360
|
+
if len(obj.shape) > 2:
|
|
361
|
+
raise MLRunInvalidArgumentError(
|
|
362
|
+
f"Cannot log the given numpy array as a dataset. Only 2D arrays can be saved as dataset, but the array "
|
|
363
|
+
f"is {len(obj.shape)}D (shape of {obj.shape}). Please specify to log it as a 'file' instead ('npy' "
|
|
364
|
+
f"format) or as an 'object' (pickle)."
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
# Cast to a `pd.DataFrame`:
|
|
368
|
+
data_frame = pd.DataFrame(data=obj)
|
|
369
|
+
|
|
370
|
+
# Create the artifact:
|
|
371
|
+
artifact = DatasetArtifact(key=key, df=data_frame, format=file_format)
|
|
372
|
+
|
|
373
|
+
return artifact, {}
|
|
374
|
+
|
|
375
|
+
@classmethod
|
|
376
|
+
def unpack_file(cls, data_item: DataItem, file_format: str = None) -> np.ndarray:
|
|
377
|
+
"""
|
|
378
|
+
Unpack a numppy array from file.
|
|
379
|
+
|
|
380
|
+
:param data_item: The data item to unpack.
|
|
381
|
+
:param file_format: The file format to use for reading the array. Default is None - will be read by the file
|
|
382
|
+
extension.
|
|
383
|
+
|
|
384
|
+
:return: The unpacked array.
|
|
385
|
+
"""
|
|
386
|
+
# Get the file:
|
|
387
|
+
file_path = data_item.local()
|
|
388
|
+
cls.add_future_clearing_path(path=file_path)
|
|
389
|
+
|
|
390
|
+
# Get the archive format by the file extension if needed:
|
|
391
|
+
if file_format is None:
|
|
392
|
+
file_format = NumPySupportedFormat.match_format(path=file_path)
|
|
393
|
+
if (
|
|
394
|
+
file_format is None
|
|
395
|
+
or file_format in NumPySupportedFormat.get_multi_array_formats()
|
|
396
|
+
):
|
|
397
|
+
raise MLRunInvalidArgumentError(
|
|
398
|
+
f"File format of {data_item.key} ('{''.join(pathlib.Path(file_path).suffixes)}') is not supported. "
|
|
399
|
+
f"Supported formats are: {' '.join(NumPySupportedFormat.get_single_array_formats())}"
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
# Read the object:
|
|
403
|
+
formatter = NumPySupportedFormat.get_format_handler(fmt=file_format)
|
|
404
|
+
obj = formatter.load(file_path=file_path)
|
|
405
|
+
|
|
406
|
+
return obj
|
|
407
|
+
|
|
408
|
+
@classmethod
|
|
409
|
+
def unpack_dataset(cls, data_item: DataItem) -> np.ndarray:
|
|
410
|
+
"""
|
|
411
|
+
Unpack a numppy array from a dataset artifact.
|
|
412
|
+
|
|
413
|
+
:param data_item: The data item to unpack.
|
|
414
|
+
|
|
415
|
+
:return: The unpacked array.
|
|
416
|
+
"""
|
|
417
|
+
# Get the artifact's data frame:
|
|
418
|
+
data_frame = data_item.as_df()
|
|
419
|
+
|
|
420
|
+
# Cast the data frame to a `np.ndarray` (1D arrays are returned as a 2D array with shape of 1xn, so we use
|
|
421
|
+
# squeeze to decrease the redundant dimension):
|
|
422
|
+
array = data_frame.to_numpy().squeeze()
|
|
423
|
+
|
|
424
|
+
return array
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
class _NumPyNDArrayCollectionPackager(DefaultPackager):
|
|
428
|
+
"""
|
|
429
|
+
A base packager for builtin python dictionaries and lists of numpy arrays as they share common artifact and file
|
|
430
|
+
types.
|
|
431
|
+
"""
|
|
432
|
+
|
|
433
|
+
DEFAULT_PACKING_ARTIFACT_TYPE = ArtifactType.FILE
|
|
434
|
+
DEFAULT_UNPACKING_ARTIFACT_TYPE = ArtifactType.FILE
|
|
435
|
+
PRIORITY = 4
|
|
436
|
+
|
|
437
|
+
@classmethod
|
|
438
|
+
def pack_file(
|
|
439
|
+
cls,
|
|
440
|
+
obj: NumPyArrayCollectionType,
|
|
441
|
+
key: str,
|
|
442
|
+
file_format: str = DEFAULT_NUMPPY_ARRAY_COLLECTION_FORMAT,
|
|
443
|
+
**save_kwargs,
|
|
444
|
+
) -> Tuple[Artifact, dict]:
|
|
445
|
+
"""
|
|
446
|
+
Pack an array collection as a file by the given format.
|
|
447
|
+
|
|
448
|
+
:param obj: The aray collection to pack.
|
|
449
|
+
:param key: The key to use for the artifact.
|
|
450
|
+
:param file_format: The file format to save as. Default is npy.
|
|
451
|
+
:param save_kwargs: Additional keyword arguments to pass to the numpy save functions.
|
|
452
|
+
|
|
453
|
+
:return: The packed artifact and instructions.
|
|
454
|
+
"""
|
|
455
|
+
# Save to file:
|
|
456
|
+
formatter = NumPySupportedFormat.get_format_handler(fmt=file_format)
|
|
457
|
+
temp_directory = pathlib.Path(tempfile.mkdtemp())
|
|
458
|
+
cls.add_future_clearing_path(path=temp_directory)
|
|
459
|
+
file_path = temp_directory / f"{key}.{file_format}"
|
|
460
|
+
formatter.save(obj=obj, file_path=str(file_path), **save_kwargs)
|
|
461
|
+
|
|
462
|
+
# Create the artifact and instructions:
|
|
463
|
+
artifact = Artifact(key=key, src_path=os.path.abspath(file_path))
|
|
464
|
+
|
|
465
|
+
return artifact, {"file_format": file_format}
|
|
466
|
+
|
|
467
|
+
@classmethod
|
|
468
|
+
def unpack_file(
|
|
469
|
+
cls, data_item: DataItem, file_format: str = None
|
|
470
|
+
) -> Dict[str, np.ndarray]:
|
|
471
|
+
"""
|
|
472
|
+
Unpack a numppy array collection from file.
|
|
473
|
+
|
|
474
|
+
:param data_item: The data item to unpack.
|
|
475
|
+
:param file_format: The file format to use for reading the array collection. Default is None - will be read by
|
|
476
|
+
the file extension.
|
|
477
|
+
|
|
478
|
+
:return: The unpacked array collection.
|
|
479
|
+
"""
|
|
480
|
+
# Get the file:
|
|
481
|
+
file_path = data_item.local()
|
|
482
|
+
cls.add_future_clearing_path(path=file_path)
|
|
483
|
+
|
|
484
|
+
# Get the archive format by the file extension if needed:
|
|
485
|
+
if file_format is None:
|
|
486
|
+
file_format = NumPySupportedFormat.match_format(path=file_path)
|
|
487
|
+
if (
|
|
488
|
+
file_format is None
|
|
489
|
+
or file_format in NumPySupportedFormat.get_single_array_formats()
|
|
490
|
+
):
|
|
491
|
+
raise MLRunInvalidArgumentError(
|
|
492
|
+
f"File format of {data_item.key} ('{''.join(pathlib.Path(file_path).suffixes)}') is not supported. "
|
|
493
|
+
f"Supported formats are: {' '.join(NumPySupportedFormat.get_multi_array_formats())}"
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
# Read the object:
|
|
497
|
+
formatter = NumPySupportedFormat.get_format_handler(fmt=file_format)
|
|
498
|
+
obj = formatter.load(file_path=file_path)
|
|
499
|
+
|
|
500
|
+
return obj
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
class NumPyNDArrayDictPackager(_NumPyNDArrayCollectionPackager):
|
|
504
|
+
"""
|
|
505
|
+
``dict[str, numpy.ndarray]`` packager.
|
|
506
|
+
"""
|
|
507
|
+
|
|
508
|
+
PACKABLE_OBJECT_TYPE = Dict[str, np.ndarray]
|
|
509
|
+
|
|
510
|
+
@classmethod
|
|
511
|
+
def is_packable(cls, obj: Any, artifact_type: str = None) -> bool:
|
|
512
|
+
"""
|
|
513
|
+
Check if the object provided is a dictionary of numpy arrays.
|
|
514
|
+
|
|
515
|
+
:param obj: The object to pack.
|
|
516
|
+
:param artifact_type: The artifact type to log the object as.
|
|
517
|
+
|
|
518
|
+
:return: True if packable and False otherwise.
|
|
519
|
+
"""
|
|
520
|
+
if not (
|
|
521
|
+
isinstance(obj, dict)
|
|
522
|
+
and all(
|
|
523
|
+
isinstance(key, str) and isinstance(value, np.ndarray)
|
|
524
|
+
for key, value in obj.items()
|
|
525
|
+
)
|
|
526
|
+
):
|
|
527
|
+
return False
|
|
528
|
+
if artifact_type and artifact_type not in cls.get_supported_artifact_types():
|
|
529
|
+
return False
|
|
530
|
+
return True
|
|
531
|
+
|
|
532
|
+
@classmethod
|
|
533
|
+
def pack_result(cls, obj: Dict[str, np.ndarray], key: str) -> dict:
|
|
534
|
+
"""
|
|
535
|
+
Pack an array dictionary as a result.
|
|
536
|
+
|
|
537
|
+
:param obj: The array to pack and log.
|
|
538
|
+
:param key: The result's key.
|
|
539
|
+
|
|
540
|
+
:return: The result dictionary.
|
|
541
|
+
"""
|
|
542
|
+
return {
|
|
543
|
+
key: {
|
|
544
|
+
array_key: array_value.tolist()
|
|
545
|
+
for array_key, array_value in obj.items()
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
@classmethod
|
|
550
|
+
def unpack_file(
|
|
551
|
+
cls, data_item: DataItem, file_format: str = None
|
|
552
|
+
) -> Dict[str, np.ndarray]:
|
|
553
|
+
# Load the object:
|
|
554
|
+
obj = super().unpack_file(data_item=data_item, file_format=file_format)
|
|
555
|
+
|
|
556
|
+
# The returned object is a mapping of type NpzFile, so we cast it to a dictionary:
|
|
557
|
+
return {key: array for key, array in obj.items()}
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
class NumPyNDArrayListPackager(_NumPyNDArrayCollectionPackager):
|
|
561
|
+
"""
|
|
562
|
+
``list[numpy.ndarray]`` packager.
|
|
563
|
+
"""
|
|
564
|
+
|
|
565
|
+
PACKABLE_OBJECT_TYPE = List[np.ndarray]
|
|
566
|
+
|
|
567
|
+
@classmethod
|
|
568
|
+
def is_packable(cls, obj: Any, artifact_type: str = None) -> bool:
|
|
569
|
+
"""
|
|
570
|
+
Check if the object provided is a list of numpy arrays.
|
|
571
|
+
|
|
572
|
+
:param obj: The object to pack.
|
|
573
|
+
:param artifact_type: The artifact type to log the object as.
|
|
574
|
+
|
|
575
|
+
:return: True if packable and False otherwise.
|
|
576
|
+
"""
|
|
577
|
+
if not (
|
|
578
|
+
isinstance(obj, list)
|
|
579
|
+
and all(isinstance(value, np.ndarray) for value in obj)
|
|
580
|
+
):
|
|
581
|
+
return False
|
|
582
|
+
if artifact_type and artifact_type not in cls.get_supported_artifact_types():
|
|
583
|
+
return False
|
|
584
|
+
return True
|
|
585
|
+
|
|
586
|
+
@classmethod
|
|
587
|
+
def pack_result(cls, obj: List[np.ndarray], key: str) -> dict:
|
|
588
|
+
return {key: [array.tolist() for array in obj]}
|
|
589
|
+
|
|
590
|
+
@classmethod
|
|
591
|
+
def unpack_file(
|
|
592
|
+
cls, data_item: DataItem, file_format: str = None
|
|
593
|
+
) -> List[np.ndarray]:
|
|
594
|
+
# Load the object:
|
|
595
|
+
obj = super().unpack_file(data_item=data_item, file_format=file_format)
|
|
596
|
+
|
|
597
|
+
# The returned object is a mapping of type NpzFile, so we cast it to a list:
|
|
598
|
+
return list(obj.values())
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
class NumPyNumberPackager(DefaultPackager):
|
|
602
|
+
"""
|
|
603
|
+
``numpy.number`` packager. It is also used for all `number` inheriting numpy objects (`float32`, uint8, etc.).
|
|
604
|
+
"""
|
|
605
|
+
|
|
606
|
+
PACKABLE_OBJECT_TYPE = np.number
|
|
607
|
+
DEFAULT_PACKING_ARTIFACT_TYPE = ArtifactType.RESULT
|
|
608
|
+
PACK_SUBCLASSES = True # To include all dtypes ('float32', 'uint8', ...)
|
|
609
|
+
|
|
610
|
+
@classmethod
|
|
611
|
+
def pack_result(cls, obj: np.number, key: str) -> dict:
|
|
612
|
+
return super().pack_result(obj=obj.item(), key=key)
|