PyPI - mlrun - Versions diffs - 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

mlrun 1.3.3py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (444) hide show

mlrun/__init__.py +3 -3
mlrun/__main__.py +79 -37
mlrun/api/__init__.py +1 -1
mlrun/api/api/__init__.py +1 -1
mlrun/api/api/api.py +4 -4
mlrun/api/api/deps.py +10 -21
mlrun/api/api/endpoints/__init__.py +1 -1
mlrun/api/api/endpoints/artifacts.py +64 -36
mlrun/api/api/endpoints/auth.py +4 -4
mlrun/api/api/endpoints/background_tasks.py +11 -11
mlrun/api/api/endpoints/client_spec.py +5 -5
mlrun/api/api/endpoints/clusterization_spec.py +6 -4
mlrun/api/api/endpoints/feature_store.py +124 -115
mlrun/api/api/endpoints/files.py +22 -14
mlrun/api/api/endpoints/frontend_spec.py +28 -21
mlrun/api/api/endpoints/functions.py +142 -87
mlrun/api/api/endpoints/grafana_proxy.py +89 -442
mlrun/api/api/endpoints/healthz.py +20 -7
mlrun/api/api/endpoints/hub.py +320 -0
mlrun/api/api/endpoints/internal/__init__.py +1 -1
mlrun/api/api/endpoints/internal/config.py +1 -1
mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
mlrun/api/api/endpoints/logs.py +11 -11
mlrun/api/api/endpoints/model_endpoints.py +74 -70
mlrun/api/api/endpoints/operations.py +13 -9
mlrun/api/api/endpoints/pipelines.py +93 -88
mlrun/api/api/endpoints/projects.py +35 -35
mlrun/api/api/endpoints/runs.py +69 -27
mlrun/api/api/endpoints/runtime_resources.py +28 -28
mlrun/api/api/endpoints/schedules.py +98 -41
mlrun/api/api/endpoints/secrets.py +37 -32
mlrun/api/api/endpoints/submit.py +12 -12
mlrun/api/api/endpoints/tags.py +20 -22
mlrun/api/api/utils.py +251 -42
mlrun/api/constants.py +1 -1
mlrun/api/crud/__init__.py +18 -15
mlrun/api/crud/artifacts.py +10 -10
mlrun/api/crud/client_spec.py +4 -4
mlrun/api/crud/clusterization_spec.py +3 -3
mlrun/api/crud/feature_store.py +54 -46
mlrun/api/crud/functions.py +3 -3
mlrun/api/crud/hub.py +312 -0
mlrun/api/crud/logs.py +11 -9
mlrun/api/crud/model_monitoring/__init__.py +3 -3
mlrun/api/crud/model_monitoring/grafana.py +435 -0
mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
mlrun/api/crud/notifications.py +149 -0
mlrun/api/crud/pipelines.py +67 -52
mlrun/api/crud/projects.py +51 -23
mlrun/api/crud/runs.py +7 -5
mlrun/api/crud/runtime_resources.py +13 -13
mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
mlrun/api/crud/runtimes/nuclio/function.py +505 -0
mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
mlrun/api/crud/secrets.py +88 -46
mlrun/api/crud/tags.py +5 -5
mlrun/api/db/__init__.py +1 -1
mlrun/api/db/base.py +102 -54
mlrun/api/db/init_db.py +2 -3
mlrun/api/db/session.py +4 -12
mlrun/api/db/sqldb/__init__.py +1 -1
mlrun/api/db/sqldb/db.py +439 -196
mlrun/api/db/sqldb/helpers.py +1 -1
mlrun/api/db/sqldb/models/__init__.py +3 -3
mlrun/api/db/sqldb/models/models_mysql.py +82 -64
mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
mlrun/api/db/sqldb/session.py +27 -20
mlrun/api/initial_data.py +82 -24
mlrun/api/launcher.py +196 -0
mlrun/api/main.py +91 -22
mlrun/api/middlewares.py +6 -5
mlrun/api/migrations_mysql/env.py +1 -1
mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
mlrun/api/migrations_sqlite/env.py +1 -1
mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
mlrun/api/schemas/__init__.py +216 -138
mlrun/api/utils/__init__.py +1 -1
mlrun/api/utils/asyncio.py +1 -1
mlrun/api/utils/auth/__init__.py +1 -1
mlrun/api/utils/auth/providers/__init__.py +1 -1
mlrun/api/utils/auth/providers/base.py +7 -7
mlrun/api/utils/auth/providers/nop.py +6 -7
mlrun/api/utils/auth/providers/opa.py +17 -17
mlrun/api/utils/auth/verifier.py +36 -34
mlrun/api/utils/background_tasks.py +24 -24
mlrun/{builder.py → api/utils/builder.py} +216 -123
mlrun/api/utils/clients/__init__.py +1 -1
mlrun/api/utils/clients/chief.py +19 -4
mlrun/api/utils/clients/iguazio.py +106 -60
mlrun/api/utils/clients/log_collector.py +1 -1
mlrun/api/utils/clients/nuclio.py +23 -23
mlrun/api/utils/clients/protocols/grpc.py +2 -2
mlrun/api/utils/db/__init__.py +1 -1
mlrun/api/utils/db/alembic.py +1 -1
mlrun/api/utils/db/backup.py +1 -1
mlrun/api/utils/db/mysql.py +24 -25
mlrun/api/utils/db/sql_collation.py +1 -1
mlrun/api/utils/db/sqlite_migration.py +2 -2
mlrun/api/utils/events/__init__.py +14 -0
mlrun/api/utils/events/base.py +57 -0
mlrun/api/utils/events/events_factory.py +41 -0
mlrun/api/utils/events/iguazio.py +217 -0
mlrun/api/utils/events/nop.py +55 -0
mlrun/api/utils/helpers.py +16 -13
mlrun/api/utils/memory_reports.py +1 -1
mlrun/api/utils/periodic.py +6 -3
mlrun/api/utils/projects/__init__.py +1 -1
mlrun/api/utils/projects/follower.py +33 -33
mlrun/api/utils/projects/leader.py +36 -34
mlrun/api/utils/projects/member.py +27 -27
mlrun/api/utils/projects/remotes/__init__.py +1 -1
mlrun/api/utils/projects/remotes/follower.py +13 -13
mlrun/api/utils/projects/remotes/leader.py +10 -10
mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
mlrun/api/utils/scheduler.py +140 -51
mlrun/api/utils/singletons/__init__.py +1 -1
mlrun/api/utils/singletons/db.py +9 -15
mlrun/api/utils/singletons/k8s.py +677 -5
mlrun/api/utils/singletons/logs_dir.py +1 -1
mlrun/api/utils/singletons/project_member.py +1 -1
mlrun/api/utils/singletons/scheduler.py +1 -1
mlrun/artifacts/__init__.py +2 -2
mlrun/artifacts/base.py +8 -2
mlrun/artifacts/dataset.py +5 -3
mlrun/artifacts/manager.py +7 -1
mlrun/artifacts/model.py +15 -4
mlrun/artifacts/plots.py +1 -1
mlrun/common/__init__.py +1 -1
mlrun/common/constants.py +15 -0
mlrun/common/model_monitoring.py +209 -0
mlrun/common/schemas/__init__.py +167 -0
mlrun/{api → common}/schemas/artifact.py +13 -14
mlrun/{api → common}/schemas/auth.py +10 -8
mlrun/{api → common}/schemas/background_task.py +3 -3
mlrun/{api → common}/schemas/client_spec.py +1 -1
mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
mlrun/{api → common}/schemas/constants.py +21 -8
mlrun/common/schemas/events.py +36 -0
mlrun/{api → common}/schemas/feature_store.py +2 -1
mlrun/{api → common}/schemas/frontend_spec.py +7 -6
mlrun/{api → common}/schemas/function.py +5 -5
mlrun/{api → common}/schemas/http.py +3 -3
mlrun/common/schemas/hub.py +134 -0
mlrun/{api → common}/schemas/k8s.py +3 -3
mlrun/{api → common}/schemas/memory_reports.py +1 -1
mlrun/common/schemas/model_endpoints.py +342 -0
mlrun/common/schemas/notification.py +57 -0
mlrun/{api → common}/schemas/object.py +6 -6
mlrun/{api → common}/schemas/pipeline.py +3 -3
mlrun/{api → common}/schemas/project.py +6 -5
mlrun/common/schemas/regex.py +24 -0
mlrun/common/schemas/runs.py +30 -0
mlrun/{api → common}/schemas/runtime_resource.py +3 -3
mlrun/{api → common}/schemas/schedule.py +19 -7
mlrun/{api → common}/schemas/secret.py +3 -3
mlrun/{api → common}/schemas/tag.py +2 -2
mlrun/common/types.py +25 -0
mlrun/config.py +152 -20
mlrun/data_types/__init__.py +7 -2
mlrun/data_types/data_types.py +4 -2
mlrun/data_types/infer.py +1 -1
mlrun/data_types/spark.py +10 -3
mlrun/datastore/__init__.py +10 -3
mlrun/datastore/azure_blob.py +1 -1
mlrun/datastore/base.py +185 -53
mlrun/datastore/datastore.py +1 -1
mlrun/datastore/filestore.py +1 -1
mlrun/datastore/google_cloud_storage.py +1 -1
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +1 -1
mlrun/datastore/s3.py +1 -1
mlrun/datastore/sources.py +192 -70
mlrun/datastore/spark_udf.py +44 -0
mlrun/datastore/store_resources.py +4 -4
mlrun/datastore/targets.py +115 -45
mlrun/datastore/utils.py +127 -5
mlrun/datastore/v3io.py +1 -1
mlrun/datastore/wasbfs/__init__.py +1 -1
mlrun/datastore/wasbfs/fs.py +1 -1
mlrun/db/__init__.py +7 -5
mlrun/db/base.py +112 -68
mlrun/db/httpdb.py +445 -277
mlrun/db/nopdb.py +491 -0
mlrun/db/sqldb.py +112 -65
mlrun/errors.py +6 -1
mlrun/execution.py +44 -22
mlrun/feature_store/__init__.py +1 -1
mlrun/feature_store/api.py +143 -95
mlrun/feature_store/common.py +16 -20
mlrun/feature_store/feature_set.py +42 -12
mlrun/feature_store/feature_vector.py +32 -21
mlrun/feature_store/ingestion.py +9 -12
mlrun/feature_store/retrieval/__init__.py +3 -2
mlrun/feature_store/retrieval/base.py +388 -66
mlrun/feature_store/retrieval/dask_merger.py +63 -151
mlrun/feature_store/retrieval/job.py +30 -12
mlrun/feature_store/retrieval/local_merger.py +40 -133
mlrun/feature_store/retrieval/spark_merger.py +129 -127
mlrun/feature_store/retrieval/storey_merger.py +173 -0
mlrun/feature_store/steps.py +132 -15
mlrun/features.py +8 -3
mlrun/frameworks/__init__.py +1 -1
mlrun/frameworks/_common/__init__.py +1 -1
mlrun/frameworks/_common/artifacts_library.py +1 -1
mlrun/frameworks/_common/mlrun_interface.py +1 -1
mlrun/frameworks/_common/model_handler.py +1 -1
mlrun/frameworks/_common/plan.py +1 -1
mlrun/frameworks/_common/producer.py +1 -1
mlrun/frameworks/_common/utils.py +1 -1
mlrun/frameworks/_dl_common/__init__.py +1 -1
mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
mlrun/frameworks/_dl_common/model_handler.py +1 -1
mlrun/frameworks/_dl_common/utils.py +1 -1
mlrun/frameworks/_ml_common/__init__.py +1 -1
mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
mlrun/frameworks/_ml_common/model_handler.py +1 -1
mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
mlrun/frameworks/_ml_common/plan.py +1 -1
mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
mlrun/frameworks/_ml_common/producer.py +1 -1
mlrun/frameworks/_ml_common/utils.py +1 -1
mlrun/frameworks/auto_mlrun/__init__.py +1 -1
mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
mlrun/frameworks/huggingface/__init__.py +1 -1
mlrun/frameworks/huggingface/model_server.py +1 -1
mlrun/frameworks/lgbm/__init__.py +1 -1
mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
mlrun/frameworks/lgbm/model_handler.py +1 -1
mlrun/frameworks/lgbm/model_server.py +1 -1
mlrun/frameworks/lgbm/utils.py +1 -1
mlrun/frameworks/onnx/__init__.py +1 -1
mlrun/frameworks/onnx/dataset.py +1 -1
mlrun/frameworks/onnx/mlrun_interface.py +1 -1
mlrun/frameworks/onnx/model_handler.py +1 -1
mlrun/frameworks/onnx/model_server.py +1 -1
mlrun/frameworks/parallel_coordinates.py +1 -1
mlrun/frameworks/pytorch/__init__.py +1 -1
mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
mlrun/frameworks/pytorch/model_handler.py +1 -1
mlrun/frameworks/pytorch/model_server.py +1 -1
mlrun/frameworks/pytorch/utils.py +1 -1
mlrun/frameworks/sklearn/__init__.py +1 -1
mlrun/frameworks/sklearn/estimator.py +1 -1
mlrun/frameworks/sklearn/metric.py +1 -1
mlrun/frameworks/sklearn/metrics_library.py +1 -1
mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
mlrun/frameworks/sklearn/model_handler.py +1 -1
mlrun/frameworks/sklearn/utils.py +1 -1
mlrun/frameworks/tf_keras/__init__.py +1 -1
mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
mlrun/frameworks/tf_keras/model_handler.py +1 -1
mlrun/frameworks/tf_keras/model_server.py +1 -1
mlrun/frameworks/tf_keras/utils.py +1 -1
mlrun/frameworks/xgboost/__init__.py +1 -1
mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
mlrun/frameworks/xgboost/model_handler.py +1 -1
mlrun/frameworks/xgboost/utils.py +1 -1
mlrun/k8s_utils.py +14 -765
mlrun/kfpops.py +14 -17
mlrun/launcher/__init__.py +13 -0
mlrun/launcher/base.py +406 -0
mlrun/launcher/client.py +159 -0
mlrun/launcher/factory.py +50 -0
mlrun/launcher/local.py +276 -0
mlrun/launcher/remote.py +178 -0
mlrun/lists.py +10 -2
mlrun/mlutils/__init__.py +1 -1
mlrun/mlutils/data.py +1 -1
mlrun/mlutils/models.py +1 -1
mlrun/mlutils/plots.py +1 -1
mlrun/model.py +252 -14
mlrun/model_monitoring/__init__.py +41 -0
mlrun/model_monitoring/features_drift_table.py +1 -1
mlrun/model_monitoring/helpers.py +123 -38
mlrun/model_monitoring/model_endpoint.py +144 -0
mlrun/model_monitoring/model_monitoring_batch.py +310 -259
mlrun/model_monitoring/stores/__init__.py +106 -0
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
mlrun/model_monitoring/stores/models/__init__.py +23 -0
mlrun/model_monitoring/stores/models/base.py +18 -0
mlrun/model_monitoring/stores/models/mysql.py +100 -0
mlrun/model_monitoring/stores/models/sqlite.py +98 -0
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
mlrun/model_monitoring/stream_processing_fs.py +239 -271
mlrun/package/__init__.py +163 -0
mlrun/package/context_handler.py +325 -0
mlrun/package/errors.py +47 -0
mlrun/package/packager.py +298 -0
mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
mlrun/package/packagers/default_packager.py +422 -0
mlrun/package/packagers/numpy_packagers.py +612 -0
mlrun/package/packagers/pandas_packagers.py +968 -0
mlrun/package/packagers/python_standard_library_packagers.py +616 -0
mlrun/package/packagers_manager.py +786 -0
mlrun/package/utils/__init__.py +53 -0
mlrun/package/utils/_archiver.py +226 -0
mlrun/package/utils/_formatter.py +211 -0
mlrun/package/utils/_pickler.py +234 -0
mlrun/package/utils/_supported_format.py +71 -0
mlrun/package/utils/log_hint_utils.py +93 -0
mlrun/package/utils/type_hint_utils.py +298 -0
mlrun/platforms/__init__.py +1 -1
mlrun/platforms/iguazio.py +34 -2
mlrun/platforms/other.py +1 -1
mlrun/projects/__init__.py +1 -1
mlrun/projects/operations.py +14 -9
mlrun/projects/pipelines.py +31 -13
mlrun/projects/project.py +762 -238
mlrun/render.py +49 -19
mlrun/run.py +57 -326
mlrun/runtimes/__init__.py +3 -9
mlrun/runtimes/base.py +247 -784
mlrun/runtimes/constants.py +1 -1
mlrun/runtimes/daskjob.py +45 -41
mlrun/runtimes/funcdoc.py +43 -7
mlrun/runtimes/function.py +66 -656
mlrun/runtimes/function_reference.py +1 -1
mlrun/runtimes/generators.py +1 -1
mlrun/runtimes/kubejob.py +99 -116
mlrun/runtimes/local.py +59 -66
mlrun/runtimes/mpijob/__init__.py +1 -1
mlrun/runtimes/mpijob/abstract.py +13 -15
mlrun/runtimes/mpijob/v1.py +3 -1
mlrun/runtimes/mpijob/v1alpha1.py +1 -1
mlrun/runtimes/nuclio.py +1 -1
mlrun/runtimes/pod.py +51 -26
mlrun/runtimes/remotesparkjob.py +3 -1
mlrun/runtimes/serving.py +12 -4
mlrun/runtimes/sparkjob/__init__.py +1 -2
mlrun/runtimes/sparkjob/abstract.py +44 -31
mlrun/runtimes/sparkjob/spark3job.py +11 -9
mlrun/runtimes/utils.py +61 -42
mlrun/secrets.py +16 -18
mlrun/serving/__init__.py +3 -2
mlrun/serving/merger.py +1 -1
mlrun/serving/remote.py +1 -1
mlrun/serving/routers.py +39 -42
mlrun/serving/server.py +23 -13
mlrun/serving/serving_wrapper.py +1 -1
mlrun/serving/states.py +172 -39
mlrun/serving/utils.py +1 -1
mlrun/serving/v1_serving.py +1 -1
mlrun/serving/v2_serving.py +29 -21
mlrun/utils/__init__.py +1 -2
mlrun/utils/async_http.py +8 -1
mlrun/utils/azure_vault.py +1 -1
mlrun/utils/clones.py +2 -2
mlrun/utils/condition_evaluator.py +65 -0
mlrun/utils/db.py +52 -0
mlrun/utils/helpers.py +188 -13
mlrun/utils/http.py +89 -54
mlrun/utils/logger.py +48 -8
mlrun/utils/model_monitoring.py +132 -100
mlrun/utils/notifications/__init__.py +1 -1
mlrun/utils/notifications/notification/__init__.py +8 -6
mlrun/utils/notifications/notification/base.py +20 -14
mlrun/utils/notifications/notification/console.py +7 -4
mlrun/utils/notifications/notification/git.py +36 -19
mlrun/utils/notifications/notification/ipython.py +10 -8
mlrun/utils/notifications/notification/slack.py +18 -13
mlrun/utils/notifications/notification_pusher.py +377 -56
mlrun/utils/regex.py +6 -1
mlrun/utils/singleton.py +1 -1
mlrun/utils/v3io_clients.py +1 -1
mlrun/utils/vault.py +270 -269
mlrun/utils/version/__init__.py +1 -1
mlrun/utils/version/version.json +2 -2
mlrun/utils/version/version.py +1 -1
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
mlrun-1.4.0.dist-info/RECORD +434 -0
mlrun/api/api/endpoints/marketplace.py +0 -257
mlrun/api/crud/marketplace.py +0 -221
mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
mlrun/api/db/filedb/db.py +0 -518
mlrun/api/schemas/marketplace.py +0 -128
mlrun/api/schemas/model_endpoints.py +0 -185
mlrun/db/filedb.py +0 -891
mlrun/feature_store/retrieval/online.py +0 -92
mlrun/model_monitoring/constants.py +0 -67
mlrun/runtimes/package/context_handler.py +0 -711
mlrun/runtimes/sparkjob/spark2job.py +0 -59
mlrun-1.3.3.dist-info/RECORD +0 -381
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0

mlrun/feature_store/retrieval/dask_merger.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2018 Iguazio
+# Copyright 2023 Iguazio
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,151 +20,18 @@ from dask.distributed import Client
 import mlrun
-from ..feature_vector import OfflineVectorResponse
 from .base import BaseMerger
 class DaskFeatureMerger(BaseMerger):
     engine = "dask"
+    support_offline = True
     def __init__(self, vector, **engine_args):
         super().__init__(vector, **engine_args)
         self.client = engine_args.get("dask_client")
         self._dask_cluster_uri = engine_args.get("dask_cluster_uri")
-    def _generate_vector(
-        self,
-        entity_rows,
-        entity_timestamp_column,
-        feature_set_objects,
-        feature_set_fields,
-        start_time=None,
-        end_time=None,
-        query=None,
-    ):
-        if "index" not in self._index_columns:
-            self._append_drop_column("index")
-        # init the dask client if needed
-        if not self.client:
-            if self._dask_cluster_uri:
-                function = mlrun.import_function(self._dask_cluster_uri)
-                self.client = function.client
-            else:
-                self.client = Client()
-        # load dataframes
-        feature_sets = []
-        dfs = []
-        keys = (
-            []
-        )  # the struct of key is [[[],[]], ..] So that each record indicates which way the corresponding
-        # featureset is connected to the previous one, and within each record the left keys are indicated in index 0
-        # and the right keys in index 1, this keys will be the keys that will be used in this join
-        all_columns = []
-        fs_link_list = self._create_linked_relation_list(
-            feature_set_objects, feature_set_fields
-        )
-        for node in fs_link_list:
-            name = node.name
-            feature_set = feature_set_objects[name]
-            feature_sets.append(feature_set)
-            columns = feature_set_fields[name]
-            column_names = [name for name, alias in columns]
-            for col in node.data["save_cols"]:
-                if col not in column_names:
-                    self._append_drop_column(col)
-            column_names += node.data["save_cols"]
-            df = feature_set.to_dataframe(
-                columns=column_names,
-                df_module=dd,
-                start_time=start_time,
-                end_time=end_time,
-                time_column=entity_timestamp_column,
-                index=False,
-            )
-            df = df.reset_index()
-            column_names += node.data["save_index"]
-            node.data["save_cols"] += node.data["save_index"]
-            entity_timestamp_column_list = (
-                [entity_timestamp_column]
-                if entity_timestamp_column
-                else feature_set.spec.timestamp_key
-            )
-            if entity_timestamp_column_list:
-                column_names += entity_timestamp_column_list
-                node.data["save_cols"] += entity_timestamp_column_list
-            df = df.persist()
-            # rename columns to be unique for each feature set
-            rename_col_dict = {
-                col: f"{col}_{name}"
-                for col in column_names
-                if col not in node.data["save_cols"]
-            }
-            df = df.rename(
-                columns=rename_col_dict,
-            )
-            dfs.append(df)
-            del df
-            keys.append([node.data["left_keys"], node.data["right_keys"]])
-            # update alias according to the unique column name
-            new_columns = []
-            for col, alias in columns:
-                if col in rename_col_dict and alias:
-                    new_columns.append((rename_col_dict[col], alias))
-                elif col in rename_col_dict and not alias:
-                    new_columns.append((rename_col_dict[col], col))
-                else:
-                    new_columns.append((col, alias))
-            all_columns.append(new_columns)
-            self._update_alias(
-                dictionary={name: alias for name, alias in new_columns if alias}
-            )
-        self.merge(
-            entity_df=entity_rows,
-            entity_timestamp_column=entity_timestamp_column,
-            featuresets=feature_sets,
-            featureset_dfs=dfs,
-            keys=keys,
-            all_columns=all_columns,
-        )
-        self._result_df = self._result_df.drop(
-            columns=self._drop_columns, errors="ignore"
-        )
-        # renaming all columns according to self._alias
-        self._result_df = self._result_df.rename(
-            columns=self._alias,
-        )
-        if self.vector.status.label_column:
-            self._result_df = self._result_df.dropna(
-                subset=[self.vector.status.label_column]
-            )
-        # filter joined data frame by the query param
-        if query:
-            self._result_df = self._result_df.query(query)
-        if self._drop_indexes:
-            self._result_df = self._reset_index(self._result_df)
-        else:
-            self._result_df = self._set_indexes(self._result_df)
-        self._write_to_target()
-        return OfflineVectorResponse(self)
     def _reset_index(self, df):
         to_drop = df.index.name is None
         df = df.reset_index(drop=to_drop)
@@ -178,27 +45,22 @@ class DaskFeatureMerger(BaseMerger):
         featureset_df,
         left_keys: list,
         right_keys: list,
-        columns: list,
     ):
+        def sort_partition(partition, timestamp):
+            return partition.sort_values(timestamp)
-        entity_df = self._reset_index(entity_df)
-        entity_df = (
-            entity_df
-            if entity_timestamp_column not in entity_df
-            else entity_df.set_index(entity_timestamp_column, drop=True)
+        entity_df = entity_df.map_partitions(
+            sort_partition, timestamp=entity_timestamp_column
         )
-        featureset_df = self._reset_index(featureset_df)
-        featureset_df = (
-            featureset_df
-            if entity_timestamp_column not in featureset_df
-            else featureset_df.set_index(entity_timestamp_column, drop=True)
+        featureset_df = featureset_df.map_partitions(
+            sort_partition, timestamp=featureset.spec.timestamp_key
         )
         merged_df = merge_asof(
             entity_df,
             featureset_df,
-            left_index=True,
-            right_index=True,
+            left_on=entity_timestamp_column,
+            right_on=featureset.spec.timestamp_key,
             left_by=left_keys or None,
             right_by=right_keys or None,
             suffixes=("", f"_{featureset.metadata.name}_"),
@@ -217,7 +79,6 @@ class DaskFeatureMerger(BaseMerger):
         featureset_df,
         left_keys: list,
         right_keys: list,
-        columns: list,
     ):
         fs_name = featureset.metadata.name
@@ -241,5 +102,56 @@ class DaskFeatureMerger(BaseMerger):
     def get_df(self, to_pandas=True):
         if to_pandas and hasattr(self._result_df, "dask"):
-            return self._result_df.compute()
-        return self._result_df
+            df = self._result_df.compute()
+        else:
+            df = self._result_df
+        self._set_indexes(df)
+        return df
+    def _create_engine_env(self):
+        if "index" not in self._index_columns:
+            self._append_drop_column("index")
+        # init the dask client if needed
+        if not self.client:
+            if self._dask_cluster_uri:
+                function = mlrun.import_function(self._dask_cluster_uri)
+                self.client = function.client
+            else:
+                self.client = Client()
+    def _get_engine_df(
+        self,
+        feature_set,
+        feature_set_name,
+        column_names=None,
+        start_time=None,
+        end_time=None,
+        time_column=None,
+    ):
+        df = feature_set.to_dataframe(
+            columns=column_names,
+            df_module=dd,
+            start_time=start_time,
+            end_time=end_time,
+            time_column=time_column,
+            index=False,
+        )
+        return self._reset_index(df).persist()
+    def _rename_columns_and_select(self, df, rename_col_dict, columns=None):
+        return df.rename(
+            columns=rename_col_dict,
+        )
+    def _drop_columns_from_result(self):
+        self._result_df = self._result_df.drop(
+            columns=self._drop_columns, errors="ignore"
+        )
+    def _filter(self, query):
+        self._result_df = self._result_df.query(query)
+    def _order_by(self, order_by_active):
+        self._result_df.sort_values(by=order_by_active)

mlrun/feature_store/retrieval/job.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2018 Iguazio
+# Copyright 2023 Iguazio
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -33,12 +33,15 @@ def run_merge_job(
     engine_args: dict,
     spark_service: str = None,
     entity_rows=None,
-    timestamp_column=None,
+    entity_timestamp_column=None,
     run_config=None,
     drop_columns=None,
     with_indexes=None,
     query=None,
-    join_type="inner",
+    order_by=None,
+    start_time=None,
+    end_time=None,
+    timestamp_for_filtering=None,
 ):
     name = vector.metadata.name
     if not target or not hasattr(target, "to_dict"):
@@ -92,20 +95,27 @@ def run_merge_job(
         set_default_resources(
             function.spec.executor_resources, function.with_executor_requests
         )
+    if start_time and not isinstance(start_time, str):
+        start_time = start_time.isoformat()
+    if end_time and not isinstance(end_time, str):
+        end_time = end_time.isoformat()
     task = new_task(
         name=name,
         params={
             "vector_uri": vector.uri,
             "target": target.to_dict(),
-            "timestamp_column": timestamp_column,
+            "entity_timestamp_column": entity_timestamp_column,
             "drop_columns": drop_columns,
             "with_indexes": with_indexes,
             "query": query,
-            "join_type": join_type,
+            "order_by": order_by,
+            "start_time": start_time,
+            "end_time": end_time,
+            "timestamp_for_filtering": timestamp_for_filtering,
             "engine_args": engine_args,
         },
-        inputs={"entity_rows": entity_rows},
+        inputs={"entity_rows": entity_rows} if entity_rows is not None else {},
     )
     task.spec.secret_sources = run_config.secret_sources
     task.set_label("job-type", "feature-merge").set_label("feature-vector", vector.uri)
@@ -120,15 +130,16 @@ def run_merge_job(
         watch=run_config.watch,
     )
     logger.info(f"feature vector merge job started, run id = {run.uid()}")
-    return RemoteVectorResponse(vector, run)
+    return RemoteVectorResponse(vector, run, with_indexes)
 class RemoteVectorResponse:
     """get_offline_features response object"""
-    def __init__(self, vector, run):
+    def __init__(self, vector, run, with_indexes=False):
         self.run = run
         self.vector = vector
+        self.with_indexes = with_indexes or self.vector.spec.with_indexes
     @property
     def status(self):
@@ -147,12 +158,18 @@ class RemoteVectorResponse:
         :param df_module: optional, py module used to create the DataFrame (e.g. pd, dd, cudf, ..)
         :param kwargs:    extended DataItem.as_df() args
         """
         file_format = kwargs.get("format")
         if not file_format:
             file_format = self.run.status.results["target"]["kind"]
-        return mlrun.get_dataitem(self.target_uri).as_df(
+        df = mlrun.get_dataitem(self.target_uri).as_df(
             columns=columns, df_module=df_module, format=file_format, **kwargs
         )
+        if self.with_indexes:
+            df.set_index(
+                list(self.vector.spec.entity_fields.keys()), inplace=True, drop=True
+            )
+        return df
     @property
     def target_uri(self):
@@ -166,17 +183,18 @@ import mlrun
 import mlrun.feature_store.retrieval
 from mlrun.datastore.targets import get_target_driver
 def merge_handler(context, vector_uri, target, entity_rows=None,
-                  timestamp_column=None, drop_columns=None, with_indexes=None, query=None, join_type='inner', engine_args=None):
+                  entity_timestamp_column=None, drop_columns=None, with_indexes=None, query=None,
+                  engine_args=None, order_by=None, start_time=None, end_time=None, timestamp_for_filtering=None):
     vector = context.get_store_resource(vector_uri)
     store_target = get_target_driver(target, vector)
-    entity_timestamp_column = timestamp_column or vector.spec.timestamp_field
     if entity_rows:
         entity_rows = entity_rows.as_df()
     context.logger.info(f"starting vector merge task to {vector.uri}")
     merger = mlrun.feature_store.retrieval.{{{engine}}}(vector, **(engine_args or {}))
     merger.start(entity_rows, entity_timestamp_column, store_target, drop_columns, with_indexes=with_indexes,
-                 query=query, join_type=join_type)
+                 query=query, order_by=order_by, start_time=start_time, end_time=end_time,
+                 timestamp_for_filtering=timestamp_for_filtering)
     target = vector.status.targets[store_target.name].to_dict()
     context.log_result('feature_vector', vector.uri)

mlrun/feature_store/retrieval/local_merger.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2018 Iguazio
+# Copyright 2023 Iguazio
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,143 +16,16 @@ import re
 import pandas as pd
-from ..feature_vector import OfflineVectorResponse
 from .base import BaseMerger
 class LocalFeatureMerger(BaseMerger):
     engine = "local"
+    support_offline = True
     def __init__(self, vector, **engine_args):
         super().__init__(vector, **engine_args)
-    def _generate_vector(
-        self,
-        entity_rows,
-        entity_timestamp_column,
-        feature_set_objects,
-        feature_set_fields,
-        start_time=None,
-        end_time=None,
-        query=None,
-    ):
-        feature_sets = []
-        dfs = []
-        keys = (
-            []
-        )  # the struct of key is [[[],[]], ..] So that each record indicates which way the corresponding
-        # featureset is connected to the previous one, and within each record the left keys are indicated in index 0
-        # and the right keys in index 1, this keys will be the keys that will be used in this join
-        all_columns = []
-        fs_link_list = self._create_linked_relation_list(
-            feature_set_objects, feature_set_fields
-        )
-        for node in fs_link_list:
-            name = node.name
-            feature_set = feature_set_objects[name]
-            feature_sets.append(feature_set)
-            columns = feature_set_fields[name]
-            column_names = [name for name, alias in columns]
-            for col in node.data["save_cols"]:
-                if col not in column_names:
-                    self._append_drop_column(col)
-            column_names += node.data["save_cols"]
-            # handling case where there are multiple feature sets and user creates vector where entity_timestamp_
-            # column is from a specific feature set (can't be entity timestamp)
-            if (
-                entity_timestamp_column in column_names
-                or feature_set.spec.timestamp_key == entity_timestamp_column
-            ):
-                df = feature_set.to_dataframe(
-                    columns=column_names,
-                    start_time=start_time,
-                    end_time=end_time,
-                    time_column=entity_timestamp_column,
-                )
-            else:
-                df = feature_set.to_dataframe(
-                    columns=column_names,
-                    time_column=entity_timestamp_column,
-                )
-            if df.index.names[0]:
-                df.reset_index(inplace=True)
-            column_names += node.data["save_index"]
-            node.data["save_cols"] += node.data["save_index"]
-            entity_timestamp_column_list = (
-                [entity_timestamp_column]
-                if entity_timestamp_column
-                else feature_set.spec.timestamp_key
-            )
-            if entity_timestamp_column_list:
-                column_names += entity_timestamp_column_list
-                node.data["save_cols"] += entity_timestamp_column_list
-            # rename columns to be unique for each feature set
-            rename_col_dict = {
-                col: f"{col}_{name}"
-                for col in column_names
-                if col not in node.data["save_cols"]
-            }
-            df.rename(
-                columns=rename_col_dict,
-                inplace=True,
-            )
-            dfs.append(df)
-            keys.append([node.data["left_keys"], node.data["right_keys"]])
-            # update alias according to the unique column name
-            new_columns = []
-            for col, alias in columns:
-                if col in rename_col_dict and alias:
-                    new_columns.append((rename_col_dict[col], alias))
-                elif col in rename_col_dict and not alias:
-                    new_columns.append((rename_col_dict[col], col))
-                else:
-                    new_columns.append((col, alias))
-            all_columns.append(new_columns)
-            self._update_alias(
-                dictionary={name: alias for name, alias in new_columns if alias}
-            )
-        self.merge(
-            entity_df=entity_rows,
-            entity_timestamp_column=entity_timestamp_column,
-            featuresets=feature_sets,
-            featureset_dfs=dfs,
-            keys=keys,
-            all_columns=all_columns,
-        )
-        self._result_df.drop(columns=self._drop_columns, inplace=True, errors="ignore")
-        # renaming all columns according to self._alias
-        self._result_df.rename(
-            columns=self._alias,
-            inplace=True,
-        )
-        if self.vector.status.label_column:
-            self._result_df.dropna(
-                subset=[self.vector.status.label_column],
-                inplace=True,
-            )
-        # filter joined data frame by the query param
-        if query:
-            self._result_df.query(query, inplace=True)
-        if self._drop_indexes:
-            self._result_df.reset_index(drop=True, inplace=True)
-        else:
-            self._set_indexes(self._result_df)
-        self._write_to_target()
-        return OfflineVectorResponse(self)
     def _asof_join(
         self,
         entity_df,
@@ -161,7 +34,6 @@ class LocalFeatureMerger(BaseMerger):
         featureset_df,
         left_keys: list,
         right_keys: list,
-        columns: list,
     ):
         indexes = None
@@ -176,7 +48,7 @@ class LocalFeatureMerger(BaseMerger):
             featureset_df[featureset.spec.timestamp_key]
         )
         entity_df.sort_values(by=entity_timestamp_column, inplace=True)
-        featureset_df.sort_values(by=entity_timestamp_column, inplace=True)
+        featureset_df.sort_values(by=featureset.spec.timestamp_key, inplace=True)
         merged_df = pd.merge_asof(
             entity_df,
@@ -191,7 +63,6 @@ class LocalFeatureMerger(BaseMerger):
         for col in merged_df.columns:
             if re.findall(f"_{featureset.metadata.name}_$", col):
                 self._append_drop_column(col)
         # Undo indexing tricks for asof merge
         # to return the correct indexes and not
         # overload `index` columns
@@ -213,7 +84,6 @@ class LocalFeatureMerger(BaseMerger):
         featureset_df,
         left_keys: list,
         right_keys: list,
-        columns: list,
     ):
         fs_name = featureset.metadata.name
         merged_df = pd.merge(
@@ -228,3 +98,40 @@ class LocalFeatureMerger(BaseMerger):
             if re.findall(f"_{fs_name}_$", col):
                 self._append_drop_column(col)
         return merged_df
+    def _create_engine_env(self):
+        pass
+    def _get_engine_df(
+        self,
+        feature_set,
+        feature_set_name,
+        column_names=None,
+        start_time=None,
+        end_time=None,
+        time_column=None,
+    ):
+        df = feature_set.to_dataframe(
+            columns=column_names,
+            start_time=start_time,
+            end_time=end_time,
+            time_column=time_column,
+        )
+        if df.index.names[0]:
+            df.reset_index(inplace=True)
+        return df
+    def _rename_columns_and_select(self, df, rename_col_dict, columns=None):
+        df.rename(
+            columns=rename_col_dict,
+            inplace=True,
+        )
+    def _drop_columns_from_result(self):
+        self._result_df.drop(columns=self._drop_columns, inplace=True, errors="ignore")
+    def _filter(self, query):
+        self._result_df.query(query, inplace=True)
+    def _order_by(self, order_by_active):
+        self._result_df.sort_values(by=order_by_active, ignore_index=True, inplace=True)

mlrun 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl

Potentially problematic release.

mlrun 1.3.3py3-none-any.whl → 1.4.0py3-none-any.whl