PyPI - mlrun - Versions diffs - 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

mlrun 1.3.3py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (444) hide show

mlrun/__init__.py +3 -3
mlrun/__main__.py +79 -37
mlrun/api/__init__.py +1 -1
mlrun/api/api/__init__.py +1 -1
mlrun/api/api/api.py +4 -4
mlrun/api/api/deps.py +10 -21
mlrun/api/api/endpoints/__init__.py +1 -1
mlrun/api/api/endpoints/artifacts.py +64 -36
mlrun/api/api/endpoints/auth.py +4 -4
mlrun/api/api/endpoints/background_tasks.py +11 -11
mlrun/api/api/endpoints/client_spec.py +5 -5
mlrun/api/api/endpoints/clusterization_spec.py +6 -4
mlrun/api/api/endpoints/feature_store.py +124 -115
mlrun/api/api/endpoints/files.py +22 -14
mlrun/api/api/endpoints/frontend_spec.py +28 -21
mlrun/api/api/endpoints/functions.py +142 -87
mlrun/api/api/endpoints/grafana_proxy.py +89 -442
mlrun/api/api/endpoints/healthz.py +20 -7
mlrun/api/api/endpoints/hub.py +320 -0
mlrun/api/api/endpoints/internal/__init__.py +1 -1
mlrun/api/api/endpoints/internal/config.py +1 -1
mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
mlrun/api/api/endpoints/logs.py +11 -11
mlrun/api/api/endpoints/model_endpoints.py +74 -70
mlrun/api/api/endpoints/operations.py +13 -9
mlrun/api/api/endpoints/pipelines.py +93 -88
mlrun/api/api/endpoints/projects.py +35 -35
mlrun/api/api/endpoints/runs.py +69 -27
mlrun/api/api/endpoints/runtime_resources.py +28 -28
mlrun/api/api/endpoints/schedules.py +98 -41
mlrun/api/api/endpoints/secrets.py +37 -32
mlrun/api/api/endpoints/submit.py +12 -12
mlrun/api/api/endpoints/tags.py +20 -22
mlrun/api/api/utils.py +251 -42
mlrun/api/constants.py +1 -1
mlrun/api/crud/__init__.py +18 -15
mlrun/api/crud/artifacts.py +10 -10
mlrun/api/crud/client_spec.py +4 -4
mlrun/api/crud/clusterization_spec.py +3 -3
mlrun/api/crud/feature_store.py +54 -46
mlrun/api/crud/functions.py +3 -3
mlrun/api/crud/hub.py +312 -0
mlrun/api/crud/logs.py +11 -9
mlrun/api/crud/model_monitoring/__init__.py +3 -3
mlrun/api/crud/model_monitoring/grafana.py +435 -0
mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
mlrun/api/crud/notifications.py +149 -0
mlrun/api/crud/pipelines.py +67 -52
mlrun/api/crud/projects.py +51 -23
mlrun/api/crud/runs.py +7 -5
mlrun/api/crud/runtime_resources.py +13 -13
mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
mlrun/api/crud/runtimes/nuclio/function.py +505 -0
mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
mlrun/api/crud/secrets.py +88 -46
mlrun/api/crud/tags.py +5 -5
mlrun/api/db/__init__.py +1 -1
mlrun/api/db/base.py +102 -54
mlrun/api/db/init_db.py +2 -3
mlrun/api/db/session.py +4 -12
mlrun/api/db/sqldb/__init__.py +1 -1
mlrun/api/db/sqldb/db.py +439 -196
mlrun/api/db/sqldb/helpers.py +1 -1
mlrun/api/db/sqldb/models/__init__.py +3 -3
mlrun/api/db/sqldb/models/models_mysql.py +82 -64
mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
mlrun/api/db/sqldb/session.py +27 -20
mlrun/api/initial_data.py +82 -24
mlrun/api/launcher.py +196 -0
mlrun/api/main.py +91 -22
mlrun/api/middlewares.py +6 -5
mlrun/api/migrations_mysql/env.py +1 -1
mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
mlrun/api/migrations_sqlite/env.py +1 -1
mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
mlrun/api/schemas/__init__.py +216 -138
mlrun/api/utils/__init__.py +1 -1
mlrun/api/utils/asyncio.py +1 -1
mlrun/api/utils/auth/__init__.py +1 -1
mlrun/api/utils/auth/providers/__init__.py +1 -1
mlrun/api/utils/auth/providers/base.py +7 -7
mlrun/api/utils/auth/providers/nop.py +6 -7
mlrun/api/utils/auth/providers/opa.py +17 -17
mlrun/api/utils/auth/verifier.py +36 -34
mlrun/api/utils/background_tasks.py +24 -24
mlrun/{builder.py → api/utils/builder.py} +216 -123
mlrun/api/utils/clients/__init__.py +1 -1
mlrun/api/utils/clients/chief.py +19 -4
mlrun/api/utils/clients/iguazio.py +106 -60
mlrun/api/utils/clients/log_collector.py +1 -1
mlrun/api/utils/clients/nuclio.py +23 -23
mlrun/api/utils/clients/protocols/grpc.py +2 -2
mlrun/api/utils/db/__init__.py +1 -1
mlrun/api/utils/db/alembic.py +1 -1
mlrun/api/utils/db/backup.py +1 -1
mlrun/api/utils/db/mysql.py +24 -25
mlrun/api/utils/db/sql_collation.py +1 -1
mlrun/api/utils/db/sqlite_migration.py +2 -2
mlrun/api/utils/events/__init__.py +14 -0
mlrun/api/utils/events/base.py +57 -0
mlrun/api/utils/events/events_factory.py +41 -0
mlrun/api/utils/events/iguazio.py +217 -0
mlrun/api/utils/events/nop.py +55 -0
mlrun/api/utils/helpers.py +16 -13
mlrun/api/utils/memory_reports.py +1 -1
mlrun/api/utils/periodic.py +6 -3
mlrun/api/utils/projects/__init__.py +1 -1
mlrun/api/utils/projects/follower.py +33 -33
mlrun/api/utils/projects/leader.py +36 -34
mlrun/api/utils/projects/member.py +27 -27
mlrun/api/utils/projects/remotes/__init__.py +1 -1
mlrun/api/utils/projects/remotes/follower.py +13 -13
mlrun/api/utils/projects/remotes/leader.py +10 -10
mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
mlrun/api/utils/scheduler.py +140 -51
mlrun/api/utils/singletons/__init__.py +1 -1
mlrun/api/utils/singletons/db.py +9 -15
mlrun/api/utils/singletons/k8s.py +677 -5
mlrun/api/utils/singletons/logs_dir.py +1 -1
mlrun/api/utils/singletons/project_member.py +1 -1
mlrun/api/utils/singletons/scheduler.py +1 -1
mlrun/artifacts/__init__.py +2 -2
mlrun/artifacts/base.py +8 -2
mlrun/artifacts/dataset.py +5 -3
mlrun/artifacts/manager.py +7 -1
mlrun/artifacts/model.py +15 -4
mlrun/artifacts/plots.py +1 -1
mlrun/common/__init__.py +1 -1
mlrun/common/constants.py +15 -0
mlrun/common/model_monitoring.py +209 -0
mlrun/common/schemas/__init__.py +167 -0
mlrun/{api → common}/schemas/artifact.py +13 -14
mlrun/{api → common}/schemas/auth.py +10 -8
mlrun/{api → common}/schemas/background_task.py +3 -3
mlrun/{api → common}/schemas/client_spec.py +1 -1
mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
mlrun/{api → common}/schemas/constants.py +21 -8
mlrun/common/schemas/events.py +36 -0
mlrun/{api → common}/schemas/feature_store.py +2 -1
mlrun/{api → common}/schemas/frontend_spec.py +7 -6
mlrun/{api → common}/schemas/function.py +5 -5
mlrun/{api → common}/schemas/http.py +3 -3
mlrun/common/schemas/hub.py +134 -0
mlrun/{api → common}/schemas/k8s.py +3 -3
mlrun/{api → common}/schemas/memory_reports.py +1 -1
mlrun/common/schemas/model_endpoints.py +342 -0
mlrun/common/schemas/notification.py +57 -0
mlrun/{api → common}/schemas/object.py +6 -6
mlrun/{api → common}/schemas/pipeline.py +3 -3
mlrun/{api → common}/schemas/project.py +6 -5
mlrun/common/schemas/regex.py +24 -0
mlrun/common/schemas/runs.py +30 -0
mlrun/{api → common}/schemas/runtime_resource.py +3 -3
mlrun/{api → common}/schemas/schedule.py +19 -7
mlrun/{api → common}/schemas/secret.py +3 -3
mlrun/{api → common}/schemas/tag.py +2 -2
mlrun/common/types.py +25 -0
mlrun/config.py +152 -20
mlrun/data_types/__init__.py +7 -2
mlrun/data_types/data_types.py +4 -2
mlrun/data_types/infer.py +1 -1
mlrun/data_types/spark.py +10 -3
mlrun/datastore/__init__.py +10 -3
mlrun/datastore/azure_blob.py +1 -1
mlrun/datastore/base.py +185 -53
mlrun/datastore/datastore.py +1 -1
mlrun/datastore/filestore.py +1 -1
mlrun/datastore/google_cloud_storage.py +1 -1
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +1 -1
mlrun/datastore/s3.py +1 -1
mlrun/datastore/sources.py +192 -70
mlrun/datastore/spark_udf.py +44 -0
mlrun/datastore/store_resources.py +4 -4
mlrun/datastore/targets.py +115 -45
mlrun/datastore/utils.py +127 -5
mlrun/datastore/v3io.py +1 -1
mlrun/datastore/wasbfs/__init__.py +1 -1
mlrun/datastore/wasbfs/fs.py +1 -1
mlrun/db/__init__.py +7 -5
mlrun/db/base.py +112 -68
mlrun/db/httpdb.py +445 -277
mlrun/db/nopdb.py +491 -0
mlrun/db/sqldb.py +112 -65
mlrun/errors.py +6 -1
mlrun/execution.py +44 -22
mlrun/feature_store/__init__.py +1 -1
mlrun/feature_store/api.py +143 -95
mlrun/feature_store/common.py +16 -20
mlrun/feature_store/feature_set.py +42 -12
mlrun/feature_store/feature_vector.py +32 -21
mlrun/feature_store/ingestion.py +9 -12
mlrun/feature_store/retrieval/__init__.py +3 -2
mlrun/feature_store/retrieval/base.py +388 -66
mlrun/feature_store/retrieval/dask_merger.py +63 -151
mlrun/feature_store/retrieval/job.py +30 -12
mlrun/feature_store/retrieval/local_merger.py +40 -133
mlrun/feature_store/retrieval/spark_merger.py +129 -127
mlrun/feature_store/retrieval/storey_merger.py +173 -0
mlrun/feature_store/steps.py +132 -15
mlrun/features.py +8 -3
mlrun/frameworks/__init__.py +1 -1
mlrun/frameworks/_common/__init__.py +1 -1
mlrun/frameworks/_common/artifacts_library.py +1 -1
mlrun/frameworks/_common/mlrun_interface.py +1 -1
mlrun/frameworks/_common/model_handler.py +1 -1
mlrun/frameworks/_common/plan.py +1 -1
mlrun/frameworks/_common/producer.py +1 -1
mlrun/frameworks/_common/utils.py +1 -1
mlrun/frameworks/_dl_common/__init__.py +1 -1
mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
mlrun/frameworks/_dl_common/model_handler.py +1 -1
mlrun/frameworks/_dl_common/utils.py +1 -1
mlrun/frameworks/_ml_common/__init__.py +1 -1
mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
mlrun/frameworks/_ml_common/model_handler.py +1 -1
mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
mlrun/frameworks/_ml_common/plan.py +1 -1
mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
mlrun/frameworks/_ml_common/producer.py +1 -1
mlrun/frameworks/_ml_common/utils.py +1 -1
mlrun/frameworks/auto_mlrun/__init__.py +1 -1
mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
mlrun/frameworks/huggingface/__init__.py +1 -1
mlrun/frameworks/huggingface/model_server.py +1 -1
mlrun/frameworks/lgbm/__init__.py +1 -1
mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
mlrun/frameworks/lgbm/model_handler.py +1 -1
mlrun/frameworks/lgbm/model_server.py +1 -1
mlrun/frameworks/lgbm/utils.py +1 -1
mlrun/frameworks/onnx/__init__.py +1 -1
mlrun/frameworks/onnx/dataset.py +1 -1
mlrun/frameworks/onnx/mlrun_interface.py +1 -1
mlrun/frameworks/onnx/model_handler.py +1 -1
mlrun/frameworks/onnx/model_server.py +1 -1
mlrun/frameworks/parallel_coordinates.py +1 -1
mlrun/frameworks/pytorch/__init__.py +1 -1
mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
mlrun/frameworks/pytorch/model_handler.py +1 -1
mlrun/frameworks/pytorch/model_server.py +1 -1
mlrun/frameworks/pytorch/utils.py +1 -1
mlrun/frameworks/sklearn/__init__.py +1 -1
mlrun/frameworks/sklearn/estimator.py +1 -1
mlrun/frameworks/sklearn/metric.py +1 -1
mlrun/frameworks/sklearn/metrics_library.py +1 -1
mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
mlrun/frameworks/sklearn/model_handler.py +1 -1
mlrun/frameworks/sklearn/utils.py +1 -1
mlrun/frameworks/tf_keras/__init__.py +1 -1
mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
mlrun/frameworks/tf_keras/model_handler.py +1 -1
mlrun/frameworks/tf_keras/model_server.py +1 -1
mlrun/frameworks/tf_keras/utils.py +1 -1
mlrun/frameworks/xgboost/__init__.py +1 -1
mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
mlrun/frameworks/xgboost/model_handler.py +1 -1
mlrun/frameworks/xgboost/utils.py +1 -1
mlrun/k8s_utils.py +14 -765
mlrun/kfpops.py +14 -17
mlrun/launcher/__init__.py +13 -0
mlrun/launcher/base.py +406 -0
mlrun/launcher/client.py +159 -0
mlrun/launcher/factory.py +50 -0
mlrun/launcher/local.py +276 -0
mlrun/launcher/remote.py +178 -0
mlrun/lists.py +10 -2
mlrun/mlutils/__init__.py +1 -1
mlrun/mlutils/data.py +1 -1
mlrun/mlutils/models.py +1 -1
mlrun/mlutils/plots.py +1 -1
mlrun/model.py +252 -14
mlrun/model_monitoring/__init__.py +41 -0
mlrun/model_monitoring/features_drift_table.py +1 -1
mlrun/model_monitoring/helpers.py +123 -38
mlrun/model_monitoring/model_endpoint.py +144 -0
mlrun/model_monitoring/model_monitoring_batch.py +310 -259
mlrun/model_monitoring/stores/__init__.py +106 -0
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
mlrun/model_monitoring/stores/models/__init__.py +23 -0
mlrun/model_monitoring/stores/models/base.py +18 -0
mlrun/model_monitoring/stores/models/mysql.py +100 -0
mlrun/model_monitoring/stores/models/sqlite.py +98 -0
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
mlrun/model_monitoring/stream_processing_fs.py +239 -271
mlrun/package/__init__.py +163 -0
mlrun/package/context_handler.py +325 -0
mlrun/package/errors.py +47 -0
mlrun/package/packager.py +298 -0
mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
mlrun/package/packagers/default_packager.py +422 -0
mlrun/package/packagers/numpy_packagers.py +612 -0
mlrun/package/packagers/pandas_packagers.py +968 -0
mlrun/package/packagers/python_standard_library_packagers.py +616 -0
mlrun/package/packagers_manager.py +786 -0
mlrun/package/utils/__init__.py +53 -0
mlrun/package/utils/_archiver.py +226 -0
mlrun/package/utils/_formatter.py +211 -0
mlrun/package/utils/_pickler.py +234 -0
mlrun/package/utils/_supported_format.py +71 -0
mlrun/package/utils/log_hint_utils.py +93 -0
mlrun/package/utils/type_hint_utils.py +298 -0
mlrun/platforms/__init__.py +1 -1
mlrun/platforms/iguazio.py +34 -2
mlrun/platforms/other.py +1 -1
mlrun/projects/__init__.py +1 -1
mlrun/projects/operations.py +14 -9
mlrun/projects/pipelines.py +31 -13
mlrun/projects/project.py +762 -238
mlrun/render.py +49 -19
mlrun/run.py +57 -326
mlrun/runtimes/__init__.py +3 -9
mlrun/runtimes/base.py +247 -784
mlrun/runtimes/constants.py +1 -1
mlrun/runtimes/daskjob.py +45 -41
mlrun/runtimes/funcdoc.py +43 -7
mlrun/runtimes/function.py +66 -656
mlrun/runtimes/function_reference.py +1 -1
mlrun/runtimes/generators.py +1 -1
mlrun/runtimes/kubejob.py +99 -116
mlrun/runtimes/local.py +59 -66
mlrun/runtimes/mpijob/__init__.py +1 -1
mlrun/runtimes/mpijob/abstract.py +13 -15
mlrun/runtimes/mpijob/v1.py +3 -1
mlrun/runtimes/mpijob/v1alpha1.py +1 -1
mlrun/runtimes/nuclio.py +1 -1
mlrun/runtimes/pod.py +51 -26
mlrun/runtimes/remotesparkjob.py +3 -1
mlrun/runtimes/serving.py +12 -4
mlrun/runtimes/sparkjob/__init__.py +1 -2
mlrun/runtimes/sparkjob/abstract.py +44 -31
mlrun/runtimes/sparkjob/spark3job.py +11 -9
mlrun/runtimes/utils.py +61 -42
mlrun/secrets.py +16 -18
mlrun/serving/__init__.py +3 -2
mlrun/serving/merger.py +1 -1
mlrun/serving/remote.py +1 -1
mlrun/serving/routers.py +39 -42
mlrun/serving/server.py +23 -13
mlrun/serving/serving_wrapper.py +1 -1
mlrun/serving/states.py +172 -39
mlrun/serving/utils.py +1 -1
mlrun/serving/v1_serving.py +1 -1
mlrun/serving/v2_serving.py +29 -21
mlrun/utils/__init__.py +1 -2
mlrun/utils/async_http.py +8 -1
mlrun/utils/azure_vault.py +1 -1
mlrun/utils/clones.py +2 -2
mlrun/utils/condition_evaluator.py +65 -0
mlrun/utils/db.py +52 -0
mlrun/utils/helpers.py +188 -13
mlrun/utils/http.py +89 -54
mlrun/utils/logger.py +48 -8
mlrun/utils/model_monitoring.py +132 -100
mlrun/utils/notifications/__init__.py +1 -1
mlrun/utils/notifications/notification/__init__.py +8 -6
mlrun/utils/notifications/notification/base.py +20 -14
mlrun/utils/notifications/notification/console.py +7 -4
mlrun/utils/notifications/notification/git.py +36 -19
mlrun/utils/notifications/notification/ipython.py +10 -8
mlrun/utils/notifications/notification/slack.py +18 -13
mlrun/utils/notifications/notification_pusher.py +377 -56
mlrun/utils/regex.py +6 -1
mlrun/utils/singleton.py +1 -1
mlrun/utils/v3io_clients.py +1 -1
mlrun/utils/vault.py +270 -269
mlrun/utils/version/__init__.py +1 -1
mlrun/utils/version/version.json +2 -2
mlrun/utils/version/version.py +1 -1
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
mlrun-1.4.0.dist-info/RECORD +434 -0
mlrun/api/api/endpoints/marketplace.py +0 -257
mlrun/api/crud/marketplace.py +0 -221
mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
mlrun/api/db/filedb/db.py +0 -518
mlrun/api/schemas/marketplace.py +0 -128
mlrun/api/schemas/model_endpoints.py +0 -185
mlrun/db/filedb.py +0 -891
mlrun/feature_store/retrieval/online.py +0 -92
mlrun/model_monitoring/constants.py +0 -67
mlrun/runtimes/package/context_handler.py +0 -711
mlrun/runtimes/sparkjob/spark2job.py +0 -59
mlrun-1.3.3.dist-info/RECORD +0 -381
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0

mlrun/feature_store/retrieval/spark_merger.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2018 Iguazio
+# Copyright 2023 Iguazio
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,17 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 import mlrun
 from mlrun.datastore.targets import get_offline_target
 from ...runtimes import RemoteSparkRuntime
 from ...runtimes.sparkjob.abstract import AbstractSparkRuntime
-from ..feature_vector import OfflineVectorResponse
 from .base import BaseMerger
 class SparkFeatureMerger(BaseMerger):
     engine = "spark"
+    support_offline = True
     def __init__(self, vector, **engine_args):
         super().__init__(vector, **engine_args)
@@ -33,109 +34,6 @@ class SparkFeatureMerger(BaseMerger):
     def to_spark_df(self, session, path):
         return session.read.load(path)
-    def _generate_vector(
-        self,
-        entity_rows,
-        entity_timestamp_column,
-        feature_set_objects,
-        feature_set_fields,
-        start_time=None,
-        end_time=None,
-        query=None,
-    ):
-        from pyspark.sql import SparkSession
-        from pyspark.sql.functions import col
-        if self.spark is None:
-            # create spark context
-            self.spark = SparkSession.builder.appName(
-                f"vector-merger-{self.vector.metadata.name}"
-            ).getOrCreate()
-        feature_sets = []
-        dfs = []
-        for name, columns in feature_set_fields.items():
-            feature_set = feature_set_objects[name]
-            feature_sets.append(feature_set)
-            column_names = [name for name, alias in columns]
-            if feature_set.spec.passthrough:
-                if not feature_set.spec.source:
-                    raise mlrun.errors.MLRunNotFoundError(
-                        f"passthrough feature set {name} with no source"
-                    )
-                source_kind = feature_set.spec.source.kind
-                source_path = feature_set.spec.source.path
-            else:
-                target = get_offline_target(feature_set)
-                if not target:
-                    raise mlrun.errors.MLRunInvalidArgumentError(
-                        f"feature set {name} does not have offline targets"
-                    )
-                source_kind = target.kind
-                source_path = target.get_target_path()
-            # handling case where there are multiple feature sets and user creates vector where
-            # entity_timestamp_column is from a specific feature set (can't be entity timestamp)
-            source_driver = mlrun.datastore.sources.source_kind_to_driver[source_kind]
-            if (
-                entity_timestamp_column in column_names
-                or feature_set.spec.timestamp_key == entity_timestamp_column
-            ):
-                source = source_driver(
-                    name=self.vector.metadata.name,
-                    path=source_path,
-                    time_field=entity_timestamp_column,
-                    start_time=start_time,
-                    end_time=end_time,
-                )
-            else:
-                source = source_driver(
-                    name=self.vector.metadata.name,
-                    path=source_path,
-                    time_field=entity_timestamp_column,
-                )
-            # add the index/key to selected columns
-            timestamp_key = feature_set.spec.timestamp_key
-            df = source.to_spark_df(
-                self.spark, named_view=self.named_view, time_field=timestamp_key
-            )
-            if timestamp_key and timestamp_key not in column_names:
-                columns.append((timestamp_key, None))
-            for entity in feature_set.spec.entities.keys():
-                if entity not in column_names:
-                    columns.append((entity, None))
-            # select requested columns and rename with alias where needed
-            df = df.select([col(name).alias(alias or name) for name, alias in columns])
-            dfs.append(df)
-            del df
-        # convert pandas entity_rows to spark DF if needed
-        if entity_rows is not None and not hasattr(entity_rows, "rdd"):
-            entity_rows = self.spark.createDataFrame(entity_rows)
-        # join the feature data frames
-        self.merge(entity_rows, entity_timestamp_column, feature_sets, dfs)
-        # filter joined data frame by the query param
-        if query:
-            self._result_df = self._result_df.filter(query)
-        self._result_df = self._result_df.drop(*self._drop_columns)
-        if self.vector.status.label_column:
-            self._result_df = self._result_df.dropna(
-                subset=[self.vector.status.label_column]
-            )
-        self._write_to_target()
-        return OfflineVectorResponse(self)
     def _unpersist_df(self, df):
         df.unpersist()
@@ -147,7 +45,6 @@ class SparkFeatureMerger(BaseMerger):
         featureset_df,
         left_keys: list,
         right_keys: list,
-        columns: list,
     ):
         """Perform an as of join between entity and featureset.
@@ -170,30 +67,32 @@ class SparkFeatureMerger(BaseMerger):
         from pyspark.sql.functions import col, monotonically_increasing_id, row_number
         entity_with_id = entity_df.withColumn("_row_nr", monotonically_increasing_id())
-        indexes = list(featureset.spec.entities.keys())
+        rename_right_keys = {}
+        for key in right_keys + [featureset.spec.timestamp_key]:
+            if key in entity_df.columns:
+                rename_right_keys[key] = f"ft__{key}"
         # get columns for projection
         projection = [
-            col(col_name).alias(
-                f"ft__{col_name}"
-                if col_name in indexes + [entity_timestamp_column]
-                else col_name
-            )
+            col(col_name).alias(rename_right_keys.get(col_name, col_name))
             for col_name in featureset_df.columns
         ]
         aliased_featureset_df = featureset_df.select(projection)
+        right_timestamp = rename_right_keys.get(
+            featureset.spec.timestamp_key, featureset.spec.timestamp_key
+        )
         # set join conditions
         join_cond = (
             entity_with_id[entity_timestamp_column]
-            >= aliased_featureset_df[f"ft__{entity_timestamp_column}"]
+            >= aliased_featureset_df[right_timestamp]
         )
         # join based on entities
-        for key in indexes:
+        for key_l, key_r in zip(left_keys, right_keys):
             join_cond = join_cond & (
-                entity_with_id[key] == aliased_featureset_df[f"ft__{key}"]
+                entity_with_id[key_l]
+                == aliased_featureset_df[rename_right_keys.get(key_r, key_r)]
             )
         conditional_join = entity_with_id.join(
@@ -201,20 +100,22 @@ class SparkFeatureMerger(BaseMerger):
         )
         window = Window.partitionBy("_row_nr").orderBy(
-            col(f"ft__{entity_timestamp_column}").desc(),
+            col(right_timestamp).desc(),
         )
         filter_most_recent_feature_timestamp = conditional_join.withColumn(
             "_rank", row_number().over(window)
         ).filter(col("_rank") == 1)
-        for key in indexes + [entity_timestamp_column]:
-            filter_most_recent_feature_timestamp = (
-                filter_most_recent_feature_timestamp.drop(
-                    aliased_featureset_df[f"ft__{key}"]
+        for key in right_keys + [featureset.spec.timestamp_key]:
+            if key in entity_df.columns + [entity_timestamp_column]:
+                filter_most_recent_feature_timestamp = (
+                    filter_most_recent_feature_timestamp.drop(
+                        aliased_featureset_df[f"ft__{key}"]
+                    )
                 )
-            )
-        return filter_most_recent_feature_timestamp.drop("_row_nr", "_rank")
+        return filter_most_recent_feature_timestamp.drop("_row_nr", "_rank").orderBy(
+            col(entity_timestamp_column)
+        )
     def _join(
         self,
@@ -224,7 +125,6 @@ class SparkFeatureMerger(BaseMerger):
         featureset_df,
         left_keys: list,
         right_keys: list,
-        columns: list,
     ):
         """
@@ -245,8 +145,19 @@ class SparkFeatureMerger(BaseMerger):
                 be prefixed with featureset_df name.
         """
-        indexes = list(featureset.spec.entities.keys())
-        merged_df = entity_df.join(featureset_df, on=indexes)
+        if left_keys != right_keys:
+            join_cond = [
+                entity_df[key_l] == featureset_df[key_r]
+                for key_l, key_r in zip(left_keys, right_keys)
+            ]
+        else:
+            join_cond = left_keys
+        merged_df = entity_df.join(
+            featureset_df,
+            join_cond,
+            how=self._join_type,
+        )
         return merged_df
     def get_df(self, to_pandas=True):
@@ -268,3 +179,94 @@ class SparkFeatureMerger(BaseMerger):
             return RemoteSparkRuntime.default_image
         else:
             raise mlrun.errors.MLRunInvalidArgumentError(f"Unsupported kind '{kind}'")
+    def _create_engine_env(self):
+        from pyspark.sql import SparkSession
+        if self.spark is None:
+            # create spark context
+            self.spark = SparkSession.builder.appName(
+                f"vector-merger-{self.vector.metadata.name}"
+            ).getOrCreate()
+    def _get_engine_df(
+        self,
+        feature_set,
+        feature_set_name,
+        column_names=None,
+        start_time=None,
+        end_time=None,
+        time_column=None,
+    ):
+        source_kwargs = {}
+        if feature_set.spec.passthrough:
+            if not feature_set.spec.source:
+                raise mlrun.errors.MLRunNotFoundError(
+                    f"passthrough feature set {feature_set_name} with no source"
+                )
+            source_kind = feature_set.spec.source.kind
+            source_path = feature_set.spec.source.path
+            source_kwargs.update(feature_set.spec.source.attributes)
+        else:
+            target = get_offline_target(feature_set)
+            if not target:
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"feature set {feature_set_name} does not have offline targets"
+                )
+            source_kind = target.kind
+            source_path = target.get_target_path()
+        # handling case where there are multiple feature sets and user creates vector where
+        # entity_timestamp_column is from a specific feature set (can't be entity timestamp)
+        source_driver = mlrun.datastore.sources.source_kind_to_driver[source_kind]
+        source = source_driver(
+            name=self.vector.metadata.name,
+            path=source_path,
+            time_field=time_column,
+            start_time=start_time,
+            end_time=end_time,
+            **source_kwargs,
+        )
+        columns = column_names + [ent.name for ent in feature_set.spec.entities]
+        if (
+            feature_set.spec.timestamp_key
+            and feature_set.spec.timestamp_key not in columns
+        ):
+            columns.append(feature_set.spec.timestamp_key)
+        return source.to_spark_df(
+            self.spark,
+            named_view=self.named_view,
+            time_field=time_column,
+            columns=columns,
+        )
+    def _rename_columns_and_select(
+        self,
+        df,
+        rename_col_dict,
+        columns=None,
+    ):
+        from pyspark.sql.functions import col
+        return df.select(
+            [
+                col(name).alias(rename_col_dict.get(name, name))
+                for name in columns or rename_col_dict.keys()
+            ]
+        )
+    def _drop_columns_from_result(self):
+        self._result_df = self._result_df.drop(*self._drop_columns)
+    def _filter(self, query):
+        self._result_df = self._result_df.filter(query)
+    def _order_by(self, order_by_active):
+        from pyspark.sql.functions import col
+        self._result_df = self._result_df.orderBy(
+            *[col(col_name).asc_nulls_last() for col_name in order_by_active]
+        )

mlrun/feature_store/retrieval/storey_merger.py ADDED Viewed

@@ -0,0 +1,173 @@
+# Copyright 2018 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import mlrun
+from mlrun.datastore.store_resources import ResourceCache
+from mlrun.datastore.targets import get_online_target
+from mlrun.serving.server import create_graph_server
+from ..feature_vector import OnlineVectorService
+from .base import BaseMerger
+class StoreyFeatureMerger(BaseMerger):
+    engine = "storey"
+    support_online = True
+    def __init__(self, vector, **engine_args):
+        super().__init__(vector, **engine_args)
+        self.impute_policy = engine_args.get("impute_policy")
+    def _generate_online_feature_vector_graph(
+        self,
+        entity_keys,
+        feature_set_fields,
+        feature_set_objects,
+        fixed_window_type,
+    ):
+        graph = self.vector.spec.graph.copy()
+        start_states, default_final_state, responders = graph.check_and_process_graph(
+            allow_empty=True
+        )
+        next = graph
+        fs_link_list = self._create_linked_relation_list(
+            feature_set_objects, feature_set_fields, entity_keys
+        )
+        all_columns = []
+        save_column = []
+        entity_keys = []
+        end_aliases = {}
+        for node in fs_link_list:
+            name = node.name
+            if name == self._entity_rows_node_name:
+                continue
+            featureset = feature_set_objects[name]
+            columns = feature_set_fields[name]
+            column_names = [name for name, alias in columns]
+            aliases = {name: alias for name, alias in columns if alias}
+            all_columns += [aliases.get(name, name) for name in column_names]
+            for col in node.data["save_cols"]:
+                if col not in column_names:
+                    column_names.append(col)
+                else:
+                    save_column.append(col)
+            entity_list = node.data["right_keys"] or list(
+                featureset.spec.entities.keys()
+            )
+            if not entity_keys:
+                # if entity_keys not provided by the user we will set it to be the entity of the first feature set
+                entity_keys = entity_list
+            end_aliases.update(
+                {
+                    k: v
+                    for k, v in zip(entity_list, node.data["left_keys"])
+                    if k != v and v in save_column
+                }
+            )
+            mapping = {
+                k: v for k, v in zip(node.data["left_keys"], entity_list) if k != v
+            }
+            if mapping:
+                next = next.to(
+                    "storey.Rename",
+                    f"rename-{name}",
+                    mapping=mapping,
+                )
+            next = next.to(
+                "storey.QueryByKey",
+                f"query-{name}",
+                features=column_names,
+                table=featureset.uri,
+                key_field=entity_list,
+                aliases=aliases,
+                fixed_window_type=fixed_window_type.to_qbk_fixed_window_type(),
+            )
+        if end_aliases:
+            # run if the user want to save a column that related to another entity
+            next = next.to(
+                "storey.Rename",
+                "rename-entity-to-features",
+                mapping=end_aliases,
+            )
+        for name in start_states:
+            next.set_next(name)
+        if not start_states:  # graph was empty
+            next.respond()
+        elif not responders and default_final_state:  # graph has clear state sequence
+            graph[default_final_state].respond()
+        elif not responders:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "the graph doesnt have an explicit final step to respond on"
+            )
+        return graph, all_columns, entity_keys
+    def init_online_vector_service(
+        self, entity_keys, fixed_window_type, update_stats=False
+    ):
+        try:
+            from storey import SyncEmitSource
+        except ImportError as exc:
+            raise ImportError(f"storey not installed, use pip install storey, {exc}")
+        feature_set_objects, feature_set_fields = self.vector.parse_features(
+            offline=False, update_stats=update_stats
+        )
+        if not feature_set_fields:
+            raise mlrun.errors.MLRunRuntimeError(
+                f"No features found for feature vector '{self.vector.metadata.name}'"
+            )
+        (
+            graph,
+            requested_columns,
+            entity_keys,
+        ) = self._generate_online_feature_vector_graph(
+            entity_keys,
+            feature_set_fields,
+            feature_set_objects,
+            fixed_window_type,
+        )
+        graph.set_flow_source(SyncEmitSource())
+        server = create_graph_server(graph=graph, parameters={})
+        cache = ResourceCache()
+        all_fs_entities = []
+        for featureset in feature_set_objects.values():
+            driver = get_online_target(featureset)
+            if not driver:
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"resource {featureset.uri} does not have an online data target"
+                )
+            cache.cache_table(featureset.uri, driver.get_table_object())
+            for key in featureset.spec.entities.keys():
+                if key not in all_fs_entities:
+                    all_fs_entities.append(key)
+        server.init_states(context=None, namespace=None, resource_cache=cache)
+        server.init_object(None)
+        service = OnlineVectorService(
+            self.vector,
+            graph,
+            entity_keys,
+            all_fs_entities=all_fs_entities,
+            impute_policy=self.impute_policy,
+            requested_columns=requested_columns,
+        )
+        service.initialize()
+        return service

mlrun 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl

Potentially problematic release.

mlrun 1.3.3py3-none-any.whl → 1.4.0py3-none-any.whl