mlrun 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -3
- mlrun/__main__.py +79 -37
- mlrun/api/__init__.py +1 -1
- mlrun/api/api/__init__.py +1 -1
- mlrun/api/api/api.py +4 -4
- mlrun/api/api/deps.py +10 -21
- mlrun/api/api/endpoints/__init__.py +1 -1
- mlrun/api/api/endpoints/artifacts.py +64 -36
- mlrun/api/api/endpoints/auth.py +4 -4
- mlrun/api/api/endpoints/background_tasks.py +11 -11
- mlrun/api/api/endpoints/client_spec.py +5 -5
- mlrun/api/api/endpoints/clusterization_spec.py +6 -4
- mlrun/api/api/endpoints/feature_store.py +124 -115
- mlrun/api/api/endpoints/files.py +22 -14
- mlrun/api/api/endpoints/frontend_spec.py +28 -21
- mlrun/api/api/endpoints/functions.py +142 -87
- mlrun/api/api/endpoints/grafana_proxy.py +89 -442
- mlrun/api/api/endpoints/healthz.py +20 -7
- mlrun/api/api/endpoints/hub.py +320 -0
- mlrun/api/api/endpoints/internal/__init__.py +1 -1
- mlrun/api/api/endpoints/internal/config.py +1 -1
- mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
- mlrun/api/api/endpoints/logs.py +11 -11
- mlrun/api/api/endpoints/model_endpoints.py +74 -70
- mlrun/api/api/endpoints/operations.py +13 -9
- mlrun/api/api/endpoints/pipelines.py +93 -88
- mlrun/api/api/endpoints/projects.py +35 -35
- mlrun/api/api/endpoints/runs.py +69 -27
- mlrun/api/api/endpoints/runtime_resources.py +28 -28
- mlrun/api/api/endpoints/schedules.py +98 -41
- mlrun/api/api/endpoints/secrets.py +37 -32
- mlrun/api/api/endpoints/submit.py +12 -12
- mlrun/api/api/endpoints/tags.py +20 -22
- mlrun/api/api/utils.py +251 -42
- mlrun/api/constants.py +1 -1
- mlrun/api/crud/__init__.py +18 -15
- mlrun/api/crud/artifacts.py +10 -10
- mlrun/api/crud/client_spec.py +4 -4
- mlrun/api/crud/clusterization_spec.py +3 -3
- mlrun/api/crud/feature_store.py +54 -46
- mlrun/api/crud/functions.py +3 -3
- mlrun/api/crud/hub.py +312 -0
- mlrun/api/crud/logs.py +11 -9
- mlrun/api/crud/model_monitoring/__init__.py +3 -3
- mlrun/api/crud/model_monitoring/grafana.py +435 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
- mlrun/api/crud/notifications.py +149 -0
- mlrun/api/crud/pipelines.py +67 -52
- mlrun/api/crud/projects.py +51 -23
- mlrun/api/crud/runs.py +7 -5
- mlrun/api/crud/runtime_resources.py +13 -13
- mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
- mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
- mlrun/api/crud/runtimes/nuclio/function.py +505 -0
- mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
- mlrun/api/crud/secrets.py +88 -46
- mlrun/api/crud/tags.py +5 -5
- mlrun/api/db/__init__.py +1 -1
- mlrun/api/db/base.py +102 -54
- mlrun/api/db/init_db.py +2 -3
- mlrun/api/db/session.py +4 -12
- mlrun/api/db/sqldb/__init__.py +1 -1
- mlrun/api/db/sqldb/db.py +439 -196
- mlrun/api/db/sqldb/helpers.py +1 -1
- mlrun/api/db/sqldb/models/__init__.py +3 -3
- mlrun/api/db/sqldb/models/models_mysql.py +82 -64
- mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
- mlrun/api/db/sqldb/session.py +27 -20
- mlrun/api/initial_data.py +82 -24
- mlrun/api/launcher.py +196 -0
- mlrun/api/main.py +91 -22
- mlrun/api/middlewares.py +6 -5
- mlrun/api/migrations_mysql/env.py +1 -1
- mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
- mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
- mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
- mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
- mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
- mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
- mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/env.py +1 -1
- mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
- mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
- mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
- mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
- mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
- mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
- mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
- mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
- mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
- mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
- mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
- mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
- mlrun/api/schemas/__init__.py +216 -138
- mlrun/api/utils/__init__.py +1 -1
- mlrun/api/utils/asyncio.py +1 -1
- mlrun/api/utils/auth/__init__.py +1 -1
- mlrun/api/utils/auth/providers/__init__.py +1 -1
- mlrun/api/utils/auth/providers/base.py +7 -7
- mlrun/api/utils/auth/providers/nop.py +6 -7
- mlrun/api/utils/auth/providers/opa.py +17 -17
- mlrun/api/utils/auth/verifier.py +36 -34
- mlrun/api/utils/background_tasks.py +24 -24
- mlrun/{builder.py → api/utils/builder.py} +216 -123
- mlrun/api/utils/clients/__init__.py +1 -1
- mlrun/api/utils/clients/chief.py +19 -4
- mlrun/api/utils/clients/iguazio.py +106 -60
- mlrun/api/utils/clients/log_collector.py +1 -1
- mlrun/api/utils/clients/nuclio.py +23 -23
- mlrun/api/utils/clients/protocols/grpc.py +2 -2
- mlrun/api/utils/db/__init__.py +1 -1
- mlrun/api/utils/db/alembic.py +1 -1
- mlrun/api/utils/db/backup.py +1 -1
- mlrun/api/utils/db/mysql.py +24 -25
- mlrun/api/utils/db/sql_collation.py +1 -1
- mlrun/api/utils/db/sqlite_migration.py +2 -2
- mlrun/api/utils/events/__init__.py +14 -0
- mlrun/api/utils/events/base.py +57 -0
- mlrun/api/utils/events/events_factory.py +41 -0
- mlrun/api/utils/events/iguazio.py +217 -0
- mlrun/api/utils/events/nop.py +55 -0
- mlrun/api/utils/helpers.py +16 -13
- mlrun/api/utils/memory_reports.py +1 -1
- mlrun/api/utils/periodic.py +6 -3
- mlrun/api/utils/projects/__init__.py +1 -1
- mlrun/api/utils/projects/follower.py +33 -33
- mlrun/api/utils/projects/leader.py +36 -34
- mlrun/api/utils/projects/member.py +27 -27
- mlrun/api/utils/projects/remotes/__init__.py +1 -1
- mlrun/api/utils/projects/remotes/follower.py +13 -13
- mlrun/api/utils/projects/remotes/leader.py +10 -10
- mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
- mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
- mlrun/api/utils/scheduler.py +140 -51
- mlrun/api/utils/singletons/__init__.py +1 -1
- mlrun/api/utils/singletons/db.py +9 -15
- mlrun/api/utils/singletons/k8s.py +677 -5
- mlrun/api/utils/singletons/logs_dir.py +1 -1
- mlrun/api/utils/singletons/project_member.py +1 -1
- mlrun/api/utils/singletons/scheduler.py +1 -1
- mlrun/artifacts/__init__.py +2 -2
- mlrun/artifacts/base.py +8 -2
- mlrun/artifacts/dataset.py +5 -3
- mlrun/artifacts/manager.py +7 -1
- mlrun/artifacts/model.py +15 -4
- mlrun/artifacts/plots.py +1 -1
- mlrun/common/__init__.py +1 -1
- mlrun/common/constants.py +15 -0
- mlrun/common/model_monitoring.py +209 -0
- mlrun/common/schemas/__init__.py +167 -0
- mlrun/{api → common}/schemas/artifact.py +13 -14
- mlrun/{api → common}/schemas/auth.py +10 -8
- mlrun/{api → common}/schemas/background_task.py +3 -3
- mlrun/{api → common}/schemas/client_spec.py +1 -1
- mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
- mlrun/{api → common}/schemas/constants.py +21 -8
- mlrun/common/schemas/events.py +36 -0
- mlrun/{api → common}/schemas/feature_store.py +2 -1
- mlrun/{api → common}/schemas/frontend_spec.py +7 -6
- mlrun/{api → common}/schemas/function.py +5 -5
- mlrun/{api → common}/schemas/http.py +3 -3
- mlrun/common/schemas/hub.py +134 -0
- mlrun/{api → common}/schemas/k8s.py +3 -3
- mlrun/{api → common}/schemas/memory_reports.py +1 -1
- mlrun/common/schemas/model_endpoints.py +342 -0
- mlrun/common/schemas/notification.py +57 -0
- mlrun/{api → common}/schemas/object.py +6 -6
- mlrun/{api → common}/schemas/pipeline.py +3 -3
- mlrun/{api → common}/schemas/project.py +6 -5
- mlrun/common/schemas/regex.py +24 -0
- mlrun/common/schemas/runs.py +30 -0
- mlrun/{api → common}/schemas/runtime_resource.py +3 -3
- mlrun/{api → common}/schemas/schedule.py +19 -7
- mlrun/{api → common}/schemas/secret.py +3 -3
- mlrun/{api → common}/schemas/tag.py +2 -2
- mlrun/common/types.py +25 -0
- mlrun/config.py +152 -20
- mlrun/data_types/__init__.py +7 -2
- mlrun/data_types/data_types.py +4 -2
- mlrun/data_types/infer.py +1 -1
- mlrun/data_types/spark.py +10 -3
- mlrun/datastore/__init__.py +10 -3
- mlrun/datastore/azure_blob.py +1 -1
- mlrun/datastore/base.py +185 -53
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/filestore.py +1 -1
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +1 -1
- mlrun/datastore/s3.py +1 -1
- mlrun/datastore/sources.py +192 -70
- mlrun/datastore/spark_udf.py +44 -0
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/targets.py +115 -45
- mlrun/datastore/utils.py +127 -5
- mlrun/datastore/v3io.py +1 -1
- mlrun/datastore/wasbfs/__init__.py +1 -1
- mlrun/datastore/wasbfs/fs.py +1 -1
- mlrun/db/__init__.py +7 -5
- mlrun/db/base.py +112 -68
- mlrun/db/httpdb.py +445 -277
- mlrun/db/nopdb.py +491 -0
- mlrun/db/sqldb.py +112 -65
- mlrun/errors.py +6 -1
- mlrun/execution.py +44 -22
- mlrun/feature_store/__init__.py +1 -1
- mlrun/feature_store/api.py +143 -95
- mlrun/feature_store/common.py +16 -20
- mlrun/feature_store/feature_set.py +42 -12
- mlrun/feature_store/feature_vector.py +32 -21
- mlrun/feature_store/ingestion.py +9 -12
- mlrun/feature_store/retrieval/__init__.py +3 -2
- mlrun/feature_store/retrieval/base.py +388 -66
- mlrun/feature_store/retrieval/dask_merger.py +63 -151
- mlrun/feature_store/retrieval/job.py +30 -12
- mlrun/feature_store/retrieval/local_merger.py +40 -133
- mlrun/feature_store/retrieval/spark_merger.py +129 -127
- mlrun/feature_store/retrieval/storey_merger.py +173 -0
- mlrun/feature_store/steps.py +132 -15
- mlrun/features.py +8 -3
- mlrun/frameworks/__init__.py +1 -1
- mlrun/frameworks/_common/__init__.py +1 -1
- mlrun/frameworks/_common/artifacts_library.py +1 -1
- mlrun/frameworks/_common/mlrun_interface.py +1 -1
- mlrun/frameworks/_common/model_handler.py +1 -1
- mlrun/frameworks/_common/plan.py +1 -1
- mlrun/frameworks/_common/producer.py +1 -1
- mlrun/frameworks/_common/utils.py +1 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
- mlrun/frameworks/_dl_common/model_handler.py +1 -1
- mlrun/frameworks/_dl_common/utils.py +1 -1
- mlrun/frameworks/_ml_common/__init__.py +1 -1
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/model_handler.py +1 -1
- mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
- mlrun/frameworks/_ml_common/producer.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +1 -1
- mlrun/frameworks/auto_mlrun/__init__.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
- mlrun/frameworks/huggingface/__init__.py +1 -1
- mlrun/frameworks/huggingface/model_server.py +1 -1
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/lgbm/model_server.py +1 -1
- mlrun/frameworks/lgbm/utils.py +1 -1
- mlrun/frameworks/onnx/__init__.py +1 -1
- mlrun/frameworks/onnx/dataset.py +1 -1
- mlrun/frameworks/onnx/mlrun_interface.py +1 -1
- mlrun/frameworks/onnx/model_handler.py +1 -1
- mlrun/frameworks/onnx/model_server.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +1 -1
- mlrun/frameworks/pytorch/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
- mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
- mlrun/frameworks/pytorch/model_handler.py +1 -1
- mlrun/frameworks/pytorch/model_server.py +1 -1
- mlrun/frameworks/pytorch/utils.py +1 -1
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/estimator.py +1 -1
- mlrun/frameworks/sklearn/metric.py +1 -1
- mlrun/frameworks/sklearn/metrics_library.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
- mlrun/frameworks/sklearn/model_handler.py +1 -1
- mlrun/frameworks/sklearn/utils.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
- mlrun/frameworks/tf_keras/model_handler.py +1 -1
- mlrun/frameworks/tf_keras/model_server.py +1 -1
- mlrun/frameworks/tf_keras/utils.py +1 -1
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
- mlrun/frameworks/xgboost/model_handler.py +1 -1
- mlrun/frameworks/xgboost/utils.py +1 -1
- mlrun/k8s_utils.py +14 -765
- mlrun/kfpops.py +14 -17
- mlrun/launcher/__init__.py +13 -0
- mlrun/launcher/base.py +406 -0
- mlrun/launcher/client.py +159 -0
- mlrun/launcher/factory.py +50 -0
- mlrun/launcher/local.py +276 -0
- mlrun/launcher/remote.py +178 -0
- mlrun/lists.py +10 -2
- mlrun/mlutils/__init__.py +1 -1
- mlrun/mlutils/data.py +1 -1
- mlrun/mlutils/models.py +1 -1
- mlrun/mlutils/plots.py +1 -1
- mlrun/model.py +252 -14
- mlrun/model_monitoring/__init__.py +41 -0
- mlrun/model_monitoring/features_drift_table.py +1 -1
- mlrun/model_monitoring/helpers.py +123 -38
- mlrun/model_monitoring/model_endpoint.py +144 -0
- mlrun/model_monitoring/model_monitoring_batch.py +310 -259
- mlrun/model_monitoring/stores/__init__.py +106 -0
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
- mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
- mlrun/model_monitoring/stores/models/__init__.py +23 -0
- mlrun/model_monitoring/stores/models/base.py +18 -0
- mlrun/model_monitoring/stores/models/mysql.py +100 -0
- mlrun/model_monitoring/stores/models/sqlite.py +98 -0
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
- mlrun/model_monitoring/stream_processing_fs.py +239 -271
- mlrun/package/__init__.py +163 -0
- mlrun/package/context_handler.py +325 -0
- mlrun/package/errors.py +47 -0
- mlrun/package/packager.py +298 -0
- mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
- mlrun/package/packagers/default_packager.py +422 -0
- mlrun/package/packagers/numpy_packagers.py +612 -0
- mlrun/package/packagers/pandas_packagers.py +968 -0
- mlrun/package/packagers/python_standard_library_packagers.py +616 -0
- mlrun/package/packagers_manager.py +786 -0
- mlrun/package/utils/__init__.py +53 -0
- mlrun/package/utils/_archiver.py +226 -0
- mlrun/package/utils/_formatter.py +211 -0
- mlrun/package/utils/_pickler.py +234 -0
- mlrun/package/utils/_supported_format.py +71 -0
- mlrun/package/utils/log_hint_utils.py +93 -0
- mlrun/package/utils/type_hint_utils.py +298 -0
- mlrun/platforms/__init__.py +1 -1
- mlrun/platforms/iguazio.py +34 -2
- mlrun/platforms/other.py +1 -1
- mlrun/projects/__init__.py +1 -1
- mlrun/projects/operations.py +14 -9
- mlrun/projects/pipelines.py +31 -13
- mlrun/projects/project.py +762 -238
- mlrun/render.py +49 -19
- mlrun/run.py +57 -326
- mlrun/runtimes/__init__.py +3 -9
- mlrun/runtimes/base.py +247 -784
- mlrun/runtimes/constants.py +1 -1
- mlrun/runtimes/daskjob.py +45 -41
- mlrun/runtimes/funcdoc.py +43 -7
- mlrun/runtimes/function.py +66 -656
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/generators.py +1 -1
- mlrun/runtimes/kubejob.py +99 -116
- mlrun/runtimes/local.py +59 -66
- mlrun/runtimes/mpijob/__init__.py +1 -1
- mlrun/runtimes/mpijob/abstract.py +13 -15
- mlrun/runtimes/mpijob/v1.py +3 -1
- mlrun/runtimes/mpijob/v1alpha1.py +1 -1
- mlrun/runtimes/nuclio.py +1 -1
- mlrun/runtimes/pod.py +51 -26
- mlrun/runtimes/remotesparkjob.py +3 -1
- mlrun/runtimes/serving.py +12 -4
- mlrun/runtimes/sparkjob/__init__.py +1 -2
- mlrun/runtimes/sparkjob/abstract.py +44 -31
- mlrun/runtimes/sparkjob/spark3job.py +11 -9
- mlrun/runtimes/utils.py +61 -42
- mlrun/secrets.py +16 -18
- mlrun/serving/__init__.py +3 -2
- mlrun/serving/merger.py +1 -1
- mlrun/serving/remote.py +1 -1
- mlrun/serving/routers.py +39 -42
- mlrun/serving/server.py +23 -13
- mlrun/serving/serving_wrapper.py +1 -1
- mlrun/serving/states.py +172 -39
- mlrun/serving/utils.py +1 -1
- mlrun/serving/v1_serving.py +1 -1
- mlrun/serving/v2_serving.py +29 -21
- mlrun/utils/__init__.py +1 -2
- mlrun/utils/async_http.py +8 -1
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +2 -2
- mlrun/utils/condition_evaluator.py +65 -0
- mlrun/utils/db.py +52 -0
- mlrun/utils/helpers.py +188 -13
- mlrun/utils/http.py +89 -54
- mlrun/utils/logger.py +48 -8
- mlrun/utils/model_monitoring.py +132 -100
- mlrun/utils/notifications/__init__.py +1 -1
- mlrun/utils/notifications/notification/__init__.py +8 -6
- mlrun/utils/notifications/notification/base.py +20 -14
- mlrun/utils/notifications/notification/console.py +7 -4
- mlrun/utils/notifications/notification/git.py +36 -19
- mlrun/utils/notifications/notification/ipython.py +10 -8
- mlrun/utils/notifications/notification/slack.py +18 -13
- mlrun/utils/notifications/notification_pusher.py +377 -56
- mlrun/utils/regex.py +6 -1
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +1 -1
- mlrun/utils/vault.py +270 -269
- mlrun/utils/version/__init__.py +1 -1
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +1 -1
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
- mlrun-1.4.0.dist-info/RECORD +434 -0
- mlrun/api/api/endpoints/marketplace.py +0 -257
- mlrun/api/crud/marketplace.py +0 -221
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
- mlrun/api/db/filedb/db.py +0 -518
- mlrun/api/schemas/marketplace.py +0 -128
- mlrun/api/schemas/model_endpoints.py +0 -185
- mlrun/db/filedb.py +0 -891
- mlrun/feature_store/retrieval/online.py +0 -92
- mlrun/model_monitoring/constants.py +0 -67
- mlrun/runtimes/package/context_handler.py +0 -711
- mlrun/runtimes/sparkjob/spark2job.py +0 -59
- mlrun-1.3.3.dist-info/RECORD +0 -381
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -28,14 +28,16 @@ import v3io.dataplane
|
|
|
28
28
|
import v3io_frames
|
|
29
29
|
|
|
30
30
|
import mlrun
|
|
31
|
-
import mlrun.
|
|
31
|
+
import mlrun.common.model_monitoring
|
|
32
|
+
import mlrun.common.schemas
|
|
32
33
|
import mlrun.data_types.infer
|
|
33
34
|
import mlrun.feature_store as fstore
|
|
35
|
+
import mlrun.model_monitoring
|
|
36
|
+
import mlrun.model_monitoring.stores
|
|
34
37
|
import mlrun.run
|
|
35
38
|
import mlrun.utils.helpers
|
|
36
39
|
import mlrun.utils.model_monitoring
|
|
37
40
|
import mlrun.utils.v3io_clients
|
|
38
|
-
from mlrun.model_monitoring.constants import EventFieldType
|
|
39
41
|
from mlrun.utils import logger
|
|
40
42
|
|
|
41
43
|
|
|
@@ -461,6 +463,7 @@ def calculate_inputs_statistics(
|
|
|
461
463
|
|
|
462
464
|
:returns: The calculated statistics of the inputs data.
|
|
463
465
|
"""
|
|
466
|
+
|
|
464
467
|
# Use `DFDataInfer` to calculate the statistics over the inputs:
|
|
465
468
|
inputs_statistics = mlrun.data_types.infer.DFDataInfer.get_stats(
|
|
466
469
|
df=inputs,
|
|
@@ -493,8 +496,6 @@ class BatchProcessor:
|
|
|
493
496
|
self,
|
|
494
497
|
context: mlrun.run.MLClientCtx,
|
|
495
498
|
project: str,
|
|
496
|
-
model_monitoring_access_key: str,
|
|
497
|
-
v3io_access_key: str,
|
|
498
499
|
):
|
|
499
500
|
|
|
500
501
|
"""
|
|
@@ -502,60 +503,16 @@ class BatchProcessor:
|
|
|
502
503
|
|
|
503
504
|
:param context: An MLRun context.
|
|
504
505
|
:param project: Project name.
|
|
505
|
-
:param model_monitoring_access_key: Access key to apply the model monitoring process.
|
|
506
|
-
:param v3io_access_key: Token key for v3io.
|
|
507
506
|
"""
|
|
508
507
|
self.context = context
|
|
509
508
|
self.project = project
|
|
510
509
|
|
|
511
|
-
self.v3io_access_key = v3io_access_key
|
|
512
|
-
self.model_monitoring_access_key = (
|
|
513
|
-
model_monitoring_access_key or v3io_access_key
|
|
514
|
-
)
|
|
515
|
-
|
|
516
510
|
# Initialize virtual drift object
|
|
517
511
|
self.virtual_drift = VirtualDrift(inf_capping=10)
|
|
518
512
|
|
|
519
|
-
# Define the required paths for the project objects.
|
|
520
|
-
# Note that the kv table, tsdb, and the input stream paths are located at the default location
|
|
521
|
-
# while the parquet path is located at the user-space location
|
|
522
|
-
template = mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default
|
|
523
|
-
kv_path = template.format(project=self.project, kind="endpoints")
|
|
524
|
-
(
|
|
525
|
-
_,
|
|
526
|
-
self.kv_container,
|
|
527
|
-
self.kv_path,
|
|
528
|
-
) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(kv_path)
|
|
529
|
-
tsdb_path = template.format(project=project, kind="events")
|
|
530
|
-
(
|
|
531
|
-
_,
|
|
532
|
-
self.tsdb_container,
|
|
533
|
-
self.tsdb_path,
|
|
534
|
-
) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(tsdb_path)
|
|
535
|
-
stream_path = template.format(project=self.project, kind="log_stream")
|
|
536
|
-
(
|
|
537
|
-
_,
|
|
538
|
-
self.stream_container,
|
|
539
|
-
self.stream_path,
|
|
540
|
-
) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(stream_path)
|
|
541
|
-
self.parquet_path = (
|
|
542
|
-
mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
|
|
543
|
-
project=project, kind="parquet"
|
|
544
|
-
)
|
|
545
|
-
)
|
|
546
|
-
|
|
547
513
|
logger.info(
|
|
548
514
|
"Initializing BatchProcessor",
|
|
549
515
|
project=project,
|
|
550
|
-
model_monitoring_access_key_initalized=bool(model_monitoring_access_key),
|
|
551
|
-
v3io_access_key_initialized=bool(v3io_access_key),
|
|
552
|
-
parquet_path=self.parquet_path,
|
|
553
|
-
kv_container=self.kv_container,
|
|
554
|
-
kv_path=self.kv_path,
|
|
555
|
-
tsdb_container=self.tsdb_container,
|
|
556
|
-
tsdb_path=self.tsdb_path,
|
|
557
|
-
stream_container=self.stream_container,
|
|
558
|
-
stream_path=self.stream_path,
|
|
559
516
|
)
|
|
560
517
|
|
|
561
518
|
# Get drift thresholds from the model monitoring configuration
|
|
@@ -567,46 +524,87 @@ class BatchProcessor:
|
|
|
567
524
|
)
|
|
568
525
|
|
|
569
526
|
# Get a runtime database
|
|
570
|
-
self.db = mlrun.get_run_db()
|
|
571
527
|
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
self.v3io = mlrun.utils.v3io_clients.get_v3io_client(
|
|
575
|
-
access_key=self.v3io_access_key
|
|
576
|
-
)
|
|
577
|
-
self.frames = mlrun.utils.v3io_clients.get_frames_client(
|
|
578
|
-
address=mlrun.mlconf.v3io_framesd,
|
|
579
|
-
container=self.tsdb_container,
|
|
580
|
-
token=self.v3io_access_key,
|
|
528
|
+
self.db = mlrun.model_monitoring.stores.get_model_endpoint_store(
|
|
529
|
+
project=project
|
|
581
530
|
)
|
|
582
531
|
|
|
532
|
+
if not mlrun.mlconf.is_ce_mode():
|
|
533
|
+
# TODO: Once there is a time series DB alternative in a non-CE deployment, we need to update this if
|
|
534
|
+
# statement to be applied only for V3IO TSDB
|
|
535
|
+
self._initialize_v3io_configurations()
|
|
536
|
+
|
|
583
537
|
# If an error occurs, it will be raised using the following argument
|
|
584
538
|
self.exception = None
|
|
585
539
|
|
|
586
540
|
# Get the batch interval range
|
|
587
|
-
self.batch_dict = context.parameters[
|
|
541
|
+
self.batch_dict = context.parameters[
|
|
542
|
+
mlrun.common.model_monitoring.EventFieldType.BATCH_INTERVALS_DICT
|
|
543
|
+
]
|
|
588
544
|
|
|
589
|
-
# TODO: This will be removed in 1.
|
|
545
|
+
# TODO: This will be removed in 1.5.0 once the job params can be parsed with different types
|
|
590
546
|
# Convert batch dict string into a dictionary
|
|
591
547
|
if isinstance(self.batch_dict, str):
|
|
592
548
|
self._parse_batch_dict_str()
|
|
593
549
|
|
|
550
|
+
def _initialize_v3io_configurations(self):
|
|
551
|
+
self.v3io_access_key = os.environ.get("V3IO_ACCESS_KEY")
|
|
552
|
+
self.model_monitoring_access_key = (
|
|
553
|
+
os.environ.get("MODEL_MONITORING_ACCESS_KEY") or self.v3io_access_key
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
# Define the required paths for the project objects
|
|
557
|
+
tsdb_path = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
558
|
+
project=self.project,
|
|
559
|
+
kind=mlrun.common.model_monitoring.FileTargetKind.EVENTS,
|
|
560
|
+
)
|
|
561
|
+
(
|
|
562
|
+
_,
|
|
563
|
+
self.tsdb_container,
|
|
564
|
+
self.tsdb_path,
|
|
565
|
+
) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(tsdb_path)
|
|
566
|
+
# stream_path = template.format(project=self.project, kind="log_stream")
|
|
567
|
+
stream_path = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
568
|
+
project=self.project,
|
|
569
|
+
kind=mlrun.common.model_monitoring.FileTargetKind.LOG_STREAM,
|
|
570
|
+
)
|
|
571
|
+
(
|
|
572
|
+
_,
|
|
573
|
+
self.stream_container,
|
|
574
|
+
self.stream_path,
|
|
575
|
+
) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(stream_path)
|
|
576
|
+
|
|
577
|
+
# Get the frames clients based on the v3io configuration
|
|
578
|
+
# it will be used later for writing the results into the tsdb
|
|
579
|
+
self.v3io = mlrun.utils.v3io_clients.get_v3io_client(
|
|
580
|
+
access_key=self.v3io_access_key
|
|
581
|
+
)
|
|
582
|
+
self.frames = mlrun.utils.v3io_clients.get_frames_client(
|
|
583
|
+
address=mlrun.mlconf.v3io_framesd,
|
|
584
|
+
container=self.tsdb_container,
|
|
585
|
+
token=self.v3io_access_key,
|
|
586
|
+
)
|
|
587
|
+
|
|
594
588
|
def post_init(self):
|
|
595
589
|
"""
|
|
596
590
|
Preprocess of the batch processing.
|
|
597
591
|
"""
|
|
598
592
|
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
593
|
+
if not mlrun.mlconf.is_ce_mode():
|
|
594
|
+
# Create v3io stream based on the input stream
|
|
595
|
+
response = self.v3io.create_stream(
|
|
596
|
+
container=self.stream_container,
|
|
597
|
+
path=self.stream_path,
|
|
598
|
+
shard_count=1,
|
|
599
|
+
raise_for_status=v3io.dataplane.RaiseForStatus.never,
|
|
600
|
+
access_key=self.v3io_access_key,
|
|
601
|
+
)
|
|
607
602
|
|
|
608
|
-
|
|
609
|
-
|
|
603
|
+
if not (
|
|
604
|
+
response.status_code == 400 and "ResourceInUse" in str(response.body)
|
|
605
|
+
):
|
|
606
|
+
response.raise_for_status([409, 204, 403])
|
|
607
|
+
pass
|
|
610
608
|
|
|
611
609
|
def run(self):
|
|
612
610
|
"""
|
|
@@ -614,231 +612,218 @@ class BatchProcessor:
|
|
|
614
612
|
"""
|
|
615
613
|
# Get model endpoints (each deployed project has at least 1 serving model):
|
|
616
614
|
try:
|
|
617
|
-
endpoints = self.db.list_model_endpoints(
|
|
615
|
+
endpoints = self.db.list_model_endpoints()
|
|
618
616
|
except Exception as e:
|
|
619
617
|
logger.error("Failed to list endpoints", exc=e)
|
|
620
618
|
return
|
|
621
619
|
|
|
622
|
-
|
|
623
|
-
for endpoint in endpoints.endpoints:
|
|
620
|
+
for endpoint in endpoints:
|
|
624
621
|
if (
|
|
625
|
-
endpoint.
|
|
626
|
-
and endpoint
|
|
627
|
-
|
|
622
|
+
endpoint[mlrun.common.model_monitoring.EventFieldType.ACTIVE]
|
|
623
|
+
and endpoint[
|
|
624
|
+
mlrun.common.model_monitoring.EventFieldType.MONITORING_MODE
|
|
625
|
+
]
|
|
626
|
+
== mlrun.common.model_monitoring.ModelMonitoringMode.enabled.value
|
|
628
627
|
):
|
|
629
|
-
active_endpoints.add(endpoint.metadata.uid)
|
|
630
|
-
|
|
631
|
-
# perform drift analysis for each model endpoint
|
|
632
|
-
for endpoint_id in active_endpoints:
|
|
633
|
-
try:
|
|
634
|
-
|
|
635
|
-
# Get model endpoint object:
|
|
636
|
-
endpoint = self.db.get_model_endpoint(
|
|
637
|
-
project=self.project, endpoint_id=endpoint_id
|
|
638
|
-
)
|
|
639
|
-
|
|
640
628
|
# Skip router endpoint:
|
|
641
629
|
if (
|
|
642
|
-
|
|
643
|
-
|
|
630
|
+
int(
|
|
631
|
+
endpoint[
|
|
632
|
+
mlrun.common.model_monitoring.EventFieldType.ENDPOINT_TYPE
|
|
633
|
+
]
|
|
634
|
+
)
|
|
635
|
+
== mlrun.common.model_monitoring.EndpointType.ROUTER
|
|
644
636
|
):
|
|
645
|
-
# endpoint
|
|
646
|
-
logger.info(
|
|
637
|
+
# Router endpoint has no feature stats
|
|
638
|
+
logger.info(
|
|
639
|
+
f"{endpoint[mlrun.common.model_monitoring.EventFieldType.UID]} is router skipping"
|
|
640
|
+
)
|
|
647
641
|
continue
|
|
642
|
+
self.update_drift_metrics(endpoint=endpoint)
|
|
648
643
|
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
644
|
+
def update_drift_metrics(self, endpoint: dict):
|
|
645
|
+
try:
|
|
646
|
+
# Convert feature set into dataframe and get the latest dataset
|
|
647
|
+
(
|
|
648
|
+
_,
|
|
649
|
+
serving_function_name,
|
|
650
|
+
_,
|
|
651
|
+
_,
|
|
652
|
+
) = mlrun.utils.helpers.parse_versioned_object_uri(
|
|
653
|
+
endpoint[mlrun.common.model_monitoring.EventFieldType.FUNCTION_URI]
|
|
654
|
+
)
|
|
660
655
|
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
656
|
+
model_name = endpoint[
|
|
657
|
+
mlrun.common.model_monitoring.EventFieldType.MODEL
|
|
658
|
+
].replace(":", "-")
|
|
664
659
|
|
|
665
|
-
|
|
666
|
-
|
|
660
|
+
m_fs = fstore.get_feature_set(
|
|
661
|
+
f"store://feature-sets/{self.project}/monitoring-{serving_function_name}-{model_name}"
|
|
662
|
+
)
|
|
667
663
|
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
start_time=start_time,
|
|
671
|
-
end_time=end_time,
|
|
672
|
-
time_column="timestamp",
|
|
673
|
-
)
|
|
664
|
+
# Getting batch interval start time and end time
|
|
665
|
+
start_time, end_time = self._get_interval_range()
|
|
674
666
|
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
start_time=str(
|
|
682
|
-
datetime.datetime.now() - datetime.timedelta(hours=1)
|
|
683
|
-
),
|
|
684
|
-
end_time=str(datetime.datetime.now()),
|
|
685
|
-
)
|
|
686
|
-
continue
|
|
667
|
+
try:
|
|
668
|
+
df = m_fs.to_dataframe(
|
|
669
|
+
start_time=start_time,
|
|
670
|
+
end_time=end_time,
|
|
671
|
+
time_column=mlrun.common.model_monitoring.EventFieldType.TIMESTAMP,
|
|
672
|
+
)
|
|
687
673
|
|
|
688
|
-
|
|
689
|
-
# as expected. In that case, the existence of the file will be checked before trying to get
|
|
690
|
-
# the offline data from the feature set.
|
|
691
|
-
# Continue if not enough events provided since the deployment of the model endpoint
|
|
692
|
-
except FileNotFoundError:
|
|
674
|
+
if len(df) == 0:
|
|
693
675
|
logger.warn(
|
|
694
|
-
"
|
|
676
|
+
"Not enough model events since the beginning of the batch interval",
|
|
695
677
|
parquet_target=m_fs.status.targets[0].path,
|
|
696
|
-
endpoint=
|
|
678
|
+
endpoint=endpoint[
|
|
679
|
+
mlrun.common.model_monitoring.EventFieldType.UID
|
|
680
|
+
],
|
|
697
681
|
min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
|
|
682
|
+
start_time=str(
|
|
683
|
+
datetime.datetime.now() - datetime.timedelta(hours=1)
|
|
684
|
+
),
|
|
685
|
+
end_time=str(datetime.datetime.now()),
|
|
698
686
|
)
|
|
699
|
-
|
|
687
|
+
return
|
|
688
|
+
|
|
689
|
+
# TODO: The below warn will be removed once the state of the Feature Store target is updated
|
|
690
|
+
# as expected. In that case, the existence of the file will be checked before trying to get
|
|
691
|
+
# the offline data from the feature set.
|
|
692
|
+
# Continue if not enough events provided since the deployment of the model endpoint
|
|
693
|
+
except FileNotFoundError:
|
|
694
|
+
logger.warn(
|
|
695
|
+
"Parquet not found, probably due to not enough model events",
|
|
696
|
+
parquet_target=m_fs.status.targets[0].path,
|
|
697
|
+
endpoint=endpoint[mlrun.common.model_monitoring.EventFieldType.UID],
|
|
698
|
+
min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
|
|
699
|
+
)
|
|
700
|
+
return
|
|
700
701
|
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
]
|
|
702
|
+
# Get feature names from monitoring feature set
|
|
703
|
+
feature_names = [
|
|
704
|
+
feature_name["name"] for feature_name in m_fs.spec.features.to_dict()
|
|
705
|
+
]
|
|
706
706
|
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
707
|
+
# Create DataFrame based on the input features
|
|
708
|
+
stats_columns = [
|
|
709
|
+
mlrun.common.model_monitoring.EventFieldType.TIMESTAMP,
|
|
710
|
+
*feature_names,
|
|
711
|
+
]
|
|
712
712
|
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
713
|
+
# Add label names if provided
|
|
714
|
+
if endpoint[mlrun.common.model_monitoring.EventFieldType.LABEL_NAMES]:
|
|
715
|
+
labels = endpoint[
|
|
716
|
+
mlrun.common.model_monitoring.EventFieldType.LABEL_NAMES
|
|
717
|
+
]
|
|
718
|
+
if isinstance(labels, str):
|
|
719
|
+
labels = json.loads(labels)
|
|
720
|
+
stats_columns.extend(labels)
|
|
721
|
+
named_features_df = df[stats_columns].copy()
|
|
722
|
+
|
|
723
|
+
# Infer feature set stats and schema
|
|
724
|
+
fstore.api._infer_from_static_df(
|
|
725
|
+
named_features_df,
|
|
726
|
+
m_fs,
|
|
727
|
+
options=mlrun.data_types.infer.InferOptions.all_stats(),
|
|
728
|
+
)
|
|
716
729
|
|
|
717
|
-
|
|
730
|
+
# Save feature set to apply changes
|
|
731
|
+
m_fs.save()
|
|
718
732
|
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
options=mlrun.data_types.infer.InferOptions.all_stats(),
|
|
724
|
-
)
|
|
733
|
+
# Get the timestamp of the latest request:
|
|
734
|
+
timestamp = df[mlrun.common.model_monitoring.EventFieldType.TIMESTAMP].iloc[
|
|
735
|
+
-1
|
|
736
|
+
]
|
|
725
737
|
|
|
726
|
-
|
|
727
|
-
|
|
738
|
+
# Get the feature stats from the model endpoint for reference data
|
|
739
|
+
feature_stats = json.loads(
|
|
740
|
+
endpoint[mlrun.common.model_monitoring.EventFieldType.FEATURE_STATS]
|
|
741
|
+
)
|
|
728
742
|
|
|
729
|
-
|
|
730
|
-
|
|
743
|
+
# Get the current stats:
|
|
744
|
+
current_stats = calculate_inputs_statistics(
|
|
745
|
+
sample_set_statistics=feature_stats,
|
|
746
|
+
inputs=named_features_df,
|
|
747
|
+
)
|
|
731
748
|
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
749
|
+
# Compute the drift based on the histogram of the current stats and the histogram of the original
|
|
750
|
+
# feature stats that can be found in the model endpoint object:
|
|
751
|
+
drift_result = self.virtual_drift.compute_drift_from_histograms(
|
|
752
|
+
feature_stats=feature_stats,
|
|
753
|
+
current_stats=current_stats,
|
|
754
|
+
)
|
|
755
|
+
logger.info("Drift result", drift_result=drift_result)
|
|
756
|
+
|
|
757
|
+
# Get drift thresholds from the model configuration:
|
|
758
|
+
monitor_configuration = (
|
|
759
|
+
json.loads(
|
|
760
|
+
endpoint[
|
|
761
|
+
mlrun.common.model_monitoring.EventFieldType.MONITOR_CONFIGURATION
|
|
762
|
+
]
|
|
736
763
|
)
|
|
764
|
+
or {}
|
|
765
|
+
)
|
|
766
|
+
possible_drift = monitor_configuration.get(
|
|
767
|
+
"possible_drift", self.default_possible_drift_threshold
|
|
768
|
+
)
|
|
769
|
+
drift_detected = monitor_configuration.get(
|
|
770
|
+
"drift_detected", self.default_drift_detected_threshold
|
|
771
|
+
)
|
|
737
772
|
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
drift_result
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
773
|
+
# Check for possible drift based on the results of the statistical metrics defined above:
|
|
774
|
+
drift_status, drift_measure = self.virtual_drift.check_for_drift(
|
|
775
|
+
metrics_results_dictionary=drift_result,
|
|
776
|
+
possible_drift_threshold=possible_drift,
|
|
777
|
+
drift_detected_threshold=drift_detected,
|
|
778
|
+
)
|
|
779
|
+
logger.info(
|
|
780
|
+
"Drift status",
|
|
781
|
+
endpoint_id=endpoint[mlrun.common.model_monitoring.EventFieldType.UID],
|
|
782
|
+
drift_status=drift_status.value,
|
|
783
|
+
drift_measure=drift_measure,
|
|
784
|
+
)
|
|
745
785
|
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
drift_detected = monitor_configuration.get(
|
|
752
|
-
"drift_detected", self.default_drift_detected_threshold
|
|
753
|
-
)
|
|
786
|
+
attributes = {
|
|
787
|
+
"current_stats": json.dumps(current_stats),
|
|
788
|
+
"drift_measures": json.dumps(drift_result),
|
|
789
|
+
"drift_status": drift_status.value,
|
|
790
|
+
}
|
|
754
791
|
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
endpoint_id=
|
|
764
|
-
|
|
792
|
+
self.db.update_model_endpoint(
|
|
793
|
+
endpoint_id=endpoint[mlrun.common.model_monitoring.EventFieldType.UID],
|
|
794
|
+
attributes=attributes,
|
|
795
|
+
)
|
|
796
|
+
|
|
797
|
+
if not mlrun.mlconf.is_ce_mode():
|
|
798
|
+
# Update drift results in TSDB
|
|
799
|
+
self._update_drift_in_input_stream(
|
|
800
|
+
endpoint_id=endpoint[
|
|
801
|
+
mlrun.common.model_monitoring.EventFieldType.UID
|
|
802
|
+
],
|
|
803
|
+
drift_status=drift_status,
|
|
765
804
|
drift_measure=drift_measure,
|
|
805
|
+
drift_result=drift_result,
|
|
806
|
+
timestamp=timestamp,
|
|
766
807
|
)
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
):
|
|
773
|
-
self.v3io.stream.put_records(
|
|
774
|
-
container=self.stream_container,
|
|
775
|
-
stream_path=self.stream_path,
|
|
776
|
-
records=[
|
|
777
|
-
{
|
|
778
|
-
"data": json.dumps(
|
|
779
|
-
{
|
|
780
|
-
"endpoint_id": endpoint_id,
|
|
781
|
-
"drift_status": drift_status.value,
|
|
782
|
-
"drift_measure": drift_measure,
|
|
783
|
-
"drift_per_feature": {**drift_result},
|
|
784
|
-
}
|
|
785
|
-
)
|
|
786
|
-
}
|
|
787
|
-
],
|
|
788
|
-
)
|
|
789
|
-
|
|
790
|
-
attributes = {
|
|
791
|
-
"current_stats": json.dumps(current_stats),
|
|
792
|
-
"drift_measures": json.dumps(drift_result),
|
|
793
|
-
"drift_status": drift_status.value,
|
|
794
|
-
}
|
|
795
|
-
|
|
796
|
-
self.db.patch_model_endpoint(
|
|
797
|
-
project=self.project,
|
|
798
|
-
endpoint_id=endpoint_id,
|
|
799
|
-
attributes=attributes,
|
|
808
|
+
logger.info(
|
|
809
|
+
"Done updating drift measures",
|
|
810
|
+
endpoint_id=endpoint[
|
|
811
|
+
mlrun.common.model_monitoring.EventFieldType.UID
|
|
812
|
+
],
|
|
800
813
|
)
|
|
801
814
|
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
format=EventFieldType.TIME_FORMAT,
|
|
808
|
-
),
|
|
809
|
-
"record_type": "drift_measures",
|
|
810
|
-
"tvd_mean": drift_result["tvd_mean"],
|
|
811
|
-
"kld_mean": drift_result["kld_mean"],
|
|
812
|
-
"hellinger_mean": drift_result["hellinger_mean"],
|
|
813
|
-
}
|
|
814
|
-
|
|
815
|
-
try:
|
|
816
|
-
self.frames.write(
|
|
817
|
-
backend="tsdb",
|
|
818
|
-
table=self.tsdb_path,
|
|
819
|
-
dfs=pd.DataFrame.from_dict([tsdb_drift_measures]),
|
|
820
|
-
index_cols=["timestamp", "endpoint_id", "record_type"],
|
|
821
|
-
)
|
|
822
|
-
except v3io_frames.errors.Error as err:
|
|
823
|
-
logger.warn(
|
|
824
|
-
"Could not write drift measures to TSDB",
|
|
825
|
-
err=err,
|
|
826
|
-
tsdb_path=self.tsdb_path,
|
|
827
|
-
endpoint=endpoint_id,
|
|
828
|
-
)
|
|
829
|
-
|
|
830
|
-
logger.info("Done updating drift measures", endpoint_id=endpoint_id)
|
|
831
|
-
|
|
832
|
-
except Exception as e:
|
|
833
|
-
logger.error(f"Exception for endpoint {endpoint_id}")
|
|
834
|
-
self.exception = e
|
|
815
|
+
except Exception as e:
|
|
816
|
+
logger.error(
|
|
817
|
+
f"Exception for endpoint {endpoint[mlrun.common.model_monitoring.EventFieldType.UID]}"
|
|
818
|
+
)
|
|
819
|
+
self.exception = e
|
|
835
820
|
|
|
836
|
-
def
|
|
821
|
+
def _get_interval_range(self) -> Tuple[datetime.datetime, datetime.datetime]:
|
|
837
822
|
"""Getting batch interval time range"""
|
|
838
823
|
minutes, hours, days = (
|
|
839
|
-
self.batch_dict[EventFieldType.MINUTES],
|
|
840
|
-
self.batch_dict[EventFieldType.HOURS],
|
|
841
|
-
self.batch_dict[EventFieldType.DAYS],
|
|
824
|
+
self.batch_dict[mlrun.common.model_monitoring.EventFieldType.MINUTES],
|
|
825
|
+
self.batch_dict[mlrun.common.model_monitoring.EventFieldType.HOURS],
|
|
826
|
+
self.batch_dict[mlrun.common.model_monitoring.EventFieldType.DAYS],
|
|
842
827
|
)
|
|
843
828
|
start_time = datetime.datetime.now() - datetime.timedelta(
|
|
844
829
|
minutes=minutes, hours=hours, days=days
|
|
@@ -858,13 +843,79 @@ class BatchProcessor:
|
|
|
858
843
|
pair_list = pair.split(":")
|
|
859
844
|
self.batch_dict[pair_list[0]] = float(pair_list[1])
|
|
860
845
|
|
|
846
|
+
def _update_drift_in_input_stream(
|
|
847
|
+
self,
|
|
848
|
+
endpoint_id: str,
|
|
849
|
+
drift_status: DriftStatus,
|
|
850
|
+
drift_measure: float,
|
|
851
|
+
drift_result: Dict[str, Dict[str, Any]],
|
|
852
|
+
timestamp: pd._libs.tslibs.timestamps.Timestamp,
|
|
853
|
+
):
|
|
854
|
+
"""Update drift results in input stream.
|
|
855
|
+
|
|
856
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
857
|
+
:param drift_status: Drift status result. Possible values can be found under DriftStatus enum class.
|
|
858
|
+
:param drift_measure: The drift result (float) based on the mean of the Total Variance Distance and the
|
|
859
|
+
Hellinger distance.
|
|
860
|
+
:param drift_result: A dictionary that includes the drift results for each feature.
|
|
861
|
+
:param timestamp: Pandas Timestamp value.
|
|
862
|
+
|
|
863
|
+
"""
|
|
864
|
+
|
|
865
|
+
if (
|
|
866
|
+
drift_status == DriftStatus.POSSIBLE_DRIFT
|
|
867
|
+
or drift_status == DriftStatus.DRIFT_DETECTED
|
|
868
|
+
):
|
|
869
|
+
self.v3io.stream.put_records(
|
|
870
|
+
container=self.stream_container,
|
|
871
|
+
stream_path=self.stream_path,
|
|
872
|
+
records=[
|
|
873
|
+
{
|
|
874
|
+
"data": json.dumps(
|
|
875
|
+
{
|
|
876
|
+
"endpoint_id": endpoint_id,
|
|
877
|
+
"drift_status": drift_status.value,
|
|
878
|
+
"drift_measure": drift_measure,
|
|
879
|
+
"drift_per_feature": {**drift_result},
|
|
880
|
+
}
|
|
881
|
+
)
|
|
882
|
+
}
|
|
883
|
+
],
|
|
884
|
+
)
|
|
885
|
+
|
|
886
|
+
# Update the results in tsdb:
|
|
887
|
+
tsdb_drift_measures = {
|
|
888
|
+
"endpoint_id": endpoint_id,
|
|
889
|
+
"timestamp": pd.to_datetime(
|
|
890
|
+
timestamp,
|
|
891
|
+
format=mlrun.common.model_monitoring.EventFieldType.TIME_FORMAT,
|
|
892
|
+
),
|
|
893
|
+
"record_type": "drift_measures",
|
|
894
|
+
"tvd_mean": drift_result["tvd_mean"],
|
|
895
|
+
"kld_mean": drift_result["kld_mean"],
|
|
896
|
+
"hellinger_mean": drift_result["hellinger_mean"],
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
try:
|
|
900
|
+
self.frames.write(
|
|
901
|
+
backend="tsdb",
|
|
902
|
+
table=self.tsdb_path,
|
|
903
|
+
dfs=pd.DataFrame.from_dict([tsdb_drift_measures]),
|
|
904
|
+
index_cols=["timestamp", "endpoint_id", "record_type"],
|
|
905
|
+
)
|
|
906
|
+
except v3io_frames.errors.Error as err:
|
|
907
|
+
logger.warn(
|
|
908
|
+
"Could not write drift measures to TSDB",
|
|
909
|
+
err=err,
|
|
910
|
+
tsdb_path=self.tsdb_path,
|
|
911
|
+
endpoint=endpoint_id,
|
|
912
|
+
)
|
|
913
|
+
|
|
861
914
|
|
|
862
915
|
def handler(context: mlrun.run.MLClientCtx):
|
|
863
916
|
batch_processor = BatchProcessor(
|
|
864
917
|
context=context,
|
|
865
918
|
project=context.project,
|
|
866
|
-
model_monitoring_access_key=os.environ.get("MODEL_MONITORING_ACCESS_KEY"),
|
|
867
|
-
v3io_access_key=os.environ.get("V3IO_ACCESS_KEY"),
|
|
868
919
|
)
|
|
869
920
|
batch_processor.post_init()
|
|
870
921
|
batch_processor.run()
|