mlrun 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -3
- mlrun/__main__.py +79 -37
- mlrun/api/__init__.py +1 -1
- mlrun/api/api/__init__.py +1 -1
- mlrun/api/api/api.py +4 -4
- mlrun/api/api/deps.py +10 -21
- mlrun/api/api/endpoints/__init__.py +1 -1
- mlrun/api/api/endpoints/artifacts.py +64 -36
- mlrun/api/api/endpoints/auth.py +4 -4
- mlrun/api/api/endpoints/background_tasks.py +11 -11
- mlrun/api/api/endpoints/client_spec.py +5 -5
- mlrun/api/api/endpoints/clusterization_spec.py +6 -4
- mlrun/api/api/endpoints/feature_store.py +124 -115
- mlrun/api/api/endpoints/files.py +22 -14
- mlrun/api/api/endpoints/frontend_spec.py +28 -21
- mlrun/api/api/endpoints/functions.py +142 -87
- mlrun/api/api/endpoints/grafana_proxy.py +89 -442
- mlrun/api/api/endpoints/healthz.py +20 -7
- mlrun/api/api/endpoints/hub.py +320 -0
- mlrun/api/api/endpoints/internal/__init__.py +1 -1
- mlrun/api/api/endpoints/internal/config.py +1 -1
- mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
- mlrun/api/api/endpoints/logs.py +11 -11
- mlrun/api/api/endpoints/model_endpoints.py +74 -70
- mlrun/api/api/endpoints/operations.py +13 -9
- mlrun/api/api/endpoints/pipelines.py +93 -88
- mlrun/api/api/endpoints/projects.py +35 -35
- mlrun/api/api/endpoints/runs.py +69 -27
- mlrun/api/api/endpoints/runtime_resources.py +28 -28
- mlrun/api/api/endpoints/schedules.py +98 -41
- mlrun/api/api/endpoints/secrets.py +37 -32
- mlrun/api/api/endpoints/submit.py +12 -12
- mlrun/api/api/endpoints/tags.py +20 -22
- mlrun/api/api/utils.py +251 -42
- mlrun/api/constants.py +1 -1
- mlrun/api/crud/__init__.py +18 -15
- mlrun/api/crud/artifacts.py +10 -10
- mlrun/api/crud/client_spec.py +4 -4
- mlrun/api/crud/clusterization_spec.py +3 -3
- mlrun/api/crud/feature_store.py +54 -46
- mlrun/api/crud/functions.py +3 -3
- mlrun/api/crud/hub.py +312 -0
- mlrun/api/crud/logs.py +11 -9
- mlrun/api/crud/model_monitoring/__init__.py +3 -3
- mlrun/api/crud/model_monitoring/grafana.py +435 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
- mlrun/api/crud/notifications.py +149 -0
- mlrun/api/crud/pipelines.py +67 -52
- mlrun/api/crud/projects.py +51 -23
- mlrun/api/crud/runs.py +7 -5
- mlrun/api/crud/runtime_resources.py +13 -13
- mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
- mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
- mlrun/api/crud/runtimes/nuclio/function.py +505 -0
- mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
- mlrun/api/crud/secrets.py +88 -46
- mlrun/api/crud/tags.py +5 -5
- mlrun/api/db/__init__.py +1 -1
- mlrun/api/db/base.py +102 -54
- mlrun/api/db/init_db.py +2 -3
- mlrun/api/db/session.py +4 -12
- mlrun/api/db/sqldb/__init__.py +1 -1
- mlrun/api/db/sqldb/db.py +439 -196
- mlrun/api/db/sqldb/helpers.py +1 -1
- mlrun/api/db/sqldb/models/__init__.py +3 -3
- mlrun/api/db/sqldb/models/models_mysql.py +82 -64
- mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
- mlrun/api/db/sqldb/session.py +27 -20
- mlrun/api/initial_data.py +82 -24
- mlrun/api/launcher.py +196 -0
- mlrun/api/main.py +91 -22
- mlrun/api/middlewares.py +6 -5
- mlrun/api/migrations_mysql/env.py +1 -1
- mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
- mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
- mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
- mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
- mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
- mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
- mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/env.py +1 -1
- mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
- mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
- mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
- mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
- mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
- mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
- mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
- mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
- mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
- mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
- mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
- mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
- mlrun/api/schemas/__init__.py +216 -138
- mlrun/api/utils/__init__.py +1 -1
- mlrun/api/utils/asyncio.py +1 -1
- mlrun/api/utils/auth/__init__.py +1 -1
- mlrun/api/utils/auth/providers/__init__.py +1 -1
- mlrun/api/utils/auth/providers/base.py +7 -7
- mlrun/api/utils/auth/providers/nop.py +6 -7
- mlrun/api/utils/auth/providers/opa.py +17 -17
- mlrun/api/utils/auth/verifier.py +36 -34
- mlrun/api/utils/background_tasks.py +24 -24
- mlrun/{builder.py → api/utils/builder.py} +216 -123
- mlrun/api/utils/clients/__init__.py +1 -1
- mlrun/api/utils/clients/chief.py +19 -4
- mlrun/api/utils/clients/iguazio.py +106 -60
- mlrun/api/utils/clients/log_collector.py +1 -1
- mlrun/api/utils/clients/nuclio.py +23 -23
- mlrun/api/utils/clients/protocols/grpc.py +2 -2
- mlrun/api/utils/db/__init__.py +1 -1
- mlrun/api/utils/db/alembic.py +1 -1
- mlrun/api/utils/db/backup.py +1 -1
- mlrun/api/utils/db/mysql.py +24 -25
- mlrun/api/utils/db/sql_collation.py +1 -1
- mlrun/api/utils/db/sqlite_migration.py +2 -2
- mlrun/api/utils/events/__init__.py +14 -0
- mlrun/api/utils/events/base.py +57 -0
- mlrun/api/utils/events/events_factory.py +41 -0
- mlrun/api/utils/events/iguazio.py +217 -0
- mlrun/api/utils/events/nop.py +55 -0
- mlrun/api/utils/helpers.py +16 -13
- mlrun/api/utils/memory_reports.py +1 -1
- mlrun/api/utils/periodic.py +6 -3
- mlrun/api/utils/projects/__init__.py +1 -1
- mlrun/api/utils/projects/follower.py +33 -33
- mlrun/api/utils/projects/leader.py +36 -34
- mlrun/api/utils/projects/member.py +27 -27
- mlrun/api/utils/projects/remotes/__init__.py +1 -1
- mlrun/api/utils/projects/remotes/follower.py +13 -13
- mlrun/api/utils/projects/remotes/leader.py +10 -10
- mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
- mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
- mlrun/api/utils/scheduler.py +140 -51
- mlrun/api/utils/singletons/__init__.py +1 -1
- mlrun/api/utils/singletons/db.py +9 -15
- mlrun/api/utils/singletons/k8s.py +677 -5
- mlrun/api/utils/singletons/logs_dir.py +1 -1
- mlrun/api/utils/singletons/project_member.py +1 -1
- mlrun/api/utils/singletons/scheduler.py +1 -1
- mlrun/artifacts/__init__.py +2 -2
- mlrun/artifacts/base.py +8 -2
- mlrun/artifacts/dataset.py +5 -3
- mlrun/artifacts/manager.py +7 -1
- mlrun/artifacts/model.py +15 -4
- mlrun/artifacts/plots.py +1 -1
- mlrun/common/__init__.py +1 -1
- mlrun/common/constants.py +15 -0
- mlrun/common/model_monitoring.py +209 -0
- mlrun/common/schemas/__init__.py +167 -0
- mlrun/{api → common}/schemas/artifact.py +13 -14
- mlrun/{api → common}/schemas/auth.py +10 -8
- mlrun/{api → common}/schemas/background_task.py +3 -3
- mlrun/{api → common}/schemas/client_spec.py +1 -1
- mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
- mlrun/{api → common}/schemas/constants.py +21 -8
- mlrun/common/schemas/events.py +36 -0
- mlrun/{api → common}/schemas/feature_store.py +2 -1
- mlrun/{api → common}/schemas/frontend_spec.py +7 -6
- mlrun/{api → common}/schemas/function.py +5 -5
- mlrun/{api → common}/schemas/http.py +3 -3
- mlrun/common/schemas/hub.py +134 -0
- mlrun/{api → common}/schemas/k8s.py +3 -3
- mlrun/{api → common}/schemas/memory_reports.py +1 -1
- mlrun/common/schemas/model_endpoints.py +342 -0
- mlrun/common/schemas/notification.py +57 -0
- mlrun/{api → common}/schemas/object.py +6 -6
- mlrun/{api → common}/schemas/pipeline.py +3 -3
- mlrun/{api → common}/schemas/project.py +6 -5
- mlrun/common/schemas/regex.py +24 -0
- mlrun/common/schemas/runs.py +30 -0
- mlrun/{api → common}/schemas/runtime_resource.py +3 -3
- mlrun/{api → common}/schemas/schedule.py +19 -7
- mlrun/{api → common}/schemas/secret.py +3 -3
- mlrun/{api → common}/schemas/tag.py +2 -2
- mlrun/common/types.py +25 -0
- mlrun/config.py +152 -20
- mlrun/data_types/__init__.py +7 -2
- mlrun/data_types/data_types.py +4 -2
- mlrun/data_types/infer.py +1 -1
- mlrun/data_types/spark.py +10 -3
- mlrun/datastore/__init__.py +10 -3
- mlrun/datastore/azure_blob.py +1 -1
- mlrun/datastore/base.py +185 -53
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/filestore.py +1 -1
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +1 -1
- mlrun/datastore/s3.py +1 -1
- mlrun/datastore/sources.py +192 -70
- mlrun/datastore/spark_udf.py +44 -0
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/targets.py +115 -45
- mlrun/datastore/utils.py +127 -5
- mlrun/datastore/v3io.py +1 -1
- mlrun/datastore/wasbfs/__init__.py +1 -1
- mlrun/datastore/wasbfs/fs.py +1 -1
- mlrun/db/__init__.py +7 -5
- mlrun/db/base.py +112 -68
- mlrun/db/httpdb.py +445 -277
- mlrun/db/nopdb.py +491 -0
- mlrun/db/sqldb.py +112 -65
- mlrun/errors.py +6 -1
- mlrun/execution.py +44 -22
- mlrun/feature_store/__init__.py +1 -1
- mlrun/feature_store/api.py +143 -95
- mlrun/feature_store/common.py +16 -20
- mlrun/feature_store/feature_set.py +42 -12
- mlrun/feature_store/feature_vector.py +32 -21
- mlrun/feature_store/ingestion.py +9 -12
- mlrun/feature_store/retrieval/__init__.py +3 -2
- mlrun/feature_store/retrieval/base.py +388 -66
- mlrun/feature_store/retrieval/dask_merger.py +63 -151
- mlrun/feature_store/retrieval/job.py +30 -12
- mlrun/feature_store/retrieval/local_merger.py +40 -133
- mlrun/feature_store/retrieval/spark_merger.py +129 -127
- mlrun/feature_store/retrieval/storey_merger.py +173 -0
- mlrun/feature_store/steps.py +132 -15
- mlrun/features.py +8 -3
- mlrun/frameworks/__init__.py +1 -1
- mlrun/frameworks/_common/__init__.py +1 -1
- mlrun/frameworks/_common/artifacts_library.py +1 -1
- mlrun/frameworks/_common/mlrun_interface.py +1 -1
- mlrun/frameworks/_common/model_handler.py +1 -1
- mlrun/frameworks/_common/plan.py +1 -1
- mlrun/frameworks/_common/producer.py +1 -1
- mlrun/frameworks/_common/utils.py +1 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
- mlrun/frameworks/_dl_common/model_handler.py +1 -1
- mlrun/frameworks/_dl_common/utils.py +1 -1
- mlrun/frameworks/_ml_common/__init__.py +1 -1
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/model_handler.py +1 -1
- mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
- mlrun/frameworks/_ml_common/producer.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +1 -1
- mlrun/frameworks/auto_mlrun/__init__.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
- mlrun/frameworks/huggingface/__init__.py +1 -1
- mlrun/frameworks/huggingface/model_server.py +1 -1
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/lgbm/model_server.py +1 -1
- mlrun/frameworks/lgbm/utils.py +1 -1
- mlrun/frameworks/onnx/__init__.py +1 -1
- mlrun/frameworks/onnx/dataset.py +1 -1
- mlrun/frameworks/onnx/mlrun_interface.py +1 -1
- mlrun/frameworks/onnx/model_handler.py +1 -1
- mlrun/frameworks/onnx/model_server.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +1 -1
- mlrun/frameworks/pytorch/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
- mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
- mlrun/frameworks/pytorch/model_handler.py +1 -1
- mlrun/frameworks/pytorch/model_server.py +1 -1
- mlrun/frameworks/pytorch/utils.py +1 -1
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/estimator.py +1 -1
- mlrun/frameworks/sklearn/metric.py +1 -1
- mlrun/frameworks/sklearn/metrics_library.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
- mlrun/frameworks/sklearn/model_handler.py +1 -1
- mlrun/frameworks/sklearn/utils.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
- mlrun/frameworks/tf_keras/model_handler.py +1 -1
- mlrun/frameworks/tf_keras/model_server.py +1 -1
- mlrun/frameworks/tf_keras/utils.py +1 -1
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
- mlrun/frameworks/xgboost/model_handler.py +1 -1
- mlrun/frameworks/xgboost/utils.py +1 -1
- mlrun/k8s_utils.py +14 -765
- mlrun/kfpops.py +14 -17
- mlrun/launcher/__init__.py +13 -0
- mlrun/launcher/base.py +406 -0
- mlrun/launcher/client.py +159 -0
- mlrun/launcher/factory.py +50 -0
- mlrun/launcher/local.py +276 -0
- mlrun/launcher/remote.py +178 -0
- mlrun/lists.py +10 -2
- mlrun/mlutils/__init__.py +1 -1
- mlrun/mlutils/data.py +1 -1
- mlrun/mlutils/models.py +1 -1
- mlrun/mlutils/plots.py +1 -1
- mlrun/model.py +252 -14
- mlrun/model_monitoring/__init__.py +41 -0
- mlrun/model_monitoring/features_drift_table.py +1 -1
- mlrun/model_monitoring/helpers.py +123 -38
- mlrun/model_monitoring/model_endpoint.py +144 -0
- mlrun/model_monitoring/model_monitoring_batch.py +310 -259
- mlrun/model_monitoring/stores/__init__.py +106 -0
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
- mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
- mlrun/model_monitoring/stores/models/__init__.py +23 -0
- mlrun/model_monitoring/stores/models/base.py +18 -0
- mlrun/model_monitoring/stores/models/mysql.py +100 -0
- mlrun/model_monitoring/stores/models/sqlite.py +98 -0
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
- mlrun/model_monitoring/stream_processing_fs.py +239 -271
- mlrun/package/__init__.py +163 -0
- mlrun/package/context_handler.py +325 -0
- mlrun/package/errors.py +47 -0
- mlrun/package/packager.py +298 -0
- mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
- mlrun/package/packagers/default_packager.py +422 -0
- mlrun/package/packagers/numpy_packagers.py +612 -0
- mlrun/package/packagers/pandas_packagers.py +968 -0
- mlrun/package/packagers/python_standard_library_packagers.py +616 -0
- mlrun/package/packagers_manager.py +786 -0
- mlrun/package/utils/__init__.py +53 -0
- mlrun/package/utils/_archiver.py +226 -0
- mlrun/package/utils/_formatter.py +211 -0
- mlrun/package/utils/_pickler.py +234 -0
- mlrun/package/utils/_supported_format.py +71 -0
- mlrun/package/utils/log_hint_utils.py +93 -0
- mlrun/package/utils/type_hint_utils.py +298 -0
- mlrun/platforms/__init__.py +1 -1
- mlrun/platforms/iguazio.py +34 -2
- mlrun/platforms/other.py +1 -1
- mlrun/projects/__init__.py +1 -1
- mlrun/projects/operations.py +14 -9
- mlrun/projects/pipelines.py +31 -13
- mlrun/projects/project.py +762 -238
- mlrun/render.py +49 -19
- mlrun/run.py +57 -326
- mlrun/runtimes/__init__.py +3 -9
- mlrun/runtimes/base.py +247 -784
- mlrun/runtimes/constants.py +1 -1
- mlrun/runtimes/daskjob.py +45 -41
- mlrun/runtimes/funcdoc.py +43 -7
- mlrun/runtimes/function.py +66 -656
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/generators.py +1 -1
- mlrun/runtimes/kubejob.py +99 -116
- mlrun/runtimes/local.py +59 -66
- mlrun/runtimes/mpijob/__init__.py +1 -1
- mlrun/runtimes/mpijob/abstract.py +13 -15
- mlrun/runtimes/mpijob/v1.py +3 -1
- mlrun/runtimes/mpijob/v1alpha1.py +1 -1
- mlrun/runtimes/nuclio.py +1 -1
- mlrun/runtimes/pod.py +51 -26
- mlrun/runtimes/remotesparkjob.py +3 -1
- mlrun/runtimes/serving.py +12 -4
- mlrun/runtimes/sparkjob/__init__.py +1 -2
- mlrun/runtimes/sparkjob/abstract.py +44 -31
- mlrun/runtimes/sparkjob/spark3job.py +11 -9
- mlrun/runtimes/utils.py +61 -42
- mlrun/secrets.py +16 -18
- mlrun/serving/__init__.py +3 -2
- mlrun/serving/merger.py +1 -1
- mlrun/serving/remote.py +1 -1
- mlrun/serving/routers.py +39 -42
- mlrun/serving/server.py +23 -13
- mlrun/serving/serving_wrapper.py +1 -1
- mlrun/serving/states.py +172 -39
- mlrun/serving/utils.py +1 -1
- mlrun/serving/v1_serving.py +1 -1
- mlrun/serving/v2_serving.py +29 -21
- mlrun/utils/__init__.py +1 -2
- mlrun/utils/async_http.py +8 -1
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +2 -2
- mlrun/utils/condition_evaluator.py +65 -0
- mlrun/utils/db.py +52 -0
- mlrun/utils/helpers.py +188 -13
- mlrun/utils/http.py +89 -54
- mlrun/utils/logger.py +48 -8
- mlrun/utils/model_monitoring.py +132 -100
- mlrun/utils/notifications/__init__.py +1 -1
- mlrun/utils/notifications/notification/__init__.py +8 -6
- mlrun/utils/notifications/notification/base.py +20 -14
- mlrun/utils/notifications/notification/console.py +7 -4
- mlrun/utils/notifications/notification/git.py +36 -19
- mlrun/utils/notifications/notification/ipython.py +10 -8
- mlrun/utils/notifications/notification/slack.py +18 -13
- mlrun/utils/notifications/notification_pusher.py +377 -56
- mlrun/utils/regex.py +6 -1
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +1 -1
- mlrun/utils/vault.py +270 -269
- mlrun/utils/version/__init__.py +1 -1
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +1 -1
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
- mlrun-1.4.0.dist-info/RECORD +434 -0
- mlrun/api/api/endpoints/marketplace.py +0 -257
- mlrun/api/crud/marketplace.py +0 -221
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
- mlrun/api/db/filedb/db.py +0 -518
- mlrun/api/schemas/marketplace.py +0 -128
- mlrun/api/schemas/model_endpoints.py +0 -185
- mlrun/db/filedb.py +0 -891
- mlrun/feature_store/retrieval/online.py +0 -92
- mlrun/model_monitoring/constants.py +0 -67
- mlrun/runtimes/package/context_handler.py +0 -711
- mlrun/runtimes/sparkjob/spark2job.py +0 -59
- mlrun-1.3.3.dist-info/RECORD +0 -381
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -19,23 +19,25 @@ import os
|
|
|
19
19
|
import typing
|
|
20
20
|
|
|
21
21
|
import pandas as pd
|
|
22
|
-
|
|
23
|
-
# Constants
|
|
24
22
|
import storey
|
|
25
|
-
import v3io
|
|
26
|
-
import v3io.dataplane
|
|
27
23
|
|
|
24
|
+
import mlrun
|
|
25
|
+
import mlrun.common.model_monitoring
|
|
28
26
|
import mlrun.config
|
|
29
27
|
import mlrun.datastore.targets
|
|
30
28
|
import mlrun.feature_store.steps
|
|
31
29
|
import mlrun.utils
|
|
32
30
|
import mlrun.utils.model_monitoring
|
|
33
31
|
import mlrun.utils.v3io_clients
|
|
34
|
-
from mlrun.model_monitoring
|
|
32
|
+
from mlrun.common.model_monitoring import (
|
|
35
33
|
EventFieldType,
|
|
36
34
|
EventKeyMetrics,
|
|
37
35
|
EventLiveStats,
|
|
36
|
+
FileTargetKind,
|
|
37
|
+
ModelEndpointTarget,
|
|
38
|
+
ProjectSecretKeys,
|
|
38
39
|
)
|
|
40
|
+
from mlrun.model_monitoring.stores import get_model_endpoint_store
|
|
39
41
|
from mlrun.utils import logger
|
|
40
42
|
|
|
41
43
|
|
|
@@ -45,81 +47,90 @@ class EventStreamProcessor:
|
|
|
45
47
|
self,
|
|
46
48
|
project: str,
|
|
47
49
|
parquet_batching_max_events: int,
|
|
50
|
+
parquet_target: str,
|
|
48
51
|
sample_window: int = 10,
|
|
49
|
-
tsdb_batching_max_events: int = 10,
|
|
50
|
-
tsdb_batching_timeout_secs: int = 60 * 5, # Default 5 minutes
|
|
51
52
|
parquet_batching_timeout_secs: int = 30 * 60, # Default 30 minutes
|
|
52
53
|
aggregate_count_windows: typing.Optional[typing.List[str]] = None,
|
|
53
54
|
aggregate_count_period: str = "30s",
|
|
54
55
|
aggregate_avg_windows: typing.Optional[typing.List[str]] = None,
|
|
55
56
|
aggregate_avg_period: str = "30s",
|
|
56
|
-
v3io_access_key: typing.Optional[str] = None,
|
|
57
|
-
v3io_framesd: typing.Optional[str] = None,
|
|
58
|
-
v3io_api: typing.Optional[str] = None,
|
|
59
57
|
model_monitoring_access_key: str = None,
|
|
60
58
|
):
|
|
59
|
+
# General configurations, mainly used for the storey steps in the future serving graph
|
|
61
60
|
self.project = project
|
|
62
61
|
self.sample_window = sample_window
|
|
63
|
-
self.tsdb_batching_max_events = tsdb_batching_max_events
|
|
64
|
-
self.tsdb_batching_timeout_secs = tsdb_batching_timeout_secs
|
|
65
|
-
self.parquet_batching_max_events = parquet_batching_max_events
|
|
66
|
-
self.parquet_batching_timeout_secs = parquet_batching_timeout_secs
|
|
67
62
|
self.aggregate_count_windows = aggregate_count_windows or ["5m", "1h"]
|
|
68
63
|
self.aggregate_count_period = aggregate_count_period
|
|
69
64
|
self.aggregate_avg_windows = aggregate_avg_windows or ["5m", "1h"]
|
|
70
65
|
self.aggregate_avg_period = aggregate_avg_period
|
|
71
66
|
|
|
67
|
+
# Parquet path and configurations
|
|
68
|
+
self.parquet_path = parquet_target
|
|
69
|
+
self.parquet_batching_max_events = parquet_batching_max_events
|
|
70
|
+
self.parquet_batching_timeout_secs = parquet_batching_timeout_secs
|
|
71
|
+
|
|
72
|
+
self.model_endpoint_store_target = (
|
|
73
|
+
mlrun.mlconf.model_endpoint_monitoring.store_type
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
logger.info(
|
|
77
|
+
"Initializing model monitoring event stream processor",
|
|
78
|
+
parquet_path=self.parquet_path,
|
|
79
|
+
parquet_batching_max_events=self.parquet_batching_max_events,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
self.storage_options = None
|
|
83
|
+
if not mlrun.mlconf.is_ce_mode():
|
|
84
|
+
self._initialize_v3io_configurations(
|
|
85
|
+
model_monitoring_access_key=model_monitoring_access_key
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def _initialize_v3io_configurations(
|
|
89
|
+
self,
|
|
90
|
+
tsdb_batching_max_events: int = 10,
|
|
91
|
+
tsdb_batching_timeout_secs: int = 60 * 5, # Default 5 minutes
|
|
92
|
+
v3io_access_key: typing.Optional[str] = None,
|
|
93
|
+
v3io_framesd: typing.Optional[str] = None,
|
|
94
|
+
v3io_api: typing.Optional[str] = None,
|
|
95
|
+
model_monitoring_access_key: str = None,
|
|
96
|
+
):
|
|
97
|
+
# Get the V3IO configurations
|
|
72
98
|
self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
|
|
73
99
|
self.v3io_api = v3io_api or mlrun.mlconf.v3io_api
|
|
74
100
|
|
|
75
101
|
self.v3io_access_key = v3io_access_key or os.environ.get("V3IO_ACCESS_KEY")
|
|
76
102
|
self.model_monitoring_access_key = (
|
|
77
103
|
model_monitoring_access_key
|
|
78
|
-
or os.environ.get(
|
|
104
|
+
or os.environ.get(ProjectSecretKeys.ACCESS_KEY)
|
|
79
105
|
or self.v3io_access_key
|
|
80
106
|
)
|
|
81
107
|
self.storage_options = dict(
|
|
82
108
|
v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
|
|
83
109
|
)
|
|
84
110
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
111
|
+
# KV path
|
|
112
|
+
kv_path = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
113
|
+
project=self.project, kind=FileTargetKind.ENDPOINTS
|
|
114
|
+
)
|
|
88
115
|
(
|
|
89
116
|
_,
|
|
90
117
|
self.kv_container,
|
|
91
118
|
self.kv_path,
|
|
92
119
|
) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(kv_path)
|
|
93
120
|
|
|
94
|
-
|
|
121
|
+
# TSDB path and configurations
|
|
122
|
+
tsdb_path = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
123
|
+
project=self.project, kind=FileTargetKind.EVENTS
|
|
124
|
+
)
|
|
95
125
|
(
|
|
96
126
|
_,
|
|
97
127
|
self.tsdb_container,
|
|
98
128
|
self.tsdb_path,
|
|
99
129
|
) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(tsdb_path)
|
|
100
|
-
self.tsdb_path = f"{self.tsdb_container}/{self.tsdb_path}"
|
|
101
130
|
|
|
102
|
-
self.
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
)
|
|
106
|
-
)
|
|
107
|
-
|
|
108
|
-
logger.info(
|
|
109
|
-
"Initializing model monitoring event stream processor",
|
|
110
|
-
parquet_batching_max_events=self.parquet_batching_max_events,
|
|
111
|
-
v3io_access_key=self.v3io_access_key,
|
|
112
|
-
model_monitoring_access_key=self.model_monitoring_access_key,
|
|
113
|
-
default_store_prefix=mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default,
|
|
114
|
-
user_space_store_prefix=mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space,
|
|
115
|
-
v3io_api=self.v3io_api,
|
|
116
|
-
v3io_framesd=self.v3io_framesd,
|
|
117
|
-
kv_container=self.kv_container,
|
|
118
|
-
kv_path=self.kv_path,
|
|
119
|
-
tsdb_container=self.tsdb_container,
|
|
120
|
-
tsdb_path=self.tsdb_path,
|
|
121
|
-
parquet_path=self.parquet_path,
|
|
122
|
-
)
|
|
131
|
+
self.tsdb_path = f"{self.tsdb_container}/{self.tsdb_path}"
|
|
132
|
+
self.tsdb_batching_max_events = tsdb_batching_max_events
|
|
133
|
+
self.tsdb_batching_timeout_secs = tsdb_batching_timeout_secs
|
|
123
134
|
|
|
124
135
|
def apply_monitoring_serving_graph(self, fn):
|
|
125
136
|
"""
|
|
@@ -127,20 +138,23 @@ class EventStreamProcessor:
|
|
|
127
138
|
of different operations that are executed on the events from the model server. Each event has
|
|
128
139
|
metadata (function_uri, timestamp, class, etc.) but also inputs and predictions from the model server.
|
|
129
140
|
Throughout the serving graph, the results are written to 3 different databases:
|
|
130
|
-
1. KV (steps 7-9): Stores metadata and stats about the average latency and the amount of predictions over
|
|
131
|
-
per endpoint. for example the amount of predictions of endpoint x in the last 5 min. This data is used
|
|
132
|
-
the monitoring dashboards in grafana.
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
141
|
+
1. KV/SQL (steps 7-9): Stores metadata and stats about the average latency and the amount of predictions over
|
|
142
|
+
time per endpoint. for example the amount of predictions of endpoint x in the last 5 min. This data is used
|
|
143
|
+
by the monitoring dashboards in grafana. The model endpoints table also contains data on the model endpoint
|
|
144
|
+
from other processes, such as current_stats that is being calculated by the monitoring batch job
|
|
145
|
+
process. If the target is from type KV, then the model endpoints table can be found under
|
|
146
|
+
v3io:///users/pipelines/project-name/model-endpoints/endpoints/. If the target is SQL, then the table
|
|
147
|
+
is stored within the database that was defined in the provided connection string and can be found
|
|
148
|
+
under mlrun.mlconf.model_endpoint_monitoring.endpoint_store_connection.
|
|
136
149
|
2. TSDB (steps 12-18): Stores live data of different key metric dictionaries in tsdb target. Results can be
|
|
137
150
|
found under v3io:///users/pipelines/project-name/model-endpoints/events/. At the moment, this part supports
|
|
138
151
|
3 different key metric dictionaries: base_metrics (average latency and predictions over time),
|
|
139
152
|
endpoint_features (Prediction and feature names and values), and custom_metrics (user-defined metrics).
|
|
140
153
|
This data is also being used by the monitoring dashboards in grafana.
|
|
141
154
|
3. Parquet (steps 19-20): This Parquet file includes the required data for the model monitoring batch job
|
|
142
|
-
that run every hour by default.
|
|
143
|
-
|
|
155
|
+
that run every hour by default. If defined, the parquet target path can be found under
|
|
156
|
+
mlrun.mlconf.model_endpoint_monitoring.offline. Otherwise, the default parquet path is under
|
|
157
|
+
mlrun.mlconf.model_endpoint_monitoring.user_space.
|
|
144
158
|
|
|
145
159
|
:param fn: A serving function.
|
|
146
160
|
"""
|
|
@@ -151,9 +165,6 @@ class EventStreamProcessor:
|
|
|
151
165
|
def apply_process_endpoint_event():
|
|
152
166
|
graph.add_step(
|
|
153
167
|
"ProcessEndpointEvent",
|
|
154
|
-
kv_container=self.kv_container,
|
|
155
|
-
kv_path=self.kv_path,
|
|
156
|
-
v3io_access_key=self.v3io_access_key,
|
|
157
168
|
full_event=True,
|
|
158
169
|
project=self.project,
|
|
159
170
|
)
|
|
@@ -182,10 +193,8 @@ class EventStreamProcessor:
|
|
|
182
193
|
graph.add_step(
|
|
183
194
|
"MapFeatureNames",
|
|
184
195
|
name="MapFeatureNames",
|
|
185
|
-
kv_container=self.kv_container,
|
|
186
|
-
kv_path=self.kv_path,
|
|
187
|
-
access_key=self.v3io_access_key,
|
|
188
196
|
infer_columns_from_data=True,
|
|
197
|
+
project=self.project,
|
|
189
198
|
after="flatten_events",
|
|
190
199
|
)
|
|
191
200
|
|
|
@@ -209,7 +218,6 @@ class EventStreamProcessor:
|
|
|
209
218
|
after="MapFeatureNames",
|
|
210
219
|
step_name="Aggregates",
|
|
211
220
|
table=".",
|
|
212
|
-
v3io_access_key=self.v3io_access_key,
|
|
213
221
|
)
|
|
214
222
|
# Step 5.2 - Calculate average latency time for each window (5 min and 1 hour by default)
|
|
215
223
|
graph.add_step(
|
|
@@ -226,7 +234,6 @@ class EventStreamProcessor:
|
|
|
226
234
|
name=EventFieldType.LATENCY,
|
|
227
235
|
after=EventFieldType.PREDICTIONS,
|
|
228
236
|
table=".",
|
|
229
|
-
v3io_access_key=self.v3io_access_key,
|
|
230
237
|
)
|
|
231
238
|
|
|
232
239
|
apply_storey_aggregations()
|
|
@@ -239,117 +246,122 @@ class EventStreamProcessor:
|
|
|
239
246
|
after=EventFieldType.LATENCY,
|
|
240
247
|
window_size=self.sample_window,
|
|
241
248
|
key=EventFieldType.ENDPOINT_ID,
|
|
242
|
-
v3io_access_key=self.v3io_access_key,
|
|
243
249
|
)
|
|
244
250
|
|
|
245
251
|
apply_storey_sample_window()
|
|
246
252
|
|
|
247
|
-
# Steps 7-9 - KV branch
|
|
248
|
-
# Step 7 - Filter relevant keys from the event before writing the data into
|
|
249
|
-
def
|
|
250
|
-
graph.add_step(
|
|
253
|
+
# Steps 7-9 - KV/SQL branch
|
|
254
|
+
# Step 7 - Filter relevant keys from the event before writing the data into the database table
|
|
255
|
+
def apply_process_before_endpoint_update():
|
|
256
|
+
graph.add_step(
|
|
257
|
+
"ProcessBeforeEndpointUpdate",
|
|
258
|
+
name="ProcessBeforeEndpointUpdate",
|
|
259
|
+
after="sample",
|
|
260
|
+
)
|
|
251
261
|
|
|
252
|
-
|
|
262
|
+
apply_process_before_endpoint_update()
|
|
253
263
|
|
|
254
|
-
# Step 8 - Write the filtered event to KV table. At this point, the serving graph updates the stats
|
|
264
|
+
# Step 8 - Write the filtered event to KV/SQL table. At this point, the serving graph updates the stats
|
|
255
265
|
# about average latency and the amount of predictions over time
|
|
256
|
-
def
|
|
266
|
+
def apply_update_endpoint():
|
|
257
267
|
graph.add_step(
|
|
258
|
-
"
|
|
259
|
-
name="
|
|
260
|
-
after="
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
v3io_access_key=self.v3io_access_key,
|
|
268
|
+
"UpdateEndpoint",
|
|
269
|
+
name="UpdateEndpoint",
|
|
270
|
+
after="ProcessBeforeEndpointUpdate",
|
|
271
|
+
project=self.project,
|
|
272
|
+
model_endpoint_store_target=self.model_endpoint_store_target,
|
|
264
273
|
)
|
|
265
274
|
|
|
266
|
-
|
|
275
|
+
apply_update_endpoint()
|
|
267
276
|
|
|
268
|
-
# Step 9 - Apply infer_schema on the
|
|
277
|
+
# Step 9 (only for KV target) - Apply infer_schema on the model endpoints table for generating schema file
|
|
269
278
|
# which will be used by Grafana monitoring dashboards
|
|
270
279
|
def apply_infer_schema():
|
|
271
280
|
graph.add_step(
|
|
272
281
|
"InferSchema",
|
|
273
282
|
name="InferSchema",
|
|
274
|
-
after="
|
|
275
|
-
v3io_access_key=self.v3io_access_key,
|
|
283
|
+
after="UpdateEndpoint",
|
|
276
284
|
v3io_framesd=self.v3io_framesd,
|
|
277
285
|
container=self.kv_container,
|
|
278
286
|
table=self.kv_path,
|
|
279
287
|
)
|
|
280
288
|
|
|
281
|
-
|
|
289
|
+
if self.model_endpoint_store_target == ModelEndpointTarget.V3IO_NOSQL:
|
|
290
|
+
apply_infer_schema()
|
|
282
291
|
|
|
283
|
-
# Steps 11-18 - TSDB branch
|
|
284
|
-
# Step 11 - Before writing data to TSDB, create dictionary of 2-3 dictionaries that contains
|
|
285
|
-
# stats and details about the events
|
|
286
|
-
def apply_process_before_tsdb():
|
|
287
|
-
graph.add_step(
|
|
288
|
-
"ProcessBeforeTSDB", name="ProcessBeforeTSDB", after="sample"
|
|
289
|
-
)
|
|
292
|
+
# Steps 11-18 - TSDB branch (not supported in CE environment at the moment)
|
|
290
293
|
|
|
291
|
-
|
|
294
|
+
if not mlrun.mlconf.is_ce_mode():
|
|
295
|
+
# Step 11 - Before writing data to TSDB, create dictionary of 2-3 dictionaries that contains
|
|
296
|
+
# stats and details about the events
|
|
297
|
+
def apply_process_before_tsdb():
|
|
298
|
+
graph.add_step(
|
|
299
|
+
"ProcessBeforeTSDB", name="ProcessBeforeTSDB", after="sample"
|
|
300
|
+
)
|
|
292
301
|
|
|
293
|
-
|
|
294
|
-
def apply_filter_and_unpacked_keys(name, keys):
|
|
295
|
-
graph.add_step(
|
|
296
|
-
"FilterAndUnpackKeys",
|
|
297
|
-
name=name,
|
|
298
|
-
after="ProcessBeforeTSDB",
|
|
299
|
-
keys=[keys],
|
|
300
|
-
)
|
|
302
|
+
apply_process_before_tsdb()
|
|
301
303
|
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
container=self.tsdb_container,
|
|
311
|
-
access_key=self.v3io_access_key,
|
|
312
|
-
v3io_frames=self.v3io_framesd,
|
|
313
|
-
infer_columns_from_data=True,
|
|
314
|
-
index_cols=[
|
|
315
|
-
EventFieldType.ENDPOINT_ID,
|
|
316
|
-
EventFieldType.RECORD_TYPE,
|
|
317
|
-
],
|
|
318
|
-
max_events=self.tsdb_batching_max_events,
|
|
319
|
-
flush_after_seconds=self.tsdb_batching_timeout_secs,
|
|
320
|
-
key=EventFieldType.ENDPOINT_ID,
|
|
321
|
-
)
|
|
304
|
+
# Steps 12-18: - Unpacked keys from each dictionary and write to TSDB target
|
|
305
|
+
def apply_filter_and_unpacked_keys(name, keys):
|
|
306
|
+
graph.add_step(
|
|
307
|
+
"FilterAndUnpackKeys",
|
|
308
|
+
name=name,
|
|
309
|
+
after="ProcessBeforeTSDB",
|
|
310
|
+
keys=[keys],
|
|
311
|
+
)
|
|
322
312
|
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
313
|
+
def apply_tsdb_target(name, after):
|
|
314
|
+
graph.add_step(
|
|
315
|
+
"storey.TSDBTarget",
|
|
316
|
+
name=name,
|
|
317
|
+
after=after,
|
|
318
|
+
path=self.tsdb_path,
|
|
319
|
+
rate="10/m",
|
|
320
|
+
time_col=EventFieldType.TIMESTAMP,
|
|
321
|
+
container=self.tsdb_container,
|
|
322
|
+
access_key=self.v3io_access_key,
|
|
323
|
+
v3io_frames=self.v3io_framesd,
|
|
324
|
+
infer_columns_from_data=True,
|
|
325
|
+
index_cols=[
|
|
326
|
+
EventFieldType.ENDPOINT_ID,
|
|
327
|
+
EventFieldType.RECORD_TYPE,
|
|
328
|
+
EventFieldType.ENDPOINT_TYPE,
|
|
329
|
+
],
|
|
330
|
+
max_events=self.tsdb_batching_max_events,
|
|
331
|
+
flush_after_seconds=self.tsdb_batching_timeout_secs,
|
|
332
|
+
key=EventFieldType.ENDPOINT_ID,
|
|
333
|
+
)
|
|
329
334
|
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
335
|
+
# Steps 12-13 - unpacked base_metrics dictionary
|
|
336
|
+
apply_filter_and_unpacked_keys(
|
|
337
|
+
name="FilterAndUnpackKeys1",
|
|
338
|
+
keys=EventKeyMetrics.BASE_METRICS,
|
|
339
|
+
)
|
|
340
|
+
apply_tsdb_target(name="tsdb1", after="FilterAndUnpackKeys1")
|
|
336
341
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
+
# Steps 14-15 - unpacked endpoint_features dictionary
|
|
343
|
+
apply_filter_and_unpacked_keys(
|
|
344
|
+
name="FilterAndUnpackKeys2",
|
|
345
|
+
keys=EventKeyMetrics.ENDPOINT_FEATURES,
|
|
346
|
+
)
|
|
347
|
+
apply_tsdb_target(name="tsdb2", after="FilterAndUnpackKeys2")
|
|
342
348
|
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
"
|
|
346
|
-
|
|
347
|
-
after="FilterAndUnpackKeys3",
|
|
348
|
-
_fn="(event is not None)",
|
|
349
|
+
# Steps 16-18 - unpacked custom_metrics dictionary. In addition, use storey.Filter remove none values
|
|
350
|
+
apply_filter_and_unpacked_keys(
|
|
351
|
+
name="FilterAndUnpackKeys3",
|
|
352
|
+
keys=EventKeyMetrics.CUSTOM_METRICS,
|
|
349
353
|
)
|
|
350
354
|
|
|
351
|
-
|
|
352
|
-
|
|
355
|
+
def apply_storey_filter():
|
|
356
|
+
graph.add_step(
|
|
357
|
+
"storey.Filter",
|
|
358
|
+
"FilterNotNone",
|
|
359
|
+
after="FilterAndUnpackKeys3",
|
|
360
|
+
_fn="(event is not None)",
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
apply_storey_filter()
|
|
364
|
+
apply_tsdb_target(name="tsdb3", after="FilterNotNone")
|
|
353
365
|
|
|
354
366
|
# Steps 19-20 - Parquet branch
|
|
355
367
|
# Step 19 - Filter and validate different keys before writing the data to Parquet target
|
|
@@ -384,19 +396,18 @@ class EventStreamProcessor:
|
|
|
384
396
|
apply_parquet_target()
|
|
385
397
|
|
|
386
398
|
|
|
387
|
-
class
|
|
399
|
+
class ProcessBeforeEndpointUpdate(mlrun.feature_store.steps.MapClass):
|
|
388
400
|
def __init__(self, **kwargs):
|
|
389
401
|
"""
|
|
390
|
-
Filter relevant keys from the event before writing the data to
|
|
391
|
-
we only keep metadata (function_uri, model_class, etc.) and stats about the
|
|
392
|
-
of predictions (per 5min and 1hour).
|
|
402
|
+
Filter relevant keys from the event before writing the data to database table (in EndpointUpdate step).
|
|
403
|
+
Note that in the endpoint table we only keep metadata (function_uri, model_class, etc.) and stats about the
|
|
404
|
+
average latency and the number of predictions (per 5min and 1hour).
|
|
393
405
|
|
|
394
|
-
:returns: A filtered event as a dictionary which will be written to
|
|
406
|
+
:returns: A filtered event as a dictionary which will be written to the endpoint table in the next step.
|
|
395
407
|
"""
|
|
396
408
|
super().__init__(**kwargs)
|
|
397
409
|
|
|
398
410
|
def do(self, event):
|
|
399
|
-
|
|
400
411
|
# Compute prediction per second
|
|
401
412
|
event[EventLiveStats.PREDICTIONS_PER_SECOND] = (
|
|
402
413
|
float(event[EventLiveStats.PREDICTIONS_COUNT_5M]) / 300
|
|
@@ -408,26 +419,31 @@ class ProcessBeforeKV(mlrun.feature_store.steps.MapClass):
|
|
|
408
419
|
EventFieldType.FUNCTION_URI,
|
|
409
420
|
EventFieldType.MODEL,
|
|
410
421
|
EventFieldType.MODEL_CLASS,
|
|
411
|
-
EventFieldType.TIMESTAMP,
|
|
412
422
|
EventFieldType.ENDPOINT_ID,
|
|
413
423
|
EventFieldType.LABELS,
|
|
414
|
-
EventFieldType.
|
|
424
|
+
EventFieldType.FIRST_REQUEST,
|
|
425
|
+
EventFieldType.LAST_REQUEST,
|
|
426
|
+
EventFieldType.ERROR_COUNT,
|
|
427
|
+
]
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
# Add generic metrics statistics
|
|
431
|
+
generic_metrics = {
|
|
432
|
+
k: event[k]
|
|
433
|
+
for k in [
|
|
415
434
|
EventLiveStats.LATENCY_AVG_5M,
|
|
416
435
|
EventLiveStats.LATENCY_AVG_1H,
|
|
417
436
|
EventLiveStats.PREDICTIONS_PER_SECOND,
|
|
418
437
|
EventLiveStats.PREDICTIONS_COUNT_5M,
|
|
419
438
|
EventLiveStats.PREDICTIONS_COUNT_1H,
|
|
420
|
-
EventFieldType.FIRST_REQUEST,
|
|
421
|
-
EventFieldType.LAST_REQUEST,
|
|
422
|
-
EventFieldType.ERROR_COUNT,
|
|
423
439
|
]
|
|
424
440
|
}
|
|
425
|
-
|
|
426
|
-
e =
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
# Write labels
|
|
441
|
+
|
|
442
|
+
e[EventFieldType.METRICS] = json.dumps(
|
|
443
|
+
{EventKeyMetrics.GENERIC: generic_metrics}
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
# Write labels as json string as required by the DB format
|
|
431
447
|
e[EventFieldType.LABELS] = json.dumps(e[EventFieldType.LABELS])
|
|
432
448
|
|
|
433
449
|
return e
|
|
@@ -449,7 +465,6 @@ class ProcessBeforeTSDB(mlrun.feature_store.steps.MapClass):
|
|
|
449
465
|
super().__init__(**kwargs)
|
|
450
466
|
|
|
451
467
|
def do(self, event):
|
|
452
|
-
|
|
453
468
|
# Compute prediction per second
|
|
454
469
|
event[EventLiveStats.PREDICTIONS_PER_SECOND] = (
|
|
455
470
|
float(event[EventLiveStats.PREDICTIONS_COUNT_5M]) / 300
|
|
@@ -457,6 +472,7 @@ class ProcessBeforeTSDB(mlrun.feature_store.steps.MapClass):
|
|
|
457
472
|
base_fields = [
|
|
458
473
|
EventFieldType.TIMESTAMP,
|
|
459
474
|
EventFieldType.ENDPOINT_ID,
|
|
475
|
+
EventFieldType.ENDPOINT_TYPE,
|
|
460
476
|
]
|
|
461
477
|
|
|
462
478
|
# Getting event timestamp and endpoint_id
|
|
@@ -519,11 +535,9 @@ class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
|
|
|
519
535
|
super().__init__(**kwargs)
|
|
520
536
|
|
|
521
537
|
def do(self, event):
|
|
522
|
-
|
|
523
538
|
logger.info("ProcessBeforeParquet1", event=event)
|
|
524
539
|
# Remove the following keys from the event
|
|
525
540
|
for key in [
|
|
526
|
-
EventFieldType.UNPACKED_LABELS,
|
|
527
541
|
EventFieldType.FEATURES,
|
|
528
542
|
EventFieldType.NAMED_FEATURES,
|
|
529
543
|
]:
|
|
@@ -549,32 +563,23 @@ class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
|
|
|
549
563
|
class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
550
564
|
def __init__(
|
|
551
565
|
self,
|
|
552
|
-
|
|
553
|
-
kv_path: str,
|
|
554
|
-
v3io_access_key: str,
|
|
566
|
+
project: str,
|
|
555
567
|
**kwargs,
|
|
556
568
|
):
|
|
557
569
|
"""
|
|
558
570
|
Process event or batch of events as part of the first step of the monitoring serving graph. It includes
|
|
559
|
-
Adding important details to the event such as endpoint_id, handling errors coming from the stream,
|
|
571
|
+
Adding important details to the event such as endpoint_id, handling errors coming from the stream, validation
|
|
560
572
|
of event data such as inputs and outputs, and splitting model event into sub-events.
|
|
561
573
|
|
|
562
|
-
:param
|
|
563
|
-
endpoints it is usually 'users'.
|
|
564
|
-
:param kv_path: KV table path that will be used to retrieve the endpoint id. For model endpoints
|
|
565
|
-
it is usually pipelines/project-name/model-endpoints/endpoints/
|
|
566
|
-
:param v3io_access_key: Access key with permission to read from a KV table.
|
|
567
|
-
:param project: Project name.
|
|
568
|
-
|
|
574
|
+
:param project: Project name.
|
|
569
575
|
|
|
570
576
|
:returns: A Storey event object which is the basic unit of data in Storey. Note that the next steps of
|
|
571
577
|
the monitoring serving graph are based on Storey operations.
|
|
572
578
|
|
|
573
579
|
"""
|
|
574
580
|
super().__init__(**kwargs)
|
|
575
|
-
|
|
576
|
-
self.
|
|
577
|
-
self.v3io_access_key: str = v3io_access_key
|
|
581
|
+
|
|
582
|
+
self.project: str = project
|
|
578
583
|
|
|
579
584
|
# First and last requests timestamps (value) of each endpoint (key)
|
|
580
585
|
self.first_request: typing.Dict[str, str] = dict()
|
|
@@ -602,7 +607,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
602
607
|
version = event.get(EventFieldType.VERSION)
|
|
603
608
|
versioned_model = f"{model}:{version}" if version else f"{model}:latest"
|
|
604
609
|
|
|
605
|
-
endpoint_id = mlrun.
|
|
610
|
+
endpoint_id = mlrun.common.model_monitoring.create_model_endpoint_uid(
|
|
606
611
|
function_uri=function_uri,
|
|
607
612
|
versioned_model=versioned_model,
|
|
608
613
|
)
|
|
@@ -615,10 +620,12 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
615
620
|
# In case this process fails, resume state from existing record
|
|
616
621
|
self.resume_state(endpoint_id)
|
|
617
622
|
|
|
618
|
-
#
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
623
|
+
# If error key has been found in the current event,
|
|
624
|
+
# increase the error counter by 1 and raise the error description
|
|
625
|
+
error = event.get("error")
|
|
626
|
+
if error:
|
|
627
|
+
self.error_count[endpoint_id] += 1
|
|
628
|
+
raise mlrun.errors.MLRunInvalidArgumentError(str(error))
|
|
622
629
|
|
|
623
630
|
# Validate event fields
|
|
624
631
|
model_class = event.get("model_class") or event.get("class")
|
|
@@ -679,11 +686,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
679
686
|
):
|
|
680
687
|
return None
|
|
681
688
|
|
|
682
|
-
# Get labels from event (if exist)
|
|
683
|
-
unpacked_labels = {
|
|
684
|
-
f"_{k}": v for k, v in event.get(EventFieldType.LABELS, {}).items()
|
|
685
|
-
}
|
|
686
|
-
|
|
687
689
|
# Adjust timestamp format
|
|
688
690
|
timestamp = datetime.datetime.strptime(timestamp[:-6], "%Y-%m-%d %H:%M:%S.%f")
|
|
689
691
|
|
|
@@ -722,7 +724,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
722
724
|
EventFieldType.ENTITIES: event.get("request", {}).get(
|
|
723
725
|
EventFieldType.ENTITIES, {}
|
|
724
726
|
),
|
|
725
|
-
EventFieldType.UNPACKED_LABELS: unpacked_labels,
|
|
726
727
|
}
|
|
727
728
|
)
|
|
728
729
|
|
|
@@ -745,14 +746,13 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
745
746
|
endpoint_id in self.last_request
|
|
746
747
|
and self.last_request[endpoint_id] > timestamp
|
|
747
748
|
):
|
|
748
|
-
|
|
749
749
|
logger.error(
|
|
750
750
|
f"current event request time {timestamp} is earlier than the last request time "
|
|
751
751
|
f"{self.last_request[endpoint_id]} - write to TSDB will be rejected"
|
|
752
752
|
)
|
|
753
753
|
|
|
754
|
+
@staticmethod
|
|
754
755
|
def is_list_of_numerics(
|
|
755
|
-
self,
|
|
756
756
|
field: typing.List[typing.Union[int, float, dict, list]],
|
|
757
757
|
dict_path: typing.List[str],
|
|
758
758
|
):
|
|
@@ -769,10 +769,8 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
769
769
|
if endpoint_id not in self.endpoints:
|
|
770
770
|
logger.info("Trying to resume state", endpoint_id=endpoint_id)
|
|
771
771
|
endpoint_record = get_endpoint_record(
|
|
772
|
-
|
|
773
|
-
kv_path=self.kv_path,
|
|
772
|
+
project=self.project,
|
|
774
773
|
endpoint_id=endpoint_id,
|
|
775
|
-
access_key=self.v3io_access_key,
|
|
776
774
|
)
|
|
777
775
|
|
|
778
776
|
# If model endpoint found, get first_request, last_request and error_count values
|
|
@@ -784,13 +782,12 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
784
782
|
|
|
785
783
|
last_request = endpoint_record.get(EventFieldType.LAST_REQUEST)
|
|
786
784
|
if last_request:
|
|
787
|
-
|
|
788
785
|
self.last_request[endpoint_id] = last_request
|
|
789
786
|
|
|
790
787
|
error_count = endpoint_record.get(EventFieldType.ERROR_COUNT)
|
|
791
788
|
|
|
792
789
|
if error_count:
|
|
793
|
-
self.error_count[endpoint_id] = error_count
|
|
790
|
+
self.error_count[endpoint_id] = int(error_count)
|
|
794
791
|
|
|
795
792
|
# add endpoint to endpoints set
|
|
796
793
|
self.endpoints.add(endpoint_id)
|
|
@@ -807,13 +804,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
807
804
|
self.error_count[endpoint_id] += 1
|
|
808
805
|
return False
|
|
809
806
|
|
|
810
|
-
def handle_errors(self, endpoint_id, event) -> bool:
|
|
811
|
-
if "error" in event:
|
|
812
|
-
self.error_count[endpoint_id] += 1
|
|
813
|
-
return True
|
|
814
|
-
|
|
815
|
-
return False
|
|
816
|
-
|
|
817
807
|
|
|
818
808
|
def is_not_none(field: typing.Any, dict_path: typing.List[str]):
|
|
819
809
|
if field is not None:
|
|
@@ -857,9 +847,7 @@ class FilterAndUnpackKeys(mlrun.feature_store.steps.MapClass):
|
|
|
857
847
|
class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
858
848
|
def __init__(
|
|
859
849
|
self,
|
|
860
|
-
|
|
861
|
-
kv_path: str,
|
|
862
|
-
access_key: str,
|
|
850
|
+
project: str,
|
|
863
851
|
infer_columns_from_data: bool = False,
|
|
864
852
|
**kwargs,
|
|
865
853
|
):
|
|
@@ -867,11 +855,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
867
855
|
Validating feature names and label columns and map each feature to its value. In the end of this step,
|
|
868
856
|
the event should have key-value pairs of (feature name: feature value).
|
|
869
857
|
|
|
870
|
-
:param
|
|
871
|
-
endpoints it is usually 'users'.
|
|
872
|
-
:param kv_path: KV table path that will be used to retrieve the endpoint id. For model endpoints
|
|
873
|
-
it is usually pipelines/project-name/model-endpoints/endpoints/
|
|
874
|
-
:param v3io_access_key: Access key with permission to read from a KV table.
|
|
858
|
+
:param project: Project name.
|
|
875
859
|
:param infer_columns_from_data: If true and features or labels names were not found, then try to
|
|
876
860
|
retrieve them from data that was stored in the previous events of
|
|
877
861
|
the current process. This data can be found under self.feature_names and
|
|
@@ -882,16 +866,18 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
882
866
|
feature names and values (as well as the prediction results).
|
|
883
867
|
"""
|
|
884
868
|
super().__init__(**kwargs)
|
|
885
|
-
|
|
886
|
-
self.kv_path = kv_path
|
|
887
|
-
self.access_key = access_key
|
|
869
|
+
|
|
888
870
|
self._infer_columns_from_data = infer_columns_from_data
|
|
871
|
+
self.project = project
|
|
889
872
|
|
|
890
873
|
# Dictionaries that will be used in case features names
|
|
891
874
|
# and labels columns were not found in the current event
|
|
892
875
|
self.feature_names = {}
|
|
893
876
|
self.label_columns = {}
|
|
894
877
|
|
|
878
|
+
# Dictionary to manage the model endpoint types - important for the V3IO TSDB
|
|
879
|
+
self.endpoint_type = {}
|
|
880
|
+
|
|
895
881
|
def _infer_feature_names_from_data(self, event):
|
|
896
882
|
for endpoint_id in self.feature_names:
|
|
897
883
|
if len(self.feature_names[endpoint_id]) >= len(
|
|
@@ -914,10 +900,8 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
914
900
|
# Get feature names and label columns
|
|
915
901
|
if endpoint_id not in self.feature_names:
|
|
916
902
|
endpoint_record = get_endpoint_record(
|
|
917
|
-
|
|
918
|
-
kv_path=self.kv_path,
|
|
903
|
+
project=self.project,
|
|
919
904
|
endpoint_id=endpoint_id,
|
|
920
|
-
access_key=self.access_key,
|
|
921
905
|
)
|
|
922
906
|
feature_names = endpoint_record.get(EventFieldType.FEATURE_NAMES)
|
|
923
907
|
feature_names = json.loads(feature_names) if feature_names else None
|
|
@@ -925,7 +909,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
925
909
|
label_columns = endpoint_record.get(EventFieldType.LABEL_NAMES)
|
|
926
910
|
label_columns = json.loads(label_columns) if label_columns else None
|
|
927
911
|
|
|
928
|
-
#
|
|
912
|
+
# If feature names were not found,
|
|
929
913
|
# try to retrieve them from the previous events of the current process
|
|
930
914
|
if not feature_names and self._infer_columns_from_data:
|
|
931
915
|
feature_names = self._infer_feature_names_from_data(event)
|
|
@@ -940,15 +924,12 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
940
924
|
]
|
|
941
925
|
|
|
942
926
|
# Update the endpoint record with the generated features
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
access_key=self.access_key,
|
|
947
|
-
key=event[EventFieldType.ENDPOINT_ID],
|
|
927
|
+
update_endpoint_record(
|
|
928
|
+
project=self.project,
|
|
929
|
+
endpoint_id=endpoint_id,
|
|
948
930
|
attributes={
|
|
949
931
|
EventFieldType.FEATURE_NAMES: json.dumps(feature_names)
|
|
950
932
|
},
|
|
951
|
-
raise_for_status=v3io.dataplane.RaiseForStatus.always,
|
|
952
933
|
)
|
|
953
934
|
|
|
954
935
|
# Similar process with label columns
|
|
@@ -963,15 +944,11 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
963
944
|
label_columns = [
|
|
964
945
|
f"p{i}" for i, _ in enumerate(event[EventFieldType.PREDICTION])
|
|
965
946
|
]
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
attributes={
|
|
972
|
-
EventFieldType.LABEL_COLUMNS: json.dumps(label_columns)
|
|
973
|
-
},
|
|
974
|
-
raise_for_status=v3io.dataplane.RaiseForStatus.always,
|
|
947
|
+
|
|
948
|
+
update_endpoint_record(
|
|
949
|
+
project=self.project,
|
|
950
|
+
endpoint_id=endpoint_id,
|
|
951
|
+
attributes={EventFieldType.LABEL_NAMES: json.dumps(label_columns)},
|
|
975
952
|
)
|
|
976
953
|
|
|
977
954
|
self.label_columns[endpoint_id] = label_columns
|
|
@@ -984,6 +961,10 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
984
961
|
"Feature names", endpoint_id=endpoint_id, feature_names=feature_names
|
|
985
962
|
)
|
|
986
963
|
|
|
964
|
+
# Update the endpoint type within the endpoint types dictionary
|
|
965
|
+
endpoint_type = int(endpoint_record.get(EventFieldType.ENDPOINT_TYPE))
|
|
966
|
+
self.endpoint_type[endpoint_id] = endpoint_type
|
|
967
|
+
|
|
987
968
|
# Add feature_name:value pairs along with a mapping dictionary of all of these pairs
|
|
988
969
|
feature_names = self.feature_names[endpoint_id]
|
|
989
970
|
feature_values = event[EventFieldType.FEATURES]
|
|
@@ -1004,6 +985,9 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
1004
985
|
mapping_dictionary=EventFieldType.NAMED_PREDICTIONS,
|
|
1005
986
|
)
|
|
1006
987
|
|
|
988
|
+
# Add endpoint type to the event
|
|
989
|
+
event[EventFieldType.ENDPOINT_TYPE] = self.endpoint_type[endpoint_id]
|
|
990
|
+
|
|
1007
991
|
logger.info("Mapped event", event=event)
|
|
1008
992
|
return event
|
|
1009
993
|
|
|
@@ -1033,33 +1017,24 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
1033
1017
|
event[mapping_dictionary][name] = value
|
|
1034
1018
|
|
|
1035
1019
|
|
|
1036
|
-
class
|
|
1037
|
-
def __init__(self,
|
|
1020
|
+
class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
|
|
1021
|
+
def __init__(self, project: str, model_endpoint_store_target: str, **kwargs):
|
|
1038
1022
|
"""
|
|
1039
|
-
|
|
1040
|
-
average latency and the amount of predictions over time. This data will be used in the monitoring dashboards
|
|
1023
|
+
Update the model endpoint record in the DB. Note that the event at this point includes metadata and stats about
|
|
1024
|
+
the average latency and the amount of predictions over time. This data will be used in the monitoring dashboards
|
|
1041
1025
|
such as "Model Monitoring - Performance" which can be found in Grafana.
|
|
1042
1026
|
|
|
1043
|
-
:param kv_container: Name of the container that will be used to retrieve the endpoint id. For model
|
|
1044
|
-
endpoints it is usually 'users'.
|
|
1045
|
-
:param table: KV table path that will be used to retrieve the endpoint id. For model endpoints
|
|
1046
|
-
it is usually pipelines/project-name/model-endpoints/endpoints/.
|
|
1047
|
-
:param v3io_access_key: Access key with permission to read from a KV table.
|
|
1048
|
-
|
|
1049
1027
|
:returns: Event as a dictionary (without any changes) for the next step (InferSchema).
|
|
1050
1028
|
"""
|
|
1051
1029
|
super().__init__(**kwargs)
|
|
1052
|
-
self.
|
|
1053
|
-
self.
|
|
1054
|
-
self.v3io_access_key = v3io_access_key
|
|
1030
|
+
self.project = project
|
|
1031
|
+
self.model_endpoint_store_target = model_endpoint_store_target
|
|
1055
1032
|
|
|
1056
1033
|
def do(self, event: typing.Dict):
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
key=event[EventFieldType.ENDPOINT_ID],
|
|
1034
|
+
update_endpoint_record(
|
|
1035
|
+
project=self.project,
|
|
1036
|
+
endpoint_id=event.pop(EventFieldType.ENDPOINT_ID),
|
|
1061
1037
|
attributes=event,
|
|
1062
|
-
access_key=self.v3io_access_key,
|
|
1063
1038
|
)
|
|
1064
1039
|
return event
|
|
1065
1040
|
|
|
@@ -1067,7 +1042,6 @@ class WriteToKV(mlrun.feature_store.steps.MapClass):
|
|
|
1067
1042
|
class InferSchema(mlrun.feature_store.steps.MapClass):
|
|
1068
1043
|
def __init__(
|
|
1069
1044
|
self,
|
|
1070
|
-
v3io_access_key: str,
|
|
1071
1045
|
v3io_framesd: str,
|
|
1072
1046
|
container: str,
|
|
1073
1047
|
table: str,
|
|
@@ -1087,7 +1061,6 @@ class InferSchema(mlrun.feature_store.steps.MapClass):
|
|
|
1087
1061
|
"""
|
|
1088
1062
|
super().__init__(**kwargs)
|
|
1089
1063
|
self.container = container
|
|
1090
|
-
self.v3io_access_key = v3io_access_key
|
|
1091
1064
|
self.v3io_framesd = v3io_framesd
|
|
1092
1065
|
self.table = table
|
|
1093
1066
|
self.keys = set()
|
|
@@ -1098,34 +1071,29 @@ class InferSchema(mlrun.feature_store.steps.MapClass):
|
|
|
1098
1071
|
self.keys.update(key_set)
|
|
1099
1072
|
# Apply infer_schema on the kv table for generating the schema file
|
|
1100
1073
|
mlrun.utils.v3io_clients.get_frames_client(
|
|
1101
|
-
token=self.v3io_access_key,
|
|
1102
1074
|
container=self.container,
|
|
1103
1075
|
address=self.v3io_framesd,
|
|
1104
1076
|
).execute(backend="kv", table=self.table, command="infer_schema")
|
|
1077
|
+
|
|
1105
1078
|
return event
|
|
1106
1079
|
|
|
1107
1080
|
|
|
1108
|
-
def
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
key=endpoint_id,
|
|
1081
|
+
def update_endpoint_record(
|
|
1082
|
+
project: str,
|
|
1083
|
+
endpoint_id: str,
|
|
1084
|
+
attributes: dict,
|
|
1085
|
+
):
|
|
1086
|
+
model_endpoint_store = get_model_endpoint_store(
|
|
1087
|
+
project=project,
|
|
1116
1088
|
)
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
)
|
|
1129
|
-
return endpoint_record
|
|
1130
|
-
except Exception:
|
|
1131
|
-
return None
|
|
1089
|
+
|
|
1090
|
+
model_endpoint_store.update_model_endpoint(
|
|
1091
|
+
endpoint_id=endpoint_id, attributes=attributes
|
|
1092
|
+
)
|
|
1093
|
+
|
|
1094
|
+
|
|
1095
|
+
def get_endpoint_record(project: str, endpoint_id: str):
|
|
1096
|
+
model_endpoint_store = get_model_endpoint_store(
|
|
1097
|
+
project=project,
|
|
1098
|
+
)
|
|
1099
|
+
return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
|