mlrun 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -3
- mlrun/__main__.py +79 -37
- mlrun/api/__init__.py +1 -1
- mlrun/api/api/__init__.py +1 -1
- mlrun/api/api/api.py +4 -4
- mlrun/api/api/deps.py +10 -21
- mlrun/api/api/endpoints/__init__.py +1 -1
- mlrun/api/api/endpoints/artifacts.py +64 -36
- mlrun/api/api/endpoints/auth.py +4 -4
- mlrun/api/api/endpoints/background_tasks.py +11 -11
- mlrun/api/api/endpoints/client_spec.py +5 -5
- mlrun/api/api/endpoints/clusterization_spec.py +6 -4
- mlrun/api/api/endpoints/feature_store.py +124 -115
- mlrun/api/api/endpoints/files.py +22 -14
- mlrun/api/api/endpoints/frontend_spec.py +28 -21
- mlrun/api/api/endpoints/functions.py +142 -87
- mlrun/api/api/endpoints/grafana_proxy.py +89 -442
- mlrun/api/api/endpoints/healthz.py +20 -7
- mlrun/api/api/endpoints/hub.py +320 -0
- mlrun/api/api/endpoints/internal/__init__.py +1 -1
- mlrun/api/api/endpoints/internal/config.py +1 -1
- mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
- mlrun/api/api/endpoints/logs.py +11 -11
- mlrun/api/api/endpoints/model_endpoints.py +74 -70
- mlrun/api/api/endpoints/operations.py +13 -9
- mlrun/api/api/endpoints/pipelines.py +93 -88
- mlrun/api/api/endpoints/projects.py +35 -35
- mlrun/api/api/endpoints/runs.py +69 -27
- mlrun/api/api/endpoints/runtime_resources.py +28 -28
- mlrun/api/api/endpoints/schedules.py +98 -41
- mlrun/api/api/endpoints/secrets.py +37 -32
- mlrun/api/api/endpoints/submit.py +12 -12
- mlrun/api/api/endpoints/tags.py +20 -22
- mlrun/api/api/utils.py +251 -42
- mlrun/api/constants.py +1 -1
- mlrun/api/crud/__init__.py +18 -15
- mlrun/api/crud/artifacts.py +10 -10
- mlrun/api/crud/client_spec.py +4 -4
- mlrun/api/crud/clusterization_spec.py +3 -3
- mlrun/api/crud/feature_store.py +54 -46
- mlrun/api/crud/functions.py +3 -3
- mlrun/api/crud/hub.py +312 -0
- mlrun/api/crud/logs.py +11 -9
- mlrun/api/crud/model_monitoring/__init__.py +3 -3
- mlrun/api/crud/model_monitoring/grafana.py +435 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
- mlrun/api/crud/notifications.py +149 -0
- mlrun/api/crud/pipelines.py +67 -52
- mlrun/api/crud/projects.py +51 -23
- mlrun/api/crud/runs.py +7 -5
- mlrun/api/crud/runtime_resources.py +13 -13
- mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
- mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
- mlrun/api/crud/runtimes/nuclio/function.py +505 -0
- mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
- mlrun/api/crud/secrets.py +88 -46
- mlrun/api/crud/tags.py +5 -5
- mlrun/api/db/__init__.py +1 -1
- mlrun/api/db/base.py +102 -54
- mlrun/api/db/init_db.py +2 -3
- mlrun/api/db/session.py +4 -12
- mlrun/api/db/sqldb/__init__.py +1 -1
- mlrun/api/db/sqldb/db.py +439 -196
- mlrun/api/db/sqldb/helpers.py +1 -1
- mlrun/api/db/sqldb/models/__init__.py +3 -3
- mlrun/api/db/sqldb/models/models_mysql.py +82 -64
- mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
- mlrun/api/db/sqldb/session.py +27 -20
- mlrun/api/initial_data.py +82 -24
- mlrun/api/launcher.py +196 -0
- mlrun/api/main.py +91 -22
- mlrun/api/middlewares.py +6 -5
- mlrun/api/migrations_mysql/env.py +1 -1
- mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
- mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
- mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
- mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
- mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
- mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
- mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/env.py +1 -1
- mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
- mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
- mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
- mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
- mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
- mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
- mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
- mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
- mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
- mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
- mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
- mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
- mlrun/api/schemas/__init__.py +216 -138
- mlrun/api/utils/__init__.py +1 -1
- mlrun/api/utils/asyncio.py +1 -1
- mlrun/api/utils/auth/__init__.py +1 -1
- mlrun/api/utils/auth/providers/__init__.py +1 -1
- mlrun/api/utils/auth/providers/base.py +7 -7
- mlrun/api/utils/auth/providers/nop.py +6 -7
- mlrun/api/utils/auth/providers/opa.py +17 -17
- mlrun/api/utils/auth/verifier.py +36 -34
- mlrun/api/utils/background_tasks.py +24 -24
- mlrun/{builder.py → api/utils/builder.py} +216 -123
- mlrun/api/utils/clients/__init__.py +1 -1
- mlrun/api/utils/clients/chief.py +19 -4
- mlrun/api/utils/clients/iguazio.py +106 -60
- mlrun/api/utils/clients/log_collector.py +1 -1
- mlrun/api/utils/clients/nuclio.py +23 -23
- mlrun/api/utils/clients/protocols/grpc.py +2 -2
- mlrun/api/utils/db/__init__.py +1 -1
- mlrun/api/utils/db/alembic.py +1 -1
- mlrun/api/utils/db/backup.py +1 -1
- mlrun/api/utils/db/mysql.py +24 -25
- mlrun/api/utils/db/sql_collation.py +1 -1
- mlrun/api/utils/db/sqlite_migration.py +2 -2
- mlrun/api/utils/events/__init__.py +14 -0
- mlrun/api/utils/events/base.py +57 -0
- mlrun/api/utils/events/events_factory.py +41 -0
- mlrun/api/utils/events/iguazio.py +217 -0
- mlrun/api/utils/events/nop.py +55 -0
- mlrun/api/utils/helpers.py +16 -13
- mlrun/api/utils/memory_reports.py +1 -1
- mlrun/api/utils/periodic.py +6 -3
- mlrun/api/utils/projects/__init__.py +1 -1
- mlrun/api/utils/projects/follower.py +33 -33
- mlrun/api/utils/projects/leader.py +36 -34
- mlrun/api/utils/projects/member.py +27 -27
- mlrun/api/utils/projects/remotes/__init__.py +1 -1
- mlrun/api/utils/projects/remotes/follower.py +13 -13
- mlrun/api/utils/projects/remotes/leader.py +10 -10
- mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
- mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
- mlrun/api/utils/scheduler.py +140 -51
- mlrun/api/utils/singletons/__init__.py +1 -1
- mlrun/api/utils/singletons/db.py +9 -15
- mlrun/api/utils/singletons/k8s.py +677 -5
- mlrun/api/utils/singletons/logs_dir.py +1 -1
- mlrun/api/utils/singletons/project_member.py +1 -1
- mlrun/api/utils/singletons/scheduler.py +1 -1
- mlrun/artifacts/__init__.py +2 -2
- mlrun/artifacts/base.py +8 -2
- mlrun/artifacts/dataset.py +5 -3
- mlrun/artifacts/manager.py +7 -1
- mlrun/artifacts/model.py +15 -4
- mlrun/artifacts/plots.py +1 -1
- mlrun/common/__init__.py +1 -1
- mlrun/common/constants.py +15 -0
- mlrun/common/model_monitoring.py +209 -0
- mlrun/common/schemas/__init__.py +167 -0
- mlrun/{api → common}/schemas/artifact.py +13 -14
- mlrun/{api → common}/schemas/auth.py +10 -8
- mlrun/{api → common}/schemas/background_task.py +3 -3
- mlrun/{api → common}/schemas/client_spec.py +1 -1
- mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
- mlrun/{api → common}/schemas/constants.py +21 -8
- mlrun/common/schemas/events.py +36 -0
- mlrun/{api → common}/schemas/feature_store.py +2 -1
- mlrun/{api → common}/schemas/frontend_spec.py +7 -6
- mlrun/{api → common}/schemas/function.py +5 -5
- mlrun/{api → common}/schemas/http.py +3 -3
- mlrun/common/schemas/hub.py +134 -0
- mlrun/{api → common}/schemas/k8s.py +3 -3
- mlrun/{api → common}/schemas/memory_reports.py +1 -1
- mlrun/common/schemas/model_endpoints.py +342 -0
- mlrun/common/schemas/notification.py +57 -0
- mlrun/{api → common}/schemas/object.py +6 -6
- mlrun/{api → common}/schemas/pipeline.py +3 -3
- mlrun/{api → common}/schemas/project.py +6 -5
- mlrun/common/schemas/regex.py +24 -0
- mlrun/common/schemas/runs.py +30 -0
- mlrun/{api → common}/schemas/runtime_resource.py +3 -3
- mlrun/{api → common}/schemas/schedule.py +19 -7
- mlrun/{api → common}/schemas/secret.py +3 -3
- mlrun/{api → common}/schemas/tag.py +2 -2
- mlrun/common/types.py +25 -0
- mlrun/config.py +152 -20
- mlrun/data_types/__init__.py +7 -2
- mlrun/data_types/data_types.py +4 -2
- mlrun/data_types/infer.py +1 -1
- mlrun/data_types/spark.py +10 -3
- mlrun/datastore/__init__.py +10 -3
- mlrun/datastore/azure_blob.py +1 -1
- mlrun/datastore/base.py +185 -53
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/filestore.py +1 -1
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +1 -1
- mlrun/datastore/s3.py +1 -1
- mlrun/datastore/sources.py +192 -70
- mlrun/datastore/spark_udf.py +44 -0
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/targets.py +115 -45
- mlrun/datastore/utils.py +127 -5
- mlrun/datastore/v3io.py +1 -1
- mlrun/datastore/wasbfs/__init__.py +1 -1
- mlrun/datastore/wasbfs/fs.py +1 -1
- mlrun/db/__init__.py +7 -5
- mlrun/db/base.py +112 -68
- mlrun/db/httpdb.py +445 -277
- mlrun/db/nopdb.py +491 -0
- mlrun/db/sqldb.py +112 -65
- mlrun/errors.py +6 -1
- mlrun/execution.py +44 -22
- mlrun/feature_store/__init__.py +1 -1
- mlrun/feature_store/api.py +143 -95
- mlrun/feature_store/common.py +16 -20
- mlrun/feature_store/feature_set.py +42 -12
- mlrun/feature_store/feature_vector.py +32 -21
- mlrun/feature_store/ingestion.py +9 -12
- mlrun/feature_store/retrieval/__init__.py +3 -2
- mlrun/feature_store/retrieval/base.py +388 -66
- mlrun/feature_store/retrieval/dask_merger.py +63 -151
- mlrun/feature_store/retrieval/job.py +30 -12
- mlrun/feature_store/retrieval/local_merger.py +40 -133
- mlrun/feature_store/retrieval/spark_merger.py +129 -127
- mlrun/feature_store/retrieval/storey_merger.py +173 -0
- mlrun/feature_store/steps.py +132 -15
- mlrun/features.py +8 -3
- mlrun/frameworks/__init__.py +1 -1
- mlrun/frameworks/_common/__init__.py +1 -1
- mlrun/frameworks/_common/artifacts_library.py +1 -1
- mlrun/frameworks/_common/mlrun_interface.py +1 -1
- mlrun/frameworks/_common/model_handler.py +1 -1
- mlrun/frameworks/_common/plan.py +1 -1
- mlrun/frameworks/_common/producer.py +1 -1
- mlrun/frameworks/_common/utils.py +1 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
- mlrun/frameworks/_dl_common/model_handler.py +1 -1
- mlrun/frameworks/_dl_common/utils.py +1 -1
- mlrun/frameworks/_ml_common/__init__.py +1 -1
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/model_handler.py +1 -1
- mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
- mlrun/frameworks/_ml_common/producer.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +1 -1
- mlrun/frameworks/auto_mlrun/__init__.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
- mlrun/frameworks/huggingface/__init__.py +1 -1
- mlrun/frameworks/huggingface/model_server.py +1 -1
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/lgbm/model_server.py +1 -1
- mlrun/frameworks/lgbm/utils.py +1 -1
- mlrun/frameworks/onnx/__init__.py +1 -1
- mlrun/frameworks/onnx/dataset.py +1 -1
- mlrun/frameworks/onnx/mlrun_interface.py +1 -1
- mlrun/frameworks/onnx/model_handler.py +1 -1
- mlrun/frameworks/onnx/model_server.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +1 -1
- mlrun/frameworks/pytorch/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
- mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
- mlrun/frameworks/pytorch/model_handler.py +1 -1
- mlrun/frameworks/pytorch/model_server.py +1 -1
- mlrun/frameworks/pytorch/utils.py +1 -1
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/estimator.py +1 -1
- mlrun/frameworks/sklearn/metric.py +1 -1
- mlrun/frameworks/sklearn/metrics_library.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
- mlrun/frameworks/sklearn/model_handler.py +1 -1
- mlrun/frameworks/sklearn/utils.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
- mlrun/frameworks/tf_keras/model_handler.py +1 -1
- mlrun/frameworks/tf_keras/model_server.py +1 -1
- mlrun/frameworks/tf_keras/utils.py +1 -1
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
- mlrun/frameworks/xgboost/model_handler.py +1 -1
- mlrun/frameworks/xgboost/utils.py +1 -1
- mlrun/k8s_utils.py +14 -765
- mlrun/kfpops.py +14 -17
- mlrun/launcher/__init__.py +13 -0
- mlrun/launcher/base.py +406 -0
- mlrun/launcher/client.py +159 -0
- mlrun/launcher/factory.py +50 -0
- mlrun/launcher/local.py +276 -0
- mlrun/launcher/remote.py +178 -0
- mlrun/lists.py +10 -2
- mlrun/mlutils/__init__.py +1 -1
- mlrun/mlutils/data.py +1 -1
- mlrun/mlutils/models.py +1 -1
- mlrun/mlutils/plots.py +1 -1
- mlrun/model.py +252 -14
- mlrun/model_monitoring/__init__.py +41 -0
- mlrun/model_monitoring/features_drift_table.py +1 -1
- mlrun/model_monitoring/helpers.py +123 -38
- mlrun/model_monitoring/model_endpoint.py +144 -0
- mlrun/model_monitoring/model_monitoring_batch.py +310 -259
- mlrun/model_monitoring/stores/__init__.py +106 -0
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
- mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
- mlrun/model_monitoring/stores/models/__init__.py +23 -0
- mlrun/model_monitoring/stores/models/base.py +18 -0
- mlrun/model_monitoring/stores/models/mysql.py +100 -0
- mlrun/model_monitoring/stores/models/sqlite.py +98 -0
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
- mlrun/model_monitoring/stream_processing_fs.py +239 -271
- mlrun/package/__init__.py +163 -0
- mlrun/package/context_handler.py +325 -0
- mlrun/package/errors.py +47 -0
- mlrun/package/packager.py +298 -0
- mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
- mlrun/package/packagers/default_packager.py +422 -0
- mlrun/package/packagers/numpy_packagers.py +612 -0
- mlrun/package/packagers/pandas_packagers.py +968 -0
- mlrun/package/packagers/python_standard_library_packagers.py +616 -0
- mlrun/package/packagers_manager.py +786 -0
- mlrun/package/utils/__init__.py +53 -0
- mlrun/package/utils/_archiver.py +226 -0
- mlrun/package/utils/_formatter.py +211 -0
- mlrun/package/utils/_pickler.py +234 -0
- mlrun/package/utils/_supported_format.py +71 -0
- mlrun/package/utils/log_hint_utils.py +93 -0
- mlrun/package/utils/type_hint_utils.py +298 -0
- mlrun/platforms/__init__.py +1 -1
- mlrun/platforms/iguazio.py +34 -2
- mlrun/platforms/other.py +1 -1
- mlrun/projects/__init__.py +1 -1
- mlrun/projects/operations.py +14 -9
- mlrun/projects/pipelines.py +31 -13
- mlrun/projects/project.py +762 -238
- mlrun/render.py +49 -19
- mlrun/run.py +57 -326
- mlrun/runtimes/__init__.py +3 -9
- mlrun/runtimes/base.py +247 -784
- mlrun/runtimes/constants.py +1 -1
- mlrun/runtimes/daskjob.py +45 -41
- mlrun/runtimes/funcdoc.py +43 -7
- mlrun/runtimes/function.py +66 -656
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/generators.py +1 -1
- mlrun/runtimes/kubejob.py +99 -116
- mlrun/runtimes/local.py +59 -66
- mlrun/runtimes/mpijob/__init__.py +1 -1
- mlrun/runtimes/mpijob/abstract.py +13 -15
- mlrun/runtimes/mpijob/v1.py +3 -1
- mlrun/runtimes/mpijob/v1alpha1.py +1 -1
- mlrun/runtimes/nuclio.py +1 -1
- mlrun/runtimes/pod.py +51 -26
- mlrun/runtimes/remotesparkjob.py +3 -1
- mlrun/runtimes/serving.py +12 -4
- mlrun/runtimes/sparkjob/__init__.py +1 -2
- mlrun/runtimes/sparkjob/abstract.py +44 -31
- mlrun/runtimes/sparkjob/spark3job.py +11 -9
- mlrun/runtimes/utils.py +61 -42
- mlrun/secrets.py +16 -18
- mlrun/serving/__init__.py +3 -2
- mlrun/serving/merger.py +1 -1
- mlrun/serving/remote.py +1 -1
- mlrun/serving/routers.py +39 -42
- mlrun/serving/server.py +23 -13
- mlrun/serving/serving_wrapper.py +1 -1
- mlrun/serving/states.py +172 -39
- mlrun/serving/utils.py +1 -1
- mlrun/serving/v1_serving.py +1 -1
- mlrun/serving/v2_serving.py +29 -21
- mlrun/utils/__init__.py +1 -2
- mlrun/utils/async_http.py +8 -1
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +2 -2
- mlrun/utils/condition_evaluator.py +65 -0
- mlrun/utils/db.py +52 -0
- mlrun/utils/helpers.py +188 -13
- mlrun/utils/http.py +89 -54
- mlrun/utils/logger.py +48 -8
- mlrun/utils/model_monitoring.py +132 -100
- mlrun/utils/notifications/__init__.py +1 -1
- mlrun/utils/notifications/notification/__init__.py +8 -6
- mlrun/utils/notifications/notification/base.py +20 -14
- mlrun/utils/notifications/notification/console.py +7 -4
- mlrun/utils/notifications/notification/git.py +36 -19
- mlrun/utils/notifications/notification/ipython.py +10 -8
- mlrun/utils/notifications/notification/slack.py +18 -13
- mlrun/utils/notifications/notification_pusher.py +377 -56
- mlrun/utils/regex.py +6 -1
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +1 -1
- mlrun/utils/vault.py +270 -269
- mlrun/utils/version/__init__.py +1 -1
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +1 -1
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
- mlrun-1.4.0.dist-info/RECORD +434 -0
- mlrun/api/api/endpoints/marketplace.py +0 -257
- mlrun/api/crud/marketplace.py +0 -221
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
- mlrun/api/db/filedb/db.py +0 -518
- mlrun/api/schemas/marketplace.py +0 -128
- mlrun/api/schemas/model_endpoints.py +0 -185
- mlrun/db/filedb.py +0 -891
- mlrun/feature_store/retrieval/online.py +0 -92
- mlrun/model_monitoring/constants.py +0 -67
- mlrun/runtimes/package/context_handler.py +0 -711
- mlrun/runtimes/sparkjob/spark2job.py +0 -59
- mlrun-1.3.3.dist-info/RECORD +0 -381
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
|
|
16
|
+
|
|
17
|
+
import enum
|
|
18
|
+
import typing
|
|
19
|
+
|
|
20
|
+
import mlrun
|
|
21
|
+
|
|
22
|
+
from .model_endpoint_store import ModelEndpointStore
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ModelEndpointStoreType(enum.Enum):
|
|
26
|
+
"""Enum class to handle the different store type values for saving a model endpoint record."""
|
|
27
|
+
|
|
28
|
+
v3io_nosql = "v3io-nosql"
|
|
29
|
+
SQL = "sql"
|
|
30
|
+
|
|
31
|
+
def to_endpoint_store(
|
|
32
|
+
self,
|
|
33
|
+
project: str,
|
|
34
|
+
access_key: str = None,
|
|
35
|
+
endpoint_store_connection: str = None,
|
|
36
|
+
) -> ModelEndpointStore:
|
|
37
|
+
"""
|
|
38
|
+
Return a ModelEndpointStore object based on the provided enum value.
|
|
39
|
+
|
|
40
|
+
:param project: The name of the project.
|
|
41
|
+
:param access_key: Access key with permission to the DB table. Note that if access key is None
|
|
42
|
+
and the endpoint target is from type KV then the access key will be
|
|
43
|
+
retrieved from the environment variable.
|
|
44
|
+
:param endpoint_store_connection: A valid connection string for model endpoint target. Contains several
|
|
45
|
+
key-value pairs that required for the database connection.
|
|
46
|
+
e.g. A root user with password 1234, tries to connect a schema called
|
|
47
|
+
mlrun within a local MySQL DB instance:
|
|
48
|
+
'mysql+pymysql://root:1234@localhost:3306/mlrun'.
|
|
49
|
+
|
|
50
|
+
:return: `ModelEndpointStore` object.
|
|
51
|
+
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
if self.value == ModelEndpointStoreType.v3io_nosql.value:
|
|
55
|
+
|
|
56
|
+
from .kv_model_endpoint_store import KVModelEndpointStore
|
|
57
|
+
|
|
58
|
+
# Get V3IO access key from env
|
|
59
|
+
access_key = access_key or mlrun.mlconf.get_v3io_access_key()
|
|
60
|
+
|
|
61
|
+
return KVModelEndpointStore(project=project, access_key=access_key)
|
|
62
|
+
|
|
63
|
+
# Assuming SQL store target if store type is not KV.
|
|
64
|
+
# Update these lines once there are more than two store target types.
|
|
65
|
+
from mlrun.utils.model_monitoring import get_connection_string
|
|
66
|
+
|
|
67
|
+
sql_connection_string = endpoint_store_connection or get_connection_string(
|
|
68
|
+
project=project
|
|
69
|
+
)
|
|
70
|
+
from .sql_model_endpoint_store import SQLModelEndpointStore
|
|
71
|
+
|
|
72
|
+
return SQLModelEndpointStore(
|
|
73
|
+
project=project, sql_connection_string=sql_connection_string
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def _missing_(cls, value: typing.Any):
|
|
78
|
+
"""A lookup function to handle an invalid value.
|
|
79
|
+
:param value: Provided enum (invalid) value.
|
|
80
|
+
"""
|
|
81
|
+
valid_values = list(cls.__members__.keys())
|
|
82
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
83
|
+
f"{value} is not a valid endpoint store, please choose a valid value: %{valid_values}."
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def get_model_endpoint_store(
|
|
88
|
+
project: str, access_key: str = None
|
|
89
|
+
) -> ModelEndpointStore:
|
|
90
|
+
"""
|
|
91
|
+
Getting the DB target type based on mlrun.config.model_endpoint_monitoring.store_type.
|
|
92
|
+
|
|
93
|
+
:param project: The name of the project.
|
|
94
|
+
:param access_key: Access key with permission to the DB table.
|
|
95
|
+
|
|
96
|
+
:return: `ModelEndpointStore` object. Using this object, the user can apply different operations on the
|
|
97
|
+
model endpoint record such as write, update, get and delete.
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
# Get store type value from ModelEndpointStoreType enum class
|
|
101
|
+
model_endpoint_store_type = ModelEndpointStoreType(
|
|
102
|
+
mlrun.mlconf.model_endpoint_monitoring.store_type
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Convert into model endpoint store target object
|
|
106
|
+
return model_endpoint_store_type.to_endpoint_store(project, access_key)
|
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
import typing
|
|
18
|
+
|
|
19
|
+
import v3io.dataplane
|
|
20
|
+
import v3io_frames
|
|
21
|
+
|
|
22
|
+
import mlrun
|
|
23
|
+
import mlrun.common.model_monitoring as model_monitoring_constants
|
|
24
|
+
import mlrun.utils.model_monitoring
|
|
25
|
+
import mlrun.utils.v3io_clients
|
|
26
|
+
from mlrun.utils import logger
|
|
27
|
+
|
|
28
|
+
from .model_endpoint_store import ModelEndpointStore
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class KVModelEndpointStore(ModelEndpointStore):
|
|
32
|
+
"""
|
|
33
|
+
Handles the DB operations when the DB target is from type KV. For the KV operations, we use an instance of V3IO
|
|
34
|
+
client and usually the KV table can be found under v3io:///users/pipelines/project-name/model-endpoints/endpoints/.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, project: str, access_key: str):
|
|
38
|
+
super().__init__(project=project)
|
|
39
|
+
# Initialize a V3IO client instance
|
|
40
|
+
self.access_key = access_key or os.environ.get("V3IO_ACCESS_KEY")
|
|
41
|
+
self.client = mlrun.utils.v3io_clients.get_v3io_client(
|
|
42
|
+
endpoint=mlrun.mlconf.v3io_api, access_key=self.access_key
|
|
43
|
+
)
|
|
44
|
+
# Get the KV table path and container
|
|
45
|
+
self.path, self.container = self._get_path_and_container()
|
|
46
|
+
|
|
47
|
+
def write_model_endpoint(self, endpoint: typing.Dict[str, typing.Any]):
|
|
48
|
+
"""
|
|
49
|
+
Create a new endpoint record in the KV table.
|
|
50
|
+
|
|
51
|
+
:param endpoint: model endpoint dictionary that will be written into the DB.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
self.client.kv.put(
|
|
55
|
+
container=self.container,
|
|
56
|
+
table_path=self.path,
|
|
57
|
+
key=endpoint[model_monitoring_constants.EventFieldType.UID],
|
|
58
|
+
attributes=endpoint,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def update_model_endpoint(
|
|
62
|
+
self, endpoint_id: str, attributes: typing.Dict[str, typing.Any]
|
|
63
|
+
):
|
|
64
|
+
"""
|
|
65
|
+
Update a model endpoint record with a given attributes.
|
|
66
|
+
|
|
67
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
68
|
+
:param attributes: Dictionary of attributes that will be used for update the model endpoint. Note that the keys
|
|
69
|
+
of the attributes dictionary should exist in the KV table.
|
|
70
|
+
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
self.client.kv.update(
|
|
74
|
+
container=self.container,
|
|
75
|
+
table_path=self.path,
|
|
76
|
+
key=endpoint_id,
|
|
77
|
+
attributes=attributes,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
def delete_model_endpoint(
|
|
81
|
+
self,
|
|
82
|
+
endpoint_id: str,
|
|
83
|
+
):
|
|
84
|
+
"""
|
|
85
|
+
Deletes the KV record of a given model endpoint id.
|
|
86
|
+
|
|
87
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
self.client.kv.delete(
|
|
91
|
+
container=self.container,
|
|
92
|
+
table_path=self.path,
|
|
93
|
+
key=endpoint_id,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
def get_model_endpoint(
|
|
97
|
+
self,
|
|
98
|
+
endpoint_id: str,
|
|
99
|
+
) -> typing.Dict[str, typing.Any]:
|
|
100
|
+
"""
|
|
101
|
+
Get a single model endpoint record.
|
|
102
|
+
|
|
103
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
104
|
+
|
|
105
|
+
:return: A model endpoint record as a dictionary.
|
|
106
|
+
|
|
107
|
+
:raise MLRunNotFoundError: If the endpoint was not found.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
# Getting the raw data from the KV table
|
|
111
|
+
endpoint = self.client.kv.get(
|
|
112
|
+
container=self.container,
|
|
113
|
+
table_path=self.path,
|
|
114
|
+
key=endpoint_id,
|
|
115
|
+
raise_for_status=v3io.dataplane.RaiseForStatus.never,
|
|
116
|
+
access_key=self.access_key,
|
|
117
|
+
)
|
|
118
|
+
endpoint = endpoint.output.item
|
|
119
|
+
|
|
120
|
+
if not endpoint:
|
|
121
|
+
raise mlrun.errors.MLRunNotFoundError(f"Endpoint {endpoint_id} not found")
|
|
122
|
+
|
|
123
|
+
# For backwards compatability: replace null values for `error_count` and `metrics`
|
|
124
|
+
mlrun.utils.model_monitoring.validate_old_schema_fields(endpoint=endpoint)
|
|
125
|
+
|
|
126
|
+
return endpoint
|
|
127
|
+
|
|
128
|
+
def _get_path_and_container(self):
|
|
129
|
+
"""Getting path and container based on the model monitoring configurations"""
|
|
130
|
+
path = mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
131
|
+
project=self.project,
|
|
132
|
+
kind=model_monitoring_constants.ModelMonitoringStoreKinds.ENDPOINTS,
|
|
133
|
+
)
|
|
134
|
+
(
|
|
135
|
+
_,
|
|
136
|
+
container,
|
|
137
|
+
path,
|
|
138
|
+
) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(path)
|
|
139
|
+
return path, container
|
|
140
|
+
|
|
141
|
+
def list_model_endpoints(
|
|
142
|
+
self,
|
|
143
|
+
model: str = None,
|
|
144
|
+
function: str = None,
|
|
145
|
+
labels: typing.List[str] = None,
|
|
146
|
+
top_level: bool = None,
|
|
147
|
+
uids: typing.List = None,
|
|
148
|
+
) -> typing.List[typing.Dict[str, typing.Any]]:
|
|
149
|
+
"""
|
|
150
|
+
Returns a list of model endpoint dictionaries, supports filtering by model, function, labels or top level.
|
|
151
|
+
By default, when no filters are applied, all available model endpoints for the given project will
|
|
152
|
+
be listed.
|
|
153
|
+
|
|
154
|
+
:param model: The name of the model to filter by.
|
|
155
|
+
:param function: The name of the function to filter by.
|
|
156
|
+
:param labels: A list of labels to filter by. Label filters work by either filtering a specific value
|
|
157
|
+
of a label (i.e. list("key=value")) or by looking for the existence of a given
|
|
158
|
+
key (i.e. "key").
|
|
159
|
+
:param top_level: If True will return only routers and endpoint that are NOT children of any router.
|
|
160
|
+
:param uids: List of model endpoint unique ids to include in the result.
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
:return: A list of model endpoint dictionaries.
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
# # Initialize an empty model endpoints list
|
|
167
|
+
endpoint_list = []
|
|
168
|
+
|
|
169
|
+
# Retrieve the raw data from the KV table and get the endpoint ids
|
|
170
|
+
try:
|
|
171
|
+
cursor = self.client.kv.new_cursor(
|
|
172
|
+
container=self.container,
|
|
173
|
+
table_path=self.path,
|
|
174
|
+
filter_expression=self._build_kv_cursor_filter_expression(
|
|
175
|
+
self.project,
|
|
176
|
+
function,
|
|
177
|
+
model,
|
|
178
|
+
labels,
|
|
179
|
+
top_level,
|
|
180
|
+
),
|
|
181
|
+
raise_for_status=v3io.dataplane.RaiseForStatus.never,
|
|
182
|
+
)
|
|
183
|
+
items = cursor.all()
|
|
184
|
+
|
|
185
|
+
except Exception as exc:
|
|
186
|
+
logger.warning("Failed retrieving raw data from kv table", exc=exc)
|
|
187
|
+
return endpoint_list
|
|
188
|
+
|
|
189
|
+
# Create a list of model endpoints unique ids
|
|
190
|
+
if uids is None:
|
|
191
|
+
uids = []
|
|
192
|
+
for item in items:
|
|
193
|
+
if model_monitoring_constants.EventFieldType.UID not in item:
|
|
194
|
+
# This is kept for backwards compatibility - in old versions the key column named endpoint_id
|
|
195
|
+
uids.append(
|
|
196
|
+
item[model_monitoring_constants.EventFieldType.ENDPOINT_ID]
|
|
197
|
+
)
|
|
198
|
+
else:
|
|
199
|
+
uids.append(item[model_monitoring_constants.EventFieldType.UID])
|
|
200
|
+
|
|
201
|
+
# Add each relevant model endpoint to the model endpoints list
|
|
202
|
+
for endpoint_id in uids:
|
|
203
|
+
endpoint = self.get_model_endpoint(
|
|
204
|
+
endpoint_id=endpoint_id,
|
|
205
|
+
)
|
|
206
|
+
endpoint_list.append(endpoint)
|
|
207
|
+
|
|
208
|
+
return endpoint_list
|
|
209
|
+
|
|
210
|
+
def delete_model_endpoints_resources(
|
|
211
|
+
self, endpoints: typing.List[typing.Dict[str, typing.Any]]
|
|
212
|
+
):
|
|
213
|
+
"""
|
|
214
|
+
Delete all model endpoints resources in both KV and the time series DB.
|
|
215
|
+
|
|
216
|
+
:param endpoints: A list of model endpoints flattened dictionaries.
|
|
217
|
+
"""
|
|
218
|
+
|
|
219
|
+
# Delete model endpoint record from KV table
|
|
220
|
+
for endpoint_dict in endpoints:
|
|
221
|
+
if model_monitoring_constants.EventFieldType.UID not in endpoint_dict:
|
|
222
|
+
# This is kept for backwards compatibility - in old versions the key column named endpoint_id
|
|
223
|
+
endpoint_id = endpoint_dict[
|
|
224
|
+
model_monitoring_constants.EventFieldType.ENDPOINT_ID
|
|
225
|
+
]
|
|
226
|
+
else:
|
|
227
|
+
endpoint_id = endpoint_dict[
|
|
228
|
+
model_monitoring_constants.EventFieldType.UID
|
|
229
|
+
]
|
|
230
|
+
self.delete_model_endpoint(
|
|
231
|
+
endpoint_id,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# Delete remain records in the KV
|
|
235
|
+
all_records = self.client.kv.new_cursor(
|
|
236
|
+
container=self.container,
|
|
237
|
+
table_path=self.path,
|
|
238
|
+
raise_for_status=v3io.dataplane.RaiseForStatus.never,
|
|
239
|
+
).all()
|
|
240
|
+
|
|
241
|
+
all_records = [r["__name"] for r in all_records]
|
|
242
|
+
|
|
243
|
+
# Cleanup KV
|
|
244
|
+
for record in all_records:
|
|
245
|
+
self.client.kv.delete(
|
|
246
|
+
container=self.container,
|
|
247
|
+
table_path=self.path,
|
|
248
|
+
key=record,
|
|
249
|
+
raise_for_status=v3io.dataplane.RaiseForStatus.never,
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
# Cleanup TSDB
|
|
253
|
+
frames = mlrun.utils.v3io_clients.get_frames_client(
|
|
254
|
+
token=self.access_key,
|
|
255
|
+
address=mlrun.mlconf.v3io_framesd,
|
|
256
|
+
container=self.container,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
# Generate the required tsdb paths
|
|
260
|
+
tsdb_path, filtered_path = self._generate_tsdb_paths()
|
|
261
|
+
|
|
262
|
+
# Delete time series DB resources
|
|
263
|
+
try:
|
|
264
|
+
frames.delete(
|
|
265
|
+
backend=model_monitoring_constants.TimeSeriesTarget.TSDB,
|
|
266
|
+
table=filtered_path,
|
|
267
|
+
)
|
|
268
|
+
except (v3io_frames.errors.DeleteError, v3io_frames.errors.CreateError) as e:
|
|
269
|
+
# Frames might raise an exception if schema file does not exist.
|
|
270
|
+
logger.warning("Failed to delete TSDB schema file:", err=e)
|
|
271
|
+
pass
|
|
272
|
+
|
|
273
|
+
# Final cleanup of tsdb path
|
|
274
|
+
tsdb_path.replace("://u", ":///u")
|
|
275
|
+
store, _ = mlrun.store_manager.get_or_create_store(tsdb_path)
|
|
276
|
+
store.rm(tsdb_path, recursive=True)
|
|
277
|
+
|
|
278
|
+
def get_endpoint_real_time_metrics(
|
|
279
|
+
self,
|
|
280
|
+
endpoint_id: str,
|
|
281
|
+
metrics: typing.List[str],
|
|
282
|
+
start: str = "now-1h",
|
|
283
|
+
end: str = "now",
|
|
284
|
+
access_key: str = None,
|
|
285
|
+
) -> typing.Dict[str, typing.List[typing.Tuple[str, float]]]:
|
|
286
|
+
"""
|
|
287
|
+
Getting metrics from the time series DB. There are pre-defined metrics for model endpoints such as
|
|
288
|
+
`predictions_per_second` and `latency_avg_5m` but also custom metrics defined by the user.
|
|
289
|
+
|
|
290
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
291
|
+
:param metrics: A list of real-time metrics to return for the model endpoint.
|
|
292
|
+
:param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
|
|
293
|
+
time, a Unix timestamp in milliseconds, a relative time (`'now'` or
|
|
294
|
+
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
|
|
295
|
+
earliest time.
|
|
296
|
+
:param end: The end time of the metrics. Can be represented by a string containing an RFC 3339
|
|
297
|
+
time, a Unix timestamp in milliseconds, a relative time (`'now'` or
|
|
298
|
+
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
|
|
299
|
+
earliest time.
|
|
300
|
+
:param access_key: V3IO access key that will be used for generating Frames client object. If not
|
|
301
|
+
provided, the access key will be retrieved from the environment variables.
|
|
302
|
+
|
|
303
|
+
:return: A dictionary of metrics in which the key is a metric name and the value is a list of tuples that
|
|
304
|
+
includes timestamps and the values.
|
|
305
|
+
"""
|
|
306
|
+
|
|
307
|
+
# Initialize access key
|
|
308
|
+
access_key = access_key or mlrun.mlconf.get_v3io_access_key()
|
|
309
|
+
|
|
310
|
+
if not metrics:
|
|
311
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
312
|
+
"Metric names must be provided"
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# Initialize metrics mapping dictionary
|
|
316
|
+
metrics_mapping = {}
|
|
317
|
+
|
|
318
|
+
# Getting the path for the time series DB
|
|
319
|
+
events_path = (
|
|
320
|
+
mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
321
|
+
project=self.project,
|
|
322
|
+
kind=model_monitoring_constants.ModelMonitoringStoreKinds.EVENTS,
|
|
323
|
+
)
|
|
324
|
+
)
|
|
325
|
+
(
|
|
326
|
+
_,
|
|
327
|
+
container,
|
|
328
|
+
events_path,
|
|
329
|
+
) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(events_path)
|
|
330
|
+
|
|
331
|
+
# Retrieve the raw data from the time series DB based on the provided metrics and time ranges
|
|
332
|
+
frames_client = mlrun.utils.v3io_clients.get_frames_client(
|
|
333
|
+
token=access_key,
|
|
334
|
+
address=mlrun.mlconf.v3io_framesd,
|
|
335
|
+
container=container,
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
try:
|
|
339
|
+
data = frames_client.read(
|
|
340
|
+
backend=model_monitoring_constants.TimeSeriesTarget.TSDB,
|
|
341
|
+
table=events_path,
|
|
342
|
+
columns=["endpoint_id", *metrics],
|
|
343
|
+
filter=f"endpoint_id=='{endpoint_id}'",
|
|
344
|
+
start=start,
|
|
345
|
+
end=end,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
# Fill the metrics mapping dictionary with the metric name and values
|
|
349
|
+
data_dict = data.to_dict()
|
|
350
|
+
for metric in metrics:
|
|
351
|
+
metric_data = data_dict.get(metric)
|
|
352
|
+
if metric_data is None:
|
|
353
|
+
continue
|
|
354
|
+
|
|
355
|
+
values = [
|
|
356
|
+
(str(timestamp), value) for timestamp, value in metric_data.items()
|
|
357
|
+
]
|
|
358
|
+
metrics_mapping[metric] = values
|
|
359
|
+
|
|
360
|
+
except v3io_frames.errors.ReadError:
|
|
361
|
+
logger.warn("Failed to read tsdb", endpoint=endpoint_id)
|
|
362
|
+
|
|
363
|
+
return metrics_mapping
|
|
364
|
+
|
|
365
|
+
def _generate_tsdb_paths(self) -> typing.Tuple[str, str]:
|
|
366
|
+
"""Generate a short path to the TSDB resources and a filtered path for the frames object
|
|
367
|
+
:return: A tuple of:
|
|
368
|
+
[0] = Short path to the TSDB resources
|
|
369
|
+
[1] = Filtered path to TSDB events without schema and container
|
|
370
|
+
"""
|
|
371
|
+
# Full path for the time series DB events
|
|
372
|
+
full_path = (
|
|
373
|
+
mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
374
|
+
project=self.project,
|
|
375
|
+
kind=model_monitoring_constants.ModelMonitoringStoreKinds.EVENTS,
|
|
376
|
+
)
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
# Generate the main directory with the TSDB resources
|
|
380
|
+
tsdb_path = mlrun.utils.model_monitoring.parse_model_endpoint_project_prefix(
|
|
381
|
+
full_path, self.project
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
# Generate filtered path without schema and container as required by the frames object
|
|
385
|
+
(
|
|
386
|
+
_,
|
|
387
|
+
_,
|
|
388
|
+
filtered_path,
|
|
389
|
+
) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(full_path)
|
|
390
|
+
return tsdb_path, filtered_path
|
|
391
|
+
|
|
392
|
+
@staticmethod
|
|
393
|
+
def _build_kv_cursor_filter_expression(
|
|
394
|
+
project: str,
|
|
395
|
+
function: str = None,
|
|
396
|
+
model: str = None,
|
|
397
|
+
labels: typing.List[str] = None,
|
|
398
|
+
top_level: bool = False,
|
|
399
|
+
) -> str:
|
|
400
|
+
"""
|
|
401
|
+
Convert the provided filters into a valid filter expression. The expected filter expression includes different
|
|
402
|
+
conditions, divided by ' AND '.
|
|
403
|
+
|
|
404
|
+
:param project: The name of the project.
|
|
405
|
+
:param model: The name of the model to filter by.
|
|
406
|
+
:param function: The name of the function to filter by.
|
|
407
|
+
:param labels: A list of labels to filter by. Label filters work by either filtering a specific value of
|
|
408
|
+
a label (i.e. list("key=value")) or by looking for the existence of a given
|
|
409
|
+
key (i.e. "key").
|
|
410
|
+
:param top_level: If True will return only routers and endpoint that are NOT children of any router.
|
|
411
|
+
|
|
412
|
+
:return: A valid filter expression as a string.
|
|
413
|
+
|
|
414
|
+
:raise MLRunInvalidArgumentError: If project value is None.
|
|
415
|
+
"""
|
|
416
|
+
|
|
417
|
+
if not project:
|
|
418
|
+
raise mlrun.errors.MLRunInvalidArgumentError("project can't be empty")
|
|
419
|
+
|
|
420
|
+
# Add project filter
|
|
421
|
+
filter_expression = [f"project=='{project}'"]
|
|
422
|
+
|
|
423
|
+
# Add function and model filters
|
|
424
|
+
if function:
|
|
425
|
+
filter_expression.append(f"function=='{function}'")
|
|
426
|
+
if model:
|
|
427
|
+
filter_expression.append(f"model=='{model}'")
|
|
428
|
+
|
|
429
|
+
# Add labels filters
|
|
430
|
+
if labels:
|
|
431
|
+
for label in labels:
|
|
432
|
+
if not label.startswith("_"):
|
|
433
|
+
label = f"_{label}"
|
|
434
|
+
|
|
435
|
+
if "=" in label:
|
|
436
|
+
lbl, value = list(map(lambda x: x.strip(), label.split("=")))
|
|
437
|
+
filter_expression.append(f"{lbl}=='{value}'")
|
|
438
|
+
else:
|
|
439
|
+
filter_expression.append(f"exists({label})")
|
|
440
|
+
|
|
441
|
+
# Apply top_level filter (remove endpoints that considered a child of a router)
|
|
442
|
+
if top_level:
|
|
443
|
+
filter_expression.append(
|
|
444
|
+
f"(endpoint_type=='{str(model_monitoring_constants.EndpointType.NODE_EP.value)}' "
|
|
445
|
+
f"OR endpoint_type=='{str(model_monitoring_constants.EndpointType.ROUTER.value)}')"
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
return " AND ".join(filter_expression)
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
|
|
16
|
+
import typing
|
|
17
|
+
from abc import ABC, abstractmethod
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ModelEndpointStore(ABC):
|
|
21
|
+
"""
|
|
22
|
+
An abstract class to handle the model endpoint in the DB target.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, project: str):
|
|
26
|
+
"""
|
|
27
|
+
Initialize a new model endpoint target.
|
|
28
|
+
|
|
29
|
+
:param project: The name of the project.
|
|
30
|
+
"""
|
|
31
|
+
self.project = project
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
def write_model_endpoint(self, endpoint: typing.Dict[str, typing.Any]):
|
|
35
|
+
"""
|
|
36
|
+
Create a new endpoint record in the DB table.
|
|
37
|
+
|
|
38
|
+
:param endpoint: model endpoint dictionary that will be written into the DB.
|
|
39
|
+
"""
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
def update_model_endpoint(
|
|
44
|
+
self, endpoint_id: str, attributes: typing.Dict[str, typing.Any]
|
|
45
|
+
):
|
|
46
|
+
"""
|
|
47
|
+
Update a model endpoint record with a given attributes.
|
|
48
|
+
|
|
49
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
50
|
+
:param attributes: Dictionary of attributes that will be used for update the model endpoint. Note that the keys
|
|
51
|
+
of the attributes dictionary should exist in the DB table.
|
|
52
|
+
|
|
53
|
+
"""
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
@abstractmethod
|
|
57
|
+
def delete_model_endpoint(self, endpoint_id: str):
|
|
58
|
+
"""
|
|
59
|
+
Deletes the record of a given model endpoint id.
|
|
60
|
+
|
|
61
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
62
|
+
"""
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
@abstractmethod
|
|
66
|
+
def delete_model_endpoints_resources(
|
|
67
|
+
self, endpoints: typing.List[typing.Dict[str, typing.Any]]
|
|
68
|
+
):
|
|
69
|
+
"""
|
|
70
|
+
Delete all model endpoints resources.
|
|
71
|
+
|
|
72
|
+
:param endpoints: A list of model endpoints flattened dictionaries.
|
|
73
|
+
|
|
74
|
+
"""
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
@abstractmethod
|
|
78
|
+
def get_model_endpoint(
|
|
79
|
+
self,
|
|
80
|
+
endpoint_id: str,
|
|
81
|
+
) -> typing.Dict[str, typing.Any]:
|
|
82
|
+
"""
|
|
83
|
+
Get a single model endpoint record.
|
|
84
|
+
|
|
85
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
86
|
+
|
|
87
|
+
:return: A model endpoint record as a dictionary.
|
|
88
|
+
"""
|
|
89
|
+
pass
|
|
90
|
+
|
|
91
|
+
@abstractmethod
|
|
92
|
+
def list_model_endpoints(
|
|
93
|
+
self,
|
|
94
|
+
model: str = None,
|
|
95
|
+
function: str = None,
|
|
96
|
+
labels: typing.List[str] = None,
|
|
97
|
+
top_level: bool = None,
|
|
98
|
+
uids: typing.List = None,
|
|
99
|
+
) -> typing.List[typing.Dict[str, typing.Any]]:
|
|
100
|
+
"""
|
|
101
|
+
Returns a list of model endpoint dictionaries, supports filtering by model, function, labels or top level.
|
|
102
|
+
By default, when no filters are applied, all available model endpoints for the given project will
|
|
103
|
+
be listed.
|
|
104
|
+
|
|
105
|
+
:param model: The name of the model to filter by.
|
|
106
|
+
:param function: The name of the function to filter by.
|
|
107
|
+
:param labels: A list of labels to filter by. Label filters work by either filtering a specific value
|
|
108
|
+
of a label (i.e. list("key=value")) or by looking for the existence of a given
|
|
109
|
+
key (i.e. "key").
|
|
110
|
+
:param top_level: If True will return only routers and endpoint that are NOT children of any router.
|
|
111
|
+
:param uids: List of model endpoint unique ids to include in the result.
|
|
112
|
+
|
|
113
|
+
:return: A list of model endpoint dictionaries.
|
|
114
|
+
"""
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
@abstractmethod
|
|
118
|
+
def get_endpoint_real_time_metrics(
|
|
119
|
+
self,
|
|
120
|
+
endpoint_id: str,
|
|
121
|
+
metrics: typing.List[str],
|
|
122
|
+
start: str = "now-1h",
|
|
123
|
+
end: str = "now",
|
|
124
|
+
access_key: str = None,
|
|
125
|
+
) -> typing.Dict[str, typing.List[typing.Tuple[str, float]]]:
|
|
126
|
+
"""
|
|
127
|
+
Getting metrics from the time series DB. There are pre-defined metrics for model endpoints such as
|
|
128
|
+
`predictions_per_second` and `latency_avg_5m` but also custom metrics defined by the user.
|
|
129
|
+
|
|
130
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
131
|
+
:param metrics: A list of real-time metrics to return for the model endpoint.
|
|
132
|
+
:param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
|
|
133
|
+
time, a Unix timestamp in milliseconds, a relative time (`'now'` or
|
|
134
|
+
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
|
|
135
|
+
earliest time.
|
|
136
|
+
:param end: The end time of the metrics. Can be represented by a string containing an RFC 3339
|
|
137
|
+
time, a Unix timestamp in milliseconds, a relative time (`'now'` or
|
|
138
|
+
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
|
|
139
|
+
earliest time.
|
|
140
|
+
:param access_key: V3IO access key that will be used for generating Frames client object. If not
|
|
141
|
+
provided, the access key will be retrieved from the environment variables.
|
|
142
|
+
|
|
143
|
+
:return: A dictionary of metrics in which the key is a metric name and the value is a list of tuples that
|
|
144
|
+
includes timestamps and the values.
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
pass
|