mlrun 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -3
- mlrun/__main__.py +79 -37
- mlrun/api/__init__.py +1 -1
- mlrun/api/api/__init__.py +1 -1
- mlrun/api/api/api.py +4 -4
- mlrun/api/api/deps.py +10 -21
- mlrun/api/api/endpoints/__init__.py +1 -1
- mlrun/api/api/endpoints/artifacts.py +64 -36
- mlrun/api/api/endpoints/auth.py +4 -4
- mlrun/api/api/endpoints/background_tasks.py +11 -11
- mlrun/api/api/endpoints/client_spec.py +5 -5
- mlrun/api/api/endpoints/clusterization_spec.py +6 -4
- mlrun/api/api/endpoints/feature_store.py +124 -115
- mlrun/api/api/endpoints/files.py +22 -14
- mlrun/api/api/endpoints/frontend_spec.py +28 -21
- mlrun/api/api/endpoints/functions.py +142 -87
- mlrun/api/api/endpoints/grafana_proxy.py +89 -442
- mlrun/api/api/endpoints/healthz.py +20 -7
- mlrun/api/api/endpoints/hub.py +320 -0
- mlrun/api/api/endpoints/internal/__init__.py +1 -1
- mlrun/api/api/endpoints/internal/config.py +1 -1
- mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
- mlrun/api/api/endpoints/logs.py +11 -11
- mlrun/api/api/endpoints/model_endpoints.py +74 -70
- mlrun/api/api/endpoints/operations.py +13 -9
- mlrun/api/api/endpoints/pipelines.py +93 -88
- mlrun/api/api/endpoints/projects.py +35 -35
- mlrun/api/api/endpoints/runs.py +69 -27
- mlrun/api/api/endpoints/runtime_resources.py +28 -28
- mlrun/api/api/endpoints/schedules.py +98 -41
- mlrun/api/api/endpoints/secrets.py +37 -32
- mlrun/api/api/endpoints/submit.py +12 -12
- mlrun/api/api/endpoints/tags.py +20 -22
- mlrun/api/api/utils.py +251 -42
- mlrun/api/constants.py +1 -1
- mlrun/api/crud/__init__.py +18 -15
- mlrun/api/crud/artifacts.py +10 -10
- mlrun/api/crud/client_spec.py +4 -4
- mlrun/api/crud/clusterization_spec.py +3 -3
- mlrun/api/crud/feature_store.py +54 -46
- mlrun/api/crud/functions.py +3 -3
- mlrun/api/crud/hub.py +312 -0
- mlrun/api/crud/logs.py +11 -9
- mlrun/api/crud/model_monitoring/__init__.py +3 -3
- mlrun/api/crud/model_monitoring/grafana.py +435 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
- mlrun/api/crud/notifications.py +149 -0
- mlrun/api/crud/pipelines.py +67 -52
- mlrun/api/crud/projects.py +51 -23
- mlrun/api/crud/runs.py +7 -5
- mlrun/api/crud/runtime_resources.py +13 -13
- mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
- mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
- mlrun/api/crud/runtimes/nuclio/function.py +505 -0
- mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
- mlrun/api/crud/secrets.py +88 -46
- mlrun/api/crud/tags.py +5 -5
- mlrun/api/db/__init__.py +1 -1
- mlrun/api/db/base.py +102 -54
- mlrun/api/db/init_db.py +2 -3
- mlrun/api/db/session.py +4 -12
- mlrun/api/db/sqldb/__init__.py +1 -1
- mlrun/api/db/sqldb/db.py +439 -196
- mlrun/api/db/sqldb/helpers.py +1 -1
- mlrun/api/db/sqldb/models/__init__.py +3 -3
- mlrun/api/db/sqldb/models/models_mysql.py +82 -64
- mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
- mlrun/api/db/sqldb/session.py +27 -20
- mlrun/api/initial_data.py +82 -24
- mlrun/api/launcher.py +196 -0
- mlrun/api/main.py +91 -22
- mlrun/api/middlewares.py +6 -5
- mlrun/api/migrations_mysql/env.py +1 -1
- mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
- mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
- mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
- mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
- mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
- mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
- mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/env.py +1 -1
- mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
- mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
- mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
- mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
- mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
- mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
- mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
- mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
- mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
- mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
- mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
- mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
- mlrun/api/schemas/__init__.py +216 -138
- mlrun/api/utils/__init__.py +1 -1
- mlrun/api/utils/asyncio.py +1 -1
- mlrun/api/utils/auth/__init__.py +1 -1
- mlrun/api/utils/auth/providers/__init__.py +1 -1
- mlrun/api/utils/auth/providers/base.py +7 -7
- mlrun/api/utils/auth/providers/nop.py +6 -7
- mlrun/api/utils/auth/providers/opa.py +17 -17
- mlrun/api/utils/auth/verifier.py +36 -34
- mlrun/api/utils/background_tasks.py +24 -24
- mlrun/{builder.py → api/utils/builder.py} +216 -123
- mlrun/api/utils/clients/__init__.py +1 -1
- mlrun/api/utils/clients/chief.py +19 -4
- mlrun/api/utils/clients/iguazio.py +106 -60
- mlrun/api/utils/clients/log_collector.py +1 -1
- mlrun/api/utils/clients/nuclio.py +23 -23
- mlrun/api/utils/clients/protocols/grpc.py +2 -2
- mlrun/api/utils/db/__init__.py +1 -1
- mlrun/api/utils/db/alembic.py +1 -1
- mlrun/api/utils/db/backup.py +1 -1
- mlrun/api/utils/db/mysql.py +24 -25
- mlrun/api/utils/db/sql_collation.py +1 -1
- mlrun/api/utils/db/sqlite_migration.py +2 -2
- mlrun/api/utils/events/__init__.py +14 -0
- mlrun/api/utils/events/base.py +57 -0
- mlrun/api/utils/events/events_factory.py +41 -0
- mlrun/api/utils/events/iguazio.py +217 -0
- mlrun/api/utils/events/nop.py +55 -0
- mlrun/api/utils/helpers.py +16 -13
- mlrun/api/utils/memory_reports.py +1 -1
- mlrun/api/utils/periodic.py +6 -3
- mlrun/api/utils/projects/__init__.py +1 -1
- mlrun/api/utils/projects/follower.py +33 -33
- mlrun/api/utils/projects/leader.py +36 -34
- mlrun/api/utils/projects/member.py +27 -27
- mlrun/api/utils/projects/remotes/__init__.py +1 -1
- mlrun/api/utils/projects/remotes/follower.py +13 -13
- mlrun/api/utils/projects/remotes/leader.py +10 -10
- mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
- mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
- mlrun/api/utils/scheduler.py +140 -51
- mlrun/api/utils/singletons/__init__.py +1 -1
- mlrun/api/utils/singletons/db.py +9 -15
- mlrun/api/utils/singletons/k8s.py +677 -5
- mlrun/api/utils/singletons/logs_dir.py +1 -1
- mlrun/api/utils/singletons/project_member.py +1 -1
- mlrun/api/utils/singletons/scheduler.py +1 -1
- mlrun/artifacts/__init__.py +2 -2
- mlrun/artifacts/base.py +8 -2
- mlrun/artifacts/dataset.py +5 -3
- mlrun/artifacts/manager.py +7 -1
- mlrun/artifacts/model.py +15 -4
- mlrun/artifacts/plots.py +1 -1
- mlrun/common/__init__.py +1 -1
- mlrun/common/constants.py +15 -0
- mlrun/common/model_monitoring.py +209 -0
- mlrun/common/schemas/__init__.py +167 -0
- mlrun/{api → common}/schemas/artifact.py +13 -14
- mlrun/{api → common}/schemas/auth.py +10 -8
- mlrun/{api → common}/schemas/background_task.py +3 -3
- mlrun/{api → common}/schemas/client_spec.py +1 -1
- mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
- mlrun/{api → common}/schemas/constants.py +21 -8
- mlrun/common/schemas/events.py +36 -0
- mlrun/{api → common}/schemas/feature_store.py +2 -1
- mlrun/{api → common}/schemas/frontend_spec.py +7 -6
- mlrun/{api → common}/schemas/function.py +5 -5
- mlrun/{api → common}/schemas/http.py +3 -3
- mlrun/common/schemas/hub.py +134 -0
- mlrun/{api → common}/schemas/k8s.py +3 -3
- mlrun/{api → common}/schemas/memory_reports.py +1 -1
- mlrun/common/schemas/model_endpoints.py +342 -0
- mlrun/common/schemas/notification.py +57 -0
- mlrun/{api → common}/schemas/object.py +6 -6
- mlrun/{api → common}/schemas/pipeline.py +3 -3
- mlrun/{api → common}/schemas/project.py +6 -5
- mlrun/common/schemas/regex.py +24 -0
- mlrun/common/schemas/runs.py +30 -0
- mlrun/{api → common}/schemas/runtime_resource.py +3 -3
- mlrun/{api → common}/schemas/schedule.py +19 -7
- mlrun/{api → common}/schemas/secret.py +3 -3
- mlrun/{api → common}/schemas/tag.py +2 -2
- mlrun/common/types.py +25 -0
- mlrun/config.py +152 -20
- mlrun/data_types/__init__.py +7 -2
- mlrun/data_types/data_types.py +4 -2
- mlrun/data_types/infer.py +1 -1
- mlrun/data_types/spark.py +10 -3
- mlrun/datastore/__init__.py +10 -3
- mlrun/datastore/azure_blob.py +1 -1
- mlrun/datastore/base.py +185 -53
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/filestore.py +1 -1
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +1 -1
- mlrun/datastore/s3.py +1 -1
- mlrun/datastore/sources.py +192 -70
- mlrun/datastore/spark_udf.py +44 -0
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/targets.py +115 -45
- mlrun/datastore/utils.py +127 -5
- mlrun/datastore/v3io.py +1 -1
- mlrun/datastore/wasbfs/__init__.py +1 -1
- mlrun/datastore/wasbfs/fs.py +1 -1
- mlrun/db/__init__.py +7 -5
- mlrun/db/base.py +112 -68
- mlrun/db/httpdb.py +445 -277
- mlrun/db/nopdb.py +491 -0
- mlrun/db/sqldb.py +112 -65
- mlrun/errors.py +6 -1
- mlrun/execution.py +44 -22
- mlrun/feature_store/__init__.py +1 -1
- mlrun/feature_store/api.py +143 -95
- mlrun/feature_store/common.py +16 -20
- mlrun/feature_store/feature_set.py +42 -12
- mlrun/feature_store/feature_vector.py +32 -21
- mlrun/feature_store/ingestion.py +9 -12
- mlrun/feature_store/retrieval/__init__.py +3 -2
- mlrun/feature_store/retrieval/base.py +388 -66
- mlrun/feature_store/retrieval/dask_merger.py +63 -151
- mlrun/feature_store/retrieval/job.py +30 -12
- mlrun/feature_store/retrieval/local_merger.py +40 -133
- mlrun/feature_store/retrieval/spark_merger.py +129 -127
- mlrun/feature_store/retrieval/storey_merger.py +173 -0
- mlrun/feature_store/steps.py +132 -15
- mlrun/features.py +8 -3
- mlrun/frameworks/__init__.py +1 -1
- mlrun/frameworks/_common/__init__.py +1 -1
- mlrun/frameworks/_common/artifacts_library.py +1 -1
- mlrun/frameworks/_common/mlrun_interface.py +1 -1
- mlrun/frameworks/_common/model_handler.py +1 -1
- mlrun/frameworks/_common/plan.py +1 -1
- mlrun/frameworks/_common/producer.py +1 -1
- mlrun/frameworks/_common/utils.py +1 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
- mlrun/frameworks/_dl_common/model_handler.py +1 -1
- mlrun/frameworks/_dl_common/utils.py +1 -1
- mlrun/frameworks/_ml_common/__init__.py +1 -1
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/model_handler.py +1 -1
- mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
- mlrun/frameworks/_ml_common/producer.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +1 -1
- mlrun/frameworks/auto_mlrun/__init__.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
- mlrun/frameworks/huggingface/__init__.py +1 -1
- mlrun/frameworks/huggingface/model_server.py +1 -1
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/lgbm/model_server.py +1 -1
- mlrun/frameworks/lgbm/utils.py +1 -1
- mlrun/frameworks/onnx/__init__.py +1 -1
- mlrun/frameworks/onnx/dataset.py +1 -1
- mlrun/frameworks/onnx/mlrun_interface.py +1 -1
- mlrun/frameworks/onnx/model_handler.py +1 -1
- mlrun/frameworks/onnx/model_server.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +1 -1
- mlrun/frameworks/pytorch/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
- mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
- mlrun/frameworks/pytorch/model_handler.py +1 -1
- mlrun/frameworks/pytorch/model_server.py +1 -1
- mlrun/frameworks/pytorch/utils.py +1 -1
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/estimator.py +1 -1
- mlrun/frameworks/sklearn/metric.py +1 -1
- mlrun/frameworks/sklearn/metrics_library.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
- mlrun/frameworks/sklearn/model_handler.py +1 -1
- mlrun/frameworks/sklearn/utils.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
- mlrun/frameworks/tf_keras/model_handler.py +1 -1
- mlrun/frameworks/tf_keras/model_server.py +1 -1
- mlrun/frameworks/tf_keras/utils.py +1 -1
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
- mlrun/frameworks/xgboost/model_handler.py +1 -1
- mlrun/frameworks/xgboost/utils.py +1 -1
- mlrun/k8s_utils.py +14 -765
- mlrun/kfpops.py +14 -17
- mlrun/launcher/__init__.py +13 -0
- mlrun/launcher/base.py +406 -0
- mlrun/launcher/client.py +159 -0
- mlrun/launcher/factory.py +50 -0
- mlrun/launcher/local.py +276 -0
- mlrun/launcher/remote.py +178 -0
- mlrun/lists.py +10 -2
- mlrun/mlutils/__init__.py +1 -1
- mlrun/mlutils/data.py +1 -1
- mlrun/mlutils/models.py +1 -1
- mlrun/mlutils/plots.py +1 -1
- mlrun/model.py +252 -14
- mlrun/model_monitoring/__init__.py +41 -0
- mlrun/model_monitoring/features_drift_table.py +1 -1
- mlrun/model_monitoring/helpers.py +123 -38
- mlrun/model_monitoring/model_endpoint.py +144 -0
- mlrun/model_monitoring/model_monitoring_batch.py +310 -259
- mlrun/model_monitoring/stores/__init__.py +106 -0
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
- mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
- mlrun/model_monitoring/stores/models/__init__.py +23 -0
- mlrun/model_monitoring/stores/models/base.py +18 -0
- mlrun/model_monitoring/stores/models/mysql.py +100 -0
- mlrun/model_monitoring/stores/models/sqlite.py +98 -0
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
- mlrun/model_monitoring/stream_processing_fs.py +239 -271
- mlrun/package/__init__.py +163 -0
- mlrun/package/context_handler.py +325 -0
- mlrun/package/errors.py +47 -0
- mlrun/package/packager.py +298 -0
- mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
- mlrun/package/packagers/default_packager.py +422 -0
- mlrun/package/packagers/numpy_packagers.py +612 -0
- mlrun/package/packagers/pandas_packagers.py +968 -0
- mlrun/package/packagers/python_standard_library_packagers.py +616 -0
- mlrun/package/packagers_manager.py +786 -0
- mlrun/package/utils/__init__.py +53 -0
- mlrun/package/utils/_archiver.py +226 -0
- mlrun/package/utils/_formatter.py +211 -0
- mlrun/package/utils/_pickler.py +234 -0
- mlrun/package/utils/_supported_format.py +71 -0
- mlrun/package/utils/log_hint_utils.py +93 -0
- mlrun/package/utils/type_hint_utils.py +298 -0
- mlrun/platforms/__init__.py +1 -1
- mlrun/platforms/iguazio.py +34 -2
- mlrun/platforms/other.py +1 -1
- mlrun/projects/__init__.py +1 -1
- mlrun/projects/operations.py +14 -9
- mlrun/projects/pipelines.py +31 -13
- mlrun/projects/project.py +762 -238
- mlrun/render.py +49 -19
- mlrun/run.py +57 -326
- mlrun/runtimes/__init__.py +3 -9
- mlrun/runtimes/base.py +247 -784
- mlrun/runtimes/constants.py +1 -1
- mlrun/runtimes/daskjob.py +45 -41
- mlrun/runtimes/funcdoc.py +43 -7
- mlrun/runtimes/function.py +66 -656
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/generators.py +1 -1
- mlrun/runtimes/kubejob.py +99 -116
- mlrun/runtimes/local.py +59 -66
- mlrun/runtimes/mpijob/__init__.py +1 -1
- mlrun/runtimes/mpijob/abstract.py +13 -15
- mlrun/runtimes/mpijob/v1.py +3 -1
- mlrun/runtimes/mpijob/v1alpha1.py +1 -1
- mlrun/runtimes/nuclio.py +1 -1
- mlrun/runtimes/pod.py +51 -26
- mlrun/runtimes/remotesparkjob.py +3 -1
- mlrun/runtimes/serving.py +12 -4
- mlrun/runtimes/sparkjob/__init__.py +1 -2
- mlrun/runtimes/sparkjob/abstract.py +44 -31
- mlrun/runtimes/sparkjob/spark3job.py +11 -9
- mlrun/runtimes/utils.py +61 -42
- mlrun/secrets.py +16 -18
- mlrun/serving/__init__.py +3 -2
- mlrun/serving/merger.py +1 -1
- mlrun/serving/remote.py +1 -1
- mlrun/serving/routers.py +39 -42
- mlrun/serving/server.py +23 -13
- mlrun/serving/serving_wrapper.py +1 -1
- mlrun/serving/states.py +172 -39
- mlrun/serving/utils.py +1 -1
- mlrun/serving/v1_serving.py +1 -1
- mlrun/serving/v2_serving.py +29 -21
- mlrun/utils/__init__.py +1 -2
- mlrun/utils/async_http.py +8 -1
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +2 -2
- mlrun/utils/condition_evaluator.py +65 -0
- mlrun/utils/db.py +52 -0
- mlrun/utils/helpers.py +188 -13
- mlrun/utils/http.py +89 -54
- mlrun/utils/logger.py +48 -8
- mlrun/utils/model_monitoring.py +132 -100
- mlrun/utils/notifications/__init__.py +1 -1
- mlrun/utils/notifications/notification/__init__.py +8 -6
- mlrun/utils/notifications/notification/base.py +20 -14
- mlrun/utils/notifications/notification/console.py +7 -4
- mlrun/utils/notifications/notification/git.py +36 -19
- mlrun/utils/notifications/notification/ipython.py +10 -8
- mlrun/utils/notifications/notification/slack.py +18 -13
- mlrun/utils/notifications/notification_pusher.py +377 -56
- mlrun/utils/regex.py +6 -1
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +1 -1
- mlrun/utils/vault.py +270 -269
- mlrun/utils/version/__init__.py +1 -1
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +1 -1
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
- mlrun-1.4.0.dist-info/RECORD +434 -0
- mlrun/api/api/endpoints/marketplace.py +0 -257
- mlrun/api/crud/marketplace.py +0 -221
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
- mlrun/api/db/filedb/db.py +0 -518
- mlrun/api/schemas/marketplace.py +0 -128
- mlrun/api/schemas/model_endpoints.py +0 -185
- mlrun/db/filedb.py +0 -891
- mlrun/feature_store/retrieval/online.py +0 -92
- mlrun/model_monitoring/constants.py +0 -67
- mlrun/runtimes/package/context_handler.py +0 -711
- mlrun/runtimes/sparkjob/spark2job.py +0 -59
- mlrun-1.3.3.dist-info/RECORD +0 -381
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
mlrun/k8s_utils.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -11,772 +11,27 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
import base64
|
|
15
|
-
import hashlib
|
|
16
|
-
import time
|
|
17
14
|
import typing
|
|
18
|
-
from datetime import datetime
|
|
19
|
-
from sys import stdout
|
|
20
15
|
|
|
21
16
|
import kubernetes.client
|
|
22
|
-
from kubernetes import client, config
|
|
23
|
-
from kubernetes.client.rest import ApiException
|
|
24
17
|
|
|
25
|
-
import mlrun.
|
|
18
|
+
import mlrun.common.schemas
|
|
26
19
|
import mlrun.errors
|
|
27
20
|
|
|
28
21
|
from .config import config as mlconfig
|
|
29
|
-
from .errors import err_to_str
|
|
30
|
-
from .platforms.iguazio import v3io_to_vol
|
|
31
|
-
from .utils import logger
|
|
32
22
|
|
|
33
|
-
|
|
23
|
+
_running_inside_kubernetes_cluster = None
|
|
34
24
|
|
|
35
25
|
|
|
36
|
-
def
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
k8s cluster)
|
|
40
|
-
:param log: sometimes we want to avoid logging when executing init_k8s_config
|
|
41
|
-
"""
|
|
42
|
-
global _k8s
|
|
43
|
-
if not _k8s:
|
|
44
|
-
_k8s = K8sHelper(namespace, silent=silent, log=log)
|
|
45
|
-
return _k8s
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
class SecretTypes:
|
|
49
|
-
opaque = "Opaque"
|
|
50
|
-
v3io_fuse = "v3io/fuse"
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
class K8sHelper:
|
|
54
|
-
def __init__(self, namespace=None, config_file=None, silent=False, log=True):
|
|
55
|
-
self.namespace = namespace or mlconfig.namespace
|
|
56
|
-
self.config_file = config_file
|
|
57
|
-
self.running_inside_kubernetes_cluster = False
|
|
58
|
-
try:
|
|
59
|
-
self._init_k8s_config(log)
|
|
60
|
-
self.v1api = client.CoreV1Api()
|
|
61
|
-
self.crdapi = client.CustomObjectsApi()
|
|
62
|
-
except Exception:
|
|
63
|
-
if not silent:
|
|
64
|
-
raise
|
|
65
|
-
|
|
66
|
-
def resolve_namespace(self, namespace=None):
|
|
67
|
-
return namespace or self.namespace
|
|
68
|
-
|
|
69
|
-
def _init_k8s_config(self, log=True):
|
|
70
|
-
try:
|
|
71
|
-
config.load_incluster_config()
|
|
72
|
-
self.running_inside_kubernetes_cluster = True
|
|
73
|
-
if log:
|
|
74
|
-
logger.info("using in-cluster config.")
|
|
75
|
-
except Exception:
|
|
76
|
-
try:
|
|
77
|
-
config.load_kube_config(self.config_file)
|
|
78
|
-
if log:
|
|
79
|
-
logger.info("using local kubernetes config.")
|
|
80
|
-
except Exception:
|
|
81
|
-
raise RuntimeError(
|
|
82
|
-
"cannot find local kubernetes config file,"
|
|
83
|
-
" place it in ~/.kube/config or specify it in "
|
|
84
|
-
"KUBECONFIG env var"
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
def is_running_inside_kubernetes_cluster(self):
|
|
88
|
-
return self.running_inside_kubernetes_cluster
|
|
89
|
-
|
|
90
|
-
def list_pods(self, namespace=None, selector="", states=None):
|
|
91
|
-
try:
|
|
92
|
-
resp = self.v1api.list_namespaced_pod(
|
|
93
|
-
self.resolve_namespace(namespace), label_selector=selector
|
|
94
|
-
)
|
|
95
|
-
except ApiException as exc:
|
|
96
|
-
logger.error(f"failed to list pods: {err_to_str(exc)}")
|
|
97
|
-
raise exc
|
|
98
|
-
|
|
99
|
-
items = []
|
|
100
|
-
for i in resp.items:
|
|
101
|
-
if not states or i.status.phase in states:
|
|
102
|
-
items.append(i)
|
|
103
|
-
return items
|
|
104
|
-
|
|
105
|
-
def clean_pods(self, namespace=None, selector="", states=None):
|
|
106
|
-
if not selector and not states:
|
|
107
|
-
raise ValueError("labels selector or states list must be specified")
|
|
108
|
-
items = self.list_pods(namespace, selector, states)
|
|
109
|
-
for item in items:
|
|
110
|
-
self.delete_pod(item.metadata.name, item.metadata.namespace)
|
|
111
|
-
|
|
112
|
-
def create_pod(self, pod, max_retry=3, retry_interval=3):
|
|
113
|
-
if "pod" in dir(pod):
|
|
114
|
-
pod = pod.pod
|
|
115
|
-
pod.metadata.namespace = self.resolve_namespace(pod.metadata.namespace)
|
|
116
|
-
|
|
117
|
-
retry_count = 0
|
|
118
|
-
while True:
|
|
119
|
-
try:
|
|
120
|
-
resp = self.v1api.create_namespaced_pod(pod.metadata.namespace, pod)
|
|
121
|
-
except ApiException as exc:
|
|
122
|
-
|
|
123
|
-
if retry_count > max_retry:
|
|
124
|
-
logger.error(
|
|
125
|
-
"failed to create pod after max retries",
|
|
126
|
-
retry_count=retry_count,
|
|
127
|
-
exc=err_to_str(exc),
|
|
128
|
-
pod=pod,
|
|
129
|
-
)
|
|
130
|
-
raise exc
|
|
131
|
-
|
|
132
|
-
logger.error("failed to create pod", exc=err_to_str(exc), pod=pod)
|
|
133
|
-
|
|
134
|
-
# known k8s issue, see https://github.com/kubernetes/kubernetes/issues/67761
|
|
135
|
-
if "gke-resource-quotas" in err_to_str(exc):
|
|
136
|
-
logger.warning(
|
|
137
|
-
"failed to create pod due to gke resource error, "
|
|
138
|
-
f"sleeping {retry_interval} seconds and retrying"
|
|
139
|
-
)
|
|
140
|
-
retry_count += 1
|
|
141
|
-
time.sleep(retry_interval)
|
|
142
|
-
continue
|
|
143
|
-
|
|
144
|
-
raise exc
|
|
145
|
-
else:
|
|
146
|
-
logger.info(f"Pod {resp.metadata.name} created")
|
|
147
|
-
return resp.metadata.name, resp.metadata.namespace
|
|
148
|
-
|
|
149
|
-
def delete_pod(self, name, namespace=None):
|
|
150
|
-
try:
|
|
151
|
-
api_response = self.v1api.delete_namespaced_pod(
|
|
152
|
-
name,
|
|
153
|
-
self.resolve_namespace(namespace),
|
|
154
|
-
grace_period_seconds=0,
|
|
155
|
-
propagation_policy="Background",
|
|
156
|
-
)
|
|
157
|
-
return api_response
|
|
158
|
-
except ApiException as exc:
|
|
159
|
-
# ignore error if pod is already removed
|
|
160
|
-
if exc.status != 404:
|
|
161
|
-
logger.error(f"failed to delete pod: {err_to_str(exc)}", pod_name=name)
|
|
162
|
-
raise exc
|
|
163
|
-
|
|
164
|
-
def get_pod(self, name, namespace=None, raise_on_not_found=False):
|
|
165
|
-
try:
|
|
166
|
-
api_response = self.v1api.read_namespaced_pod(
|
|
167
|
-
name=name, namespace=self.resolve_namespace(namespace)
|
|
168
|
-
)
|
|
169
|
-
return api_response
|
|
170
|
-
except ApiException as exc:
|
|
171
|
-
if exc.status != 404:
|
|
172
|
-
logger.error(f"failed to get pod: {err_to_str(exc)}")
|
|
173
|
-
raise exc
|
|
174
|
-
else:
|
|
175
|
-
if raise_on_not_found:
|
|
176
|
-
raise mlrun.errors.MLRunNotFoundError(f"Pod not found: {name}")
|
|
177
|
-
return None
|
|
178
|
-
|
|
179
|
-
def get_pod_status(self, name, namespace=None):
|
|
180
|
-
return self.get_pod(
|
|
181
|
-
name, namespace, raise_on_not_found=True
|
|
182
|
-
).status.phase.lower()
|
|
183
|
-
|
|
184
|
-
def delete_crd(self, name, crd_group, crd_version, crd_plural, namespace=None):
|
|
185
|
-
try:
|
|
186
|
-
namespace = self.resolve_namespace(namespace)
|
|
187
|
-
self.crdapi.delete_namespaced_custom_object(
|
|
188
|
-
crd_group,
|
|
189
|
-
crd_version,
|
|
190
|
-
namespace,
|
|
191
|
-
crd_plural,
|
|
192
|
-
name,
|
|
193
|
-
)
|
|
194
|
-
logger.info(
|
|
195
|
-
"Deleted crd object",
|
|
196
|
-
crd_name=name,
|
|
197
|
-
namespace=namespace,
|
|
198
|
-
)
|
|
199
|
-
except ApiException as exc:
|
|
200
|
-
|
|
201
|
-
# ignore error if crd is already removed
|
|
202
|
-
if exc.status != 404:
|
|
203
|
-
logger.error(
|
|
204
|
-
f"failed to delete crd: {err_to_str(exc)}",
|
|
205
|
-
crd_name=name,
|
|
206
|
-
crd_group=crd_group,
|
|
207
|
-
crd_version=crd_version,
|
|
208
|
-
crd_plural=crd_plural,
|
|
209
|
-
)
|
|
210
|
-
raise exc
|
|
211
|
-
|
|
212
|
-
def logs(self, name, namespace=None):
|
|
213
|
-
try:
|
|
214
|
-
resp = self.v1api.read_namespaced_pod_log(
|
|
215
|
-
name=name, namespace=self.resolve_namespace(namespace)
|
|
216
|
-
)
|
|
217
|
-
except ApiException as exc:
|
|
218
|
-
logger.error(f"failed to get pod logs: {err_to_str(exc)}")
|
|
219
|
-
raise exc
|
|
220
|
-
|
|
221
|
-
return resp
|
|
222
|
-
|
|
223
|
-
def run_job(self, pod, timeout=600):
|
|
224
|
-
pod_name, namespace = self.create_pod(pod)
|
|
225
|
-
if not pod_name:
|
|
226
|
-
logger.error("failed to create pod")
|
|
227
|
-
return "error"
|
|
228
|
-
return self.watch(pod_name, namespace, timeout)
|
|
229
|
-
|
|
230
|
-
def watch(self, pod_name, namespace=None, timeout=600, writer=None):
|
|
231
|
-
namespace = self.resolve_namespace(namespace)
|
|
232
|
-
start_time = datetime.now()
|
|
233
|
-
while True:
|
|
234
|
-
try:
|
|
235
|
-
pod = self.get_pod(pod_name, namespace)
|
|
236
|
-
if not pod:
|
|
237
|
-
return "error"
|
|
238
|
-
status = pod.status.phase.lower()
|
|
239
|
-
if status in ["running", "completed", "succeeded"]:
|
|
240
|
-
print("")
|
|
241
|
-
break
|
|
242
|
-
if status == "failed":
|
|
243
|
-
return "failed"
|
|
244
|
-
elapsed_time = (datetime.now() - start_time).seconds
|
|
245
|
-
if elapsed_time > timeout:
|
|
246
|
-
return "timeout"
|
|
247
|
-
time.sleep(2)
|
|
248
|
-
stdout.write(".")
|
|
249
|
-
if status != "pending":
|
|
250
|
-
logger.warning(f"pod state in loop is {status}")
|
|
251
|
-
except ApiException as exc:
|
|
252
|
-
logger.error(f"failed waiting for pod: {err_to_str(exc)}\n")
|
|
253
|
-
return "error"
|
|
254
|
-
outputs = self.v1api.read_namespaced_pod_log(
|
|
255
|
-
name=pod_name, namespace=namespace, follow=True, _preload_content=False
|
|
256
|
-
)
|
|
257
|
-
for out in outputs:
|
|
258
|
-
print(out.decode("utf-8"), end="")
|
|
259
|
-
if writer:
|
|
260
|
-
writer.write(out)
|
|
261
|
-
|
|
262
|
-
for i in range(5):
|
|
263
|
-
pod_state = self.get_pod(pod_name, namespace).status.phase.lower()
|
|
264
|
-
if pod_state != "running":
|
|
265
|
-
break
|
|
266
|
-
logger.warning("pod still running, waiting 2 sec")
|
|
267
|
-
time.sleep(2)
|
|
268
|
-
|
|
269
|
-
if pod_state == "failed":
|
|
270
|
-
logger.error("pod exited with error")
|
|
271
|
-
if writer:
|
|
272
|
-
writer.flush()
|
|
273
|
-
return pod_state
|
|
274
|
-
|
|
275
|
-
def create_cfgmap(self, name, data, namespace="", labels=None):
|
|
276
|
-
body = client.api_client.V1ConfigMap()
|
|
277
|
-
namespace = self.resolve_namespace(namespace)
|
|
278
|
-
body.data = data
|
|
279
|
-
if name.endswith("*"):
|
|
280
|
-
body.metadata = client.V1ObjectMeta(
|
|
281
|
-
generate_name=name[:-1], namespace=namespace, labels=labels
|
|
282
|
-
)
|
|
283
|
-
else:
|
|
284
|
-
body.metadata = client.V1ObjectMeta(
|
|
285
|
-
name=name, namespace=namespace, labels=labels
|
|
286
|
-
)
|
|
287
|
-
try:
|
|
288
|
-
resp = self.v1api.create_namespaced_config_map(namespace, body)
|
|
289
|
-
except ApiException as exc:
|
|
290
|
-
logger.error(f"failed to create configmap: {err_to_str(exc)}")
|
|
291
|
-
raise exc
|
|
292
|
-
|
|
293
|
-
logger.info(f"ConfigMap {resp.metadata.name} created")
|
|
294
|
-
return resp.metadata.name
|
|
295
|
-
|
|
296
|
-
def del_cfgmap(self, name, namespace=None):
|
|
297
|
-
try:
|
|
298
|
-
api_response = self.v1api.delete_namespaced_config_map(
|
|
299
|
-
name,
|
|
300
|
-
self.resolve_namespace(namespace),
|
|
301
|
-
grace_period_seconds=0,
|
|
302
|
-
propagation_policy="Background",
|
|
303
|
-
)
|
|
304
|
-
|
|
305
|
-
return api_response
|
|
306
|
-
except ApiException as exc:
|
|
307
|
-
# ignore error if ConfigMap is already removed
|
|
308
|
-
if exc.status != 404:
|
|
309
|
-
logger.error(f"failed to delete ConfigMap: {err_to_str(exc)}")
|
|
310
|
-
raise exc
|
|
311
|
-
|
|
312
|
-
def list_cfgmap(self, namespace=None, selector=""):
|
|
313
|
-
try:
|
|
314
|
-
resp = self.v1api.list_namespaced_config_map(
|
|
315
|
-
self.resolve_namespace(namespace), watch=False, label_selector=selector
|
|
316
|
-
)
|
|
317
|
-
except ApiException as exc:
|
|
318
|
-
logger.error(f"failed to list ConfigMaps: {err_to_str(exc)}")
|
|
319
|
-
raise exc
|
|
320
|
-
|
|
321
|
-
items = []
|
|
322
|
-
for i in resp.items:
|
|
323
|
-
items.append(i)
|
|
324
|
-
return items
|
|
325
|
-
|
|
326
|
-
def get_logger_pods(self, project, uid, run_kind, namespace=""):
|
|
327
|
-
|
|
328
|
-
# As this file is imported in mlrun.runtimes, we sadly cannot have this import in the top level imports
|
|
329
|
-
# as that will create an import loop.
|
|
330
|
-
# TODO: Fix the import loops already!
|
|
331
|
-
import mlrun.runtimes
|
|
332
|
-
|
|
333
|
-
namespace = self.resolve_namespace(namespace)
|
|
334
|
-
mpijob_crd_version = mlrun.runtimes.utils.resolve_mpijob_crd_version(
|
|
335
|
-
api_context=True
|
|
336
|
-
)
|
|
337
|
-
mpijob_role_label = (
|
|
338
|
-
mlrun.runtimes.constants.MPIJobCRDVersions.role_label_by_version(
|
|
339
|
-
mpijob_crd_version
|
|
340
|
-
)
|
|
341
|
-
)
|
|
342
|
-
extra_selectors = {
|
|
343
|
-
"spark": "spark-role=driver",
|
|
344
|
-
"mpijob": f"{mpijob_role_label}=launcher",
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
# TODO: all mlrun labels are sprinkled in a lot of places - they need to all be defined in a central,
|
|
348
|
-
# inclusive place.
|
|
349
|
-
selectors = [
|
|
350
|
-
"mlrun/class",
|
|
351
|
-
f"mlrun/project={project}",
|
|
352
|
-
f"mlrun/uid={uid}",
|
|
353
|
-
]
|
|
354
|
-
|
|
355
|
-
# In order to make the `list_pods` request return a lighter and quicker result, we narrow the search for
|
|
356
|
-
# the relevant pods using the proper label selector according to the run kind
|
|
357
|
-
if run_kind in extra_selectors:
|
|
358
|
-
selectors.append(extra_selectors[run_kind])
|
|
359
|
-
|
|
360
|
-
selector = ",".join(selectors)
|
|
361
|
-
pods = self.list_pods(namespace, selector=selector)
|
|
362
|
-
if not pods:
|
|
363
|
-
logger.error("no pod matches that uid", uid=uid)
|
|
364
|
-
return
|
|
365
|
-
|
|
366
|
-
return {p.metadata.name: p.status.phase for p in pods}
|
|
367
|
-
|
|
368
|
-
def create_project_service_account(self, project, service_account, namespace=""):
|
|
369
|
-
namespace = self.resolve_namespace(namespace)
|
|
370
|
-
k8s_service_account = client.V1ServiceAccount()
|
|
371
|
-
labels = {"mlrun/project": project}
|
|
372
|
-
k8s_service_account.metadata = client.V1ObjectMeta(
|
|
373
|
-
name=service_account, namespace=namespace, labels=labels
|
|
374
|
-
)
|
|
375
|
-
try:
|
|
376
|
-
api_response = self.v1api.create_namespaced_service_account(
|
|
377
|
-
namespace,
|
|
378
|
-
k8s_service_account,
|
|
379
|
-
)
|
|
380
|
-
return api_response
|
|
381
|
-
except ApiException as exc:
|
|
382
|
-
logger.error(f"failed to create service account: {err_to_str(exc)}")
|
|
383
|
-
raise exc
|
|
384
|
-
|
|
385
|
-
def get_project_vault_secret_name(
|
|
386
|
-
self, project, service_account_name, namespace=""
|
|
387
|
-
):
|
|
388
|
-
namespace = self.resolve_namespace(namespace)
|
|
389
|
-
|
|
26
|
+
def is_running_inside_kubernetes_cluster():
|
|
27
|
+
global _running_inside_kubernetes_cluster
|
|
28
|
+
if _running_inside_kubernetes_cluster is None:
|
|
390
29
|
try:
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
if exc.status != 404:
|
|
397
|
-
logger.error(f"failed to retrieve service accounts: {err_to_str(exc)}")
|
|
398
|
-
raise exc
|
|
399
|
-
return None
|
|
400
|
-
|
|
401
|
-
if len(service_account.secrets) > 1:
|
|
402
|
-
raise ValueError(
|
|
403
|
-
f"Service account {service_account_name} has more than one secret"
|
|
404
|
-
)
|
|
405
|
-
|
|
406
|
-
return service_account.secrets[0].name
|
|
407
|
-
|
|
408
|
-
def get_project_secret_name(self, project) -> str:
|
|
409
|
-
return mlconfig.secret_stores.kubernetes.project_secret_name.format(
|
|
410
|
-
project=project
|
|
411
|
-
)
|
|
412
|
-
|
|
413
|
-
def get_auth_secret_name(self, access_key: str) -> str:
|
|
414
|
-
hashed_access_key = self._hash_access_key(access_key)
|
|
415
|
-
return mlconfig.secret_stores.kubernetes.auth_secret_name.format(
|
|
416
|
-
hashed_access_key=hashed_access_key
|
|
417
|
-
)
|
|
418
|
-
|
|
419
|
-
@staticmethod
|
|
420
|
-
def _hash_access_key(access_key: str):
|
|
421
|
-
return hashlib.sha224(access_key.encode()).hexdigest()
|
|
422
|
-
|
|
423
|
-
def store_project_secrets(self, project, secrets, namespace=""):
|
|
424
|
-
secret_name = self.get_project_secret_name(project)
|
|
425
|
-
self.store_secrets(secret_name, secrets, namespace)
|
|
426
|
-
|
|
427
|
-
def read_auth_secret(self, secret_name, namespace="", raise_on_not_found=False):
|
|
428
|
-
namespace = self.resolve_namespace(namespace)
|
|
429
|
-
|
|
430
|
-
try:
|
|
431
|
-
secret_data = self.v1api.read_namespaced_secret(secret_name, namespace).data
|
|
432
|
-
except ApiException as exc:
|
|
433
|
-
logger.error(
|
|
434
|
-
"Failed to read secret",
|
|
435
|
-
secret_name=secret_name,
|
|
436
|
-
namespace=namespace,
|
|
437
|
-
exc=err_to_str(exc),
|
|
438
|
-
)
|
|
439
|
-
if exc.status != 404:
|
|
440
|
-
raise exc
|
|
441
|
-
elif raise_on_not_found:
|
|
442
|
-
raise mlrun.errors.MLRunNotFoundError(
|
|
443
|
-
f"Secret '{secret_name}' was not found in namespace '{namespace}'"
|
|
444
|
-
) from exc
|
|
445
|
-
|
|
446
|
-
return None, None
|
|
447
|
-
|
|
448
|
-
def _get_secret_value(key):
|
|
449
|
-
if secret_data.get(key):
|
|
450
|
-
return base64.b64decode(secret_data[key]).decode("utf-8")
|
|
451
|
-
else:
|
|
452
|
-
return None
|
|
453
|
-
|
|
454
|
-
username = _get_secret_value(
|
|
455
|
-
mlrun.api.schemas.AuthSecretData.get_field_secret_key("username")
|
|
456
|
-
)
|
|
457
|
-
access_key = _get_secret_value(
|
|
458
|
-
mlrun.api.schemas.AuthSecretData.get_field_secret_key("access_key")
|
|
459
|
-
)
|
|
460
|
-
|
|
461
|
-
return username, access_key
|
|
462
|
-
|
|
463
|
-
def store_auth_secret(self, username: str, access_key: str, namespace="") -> str:
|
|
464
|
-
secret_name = self.get_auth_secret_name(access_key)
|
|
465
|
-
secret_data = {
|
|
466
|
-
mlrun.api.schemas.AuthSecretData.get_field_secret_key("username"): username,
|
|
467
|
-
mlrun.api.schemas.AuthSecretData.get_field_secret_key(
|
|
468
|
-
"access_key"
|
|
469
|
-
): access_key,
|
|
470
|
-
}
|
|
471
|
-
self.store_secrets(
|
|
472
|
-
secret_name,
|
|
473
|
-
secret_data,
|
|
474
|
-
namespace,
|
|
475
|
-
type_=SecretTypes.v3io_fuse,
|
|
476
|
-
labels={"mlrun/username": username},
|
|
477
|
-
)
|
|
478
|
-
return secret_name
|
|
479
|
-
|
|
480
|
-
def store_secrets(
|
|
481
|
-
self,
|
|
482
|
-
secret_name,
|
|
483
|
-
secrets,
|
|
484
|
-
namespace="",
|
|
485
|
-
type_=SecretTypes.opaque,
|
|
486
|
-
labels: typing.Optional[dict] = None,
|
|
487
|
-
):
|
|
488
|
-
namespace = self.resolve_namespace(namespace)
|
|
489
|
-
try:
|
|
490
|
-
k8s_secret = self.v1api.read_namespaced_secret(secret_name, namespace)
|
|
491
|
-
except ApiException as exc:
|
|
492
|
-
# If secret doesn't exist, we'll simply create it
|
|
493
|
-
if exc.status != 404:
|
|
494
|
-
logger.error(f"failed to retrieve k8s secret: {err_to_str(exc)}")
|
|
495
|
-
raise exc
|
|
496
|
-
k8s_secret = client.V1Secret(type=type_)
|
|
497
|
-
k8s_secret.metadata = client.V1ObjectMeta(
|
|
498
|
-
name=secret_name, namespace=namespace, labels=labels
|
|
499
|
-
)
|
|
500
|
-
k8s_secret.string_data = secrets
|
|
501
|
-
self.v1api.create_namespaced_secret(namespace, k8s_secret)
|
|
502
|
-
return
|
|
503
|
-
|
|
504
|
-
secret_data = k8s_secret.data.copy()
|
|
505
|
-
for key, value in secrets.items():
|
|
506
|
-
secret_data[key] = base64.b64encode(value.encode()).decode("utf-8")
|
|
507
|
-
|
|
508
|
-
k8s_secret.data = secret_data
|
|
509
|
-
self.v1api.replace_namespaced_secret(secret_name, namespace, k8s_secret)
|
|
510
|
-
|
|
511
|
-
def delete_project_secrets(self, project, secrets, namespace=""):
|
|
512
|
-
secret_name = self.get_project_secret_name(project)
|
|
513
|
-
self.delete_secrets(secret_name, secrets, namespace)
|
|
514
|
-
|
|
515
|
-
def delete_auth_secret(self, secret_ref: str, namespace=""):
|
|
516
|
-
self.delete_secrets(secret_ref, {}, namespace)
|
|
517
|
-
|
|
518
|
-
def delete_secrets(self, secret_name, secrets, namespace=""):
|
|
519
|
-
namespace = self.resolve_namespace(namespace)
|
|
520
|
-
|
|
521
|
-
try:
|
|
522
|
-
k8s_secret = self.v1api.read_namespaced_secret(secret_name, namespace)
|
|
523
|
-
except ApiException as exc:
|
|
524
|
-
# If secret does not exist, return as if the deletion was successfully
|
|
525
|
-
if exc.status == 404:
|
|
526
|
-
return
|
|
527
|
-
else:
|
|
528
|
-
logger.error(f"failed to retrieve k8s secret: {err_to_str(exc)}")
|
|
529
|
-
raise exc
|
|
530
|
-
|
|
531
|
-
if not secrets:
|
|
532
|
-
secret_data = {}
|
|
533
|
-
else:
|
|
534
|
-
secret_data = k8s_secret.data.copy()
|
|
535
|
-
for secret in secrets:
|
|
536
|
-
secret_data.pop(secret, None)
|
|
537
|
-
|
|
538
|
-
if not secret_data:
|
|
539
|
-
self.v1api.delete_namespaced_secret(secret_name, namespace)
|
|
540
|
-
else:
|
|
541
|
-
k8s_secret.data = secret_data
|
|
542
|
-
self.v1api.replace_namespaced_secret(secret_name, namespace, k8s_secret)
|
|
543
|
-
|
|
544
|
-
def _get_project_secrets_raw_data(self, project, namespace=""):
|
|
545
|
-
secret_name = self.get_project_secret_name(project)
|
|
546
|
-
return self._get_secret_raw_data(secret_name, namespace)
|
|
547
|
-
|
|
548
|
-
def _get_secret_raw_data(self, secret_name, namespace=""):
|
|
549
|
-
namespace = self.resolve_namespace(namespace)
|
|
550
|
-
|
|
551
|
-
try:
|
|
552
|
-
k8s_secret = self.v1api.read_namespaced_secret(secret_name, namespace)
|
|
553
|
-
except ApiException:
|
|
554
|
-
return None
|
|
555
|
-
|
|
556
|
-
return k8s_secret.data
|
|
557
|
-
|
|
558
|
-
def get_project_secret_keys(self, project, namespace="", filter_internal=False):
|
|
559
|
-
secrets_data = self._get_project_secrets_raw_data(project, namespace)
|
|
560
|
-
if not secrets_data:
|
|
561
|
-
return []
|
|
562
|
-
|
|
563
|
-
secret_keys = list(secrets_data.keys())
|
|
564
|
-
if filter_internal:
|
|
565
|
-
secret_keys = list(
|
|
566
|
-
filter(lambda key: not key.startswith("mlrun."), secret_keys)
|
|
567
|
-
)
|
|
568
|
-
return secret_keys
|
|
569
|
-
|
|
570
|
-
def get_project_secret_data(self, project, secret_keys=None, namespace=""):
|
|
571
|
-
secrets_data = self._get_project_secrets_raw_data(project, namespace)
|
|
572
|
-
return self._decode_secret_data(secrets_data, secret_keys)
|
|
573
|
-
|
|
574
|
-
def get_secret_data(self, secret_name, namespace=""):
|
|
575
|
-
secrets_data = self._get_secret_raw_data(secret_name, namespace)
|
|
576
|
-
return self._decode_secret_data(secrets_data)
|
|
577
|
-
|
|
578
|
-
def _decode_secret_data(self, secrets_data, secret_keys=None):
|
|
579
|
-
results = {}
|
|
580
|
-
if not secrets_data:
|
|
581
|
-
return results
|
|
582
|
-
|
|
583
|
-
# If not asking for specific keys, return all
|
|
584
|
-
secret_keys = secret_keys or secrets_data.keys()
|
|
585
|
-
|
|
586
|
-
for key in secret_keys:
|
|
587
|
-
encoded_value = secrets_data.get(key)
|
|
588
|
-
if encoded_value:
|
|
589
|
-
results[key] = base64.b64decode(secrets_data[key]).decode("utf-8")
|
|
590
|
-
return results
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
class BasePod:
|
|
594
|
-
def __init__(
|
|
595
|
-
self,
|
|
596
|
-
task_name="",
|
|
597
|
-
image=None,
|
|
598
|
-
command=None,
|
|
599
|
-
args=None,
|
|
600
|
-
namespace="",
|
|
601
|
-
kind="job",
|
|
602
|
-
project=None,
|
|
603
|
-
default_pod_spec_attributes=None,
|
|
604
|
-
resources=None,
|
|
605
|
-
):
|
|
606
|
-
self.namespace = namespace
|
|
607
|
-
self.name = ""
|
|
608
|
-
self.task_name = task_name
|
|
609
|
-
self.image = image
|
|
610
|
-
self.command = command
|
|
611
|
-
self.args = args
|
|
612
|
-
self._volumes = []
|
|
613
|
-
self._mounts = []
|
|
614
|
-
self.env = None
|
|
615
|
-
self.node_selector = None
|
|
616
|
-
self.project = project or mlrun.mlconf.default_project
|
|
617
|
-
self._labels = {
|
|
618
|
-
"mlrun/task-name": task_name,
|
|
619
|
-
"mlrun/class": kind,
|
|
620
|
-
"mlrun/project": self.project,
|
|
621
|
-
}
|
|
622
|
-
self._annotations = {}
|
|
623
|
-
self._init_containers = []
|
|
624
|
-
# will be applied on the pod spec only when calling .pod(), allows to override spec attributes
|
|
625
|
-
self.default_pod_spec_attributes = default_pod_spec_attributes
|
|
626
|
-
self.resources = resources
|
|
627
|
-
|
|
628
|
-
@property
|
|
629
|
-
def pod(self):
|
|
630
|
-
return self._get_spec()
|
|
631
|
-
|
|
632
|
-
@property
|
|
633
|
-
def init_containers(self):
|
|
634
|
-
return self._init_containers
|
|
635
|
-
|
|
636
|
-
@init_containers.setter
|
|
637
|
-
def init_containers(self, containers):
|
|
638
|
-
self._init_containers = containers
|
|
639
|
-
|
|
640
|
-
def append_init_container(
|
|
641
|
-
self,
|
|
642
|
-
image,
|
|
643
|
-
command=None,
|
|
644
|
-
args=None,
|
|
645
|
-
env=None,
|
|
646
|
-
image_pull_policy="IfNotPresent",
|
|
647
|
-
name="init",
|
|
648
|
-
):
|
|
649
|
-
if isinstance(env, dict):
|
|
650
|
-
env = [client.V1EnvVar(name=k, value=v) for k, v in env.items()]
|
|
651
|
-
self._init_containers.append(
|
|
652
|
-
client.V1Container(
|
|
653
|
-
name=name,
|
|
654
|
-
image=image,
|
|
655
|
-
env=env,
|
|
656
|
-
command=command,
|
|
657
|
-
args=args,
|
|
658
|
-
image_pull_policy=image_pull_policy,
|
|
659
|
-
)
|
|
660
|
-
)
|
|
661
|
-
|
|
662
|
-
def add_label(self, key, value):
|
|
663
|
-
self._labels[key] = str(value)
|
|
664
|
-
|
|
665
|
-
def add_annotation(self, key, value):
|
|
666
|
-
self._annotations[key] = str(value)
|
|
667
|
-
|
|
668
|
-
def add_volume(self, volume: client.V1Volume, mount_path, name=None, sub_path=None):
|
|
669
|
-
self._mounts.append(
|
|
670
|
-
client.V1VolumeMount(
|
|
671
|
-
name=name or volume.name, mount_path=mount_path, sub_path=sub_path
|
|
672
|
-
)
|
|
673
|
-
)
|
|
674
|
-
self._volumes.append(volume)
|
|
675
|
-
|
|
676
|
-
def mount_empty(self, name="empty", mount_path="/empty"):
|
|
677
|
-
self.add_volume(
|
|
678
|
-
client.V1Volume(name=name, empty_dir=client.V1EmptyDirVolumeSource()),
|
|
679
|
-
mount_path=mount_path,
|
|
680
|
-
)
|
|
681
|
-
|
|
682
|
-
def mount_v3io(
|
|
683
|
-
self, name="v3io", remote="~/", mount_path="/User", access_key="", user=""
|
|
684
|
-
):
|
|
685
|
-
self.add_volume(
|
|
686
|
-
v3io_to_vol(name, remote, access_key, user),
|
|
687
|
-
mount_path=mount_path,
|
|
688
|
-
name=name,
|
|
689
|
-
)
|
|
690
|
-
|
|
691
|
-
def mount_cfgmap(self, name, path="/config"):
|
|
692
|
-
self.add_volume(
|
|
693
|
-
client.V1Volume(
|
|
694
|
-
name=name, config_map=client.V1ConfigMapVolumeSource(name=name)
|
|
695
|
-
),
|
|
696
|
-
mount_path=path,
|
|
697
|
-
)
|
|
698
|
-
|
|
699
|
-
def mount_secret(self, name, path="/secret", items=None, sub_path=None):
|
|
700
|
-
self.add_volume(
|
|
701
|
-
client.V1Volume(
|
|
702
|
-
name=name,
|
|
703
|
-
secret=client.V1SecretVolumeSource(
|
|
704
|
-
secret_name=name,
|
|
705
|
-
items=items,
|
|
706
|
-
),
|
|
707
|
-
),
|
|
708
|
-
mount_path=path,
|
|
709
|
-
sub_path=sub_path,
|
|
710
|
-
)
|
|
711
|
-
|
|
712
|
-
def set_node_selector(self, node_selector: typing.Optional[typing.Dict[str, str]]):
|
|
713
|
-
self.node_selector = node_selector
|
|
714
|
-
|
|
715
|
-
def _get_spec(self, template=False):
|
|
716
|
-
|
|
717
|
-
pod_obj = client.V1PodTemplate if template else client.V1Pod
|
|
718
|
-
|
|
719
|
-
if self.env and isinstance(self.env, dict):
|
|
720
|
-
env = [client.V1EnvVar(name=k, value=v) for k, v in self.env.items()]
|
|
721
|
-
else:
|
|
722
|
-
env = self.env
|
|
723
|
-
container = client.V1Container(
|
|
724
|
-
name="base",
|
|
725
|
-
image=self.image,
|
|
726
|
-
env=env,
|
|
727
|
-
command=self.command,
|
|
728
|
-
args=self.args,
|
|
729
|
-
volume_mounts=self._mounts,
|
|
730
|
-
resources=self.resources,
|
|
731
|
-
)
|
|
732
|
-
|
|
733
|
-
pod_spec = client.V1PodSpec(
|
|
734
|
-
containers=[container],
|
|
735
|
-
restart_policy="Never",
|
|
736
|
-
volumes=self._volumes,
|
|
737
|
-
node_selector=self.node_selector,
|
|
738
|
-
)
|
|
739
|
-
|
|
740
|
-
# if attribute isn't defined use default pod spec attributes
|
|
741
|
-
for key, val in self.default_pod_spec_attributes.items():
|
|
742
|
-
if not getattr(pod_spec, key, None):
|
|
743
|
-
setattr(pod_spec, key, val)
|
|
744
|
-
|
|
745
|
-
for init_containers in self._init_containers:
|
|
746
|
-
init_containers.volume_mounts = self._mounts
|
|
747
|
-
pod_spec.init_containers = self._init_containers
|
|
748
|
-
|
|
749
|
-
pod = pod_obj(
|
|
750
|
-
metadata=client.V1ObjectMeta(
|
|
751
|
-
generate_name=f"{self.task_name}-",
|
|
752
|
-
namespace=self.namespace,
|
|
753
|
-
labels=self._labels,
|
|
754
|
-
annotations=self._annotations,
|
|
755
|
-
),
|
|
756
|
-
spec=pod_spec,
|
|
757
|
-
)
|
|
758
|
-
return pod
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
def format_labels(labels):
|
|
762
|
-
"""Convert a dictionary of labels into a comma separated string"""
|
|
763
|
-
if labels:
|
|
764
|
-
return ",".join([f"{k}={v}" for k, v in labels.items()])
|
|
765
|
-
else:
|
|
766
|
-
return ""
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
def verify_gpu_requests_and_limits(requests_gpu: str = None, limits_gpu: str = None):
|
|
770
|
-
# https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/
|
|
771
|
-
if requests_gpu and not limits_gpu:
|
|
772
|
-
raise mlrun.errors.MLRunConflictError(
|
|
773
|
-
"You cannot specify GPU requests without specifying limits"
|
|
774
|
-
)
|
|
775
|
-
if requests_gpu and limits_gpu and requests_gpu != limits_gpu:
|
|
776
|
-
raise mlrun.errors.MLRunConflictError(
|
|
777
|
-
f"When specifying both GPU requests and limits these two values must be equal, "
|
|
778
|
-
f"requests_gpu={requests_gpu}, limits_gpu={limits_gpu}"
|
|
779
|
-
)
|
|
30
|
+
kubernetes.config.load_incluster_config()
|
|
31
|
+
_running_inside_kubernetes_cluster = True
|
|
32
|
+
except kubernetes.config.ConfigException:
|
|
33
|
+
_running_inside_kubernetes_cluster = False
|
|
34
|
+
return _running_inside_kubernetes_cluster
|
|
780
35
|
|
|
781
36
|
|
|
782
37
|
def generate_preemptible_node_selector_requirements(
|
|
@@ -785,7 +40,7 @@ def generate_preemptible_node_selector_requirements(
|
|
|
785
40
|
"""
|
|
786
41
|
Generate node selector requirements based on the pre-configured node selector of the preemptible nodes.
|
|
787
42
|
node selector operator represents a key's relationship to a set of values.
|
|
788
|
-
Valid operators are listed in :py:class:`~mlrun.
|
|
43
|
+
Valid operators are listed in :py:class:`~mlrun.common.schemas.NodeSelectorOperator`
|
|
789
44
|
:param node_selector_operator: The operator of V1NodeSelectorRequirement
|
|
790
45
|
:return: List[V1NodeSelectorRequirement]
|
|
791
46
|
"""
|
|
@@ -815,12 +70,9 @@ def generate_preemptible_nodes_anti_affinity_terms() -> typing.List[
|
|
|
815
70
|
https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity
|
|
816
71
|
:return: List contains one nodeSelectorTerm with multiple expressions.
|
|
817
72
|
"""
|
|
818
|
-
# import here to avoid circular imports
|
|
819
|
-
from mlrun.api.schemas import NodeSelectorOperator
|
|
820
|
-
|
|
821
73
|
# compile affinities with operator NotIn to make sure pods are not running on preemptible nodes.
|
|
822
74
|
node_selector_requirements = generate_preemptible_node_selector_requirements(
|
|
823
|
-
NodeSelectorOperator.node_selector_op_not_in.value
|
|
75
|
+
mlrun.common.schemas.NodeSelectorOperator.node_selector_op_not_in.value
|
|
824
76
|
)
|
|
825
77
|
return [
|
|
826
78
|
kubernetes.client.V1NodeSelectorTerm(
|
|
@@ -838,14 +90,11 @@ def generate_preemptible_nodes_affinity_terms() -> typing.List[
|
|
|
838
90
|
then the pod can be scheduled onto a node if at least one of the nodeSelectorTerms can be satisfied.
|
|
839
91
|
:return: List of nodeSelectorTerms associated with the preemptible nodes.
|
|
840
92
|
"""
|
|
841
|
-
# import here to avoid circular imports
|
|
842
|
-
from mlrun.api.schemas import NodeSelectorOperator
|
|
843
|
-
|
|
844
93
|
node_selector_terms = []
|
|
845
94
|
|
|
846
95
|
# compile affinities with operator In so pods could schedule on at least one of the preemptible nodes.
|
|
847
96
|
node_selector_requirements = generate_preemptible_node_selector_requirements(
|
|
848
|
-
NodeSelectorOperator.node_selector_op_in.value
|
|
97
|
+
mlrun.common.schemas.NodeSelectorOperator.node_selector_op_in.value
|
|
849
98
|
)
|
|
850
99
|
for expression in node_selector_requirements:
|
|
851
100
|
node_selector_terms.append(
|