mlrun 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -3
- mlrun/__main__.py +79 -37
- mlrun/api/__init__.py +1 -1
- mlrun/api/api/__init__.py +1 -1
- mlrun/api/api/api.py +4 -4
- mlrun/api/api/deps.py +10 -21
- mlrun/api/api/endpoints/__init__.py +1 -1
- mlrun/api/api/endpoints/artifacts.py +64 -36
- mlrun/api/api/endpoints/auth.py +4 -4
- mlrun/api/api/endpoints/background_tasks.py +11 -11
- mlrun/api/api/endpoints/client_spec.py +5 -5
- mlrun/api/api/endpoints/clusterization_spec.py +6 -4
- mlrun/api/api/endpoints/feature_store.py +124 -115
- mlrun/api/api/endpoints/files.py +22 -14
- mlrun/api/api/endpoints/frontend_spec.py +28 -21
- mlrun/api/api/endpoints/functions.py +142 -87
- mlrun/api/api/endpoints/grafana_proxy.py +89 -442
- mlrun/api/api/endpoints/healthz.py +20 -7
- mlrun/api/api/endpoints/hub.py +320 -0
- mlrun/api/api/endpoints/internal/__init__.py +1 -1
- mlrun/api/api/endpoints/internal/config.py +1 -1
- mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
- mlrun/api/api/endpoints/logs.py +11 -11
- mlrun/api/api/endpoints/model_endpoints.py +74 -70
- mlrun/api/api/endpoints/operations.py +13 -9
- mlrun/api/api/endpoints/pipelines.py +93 -88
- mlrun/api/api/endpoints/projects.py +35 -35
- mlrun/api/api/endpoints/runs.py +69 -27
- mlrun/api/api/endpoints/runtime_resources.py +28 -28
- mlrun/api/api/endpoints/schedules.py +98 -41
- mlrun/api/api/endpoints/secrets.py +37 -32
- mlrun/api/api/endpoints/submit.py +12 -12
- mlrun/api/api/endpoints/tags.py +20 -22
- mlrun/api/api/utils.py +251 -42
- mlrun/api/constants.py +1 -1
- mlrun/api/crud/__init__.py +18 -15
- mlrun/api/crud/artifacts.py +10 -10
- mlrun/api/crud/client_spec.py +4 -4
- mlrun/api/crud/clusterization_spec.py +3 -3
- mlrun/api/crud/feature_store.py +54 -46
- mlrun/api/crud/functions.py +3 -3
- mlrun/api/crud/hub.py +312 -0
- mlrun/api/crud/logs.py +11 -9
- mlrun/api/crud/model_monitoring/__init__.py +3 -3
- mlrun/api/crud/model_monitoring/grafana.py +435 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
- mlrun/api/crud/notifications.py +149 -0
- mlrun/api/crud/pipelines.py +67 -52
- mlrun/api/crud/projects.py +51 -23
- mlrun/api/crud/runs.py +7 -5
- mlrun/api/crud/runtime_resources.py +13 -13
- mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
- mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
- mlrun/api/crud/runtimes/nuclio/function.py +505 -0
- mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
- mlrun/api/crud/secrets.py +88 -46
- mlrun/api/crud/tags.py +5 -5
- mlrun/api/db/__init__.py +1 -1
- mlrun/api/db/base.py +102 -54
- mlrun/api/db/init_db.py +2 -3
- mlrun/api/db/session.py +4 -12
- mlrun/api/db/sqldb/__init__.py +1 -1
- mlrun/api/db/sqldb/db.py +439 -196
- mlrun/api/db/sqldb/helpers.py +1 -1
- mlrun/api/db/sqldb/models/__init__.py +3 -3
- mlrun/api/db/sqldb/models/models_mysql.py +82 -64
- mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
- mlrun/api/db/sqldb/session.py +27 -20
- mlrun/api/initial_data.py +82 -24
- mlrun/api/launcher.py +196 -0
- mlrun/api/main.py +91 -22
- mlrun/api/middlewares.py +6 -5
- mlrun/api/migrations_mysql/env.py +1 -1
- mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
- mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
- mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
- mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
- mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
- mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
- mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/env.py +1 -1
- mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
- mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
- mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
- mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
- mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
- mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
- mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
- mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
- mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
- mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
- mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
- mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
- mlrun/api/schemas/__init__.py +216 -138
- mlrun/api/utils/__init__.py +1 -1
- mlrun/api/utils/asyncio.py +1 -1
- mlrun/api/utils/auth/__init__.py +1 -1
- mlrun/api/utils/auth/providers/__init__.py +1 -1
- mlrun/api/utils/auth/providers/base.py +7 -7
- mlrun/api/utils/auth/providers/nop.py +6 -7
- mlrun/api/utils/auth/providers/opa.py +17 -17
- mlrun/api/utils/auth/verifier.py +36 -34
- mlrun/api/utils/background_tasks.py +24 -24
- mlrun/{builder.py → api/utils/builder.py} +216 -123
- mlrun/api/utils/clients/__init__.py +1 -1
- mlrun/api/utils/clients/chief.py +19 -4
- mlrun/api/utils/clients/iguazio.py +106 -60
- mlrun/api/utils/clients/log_collector.py +1 -1
- mlrun/api/utils/clients/nuclio.py +23 -23
- mlrun/api/utils/clients/protocols/grpc.py +2 -2
- mlrun/api/utils/db/__init__.py +1 -1
- mlrun/api/utils/db/alembic.py +1 -1
- mlrun/api/utils/db/backup.py +1 -1
- mlrun/api/utils/db/mysql.py +24 -25
- mlrun/api/utils/db/sql_collation.py +1 -1
- mlrun/api/utils/db/sqlite_migration.py +2 -2
- mlrun/api/utils/events/__init__.py +14 -0
- mlrun/api/utils/events/base.py +57 -0
- mlrun/api/utils/events/events_factory.py +41 -0
- mlrun/api/utils/events/iguazio.py +217 -0
- mlrun/api/utils/events/nop.py +55 -0
- mlrun/api/utils/helpers.py +16 -13
- mlrun/api/utils/memory_reports.py +1 -1
- mlrun/api/utils/periodic.py +6 -3
- mlrun/api/utils/projects/__init__.py +1 -1
- mlrun/api/utils/projects/follower.py +33 -33
- mlrun/api/utils/projects/leader.py +36 -34
- mlrun/api/utils/projects/member.py +27 -27
- mlrun/api/utils/projects/remotes/__init__.py +1 -1
- mlrun/api/utils/projects/remotes/follower.py +13 -13
- mlrun/api/utils/projects/remotes/leader.py +10 -10
- mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
- mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
- mlrun/api/utils/scheduler.py +140 -51
- mlrun/api/utils/singletons/__init__.py +1 -1
- mlrun/api/utils/singletons/db.py +9 -15
- mlrun/api/utils/singletons/k8s.py +677 -5
- mlrun/api/utils/singletons/logs_dir.py +1 -1
- mlrun/api/utils/singletons/project_member.py +1 -1
- mlrun/api/utils/singletons/scheduler.py +1 -1
- mlrun/artifacts/__init__.py +2 -2
- mlrun/artifacts/base.py +8 -2
- mlrun/artifacts/dataset.py +5 -3
- mlrun/artifacts/manager.py +7 -1
- mlrun/artifacts/model.py +15 -4
- mlrun/artifacts/plots.py +1 -1
- mlrun/common/__init__.py +1 -1
- mlrun/common/constants.py +15 -0
- mlrun/common/model_monitoring.py +209 -0
- mlrun/common/schemas/__init__.py +167 -0
- mlrun/{api → common}/schemas/artifact.py +13 -14
- mlrun/{api → common}/schemas/auth.py +10 -8
- mlrun/{api → common}/schemas/background_task.py +3 -3
- mlrun/{api → common}/schemas/client_spec.py +1 -1
- mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
- mlrun/{api → common}/schemas/constants.py +21 -8
- mlrun/common/schemas/events.py +36 -0
- mlrun/{api → common}/schemas/feature_store.py +2 -1
- mlrun/{api → common}/schemas/frontend_spec.py +7 -6
- mlrun/{api → common}/schemas/function.py +5 -5
- mlrun/{api → common}/schemas/http.py +3 -3
- mlrun/common/schemas/hub.py +134 -0
- mlrun/{api → common}/schemas/k8s.py +3 -3
- mlrun/{api → common}/schemas/memory_reports.py +1 -1
- mlrun/common/schemas/model_endpoints.py +342 -0
- mlrun/common/schemas/notification.py +57 -0
- mlrun/{api → common}/schemas/object.py +6 -6
- mlrun/{api → common}/schemas/pipeline.py +3 -3
- mlrun/{api → common}/schemas/project.py +6 -5
- mlrun/common/schemas/regex.py +24 -0
- mlrun/common/schemas/runs.py +30 -0
- mlrun/{api → common}/schemas/runtime_resource.py +3 -3
- mlrun/{api → common}/schemas/schedule.py +19 -7
- mlrun/{api → common}/schemas/secret.py +3 -3
- mlrun/{api → common}/schemas/tag.py +2 -2
- mlrun/common/types.py +25 -0
- mlrun/config.py +152 -20
- mlrun/data_types/__init__.py +7 -2
- mlrun/data_types/data_types.py +4 -2
- mlrun/data_types/infer.py +1 -1
- mlrun/data_types/spark.py +10 -3
- mlrun/datastore/__init__.py +10 -3
- mlrun/datastore/azure_blob.py +1 -1
- mlrun/datastore/base.py +185 -53
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/filestore.py +1 -1
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +1 -1
- mlrun/datastore/s3.py +1 -1
- mlrun/datastore/sources.py +192 -70
- mlrun/datastore/spark_udf.py +44 -0
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/targets.py +115 -45
- mlrun/datastore/utils.py +127 -5
- mlrun/datastore/v3io.py +1 -1
- mlrun/datastore/wasbfs/__init__.py +1 -1
- mlrun/datastore/wasbfs/fs.py +1 -1
- mlrun/db/__init__.py +7 -5
- mlrun/db/base.py +112 -68
- mlrun/db/httpdb.py +445 -277
- mlrun/db/nopdb.py +491 -0
- mlrun/db/sqldb.py +112 -65
- mlrun/errors.py +6 -1
- mlrun/execution.py +44 -22
- mlrun/feature_store/__init__.py +1 -1
- mlrun/feature_store/api.py +143 -95
- mlrun/feature_store/common.py +16 -20
- mlrun/feature_store/feature_set.py +42 -12
- mlrun/feature_store/feature_vector.py +32 -21
- mlrun/feature_store/ingestion.py +9 -12
- mlrun/feature_store/retrieval/__init__.py +3 -2
- mlrun/feature_store/retrieval/base.py +388 -66
- mlrun/feature_store/retrieval/dask_merger.py +63 -151
- mlrun/feature_store/retrieval/job.py +30 -12
- mlrun/feature_store/retrieval/local_merger.py +40 -133
- mlrun/feature_store/retrieval/spark_merger.py +129 -127
- mlrun/feature_store/retrieval/storey_merger.py +173 -0
- mlrun/feature_store/steps.py +132 -15
- mlrun/features.py +8 -3
- mlrun/frameworks/__init__.py +1 -1
- mlrun/frameworks/_common/__init__.py +1 -1
- mlrun/frameworks/_common/artifacts_library.py +1 -1
- mlrun/frameworks/_common/mlrun_interface.py +1 -1
- mlrun/frameworks/_common/model_handler.py +1 -1
- mlrun/frameworks/_common/plan.py +1 -1
- mlrun/frameworks/_common/producer.py +1 -1
- mlrun/frameworks/_common/utils.py +1 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
- mlrun/frameworks/_dl_common/model_handler.py +1 -1
- mlrun/frameworks/_dl_common/utils.py +1 -1
- mlrun/frameworks/_ml_common/__init__.py +1 -1
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/model_handler.py +1 -1
- mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
- mlrun/frameworks/_ml_common/producer.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +1 -1
- mlrun/frameworks/auto_mlrun/__init__.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
- mlrun/frameworks/huggingface/__init__.py +1 -1
- mlrun/frameworks/huggingface/model_server.py +1 -1
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/lgbm/model_server.py +1 -1
- mlrun/frameworks/lgbm/utils.py +1 -1
- mlrun/frameworks/onnx/__init__.py +1 -1
- mlrun/frameworks/onnx/dataset.py +1 -1
- mlrun/frameworks/onnx/mlrun_interface.py +1 -1
- mlrun/frameworks/onnx/model_handler.py +1 -1
- mlrun/frameworks/onnx/model_server.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +1 -1
- mlrun/frameworks/pytorch/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
- mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
- mlrun/frameworks/pytorch/model_handler.py +1 -1
- mlrun/frameworks/pytorch/model_server.py +1 -1
- mlrun/frameworks/pytorch/utils.py +1 -1
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/estimator.py +1 -1
- mlrun/frameworks/sklearn/metric.py +1 -1
- mlrun/frameworks/sklearn/metrics_library.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
- mlrun/frameworks/sklearn/model_handler.py +1 -1
- mlrun/frameworks/sklearn/utils.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
- mlrun/frameworks/tf_keras/model_handler.py +1 -1
- mlrun/frameworks/tf_keras/model_server.py +1 -1
- mlrun/frameworks/tf_keras/utils.py +1 -1
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
- mlrun/frameworks/xgboost/model_handler.py +1 -1
- mlrun/frameworks/xgboost/utils.py +1 -1
- mlrun/k8s_utils.py +14 -765
- mlrun/kfpops.py +14 -17
- mlrun/launcher/__init__.py +13 -0
- mlrun/launcher/base.py +406 -0
- mlrun/launcher/client.py +159 -0
- mlrun/launcher/factory.py +50 -0
- mlrun/launcher/local.py +276 -0
- mlrun/launcher/remote.py +178 -0
- mlrun/lists.py +10 -2
- mlrun/mlutils/__init__.py +1 -1
- mlrun/mlutils/data.py +1 -1
- mlrun/mlutils/models.py +1 -1
- mlrun/mlutils/plots.py +1 -1
- mlrun/model.py +252 -14
- mlrun/model_monitoring/__init__.py +41 -0
- mlrun/model_monitoring/features_drift_table.py +1 -1
- mlrun/model_monitoring/helpers.py +123 -38
- mlrun/model_monitoring/model_endpoint.py +144 -0
- mlrun/model_monitoring/model_monitoring_batch.py +310 -259
- mlrun/model_monitoring/stores/__init__.py +106 -0
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
- mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
- mlrun/model_monitoring/stores/models/__init__.py +23 -0
- mlrun/model_monitoring/stores/models/base.py +18 -0
- mlrun/model_monitoring/stores/models/mysql.py +100 -0
- mlrun/model_monitoring/stores/models/sqlite.py +98 -0
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
- mlrun/model_monitoring/stream_processing_fs.py +239 -271
- mlrun/package/__init__.py +163 -0
- mlrun/package/context_handler.py +325 -0
- mlrun/package/errors.py +47 -0
- mlrun/package/packager.py +298 -0
- mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
- mlrun/package/packagers/default_packager.py +422 -0
- mlrun/package/packagers/numpy_packagers.py +612 -0
- mlrun/package/packagers/pandas_packagers.py +968 -0
- mlrun/package/packagers/python_standard_library_packagers.py +616 -0
- mlrun/package/packagers_manager.py +786 -0
- mlrun/package/utils/__init__.py +53 -0
- mlrun/package/utils/_archiver.py +226 -0
- mlrun/package/utils/_formatter.py +211 -0
- mlrun/package/utils/_pickler.py +234 -0
- mlrun/package/utils/_supported_format.py +71 -0
- mlrun/package/utils/log_hint_utils.py +93 -0
- mlrun/package/utils/type_hint_utils.py +298 -0
- mlrun/platforms/__init__.py +1 -1
- mlrun/platforms/iguazio.py +34 -2
- mlrun/platforms/other.py +1 -1
- mlrun/projects/__init__.py +1 -1
- mlrun/projects/operations.py +14 -9
- mlrun/projects/pipelines.py +31 -13
- mlrun/projects/project.py +762 -238
- mlrun/render.py +49 -19
- mlrun/run.py +57 -326
- mlrun/runtimes/__init__.py +3 -9
- mlrun/runtimes/base.py +247 -784
- mlrun/runtimes/constants.py +1 -1
- mlrun/runtimes/daskjob.py +45 -41
- mlrun/runtimes/funcdoc.py +43 -7
- mlrun/runtimes/function.py +66 -656
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/generators.py +1 -1
- mlrun/runtimes/kubejob.py +99 -116
- mlrun/runtimes/local.py +59 -66
- mlrun/runtimes/mpijob/__init__.py +1 -1
- mlrun/runtimes/mpijob/abstract.py +13 -15
- mlrun/runtimes/mpijob/v1.py +3 -1
- mlrun/runtimes/mpijob/v1alpha1.py +1 -1
- mlrun/runtimes/nuclio.py +1 -1
- mlrun/runtimes/pod.py +51 -26
- mlrun/runtimes/remotesparkjob.py +3 -1
- mlrun/runtimes/serving.py +12 -4
- mlrun/runtimes/sparkjob/__init__.py +1 -2
- mlrun/runtimes/sparkjob/abstract.py +44 -31
- mlrun/runtimes/sparkjob/spark3job.py +11 -9
- mlrun/runtimes/utils.py +61 -42
- mlrun/secrets.py +16 -18
- mlrun/serving/__init__.py +3 -2
- mlrun/serving/merger.py +1 -1
- mlrun/serving/remote.py +1 -1
- mlrun/serving/routers.py +39 -42
- mlrun/serving/server.py +23 -13
- mlrun/serving/serving_wrapper.py +1 -1
- mlrun/serving/states.py +172 -39
- mlrun/serving/utils.py +1 -1
- mlrun/serving/v1_serving.py +1 -1
- mlrun/serving/v2_serving.py +29 -21
- mlrun/utils/__init__.py +1 -2
- mlrun/utils/async_http.py +8 -1
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +2 -2
- mlrun/utils/condition_evaluator.py +65 -0
- mlrun/utils/db.py +52 -0
- mlrun/utils/helpers.py +188 -13
- mlrun/utils/http.py +89 -54
- mlrun/utils/logger.py +48 -8
- mlrun/utils/model_monitoring.py +132 -100
- mlrun/utils/notifications/__init__.py +1 -1
- mlrun/utils/notifications/notification/__init__.py +8 -6
- mlrun/utils/notifications/notification/base.py +20 -14
- mlrun/utils/notifications/notification/console.py +7 -4
- mlrun/utils/notifications/notification/git.py +36 -19
- mlrun/utils/notifications/notification/ipython.py +10 -8
- mlrun/utils/notifications/notification/slack.py +18 -13
- mlrun/utils/notifications/notification_pusher.py +377 -56
- mlrun/utils/regex.py +6 -1
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +1 -1
- mlrun/utils/vault.py +270 -269
- mlrun/utils/version/__init__.py +1 -1
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +1 -1
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
- mlrun-1.4.0.dist-info/RECORD +434 -0
- mlrun/api/api/endpoints/marketplace.py +0 -257
- mlrun/api/crud/marketplace.py +0 -221
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
- mlrun/api/db/filedb/db.py +0 -518
- mlrun/api/schemas/marketplace.py +0 -128
- mlrun/api/schemas/model_endpoints.py +0 -185
- mlrun/db/filedb.py +0 -891
- mlrun/feature_store/retrieval/online.py +0 -92
- mlrun/model_monitoring/constants.py +0 -67
- mlrun/runtimes/package/context_handler.py +0 -711
- mlrun/runtimes/sparkjob/spark2job.py +0 -59
- mlrun-1.3.3.dist-info/RECORD +0 -381
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
mlrun/runtimes/base.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -14,69 +14,54 @@
|
|
|
14
14
|
import enum
|
|
15
15
|
import getpass
|
|
16
16
|
import http
|
|
17
|
-
import
|
|
18
|
-
import shlex
|
|
17
|
+
import re
|
|
19
18
|
import traceback
|
|
20
|
-
import
|
|
21
|
-
import uuid
|
|
19
|
+
import warnings
|
|
22
20
|
from abc import ABC, abstractmethod
|
|
23
|
-
from ast import literal_eval
|
|
24
21
|
from base64 import b64encode
|
|
25
|
-
from copy import deepcopy
|
|
26
22
|
from datetime import datetime, timedelta, timezone
|
|
27
23
|
from os import environ
|
|
28
|
-
from typing import Dict, List, Optional, Tuple, Union
|
|
24
|
+
from typing import Callable, Dict, List, Optional, Tuple, Union
|
|
29
25
|
|
|
30
|
-
import IPython
|
|
31
26
|
import requests.exceptions
|
|
27
|
+
from deprecated import deprecated
|
|
32
28
|
from kubernetes.client.rest import ApiException
|
|
33
29
|
from nuclio.build import mlrun_footer
|
|
34
30
|
from sqlalchemy.orm import Session
|
|
35
31
|
|
|
32
|
+
import mlrun.api.db.sqldb.session
|
|
33
|
+
import mlrun.api.utils.singletons.db
|
|
34
|
+
import mlrun.common.schemas
|
|
36
35
|
import mlrun.errors
|
|
36
|
+
import mlrun.launcher.factory
|
|
37
37
|
import mlrun.utils.helpers
|
|
38
|
+
import mlrun.utils.notifications
|
|
38
39
|
import mlrun.utils.regex
|
|
39
|
-
from mlrun.api import schemas
|
|
40
40
|
from mlrun.api.constants import LogSources
|
|
41
41
|
from mlrun.api.db.base import DBInterface
|
|
42
42
|
from mlrun.utils.helpers import generate_object_uri, verify_field_regex
|
|
43
43
|
|
|
44
|
-
from ..config import config
|
|
44
|
+
from ..config import config
|
|
45
45
|
from ..datastore import store_manager
|
|
46
46
|
from ..db import RunDBError, get_or_set_dburl, get_run_db
|
|
47
47
|
from ..errors import err_to_str
|
|
48
|
-
from ..
|
|
49
|
-
from ..k8s_utils import get_k8s_helper
|
|
50
|
-
from ..kfpops import mlrun_op, write_kfpmeta
|
|
48
|
+
from ..kfpops import mlrun_op
|
|
51
49
|
from ..lists import RunList
|
|
52
|
-
from ..model import
|
|
53
|
-
BaseMetadata,
|
|
54
|
-
HyperParamOptions,
|
|
55
|
-
ImageBuilder,
|
|
56
|
-
ModelObj,
|
|
57
|
-
RunObject,
|
|
58
|
-
RunTemplate,
|
|
59
|
-
)
|
|
60
|
-
from ..secrets import SecretsStore
|
|
50
|
+
from ..model import BaseMetadata, HyperParamOptions, ImageBuilder, ModelObj, RunObject
|
|
61
51
|
from ..utils import (
|
|
62
52
|
dict_to_json,
|
|
63
53
|
dict_to_yaml,
|
|
64
54
|
enrich_image_url,
|
|
65
55
|
get_in,
|
|
66
56
|
get_parsed_docker_registry,
|
|
67
|
-
get_ui_url,
|
|
68
|
-
is_ipython,
|
|
69
57
|
logger,
|
|
70
|
-
normalize_name,
|
|
71
58
|
now_date,
|
|
72
59
|
update_in,
|
|
73
60
|
)
|
|
74
61
|
from .constants import PodPhases, RunStates
|
|
75
62
|
from .funcdoc import update_function_entry_points
|
|
76
|
-
from .
|
|
77
|
-
from .utils import RunError, calc_hash, results_to_iter
|
|
63
|
+
from .utils import RunError, calc_hash, get_k8s
|
|
78
64
|
|
|
79
|
-
run_modes = ["pass"]
|
|
80
65
|
spec_fields = [
|
|
81
66
|
"command",
|
|
82
67
|
"args",
|
|
@@ -90,6 +75,7 @@ spec_fields = [
|
|
|
90
75
|
"pythonpath",
|
|
91
76
|
"disable_auto_mount",
|
|
92
77
|
"allow_empty_resources",
|
|
78
|
+
"clone_target_dir",
|
|
93
79
|
]
|
|
94
80
|
|
|
95
81
|
|
|
@@ -130,6 +116,7 @@ class FunctionSpec(ModelObj):
|
|
|
130
116
|
default_handler=None,
|
|
131
117
|
pythonpath=None,
|
|
132
118
|
disable_auto_mount=False,
|
|
119
|
+
clone_target_dir=None,
|
|
133
120
|
):
|
|
134
121
|
|
|
135
122
|
self.command = command or ""
|
|
@@ -148,6 +135,9 @@ class FunctionSpec(ModelObj):
|
|
|
148
135
|
self.entry_points = entry_points or {}
|
|
149
136
|
self.disable_auto_mount = disable_auto_mount
|
|
150
137
|
self.allow_empty_resources = None
|
|
138
|
+
# the build.source is cloned/extracted to the specified clone_target_dir
|
|
139
|
+
# if a relative path is specified, it will be enriched with a temp dir path
|
|
140
|
+
self.clone_target_dir = clone_target_dir or ""
|
|
151
141
|
|
|
152
142
|
@property
|
|
153
143
|
def build(self) -> ImageBuilder:
|
|
@@ -183,14 +173,12 @@ class BaseRuntime(ModelObj):
|
|
|
183
173
|
self.is_child = False
|
|
184
174
|
self._status = None
|
|
185
175
|
self.status = None
|
|
186
|
-
self._is_api_server = False
|
|
187
176
|
self.verbose = False
|
|
188
177
|
self._enriched_image = False
|
|
189
178
|
|
|
190
179
|
def set_db_connection(self, conn):
|
|
191
180
|
if not self._db_conn:
|
|
192
181
|
self._db_conn = conn
|
|
193
|
-
self._is_api_server = mlrun.config.is_running_as_api()
|
|
194
182
|
|
|
195
183
|
@property
|
|
196
184
|
def metadata(self) -> BaseMetadata:
|
|
@@ -216,9 +204,6 @@ class BaseRuntime(ModelObj):
|
|
|
216
204
|
def status(self, status):
|
|
217
205
|
self._status = self._verify_dict(status, "status", FunctionStatus)
|
|
218
206
|
|
|
219
|
-
def _get_k8s(self):
|
|
220
|
-
return get_k8s_helper()
|
|
221
|
-
|
|
222
207
|
def set_label(self, key, value):
|
|
223
208
|
self.metadata.labels[key] = str(value)
|
|
224
209
|
return self
|
|
@@ -236,39 +221,6 @@ class BaseRuntime(ModelObj):
|
|
|
236
221
|
return True
|
|
237
222
|
return False
|
|
238
223
|
|
|
239
|
-
def _use_remote_api(self):
|
|
240
|
-
if (
|
|
241
|
-
self._is_remote
|
|
242
|
-
and not self._is_api_server
|
|
243
|
-
and self._get_db()
|
|
244
|
-
and self._get_db().kind == "http"
|
|
245
|
-
):
|
|
246
|
-
return True
|
|
247
|
-
return False
|
|
248
|
-
|
|
249
|
-
def _enrich_on_client_side(self):
|
|
250
|
-
self.try_auto_mount_based_on_config()
|
|
251
|
-
self._fill_credentials()
|
|
252
|
-
|
|
253
|
-
def _enrich_on_server_side(self):
|
|
254
|
-
pass
|
|
255
|
-
|
|
256
|
-
def _enrich_on_server_and_client_sides(self):
|
|
257
|
-
"""
|
|
258
|
-
enrich function also in client side and also on server side
|
|
259
|
-
"""
|
|
260
|
-
pass
|
|
261
|
-
|
|
262
|
-
def _enrich_function(self):
|
|
263
|
-
"""
|
|
264
|
-
enriches the function based on the flow state we run in (sdk or server)
|
|
265
|
-
"""
|
|
266
|
-
if self._use_remote_api():
|
|
267
|
-
self._enrich_on_client_side()
|
|
268
|
-
else:
|
|
269
|
-
self._enrich_on_server_side()
|
|
270
|
-
self._enrich_on_server_and_client_sides()
|
|
271
|
-
|
|
272
224
|
def _function_uri(self, tag=None, hash_key=None):
|
|
273
225
|
return generate_object_uri(
|
|
274
226
|
self.metadata.project,
|
|
@@ -281,11 +233,11 @@ class BaseRuntime(ModelObj):
|
|
|
281
233
|
self.spec.rundb = self.spec.rundb or get_or_set_dburl()
|
|
282
234
|
|
|
283
235
|
def _get_db(self):
|
|
236
|
+
# TODO: remove this function and use the launcher db instead
|
|
284
237
|
self._ensure_run_db()
|
|
285
238
|
if not self._db_conn:
|
|
286
239
|
if self.spec.rundb:
|
|
287
240
|
self._db_conn = get_run_db(self.spec.rundb, secrets=self._secrets)
|
|
288
|
-
self._is_api_server = mlrun.config.is_running_as_api()
|
|
289
241
|
return self._db_conn
|
|
290
242
|
|
|
291
243
|
# This function is different than the auto_mount function, as it mounts to runtimes based on the configuration.
|
|
@@ -321,59 +273,62 @@ class BaseRuntime(ModelObj):
|
|
|
321
273
|
|
|
322
274
|
def run(
|
|
323
275
|
self,
|
|
324
|
-
runspec:
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
276
|
+
runspec: Optional[
|
|
277
|
+
Union["mlrun.run.RunTemplate", "mlrun.run.RunObject", dict]
|
|
278
|
+
] = None,
|
|
279
|
+
handler: Optional[Union[str, Callable]] = None,
|
|
280
|
+
name: Optional[str] = "",
|
|
281
|
+
project: Optional[str] = "",
|
|
282
|
+
params: Optional[dict] = None,
|
|
283
|
+
inputs: Optional[Dict[str, str]] = None,
|
|
284
|
+
out_path: Optional[str] = "",
|
|
285
|
+
workdir: Optional[str] = "",
|
|
286
|
+
artifact_path: Optional[str] = "",
|
|
287
|
+
watch: Optional[bool] = True,
|
|
288
|
+
schedule: Optional[Union[str, mlrun.common.schemas.ScheduleCronTrigger]] = None,
|
|
289
|
+
hyperparams: Optional[Dict[str, list]] = None,
|
|
290
|
+
hyper_param_options: Optional[HyperParamOptions] = None,
|
|
291
|
+
verbose: Optional[bool] = None,
|
|
292
|
+
scrape_metrics: Optional[bool] = None,
|
|
293
|
+
local: Optional[bool] = False,
|
|
294
|
+
local_code_path: Optional[str] = None,
|
|
295
|
+
auto_build: Optional[bool] = None,
|
|
296
|
+
param_file_secrets: Optional[Dict[str, str]] = None,
|
|
297
|
+
notifications: Optional[List[mlrun.model.Notification]] = None,
|
|
343
298
|
returns: Optional[List[Union[str, Dict[str, str]]]] = None,
|
|
344
299
|
) -> RunObject:
|
|
345
300
|
"""
|
|
346
301
|
Run a local or remote task.
|
|
347
302
|
|
|
348
|
-
:param runspec: run
|
|
349
|
-
:param handler:
|
|
350
|
-
:param name:
|
|
351
|
-
:param project:
|
|
352
|
-
:param params:
|
|
303
|
+
:param runspec: The run spec to generate the RunObject from. Can be RunTemplate | RunObject | dict.
|
|
304
|
+
:param handler: Pointer or name of a function handler.
|
|
305
|
+
:param name: Execution name.
|
|
306
|
+
:param project: Project name.
|
|
307
|
+
:param params: Input parameters (dict).
|
|
353
308
|
:param inputs: Input objects to pass to the handler. Type hints can be given so the input will be parsed
|
|
354
309
|
during runtime from `mlrun.DataItem` to the given type hint. The type hint can be given
|
|
355
310
|
in the key field of the dictionary after a colon, e.g: "<key> : <type_hint>".
|
|
356
|
-
:param out_path:
|
|
357
|
-
:param artifact_path:
|
|
358
|
-
:param workdir:
|
|
359
|
-
:param watch:
|
|
311
|
+
:param out_path: Default artifact output path.
|
|
312
|
+
:param artifact_path: Default artifact output path (will replace out_path).
|
|
313
|
+
:param workdir: Default input artifacts path.
|
|
314
|
+
:param watch: Watch/follow run log.
|
|
360
315
|
:param schedule: ScheduleCronTrigger class instance or a standard crontab expression string
|
|
361
316
|
(which will be converted to the class using its `from_crontab` constructor),
|
|
362
317
|
see this link for help:
|
|
363
318
|
https://apscheduler.readthedocs.io/en/3.x/modules/triggers/cron.html#module-apscheduler.triggers.cron
|
|
364
|
-
:param hyperparams:
|
|
319
|
+
:param hyperparams: Dict of param name and list of values to be enumerated e.g. {"p1": [1,2,3]}
|
|
365
320
|
the default strategy is grid search, can specify strategy (grid, list, random)
|
|
366
|
-
and other options in the hyper_param_options parameter
|
|
367
|
-
:param hyper_param_options:
|
|
368
|
-
|
|
369
|
-
:param
|
|
370
|
-
:param
|
|
371
|
-
:param
|
|
372
|
-
:param
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
321
|
+
and other options in the hyper_param_options parameter.
|
|
322
|
+
:param hyper_param_options: Dict or :py:class:`~mlrun.model.HyperParamOptions` struct of hyperparameter options.
|
|
323
|
+
:param verbose: Add verbose prints/logs.
|
|
324
|
+
:param scrape_metrics: Whether to add the `mlrun/scrape-metrics` label to this run's resources.
|
|
325
|
+
:param local: Run the function locally vs on the runtime/cluster.
|
|
326
|
+
:param local_code_path: Path of the code for local runs & debug.
|
|
327
|
+
:param auto_build: When set to True and the function require build it will be built on the first
|
|
328
|
+
function run, use only if you don't plan on changing the build config between runs.
|
|
329
|
+
:param param_file_secrets: Dictionary of secrets to be used only for accessing the hyper-param parameter file.
|
|
330
|
+
These secrets are only used locally and will not be stored anywhere
|
|
331
|
+
:param notifications: List of notifications to push when the run is completed
|
|
377
332
|
:param returns: List of log hints - configurations for how to log the returning values from the handler's run
|
|
378
333
|
(as artifacts or results). The list's length must be equal to the amount of returning objects. A
|
|
379
334
|
log hint may be given as:
|
|
@@ -385,206 +340,34 @@ class BaseRuntime(ModelObj):
|
|
|
385
340
|
* A dictionary of configurations to use when logging. Further info per object type and artifact
|
|
386
341
|
type can be given there. The artifact key must appear in the dictionary as "key": "the_key".
|
|
387
342
|
|
|
388
|
-
:return:
|
|
343
|
+
:return: Run context object (RunObject) with run metadata, results and status
|
|
389
344
|
"""
|
|
390
|
-
mlrun.
|
|
391
|
-
|
|
392
|
-
if self.spec.mode and self.spec.mode not in run_modes:
|
|
393
|
-
raise ValueError(f'run mode can only be {",".join(run_modes)}')
|
|
394
|
-
|
|
395
|
-
self._enrich_function()
|
|
396
|
-
|
|
397
|
-
run = self._create_run_object(runspec)
|
|
398
|
-
|
|
399
|
-
if local:
|
|
400
|
-
|
|
401
|
-
# do not allow local function to be scheduled
|
|
402
|
-
if schedule is not None:
|
|
403
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
404
|
-
"local and schedule cannot be used together"
|
|
405
|
-
)
|
|
406
|
-
return self._run_local(
|
|
407
|
-
run,
|
|
408
|
-
local_code_path,
|
|
409
|
-
project,
|
|
410
|
-
name,
|
|
411
|
-
workdir,
|
|
412
|
-
handler,
|
|
413
|
-
params,
|
|
414
|
-
inputs,
|
|
415
|
-
returns,
|
|
416
|
-
artifact_path,
|
|
417
|
-
)
|
|
418
|
-
|
|
419
|
-
run = self._enrich_run(
|
|
420
|
-
run,
|
|
421
|
-
handler,
|
|
422
|
-
project,
|
|
423
|
-
name,
|
|
424
|
-
params,
|
|
425
|
-
inputs,
|
|
426
|
-
returns,
|
|
427
|
-
hyperparams,
|
|
428
|
-
hyper_param_options,
|
|
429
|
-
verbose,
|
|
430
|
-
scrape_metrics,
|
|
431
|
-
out_path,
|
|
432
|
-
artifact_path,
|
|
433
|
-
workdir,
|
|
345
|
+
launcher = mlrun.launcher.factory.LauncherFactory.create_launcher(
|
|
346
|
+
self._is_remote, local
|
|
434
347
|
)
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
logger.info(
|
|
458
|
-
"Storing function",
|
|
459
|
-
name=run.metadata.name,
|
|
460
|
-
uid=run.metadata.uid,
|
|
461
|
-
db=db_str,
|
|
462
|
-
)
|
|
463
|
-
self._store_function(run, run.metadata, db)
|
|
464
|
-
|
|
465
|
-
# execute the job remotely (to a k8s cluster via the API service)
|
|
466
|
-
if self._use_remote_api():
|
|
467
|
-
return self._submit_job(run, schedule, db, watch)
|
|
468
|
-
|
|
469
|
-
elif self._is_remote and not self._is_api_server and not self.kfp:
|
|
470
|
-
logger.warning(
|
|
471
|
-
"warning!, Api url not set, " "trying to exec remote runtime locally"
|
|
472
|
-
)
|
|
473
|
-
|
|
474
|
-
execution = MLClientCtx.from_dict(
|
|
475
|
-
run.to_dict(),
|
|
476
|
-
db,
|
|
477
|
-
autocommit=False,
|
|
478
|
-
is_api=self._is_api_server,
|
|
479
|
-
store_run=False,
|
|
480
|
-
)
|
|
481
|
-
|
|
482
|
-
self._verify_run_params(run.spec.parameters)
|
|
483
|
-
|
|
484
|
-
# create task generator (for child runs) from spec
|
|
485
|
-
task_generator = get_generator(
|
|
486
|
-
run.spec, execution, param_file_secrets=param_file_secrets
|
|
348
|
+
return launcher.launch(
|
|
349
|
+
runtime=self,
|
|
350
|
+
task=runspec,
|
|
351
|
+
handler=handler,
|
|
352
|
+
name=name,
|
|
353
|
+
project=project,
|
|
354
|
+
params=params,
|
|
355
|
+
inputs=inputs,
|
|
356
|
+
out_path=out_path,
|
|
357
|
+
workdir=workdir,
|
|
358
|
+
artifact_path=artifact_path,
|
|
359
|
+
watch=watch,
|
|
360
|
+
schedule=schedule,
|
|
361
|
+
hyperparams=hyperparams,
|
|
362
|
+
hyper_param_options=hyper_param_options,
|
|
363
|
+
verbose=verbose,
|
|
364
|
+
scrape_metrics=scrape_metrics,
|
|
365
|
+
local_code_path=local_code_path,
|
|
366
|
+
auto_build=auto_build,
|
|
367
|
+
param_file_secrets=param_file_secrets,
|
|
368
|
+
notifications=notifications,
|
|
369
|
+
returns=returns,
|
|
487
370
|
)
|
|
488
|
-
if task_generator:
|
|
489
|
-
# verify valid task parameters
|
|
490
|
-
tasks = task_generator.generate(run)
|
|
491
|
-
for task in tasks:
|
|
492
|
-
self._verify_run_params(task.spec.parameters)
|
|
493
|
-
|
|
494
|
-
# post verifications, store execution in db and run pre run hooks
|
|
495
|
-
execution.store_run()
|
|
496
|
-
self._pre_run(run, execution) # hook for runtime specific prep
|
|
497
|
-
|
|
498
|
-
last_err = None
|
|
499
|
-
# If the runtime is nested, it means the hyper-run will run within a single instance of the run.
|
|
500
|
-
# So while in the API, we consider the hyper-run as a single run, and then in the runtime itself when the
|
|
501
|
-
# runtime is now a local runtime and therefore `self._is_nested == False`, we run each task as a separate run by
|
|
502
|
-
# using the task generator
|
|
503
|
-
if task_generator and not self._is_nested:
|
|
504
|
-
# multiple runs (based on hyper params or params file)
|
|
505
|
-
runner = self._run_many
|
|
506
|
-
if hasattr(self, "_parallel_run_many") and task_generator.use_parallel():
|
|
507
|
-
runner = self._parallel_run_many
|
|
508
|
-
results = runner(task_generator, execution, run)
|
|
509
|
-
results_to_iter(results, run, execution)
|
|
510
|
-
result = execution.to_dict()
|
|
511
|
-
result = self._update_run_state(result, task=run)
|
|
512
|
-
|
|
513
|
-
else:
|
|
514
|
-
# single run
|
|
515
|
-
try:
|
|
516
|
-
resp = self._run(run, execution)
|
|
517
|
-
if (
|
|
518
|
-
watch
|
|
519
|
-
and mlrun.runtimes.RuntimeKinds.is_watchable(self.kind)
|
|
520
|
-
# API shouldn't watch logs, its the client job to query the run logs
|
|
521
|
-
and not mlrun.config.is_running_as_api()
|
|
522
|
-
):
|
|
523
|
-
state, _ = run.logs(True, self._get_db())
|
|
524
|
-
if state not in ["succeeded", "completed"]:
|
|
525
|
-
logger.warning(f"run ended with state {state}")
|
|
526
|
-
result = self._update_run_state(resp, task=run)
|
|
527
|
-
except RunError as err:
|
|
528
|
-
last_err = err
|
|
529
|
-
result = self._update_run_state(task=run, err=err)
|
|
530
|
-
|
|
531
|
-
self._post_run(result, execution) # hook for runtime specific cleanup
|
|
532
|
-
|
|
533
|
-
return self._wrap_run_result(result, run, schedule=schedule, err=last_err)
|
|
534
|
-
|
|
535
|
-
def _wrap_run_result(
|
|
536
|
-
self, result: dict, runspec: RunObject, schedule=None, err=None
|
|
537
|
-
):
|
|
538
|
-
# if the purpose was to schedule (and not to run) nothing to wrap
|
|
539
|
-
if schedule:
|
|
540
|
-
return
|
|
541
|
-
|
|
542
|
-
if result and self.kfp and err is None:
|
|
543
|
-
write_kfpmeta(result)
|
|
544
|
-
|
|
545
|
-
# show ipython/jupyter result table widget
|
|
546
|
-
results_tbl = RunList()
|
|
547
|
-
if result:
|
|
548
|
-
results_tbl.append(result)
|
|
549
|
-
else:
|
|
550
|
-
logger.info("no returned result (job may still be in progress)")
|
|
551
|
-
results_tbl.append(runspec.to_dict())
|
|
552
|
-
|
|
553
|
-
uid = runspec.metadata.uid
|
|
554
|
-
project = runspec.metadata.project
|
|
555
|
-
if is_ipython and config.ipython_widget:
|
|
556
|
-
results_tbl.show()
|
|
557
|
-
print()
|
|
558
|
-
ui_url = get_ui_url(project, uid)
|
|
559
|
-
if ui_url:
|
|
560
|
-
ui_url = f' or <a href="{ui_url}" target="_blank">click here</a> to open in UI'
|
|
561
|
-
IPython.display.display(
|
|
562
|
-
IPython.display.HTML(
|
|
563
|
-
f"<b> > to track results use the .show() or .logs() methods {ui_url}</b>"
|
|
564
|
-
)
|
|
565
|
-
)
|
|
566
|
-
elif not (self.is_child and is_running_as_api()):
|
|
567
|
-
project_flag = f"-p {project}" if project else ""
|
|
568
|
-
info_cmd = f"mlrun get run {uid} {project_flag}"
|
|
569
|
-
logs_cmd = f"mlrun logs {uid} {project_flag}"
|
|
570
|
-
logger.info(
|
|
571
|
-
"To track results use the CLI", info_cmd=info_cmd, logs_cmd=logs_cmd
|
|
572
|
-
)
|
|
573
|
-
ui_url = get_ui_url(project, uid)
|
|
574
|
-
if ui_url:
|
|
575
|
-
logger.info("Or click for UI", ui_url=ui_url)
|
|
576
|
-
if result:
|
|
577
|
-
run = RunObject.from_dict(result)
|
|
578
|
-
logger.info(
|
|
579
|
-
f"run executed, status={run.status.state}", name=run.metadata.name
|
|
580
|
-
)
|
|
581
|
-
if run.status.state == "error":
|
|
582
|
-
if self._is_remote and not self.is_child:
|
|
583
|
-
logger.error(f"runtime error: {run.status.error}")
|
|
584
|
-
raise RunError(run.status.error)
|
|
585
|
-
return run
|
|
586
|
-
|
|
587
|
-
return None
|
|
588
371
|
|
|
589
372
|
def _get_db_run(self, task: RunObject = None):
|
|
590
373
|
if self._get_db() and task:
|
|
@@ -613,237 +396,6 @@ class BaseRuntime(ModelObj):
|
|
|
613
396
|
runtime_env["MLRUN_NAMESPACE"] = self.metadata.namespace or config.namespace
|
|
614
397
|
return runtime_env
|
|
615
398
|
|
|
616
|
-
def _run_local(
|
|
617
|
-
self,
|
|
618
|
-
runspec,
|
|
619
|
-
local_code_path,
|
|
620
|
-
project,
|
|
621
|
-
name,
|
|
622
|
-
workdir,
|
|
623
|
-
handler,
|
|
624
|
-
params,
|
|
625
|
-
inputs,
|
|
626
|
-
returns,
|
|
627
|
-
artifact_path,
|
|
628
|
-
):
|
|
629
|
-
# allow local run simulation with a flip of a flag
|
|
630
|
-
command = self
|
|
631
|
-
if local_code_path:
|
|
632
|
-
project = project or self.metadata.project
|
|
633
|
-
name = name or self.metadata.name
|
|
634
|
-
command = local_code_path
|
|
635
|
-
return mlrun.run_local(
|
|
636
|
-
runspec,
|
|
637
|
-
command,
|
|
638
|
-
name,
|
|
639
|
-
self.spec.args,
|
|
640
|
-
workdir=workdir,
|
|
641
|
-
project=project,
|
|
642
|
-
handler=handler,
|
|
643
|
-
params=params,
|
|
644
|
-
inputs=inputs,
|
|
645
|
-
artifact_path=artifact_path,
|
|
646
|
-
mode=self.spec.mode,
|
|
647
|
-
allow_empty_resources=self.spec.allow_empty_resources,
|
|
648
|
-
returns=returns,
|
|
649
|
-
)
|
|
650
|
-
|
|
651
|
-
def _create_run_object(self, runspec):
|
|
652
|
-
# TODO: Once implemented the `Runtime` handlers configurations (doc strings, params type hints and returning
|
|
653
|
-
# log hints, possible parameter values, etc), the configured type hints and log hints should be set into
|
|
654
|
-
# the `RunObject` from the `Runtime`.
|
|
655
|
-
if runspec:
|
|
656
|
-
runspec = deepcopy(runspec)
|
|
657
|
-
if isinstance(runspec, str):
|
|
658
|
-
runspec = literal_eval(runspec)
|
|
659
|
-
if not isinstance(runspec, (dict, RunTemplate, RunObject)):
|
|
660
|
-
raise ValueError(
|
|
661
|
-
"task/runspec is not a valid task object," f" type={type(runspec)}"
|
|
662
|
-
)
|
|
663
|
-
|
|
664
|
-
if isinstance(runspec, RunTemplate):
|
|
665
|
-
runspec = RunObject.from_template(runspec)
|
|
666
|
-
if isinstance(runspec, dict) or runspec is None:
|
|
667
|
-
runspec = RunObject.from_dict(runspec)
|
|
668
|
-
return runspec
|
|
669
|
-
|
|
670
|
-
def _enrich_run(
|
|
671
|
-
self,
|
|
672
|
-
runspec,
|
|
673
|
-
handler,
|
|
674
|
-
project_name,
|
|
675
|
-
name,
|
|
676
|
-
params,
|
|
677
|
-
inputs,
|
|
678
|
-
returns,
|
|
679
|
-
hyperparams,
|
|
680
|
-
hyper_param_options,
|
|
681
|
-
verbose,
|
|
682
|
-
scrape_metrics,
|
|
683
|
-
out_path,
|
|
684
|
-
artifact_path,
|
|
685
|
-
workdir,
|
|
686
|
-
):
|
|
687
|
-
runspec.spec.handler = (
|
|
688
|
-
handler or runspec.spec.handler or self.spec.default_handler or ""
|
|
689
|
-
)
|
|
690
|
-
if runspec.spec.handler and self.kind not in ["handler", "dask"]:
|
|
691
|
-
runspec.spec.handler = runspec.spec.handler_name
|
|
692
|
-
|
|
693
|
-
def_name = self.metadata.name
|
|
694
|
-
if runspec.spec.handler_name:
|
|
695
|
-
short_name = runspec.spec.handler_name
|
|
696
|
-
for separator in ["#", "::", "."]:
|
|
697
|
-
# drop paths, module or class name from short name
|
|
698
|
-
if separator in short_name:
|
|
699
|
-
short_name = short_name.split(separator)[-1]
|
|
700
|
-
def_name += "-" + short_name
|
|
701
|
-
|
|
702
|
-
runspec.metadata.name = normalize_name(
|
|
703
|
-
name=name or runspec.metadata.name or def_name,
|
|
704
|
-
# if name or runspec.metadata.name are set then it means that is user defined name and we want to warn the
|
|
705
|
-
# user that the passed name needs to be set without underscore, if its not user defined but rather enriched
|
|
706
|
-
# from the handler(function) name then we replace the underscore without warning the user.
|
|
707
|
-
# most of the time handlers will have `_` in the handler name (python convention is to separate function
|
|
708
|
-
# words with `_`), therefore we don't want to be noisy when normalizing the run name
|
|
709
|
-
verbose=bool(name or runspec.metadata.name),
|
|
710
|
-
)
|
|
711
|
-
verify_field_regex(
|
|
712
|
-
"run.metadata.name", runspec.metadata.name, mlrun.utils.regex.run_name
|
|
713
|
-
)
|
|
714
|
-
runspec.metadata.project = (
|
|
715
|
-
project_name
|
|
716
|
-
or runspec.metadata.project
|
|
717
|
-
or self.metadata.project
|
|
718
|
-
or config.default_project
|
|
719
|
-
)
|
|
720
|
-
runspec.spec.parameters = params or runspec.spec.parameters
|
|
721
|
-
runspec.spec.inputs = inputs or runspec.spec.inputs
|
|
722
|
-
runspec.spec.returns = returns or runspec.spec.returns
|
|
723
|
-
runspec.spec.hyperparams = hyperparams or runspec.spec.hyperparams
|
|
724
|
-
runspec.spec.hyper_param_options = (
|
|
725
|
-
hyper_param_options or runspec.spec.hyper_param_options
|
|
726
|
-
)
|
|
727
|
-
runspec.spec.verbose = verbose or runspec.spec.verbose
|
|
728
|
-
if scrape_metrics is None:
|
|
729
|
-
if runspec.spec.scrape_metrics is None:
|
|
730
|
-
scrape_metrics = config.scrape_metrics
|
|
731
|
-
else:
|
|
732
|
-
scrape_metrics = runspec.spec.scrape_metrics
|
|
733
|
-
runspec.spec.scrape_metrics = scrape_metrics
|
|
734
|
-
runspec.spec.input_path = (
|
|
735
|
-
workdir or runspec.spec.input_path or self.spec.workdir
|
|
736
|
-
)
|
|
737
|
-
if self.spec.allow_empty_resources:
|
|
738
|
-
runspec.spec.allow_empty_resources = self.spec.allow_empty_resources
|
|
739
|
-
|
|
740
|
-
spec = runspec.spec
|
|
741
|
-
if spec.secret_sources:
|
|
742
|
-
self._secrets = SecretsStore.from_list(spec.secret_sources)
|
|
743
|
-
|
|
744
|
-
# update run metadata (uid, labels) and store in DB
|
|
745
|
-
meta = runspec.metadata
|
|
746
|
-
meta.uid = meta.uid or uuid.uuid4().hex
|
|
747
|
-
|
|
748
|
-
runspec.spec.output_path = out_path or artifact_path or runspec.spec.output_path
|
|
749
|
-
|
|
750
|
-
if not runspec.spec.output_path:
|
|
751
|
-
if runspec.metadata.project:
|
|
752
|
-
if (
|
|
753
|
-
mlrun.pipeline_context.project
|
|
754
|
-
and runspec.metadata.project
|
|
755
|
-
== mlrun.pipeline_context.project.metadata.name
|
|
756
|
-
):
|
|
757
|
-
runspec.spec.output_path = (
|
|
758
|
-
mlrun.pipeline_context.project.spec.artifact_path
|
|
759
|
-
or mlrun.pipeline_context.workflow_artifact_path
|
|
760
|
-
)
|
|
761
|
-
|
|
762
|
-
if not runspec.spec.output_path and self._get_db():
|
|
763
|
-
try:
|
|
764
|
-
# not passing or loading the DB before the enrichment on purpose, because we want to enrich the
|
|
765
|
-
# spec first as get_db() depends on it
|
|
766
|
-
project = self._get_db().get_project(runspec.metadata.project)
|
|
767
|
-
# this is mainly for tests, so we won't need to mock get_project for so many tests
|
|
768
|
-
# in normal use cases if no project is found we will get an error
|
|
769
|
-
if project:
|
|
770
|
-
runspec.spec.output_path = project.spec.artifact_path
|
|
771
|
-
except mlrun.errors.MLRunNotFoundError:
|
|
772
|
-
logger.warning(
|
|
773
|
-
f"project {project_name} is not saved in DB yet, "
|
|
774
|
-
f"enriching output path with default artifact path: {config.artifact_path}"
|
|
775
|
-
)
|
|
776
|
-
|
|
777
|
-
if not runspec.spec.output_path:
|
|
778
|
-
runspec.spec.output_path = config.artifact_path
|
|
779
|
-
|
|
780
|
-
if runspec.spec.output_path:
|
|
781
|
-
runspec.spec.output_path = runspec.spec.output_path.replace(
|
|
782
|
-
"{{run.uid}}", meta.uid
|
|
783
|
-
)
|
|
784
|
-
runspec.spec.output_path = mlrun.utils.helpers.fill_artifact_path_template(
|
|
785
|
-
runspec.spec.output_path, runspec.metadata.project
|
|
786
|
-
)
|
|
787
|
-
return runspec
|
|
788
|
-
|
|
789
|
-
def _submit_job(self, run: RunObject, schedule, db, watch):
|
|
790
|
-
if self._secrets:
|
|
791
|
-
run.spec.secret_sources = self._secrets.to_serial()
|
|
792
|
-
try:
|
|
793
|
-
resp = db.submit_job(run, schedule=schedule)
|
|
794
|
-
if schedule:
|
|
795
|
-
action = resp.pop("action", "created")
|
|
796
|
-
logger.info(f"task schedule {action}", **resp)
|
|
797
|
-
return
|
|
798
|
-
|
|
799
|
-
except (requests.HTTPError, Exception) as err:
|
|
800
|
-
logger.error(f"got remote run err, {err_to_str(err)}")
|
|
801
|
-
|
|
802
|
-
if isinstance(err, requests.HTTPError):
|
|
803
|
-
self._handle_submit_job_http_error(err)
|
|
804
|
-
|
|
805
|
-
result = None
|
|
806
|
-
# if we got a schedule no reason to do post_run stuff (it purposed to update the run status with error,
|
|
807
|
-
# but there's no run in case of schedule)
|
|
808
|
-
if not schedule:
|
|
809
|
-
result = self._update_run_state(task=run, err=err_to_str(err))
|
|
810
|
-
return self._wrap_run_result(result, run, schedule=schedule, err=err)
|
|
811
|
-
|
|
812
|
-
if resp:
|
|
813
|
-
txt = get_in(resp, "status.status_text")
|
|
814
|
-
if txt:
|
|
815
|
-
logger.info(txt)
|
|
816
|
-
# watch is None only in scenario where we run from pipeline step, in this case we don't want to watch the run
|
|
817
|
-
# logs too frequently but rather just pull the state of the run from the DB and pull the logs every x seconds
|
|
818
|
-
# which ideally greater than the pull state interval, this reduces unnecessary load on the API server, as
|
|
819
|
-
# running a pipeline is mostly not an interactive process which means the logs pulling doesn't need to be pulled
|
|
820
|
-
# in real time
|
|
821
|
-
if (
|
|
822
|
-
watch is None
|
|
823
|
-
and self.kfp
|
|
824
|
-
and config.httpdb.logs.pipelines.pull_state.mode == "enabled"
|
|
825
|
-
):
|
|
826
|
-
state_interval = int(
|
|
827
|
-
config.httpdb.logs.pipelines.pull_state.pull_state_interval
|
|
828
|
-
)
|
|
829
|
-
logs_interval = int(
|
|
830
|
-
config.httpdb.logs.pipelines.pull_state.pull_logs_interval
|
|
831
|
-
)
|
|
832
|
-
|
|
833
|
-
run.wait_for_completion(
|
|
834
|
-
show_logs=True,
|
|
835
|
-
sleep=state_interval,
|
|
836
|
-
logs_interval=logs_interval,
|
|
837
|
-
raise_on_failure=False,
|
|
838
|
-
)
|
|
839
|
-
resp = self._get_db_run(run)
|
|
840
|
-
|
|
841
|
-
elif watch or self.kfp:
|
|
842
|
-
run.logs(True, self._get_db())
|
|
843
|
-
resp = self._get_db_run(run)
|
|
844
|
-
|
|
845
|
-
return self._wrap_run_result(resp, run, schedule=schedule)
|
|
846
|
-
|
|
847
399
|
@staticmethod
|
|
848
400
|
def _handle_submit_job_http_error(error: requests.HTTPError):
|
|
849
401
|
# if we receive a 400 status code, this means the request was invalid and the run wasn't created in the DB.
|
|
@@ -1048,6 +600,12 @@ class BaseRuntime(ModelObj):
|
|
|
1048
600
|
if not handler:
|
|
1049
601
|
raise RunError(f"handler must be provided for {self.kind} runtime")
|
|
1050
602
|
|
|
603
|
+
def _has_pipeline_param(self) -> bool:
|
|
604
|
+
# check if the runtime has pipeline parameters
|
|
605
|
+
# https://www.kubeflow.org/docs/components/pipelines/v1/sdk/parameters/
|
|
606
|
+
matches = re.findall(mlrun.utils.regex.pipeline_param[0], self.to_json())
|
|
607
|
+
return bool(matches)
|
|
608
|
+
|
|
1051
609
|
def full_image_path(
|
|
1052
610
|
self, image=None, client_version: str = None, client_python_version: str = None
|
|
1053
611
|
):
|
|
@@ -1127,10 +685,9 @@ class BaseRuntime(ModelObj):
|
|
|
1127
685
|
:return: KubeFlow containerOp
|
|
1128
686
|
"""
|
|
1129
687
|
|
|
1130
|
-
# if
|
|
1131
|
-
#
|
|
1132
|
-
|
|
1133
|
-
if use_db:
|
|
688
|
+
# if the function contain KFP PipelineParams (futures) pass the full spec to the
|
|
689
|
+
# ContainerOp this way KFP will substitute the params with previous step outputs
|
|
690
|
+
if use_db and not self._has_pipeline_param():
|
|
1134
691
|
# if the same function is built as part of the pipeline we do not use the versioned function
|
|
1135
692
|
# rather the latest function w the same tag so we can pick up the updated image/status
|
|
1136
693
|
versioned = False if hasattr(self, "_build_in_pipeline") else True
|
|
@@ -1206,104 +763,93 @@ class BaseRuntime(ModelObj):
|
|
|
1206
763
|
self,
|
|
1207
764
|
requirements: Union[str, List[str]],
|
|
1208
765
|
overwrite: bool = False,
|
|
1209
|
-
verify_base_image: bool =
|
|
766
|
+
verify_base_image: bool = False,
|
|
767
|
+
prepare_image_for_deploy: bool = True,
|
|
768
|
+
requirements_file: str = "",
|
|
1210
769
|
):
|
|
1211
770
|
"""add package requirements from file or list to build spec.
|
|
1212
771
|
|
|
1213
|
-
:param requirements:
|
|
1214
|
-
:param
|
|
1215
|
-
:param
|
|
772
|
+
:param requirements: a list of python packages
|
|
773
|
+
:param requirements_file: a local python requirements file path
|
|
774
|
+
:param overwrite: overwrite existing requirements
|
|
775
|
+
:param verify_base_image: verify that the base image is configured
|
|
776
|
+
(deprecated, use prepare_image_for_deploy)
|
|
777
|
+
:param prepare_image_for_deploy: prepare the image/base_image spec for deployment
|
|
1216
778
|
:return: function object
|
|
1217
779
|
"""
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
780
|
+
self.spec.build.with_requirements(requirements, requirements_file, overwrite)
|
|
781
|
+
|
|
782
|
+
if verify_base_image or prepare_image_for_deploy:
|
|
783
|
+
# TODO: remove verify_base_image in 1.6.0
|
|
784
|
+
if verify_base_image:
|
|
785
|
+
warnings.warn(
|
|
786
|
+
"verify_base_image is deprecated in 1.4.0 and will be removed in 1.6.0, "
|
|
787
|
+
"use prepare_image_for_deploy",
|
|
788
|
+
category=FutureWarning,
|
|
789
|
+
)
|
|
790
|
+
self.prepare_image_for_deploy()
|
|
791
|
+
|
|
1227
792
|
return self
|
|
1228
793
|
|
|
1229
794
|
def with_commands(
|
|
1230
795
|
self,
|
|
1231
796
|
commands: List[str],
|
|
1232
797
|
overwrite: bool = False,
|
|
1233
|
-
verify_base_image: bool =
|
|
798
|
+
verify_base_image: bool = False,
|
|
799
|
+
prepare_image_for_deploy: bool = True,
|
|
1234
800
|
):
|
|
1235
801
|
"""add commands to build spec.
|
|
1236
802
|
|
|
1237
|
-
:param commands:
|
|
803
|
+
:param commands: list of commands to run during build
|
|
804
|
+
:param overwrite: overwrite existing commands
|
|
805
|
+
:param verify_base_image: verify that the base image is configured
|
|
806
|
+
(deprecated, use prepare_image_for_deploy)
|
|
807
|
+
:param prepare_image_for_deploy: prepare the image/base_image spec for deployment
|
|
1238
808
|
|
|
1239
809
|
:return: function object
|
|
1240
810
|
"""
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
if
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
self.
|
|
1253
|
-
if verify_base_image:
|
|
1254
|
-
self.verify_base_image()
|
|
811
|
+
self.spec.build.with_commands(commands, overwrite)
|
|
812
|
+
|
|
813
|
+
if verify_base_image or prepare_image_for_deploy:
|
|
814
|
+
# TODO: remove verify_base_image in 1.6.0
|
|
815
|
+
if verify_base_image:
|
|
816
|
+
warnings.warn(
|
|
817
|
+
"verify_base_image is deprecated in 1.4.0 and will be removed in 1.6.0, "
|
|
818
|
+
"use prepare_image_for_deploy",
|
|
819
|
+
category=FutureWarning,
|
|
820
|
+
)
|
|
821
|
+
|
|
822
|
+
self.prepare_image_for_deploy()
|
|
1255
823
|
return self
|
|
1256
824
|
|
|
1257
825
|
def clean_build_params(self):
|
|
1258
|
-
# when using `with_requirements` we also execute `
|
|
1259
|
-
# spec.image, so we need to restore the image back
|
|
826
|
+
# when using `with_requirements` we also execute `prepare_image_for_deploy` which adds the base image
|
|
827
|
+
# and cleans the spec.image, so we need to restore the image back
|
|
1260
828
|
if self.spec.build.base_image and not self.spec.image:
|
|
1261
829
|
self.spec.image = self.spec.build.base_image
|
|
1262
830
|
|
|
1263
831
|
self.spec.build = {}
|
|
1264
832
|
return self
|
|
1265
833
|
|
|
834
|
+
# TODO: remove in 1.6.0
|
|
835
|
+
@deprecated(
|
|
836
|
+
version="1.4.0",
|
|
837
|
+
reason="'verify_base_image' will be removed in 1.6.0, use 'prepare_image_for_deploy' instead",
|
|
838
|
+
category=FutureWarning,
|
|
839
|
+
)
|
|
1266
840
|
def verify_base_image(self):
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
841
|
+
self.prepare_image_for_deploy()
|
|
842
|
+
|
|
843
|
+
def prepare_image_for_deploy(self):
|
|
844
|
+
"""
|
|
845
|
+
if a function has a 'spec.image' it is considered to be deployed,
|
|
846
|
+
but because we allow the user to set 'spec.image' for usability purposes,
|
|
847
|
+
we need to check whether this is a built image or it requires to be built on top.
|
|
848
|
+
"""
|
|
849
|
+
launcher = mlrun.launcher.factory.LauncherFactory.create_launcher(
|
|
850
|
+
is_remote=self._is_remote
|
|
1270
851
|
)
|
|
1271
|
-
|
|
1272
|
-
# we allow users to not set an image, in that case we'll use the default
|
|
1273
|
-
if (
|
|
1274
|
-
not image
|
|
1275
|
-
and self.kind in mlrun.mlconf.function_defaults.image_by_kind.to_dict()
|
|
1276
|
-
):
|
|
1277
|
-
image = mlrun.mlconf.function_defaults.image_by_kind.to_dict()[self.kind]
|
|
1278
|
-
|
|
1279
|
-
if (
|
|
1280
|
-
self.kind not in mlrun.runtimes.RuntimeKinds.nuclio_runtimes()
|
|
1281
|
-
# TODO: need a better way to decide whether a function requires a build
|
|
1282
|
-
and require_build
|
|
1283
|
-
and image
|
|
1284
|
-
and not self.spec.build.base_image
|
|
1285
|
-
# when submitting a run we are loading the function from the db, and using new_function for it,
|
|
1286
|
-
# this results reaching here, but we are already after deploy of the image, meaning we don't need to prepare
|
|
1287
|
-
# the base image for deployment
|
|
1288
|
-
and self._is_remote_api()
|
|
1289
|
-
):
|
|
1290
|
-
# when the function require build use the image as the base_image for the build
|
|
1291
|
-
self.spec.build.base_image = image
|
|
1292
|
-
self.spec.image = ""
|
|
1293
|
-
|
|
1294
|
-
def _verify_run_params(self, parameters: typing.Dict[str, typing.Any]):
|
|
1295
|
-
for param_name, param_value in parameters.items():
|
|
1296
|
-
|
|
1297
|
-
if isinstance(param_value, dict):
|
|
1298
|
-
# if the parameter is a dict, we might have some nested parameters,
|
|
1299
|
-
# in this case we need to verify them as well recursively
|
|
1300
|
-
self._verify_run_params(param_value)
|
|
1301
|
-
|
|
1302
|
-
# verify that integer parameters don't exceed a int64
|
|
1303
|
-
if isinstance(param_value, int) and abs(param_value) >= 2**63:
|
|
1304
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1305
|
-
f"parameter {param_name} value {param_value} exceeds int64"
|
|
1306
|
-
)
|
|
852
|
+
launcher.prepare_image_for_deploy(self)
|
|
1307
853
|
|
|
1308
854
|
def export(self, target="", format=".yaml", secrets=None, strip=True):
|
|
1309
855
|
"""save function spec to a local/remote path (default to./function.yaml)
|
|
@@ -1334,35 +880,12 @@ class BaseRuntime(ModelObj):
|
|
|
1334
880
|
return self
|
|
1335
881
|
|
|
1336
882
|
def save(self, tag="", versioned=False, refresh=False) -> str:
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
if refresh and self._is_remote_api():
|
|
1343
|
-
try:
|
|
1344
|
-
meta = self.metadata
|
|
1345
|
-
db_func = db.get_function(meta.name, meta.project, meta.tag)
|
|
1346
|
-
if db_func and "status" in db_func:
|
|
1347
|
-
self.status = db_func["status"]
|
|
1348
|
-
if (
|
|
1349
|
-
self.status.state
|
|
1350
|
-
and self.status.state == "ready"
|
|
1351
|
-
and not hasattr(self.status, "nuclio_name")
|
|
1352
|
-
):
|
|
1353
|
-
self.spec.image = get_in(db_func, "spec.image", self.spec.image)
|
|
1354
|
-
except mlrun.errors.MLRunNotFoundError:
|
|
1355
|
-
pass
|
|
1356
|
-
|
|
1357
|
-
tag = tag or self.metadata.tag
|
|
1358
|
-
|
|
1359
|
-
obj = self.to_dict()
|
|
1360
|
-
logger.debug(f"saving function: {self.metadata.name}, tag: {tag}")
|
|
1361
|
-
hash_key = db.store_function(
|
|
1362
|
-
obj, self.metadata.name, self.metadata.project, tag, versioned
|
|
883
|
+
launcher = mlrun.launcher.factory.LauncherFactory.create_launcher(
|
|
884
|
+
is_remote=self._is_remote
|
|
885
|
+
)
|
|
886
|
+
return launcher.save_function(
|
|
887
|
+
self, tag=tag, versioned=versioned, refresh=refresh
|
|
1363
888
|
)
|
|
1364
|
-
hash_key = hash_key if versioned else None
|
|
1365
|
-
return "db://" + self._function_uri(hash_key=hash_key, tag=tag)
|
|
1366
889
|
|
|
1367
890
|
def to_dict(self, fields=None, exclude=None, strip=False):
|
|
1368
891
|
struct = super().to_dict(fields, exclude=exclude)
|
|
@@ -1391,76 +914,11 @@ class BaseRuntime(ModelObj):
|
|
|
1391
914
|
line += f", default={p['default']}"
|
|
1392
915
|
print(" " + line)
|
|
1393
916
|
|
|
1394
|
-
def _encode_requirements(self, requirements_to_encode):
|
|
1395
|
-
|
|
1396
|
-
# if a string, read the file then encode
|
|
1397
|
-
if isinstance(requirements_to_encode, str):
|
|
1398
|
-
with open(requirements_to_encode, "r") as fp:
|
|
1399
|
-
requirements_to_encode = fp.read().splitlines()
|
|
1400
|
-
|
|
1401
|
-
requirements = []
|
|
1402
|
-
for requirement in requirements_to_encode:
|
|
1403
|
-
requirement = requirement.strip()
|
|
1404
|
-
|
|
1405
|
-
# ignore empty lines
|
|
1406
|
-
# ignore comments
|
|
1407
|
-
if not requirement or requirement.startswith("#"):
|
|
1408
|
-
continue
|
|
1409
|
-
|
|
1410
|
-
# ignore inline comments as well
|
|
1411
|
-
inline_comment = requirement.split(" #")
|
|
1412
|
-
if len(inline_comment) > 1:
|
|
1413
|
-
requirement = inline_comment[0].strip()
|
|
1414
|
-
|
|
1415
|
-
# -r / --requirement are flags and should not be escaped
|
|
1416
|
-
# we allow such flags (could be passed within the requirements.txt file) and do not
|
|
1417
|
-
# try to open the file and include its content since it might be a remote file
|
|
1418
|
-
# given on the base image.
|
|
1419
|
-
for req_flag in ["-r", "--requirement"]:
|
|
1420
|
-
if requirement.startswith(req_flag):
|
|
1421
|
-
requirement = requirement[len(req_flag) :].strip()
|
|
1422
|
-
requirements.append(req_flag)
|
|
1423
|
-
break
|
|
1424
|
-
|
|
1425
|
-
# wrap in single quote to ensure that the requirement is treated as a single string
|
|
1426
|
-
# quote the requirement to avoid issues with special characters, double quotes, etc.
|
|
1427
|
-
requirements.append(shlex.quote(requirement))
|
|
1428
|
-
|
|
1429
|
-
return " ".join(requirements)
|
|
1430
|
-
|
|
1431
|
-
def _validate_output_path(self, run):
|
|
1432
|
-
if is_local(run.spec.output_path):
|
|
1433
|
-
message = ""
|
|
1434
|
-
if not os.path.isabs(run.spec.output_path):
|
|
1435
|
-
message = (
|
|
1436
|
-
"artifact/output path is not defined or is local and relative,"
|
|
1437
|
-
" artifacts will not be visible in the UI"
|
|
1438
|
-
)
|
|
1439
|
-
if mlrun.runtimes.RuntimeKinds.requires_absolute_artifacts_path(
|
|
1440
|
-
self.kind
|
|
1441
|
-
):
|
|
1442
|
-
raise mlrun.errors.MLRunPreconditionFailedError(
|
|
1443
|
-
"artifact path (`artifact_path`) must be absolute for remote tasks"
|
|
1444
|
-
)
|
|
1445
|
-
elif hasattr(self.spec, "volume_mounts") and not self.spec.volume_mounts:
|
|
1446
|
-
message = (
|
|
1447
|
-
"artifact output path is local while no volume mount is specified. "
|
|
1448
|
-
"artifacts would not be visible via UI."
|
|
1449
|
-
)
|
|
1450
|
-
if message:
|
|
1451
|
-
logger.warning(message, output_path=run.spec.output_path)
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
def is_local(url):
|
|
1455
|
-
if not url:
|
|
1456
|
-
return True
|
|
1457
|
-
return "://" not in url
|
|
1458
|
-
|
|
1459
917
|
|
|
1460
918
|
class BaseRuntimeHandler(ABC):
|
|
1461
919
|
# setting here to allow tests to override
|
|
1462
920
|
kind = "base"
|
|
1463
|
-
class_modes:
|
|
921
|
+
class_modes: Dict[RuntimeClassMode, str] = {}
|
|
1464
922
|
wait_for_deletion_interval = 10
|
|
1465
923
|
|
|
1466
924
|
@staticmethod
|
|
@@ -1474,12 +932,12 @@ class BaseRuntimeHandler(ABC):
|
|
|
1474
932
|
def _should_collect_logs(self) -> bool:
|
|
1475
933
|
"""
|
|
1476
934
|
There are some runtimes which we don't collect logs for using the log collector
|
|
1477
|
-
:return: whether should collect log for it
|
|
935
|
+
:return: whether it should collect log for it
|
|
1478
936
|
"""
|
|
1479
937
|
return True
|
|
1480
938
|
|
|
1481
939
|
def _get_possible_mlrun_class_label_values(
|
|
1482
|
-
self, class_mode:
|
|
940
|
+
self, class_mode: Union[RuntimeClassMode, str] = None
|
|
1483
941
|
) -> List[str]:
|
|
1484
942
|
"""
|
|
1485
943
|
Should return the possible values of the mlrun/class label for runtime resources that are of this runtime
|
|
@@ -1493,21 +951,20 @@ class BaseRuntimeHandler(ABC):
|
|
|
1493
951
|
def list_resources(
|
|
1494
952
|
self,
|
|
1495
953
|
project: str,
|
|
1496
|
-
object_id:
|
|
954
|
+
object_id: Optional[str] = None,
|
|
1497
955
|
label_selector: str = None,
|
|
1498
|
-
group_by: Optional[
|
|
956
|
+
group_by: Optional[
|
|
957
|
+
mlrun.common.schemas.ListRuntimeResourcesGroupByField
|
|
958
|
+
] = None,
|
|
1499
959
|
) -> Union[
|
|
1500
|
-
mlrun.
|
|
1501
|
-
mlrun.
|
|
1502
|
-
mlrun.
|
|
960
|
+
mlrun.common.schemas.RuntimeResources,
|
|
961
|
+
mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
|
|
962
|
+
mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
|
|
1503
963
|
]:
|
|
1504
964
|
# We currently don't support removing runtime resources in non k8s env
|
|
1505
|
-
if not
|
|
1506
|
-
silent=True
|
|
1507
|
-
).is_running_inside_kubernetes_cluster():
|
|
965
|
+
if not get_k8s().is_running_inside_kubernetes_cluster():
|
|
1508
966
|
return {}
|
|
1509
|
-
|
|
1510
|
-
namespace = k8s_helper.resolve_namespace()
|
|
967
|
+
namespace = get_k8s().resolve_namespace()
|
|
1511
968
|
label_selector = self.resolve_label_selector(project, object_id, label_selector)
|
|
1512
969
|
pods = self._list_pods(namespace, label_selector)
|
|
1513
970
|
pod_resources = self._build_pod_resources(pods)
|
|
@@ -1523,8 +980,10 @@ class BaseRuntimeHandler(ABC):
|
|
|
1523
980
|
|
|
1524
981
|
def build_output_from_runtime_resources(
|
|
1525
982
|
self,
|
|
1526
|
-
runtime_resources_list: List[mlrun.
|
|
1527
|
-
group_by: Optional[
|
|
983
|
+
runtime_resources_list: List[mlrun.common.schemas.RuntimeResources],
|
|
984
|
+
group_by: Optional[
|
|
985
|
+
mlrun.common.schemas.ListRuntimeResourcesGroupByField
|
|
986
|
+
] = None,
|
|
1528
987
|
):
|
|
1529
988
|
pod_resources = []
|
|
1530
989
|
crd_resources = []
|
|
@@ -1550,12 +1009,9 @@ class BaseRuntimeHandler(ABC):
|
|
|
1550
1009
|
if grace_period is None:
|
|
1551
1010
|
grace_period = config.runtime_resources_deletion_grace_period
|
|
1552
1011
|
# We currently don't support removing runtime resources in non k8s env
|
|
1553
|
-
if not
|
|
1554
|
-
silent=True
|
|
1555
|
-
).is_running_inside_kubernetes_cluster():
|
|
1012
|
+
if not get_k8s().is_running_inside_kubernetes_cluster():
|
|
1556
1013
|
return
|
|
1557
|
-
|
|
1558
|
-
namespace = k8s_helper.resolve_namespace()
|
|
1014
|
+
namespace = get_k8s().resolve_namespace()
|
|
1559
1015
|
label_selector = self.resolve_label_selector("*", label_selector=label_selector)
|
|
1560
1016
|
crd_group, crd_version, crd_plural = self._get_crd_info()
|
|
1561
1017
|
if crd_group and crd_version and crd_plural:
|
|
@@ -1603,8 +1059,7 @@ class BaseRuntimeHandler(ABC):
|
|
|
1603
1059
|
self.delete_resources(db, db_session, label_selector, force, grace_period)
|
|
1604
1060
|
|
|
1605
1061
|
def monitor_runs(self, db: DBInterface, db_session: Session):
|
|
1606
|
-
|
|
1607
|
-
namespace = k8s_helper.resolve_namespace()
|
|
1062
|
+
namespace = get_k8s().resolve_namespace()
|
|
1608
1063
|
label_selector = self._get_default_label_selector()
|
|
1609
1064
|
crd_group, crd_version, crd_plural = self._get_crd_info()
|
|
1610
1065
|
runtime_resource_is_crd = False
|
|
@@ -1753,8 +1208,8 @@ class BaseRuntimeHandler(ABC):
|
|
|
1753
1208
|
|
|
1754
1209
|
def _add_object_label_selector_if_needed(
|
|
1755
1210
|
self,
|
|
1756
|
-
object_id:
|
|
1757
|
-
label_selector:
|
|
1211
|
+
object_id: Optional[str] = None,
|
|
1212
|
+
label_selector: Optional[str] = None,
|
|
1758
1213
|
):
|
|
1759
1214
|
if object_id:
|
|
1760
1215
|
object_label_selector = self._get_object_label_selector(object_id)
|
|
@@ -1776,17 +1231,19 @@ class BaseRuntimeHandler(ABC):
|
|
|
1776
1231
|
def _enrich_list_resources_response(
|
|
1777
1232
|
self,
|
|
1778
1233
|
response: Union[
|
|
1779
|
-
mlrun.
|
|
1780
|
-
mlrun.
|
|
1781
|
-
mlrun.
|
|
1234
|
+
mlrun.common.schemas.RuntimeResources,
|
|
1235
|
+
mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
|
|
1236
|
+
mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
|
|
1782
1237
|
],
|
|
1783
1238
|
namespace: str,
|
|
1784
1239
|
label_selector: str = None,
|
|
1785
|
-
group_by: Optional[
|
|
1240
|
+
group_by: Optional[
|
|
1241
|
+
mlrun.common.schemas.ListRuntimeResourcesGroupByField
|
|
1242
|
+
] = None,
|
|
1786
1243
|
) -> Union[
|
|
1787
|
-
mlrun.
|
|
1788
|
-
mlrun.
|
|
1789
|
-
mlrun.
|
|
1244
|
+
mlrun.common.schemas.RuntimeResources,
|
|
1245
|
+
mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
|
|
1246
|
+
mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
|
|
1790
1247
|
]:
|
|
1791
1248
|
"""
|
|
1792
1249
|
Override this to list resources other then pods or CRDs (which are handled by the base class)
|
|
@@ -1796,12 +1253,14 @@ class BaseRuntimeHandler(ABC):
|
|
|
1796
1253
|
def _build_output_from_runtime_resources(
|
|
1797
1254
|
self,
|
|
1798
1255
|
response: Union[
|
|
1799
|
-
mlrun.
|
|
1800
|
-
mlrun.
|
|
1801
|
-
mlrun.
|
|
1256
|
+
mlrun.common.schemas.RuntimeResources,
|
|
1257
|
+
mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
|
|
1258
|
+
mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
|
|
1802
1259
|
],
|
|
1803
|
-
runtime_resources_list: List[mlrun.
|
|
1804
|
-
group_by: Optional[
|
|
1260
|
+
runtime_resources_list: List[mlrun.common.schemas.RuntimeResources],
|
|
1261
|
+
group_by: Optional[
|
|
1262
|
+
mlrun.common.schemas.ListRuntimeResourcesGroupByField
|
|
1263
|
+
] = None,
|
|
1805
1264
|
):
|
|
1806
1265
|
"""
|
|
1807
1266
|
Override this to add runtime resources other than pods or CRDs (which are handled by the base class) to the
|
|
@@ -1883,7 +1342,7 @@ class BaseRuntimeHandler(ABC):
|
|
|
1883
1342
|
return in_terminal_state, last_container_completion_time, run_state
|
|
1884
1343
|
|
|
1885
1344
|
def _get_default_label_selector(
|
|
1886
|
-
self, class_mode:
|
|
1345
|
+
self, class_mode: Union[RuntimeClassMode, str] = None
|
|
1887
1346
|
) -> str:
|
|
1888
1347
|
"""
|
|
1889
1348
|
Override this to add a default label selector
|
|
@@ -1932,20 +1391,18 @@ class BaseRuntimeHandler(ABC):
|
|
|
1932
1391
|
return False
|
|
1933
1392
|
|
|
1934
1393
|
def _list_pods(self, namespace: str, label_selector: str = None) -> List:
|
|
1935
|
-
|
|
1936
|
-
pods = k8s_helper.list_pods(namespace, selector=label_selector)
|
|
1394
|
+
pods = get_k8s().list_pods(namespace, selector=label_selector)
|
|
1937
1395
|
# when we work with custom objects (list_namespaced_custom_object) it's always a dict, to be able to generalize
|
|
1938
1396
|
# code working on runtime resource (either a custom object or a pod) we're transforming to dicts
|
|
1939
1397
|
pods = [pod.to_dict() for pod in pods]
|
|
1940
1398
|
return pods
|
|
1941
1399
|
|
|
1942
1400
|
def _list_crd_objects(self, namespace: str, label_selector: str = None) -> List:
|
|
1943
|
-
k8s_helper = get_k8s_helper()
|
|
1944
1401
|
crd_group, crd_version, crd_plural = self._get_crd_info()
|
|
1945
1402
|
crd_objects = []
|
|
1946
1403
|
if crd_group and crd_version and crd_plural:
|
|
1947
1404
|
try:
|
|
1948
|
-
crd_objects =
|
|
1405
|
+
crd_objects = get_k8s().crdapi.list_namespaced_custom_object(
|
|
1949
1406
|
crd_group,
|
|
1950
1407
|
crd_version,
|
|
1951
1408
|
namespace,
|
|
@@ -1963,9 +1420,9 @@ class BaseRuntimeHandler(ABC):
|
|
|
1963
1420
|
def resolve_label_selector(
|
|
1964
1421
|
self,
|
|
1965
1422
|
project: str,
|
|
1966
|
-
object_id:
|
|
1967
|
-
label_selector:
|
|
1968
|
-
class_mode:
|
|
1423
|
+
object_id: Optional[str] = None,
|
|
1424
|
+
label_selector: Optional[str] = None,
|
|
1425
|
+
class_mode: Union[RuntimeClassMode, str] = None,
|
|
1969
1426
|
with_main_runtime_resource_label_selector: bool = False,
|
|
1970
1427
|
) -> str:
|
|
1971
1428
|
default_label_selector = self._get_default_label_selector(class_mode=class_mode)
|
|
@@ -1996,7 +1453,7 @@ class BaseRuntimeHandler(ABC):
|
|
|
1996
1453
|
@staticmethod
|
|
1997
1454
|
def resolve_object_id(
|
|
1998
1455
|
run: dict,
|
|
1999
|
-
) ->
|
|
1456
|
+
) -> Optional[str]:
|
|
2000
1457
|
"""
|
|
2001
1458
|
Get the object id from the run object
|
|
2002
1459
|
Override this if the object id is not the run uid
|
|
@@ -2011,11 +1468,10 @@ class BaseRuntimeHandler(ABC):
|
|
|
2011
1468
|
deleted_pods: List[Dict],
|
|
2012
1469
|
label_selector: str = None,
|
|
2013
1470
|
):
|
|
2014
|
-
k8s_helper = get_k8s_helper()
|
|
2015
1471
|
deleted_pod_names = [pod_dict["metadata"]["name"] for pod_dict in deleted_pods]
|
|
2016
1472
|
|
|
2017
1473
|
def _verify_pods_removed():
|
|
2018
|
-
pods =
|
|
1474
|
+
pods = get_k8s().v1api.list_namespaced_pod(
|
|
2019
1475
|
namespace, label_selector=label_selector
|
|
2020
1476
|
)
|
|
2021
1477
|
existing_pod_names = [pod.metadata.name for pod in pods.items]
|
|
@@ -2068,10 +1524,10 @@ class BaseRuntimeHandler(ABC):
|
|
|
2068
1524
|
"name"
|
|
2069
1525
|
]
|
|
2070
1526
|
still_in_deletion_crds_to_pod_names = {}
|
|
2071
|
-
jobs_runtime_resources: mlrun.
|
|
1527
|
+
jobs_runtime_resources: mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput = self.list_resources(
|
|
2072
1528
|
"*",
|
|
2073
1529
|
label_selector=label_selector,
|
|
2074
|
-
group_by=mlrun.
|
|
1530
|
+
group_by=mlrun.common.schemas.ListRuntimeResourcesGroupByField.job,
|
|
2075
1531
|
)
|
|
2076
1532
|
for project, project_jobs in jobs_runtime_resources.items():
|
|
2077
1533
|
if project not in project_uid_crd_map:
|
|
@@ -2119,8 +1575,7 @@ class BaseRuntimeHandler(ABC):
|
|
|
2119
1575
|
) -> List[Dict]:
|
|
2120
1576
|
if grace_period is None:
|
|
2121
1577
|
grace_period = config.runtime_resources_deletion_grace_period
|
|
2122
|
-
|
|
2123
|
-
pods = k8s_helper.v1api.list_namespaced_pod(
|
|
1578
|
+
pods = get_k8s().v1api.list_namespaced_pod(
|
|
2124
1579
|
namespace, label_selector=label_selector
|
|
2125
1580
|
)
|
|
2126
1581
|
deleted_pods = []
|
|
@@ -2161,7 +1616,7 @@ class BaseRuntimeHandler(ABC):
|
|
|
2161
1616
|
pod_name=pod.metadata.name,
|
|
2162
1617
|
)
|
|
2163
1618
|
|
|
2164
|
-
|
|
1619
|
+
get_k8s().delete_pod(pod.metadata.name, namespace)
|
|
2165
1620
|
deleted_pods.append(pod_dict)
|
|
2166
1621
|
except Exception as exc:
|
|
2167
1622
|
logger.warning(
|
|
@@ -2182,11 +1637,10 @@ class BaseRuntimeHandler(ABC):
|
|
|
2182
1637
|
) -> List[Dict]:
|
|
2183
1638
|
if grace_period is None:
|
|
2184
1639
|
grace_period = config.runtime_resources_deletion_grace_period
|
|
2185
|
-
k8s_helper = get_k8s_helper()
|
|
2186
1640
|
crd_group, crd_version, crd_plural = self._get_crd_info()
|
|
2187
1641
|
deleted_crds = []
|
|
2188
1642
|
try:
|
|
2189
|
-
crd_objects =
|
|
1643
|
+
crd_objects = get_k8s().crdapi.list_namespaced_custom_object(
|
|
2190
1644
|
crd_group,
|
|
2191
1645
|
crd_version,
|
|
2192
1646
|
namespace,
|
|
@@ -2238,7 +1692,7 @@ class BaseRuntimeHandler(ABC):
|
|
|
2238
1692
|
crd_object_name=crd_object["metadata"]["name"],
|
|
2239
1693
|
)
|
|
2240
1694
|
|
|
2241
|
-
|
|
1695
|
+
get_k8s().delete_crd(
|
|
2242
1696
|
crd_object["metadata"]["name"],
|
|
2243
1697
|
crd_group,
|
|
2244
1698
|
crd_version,
|
|
@@ -2414,13 +1868,15 @@ class BaseRuntimeHandler(ABC):
|
|
|
2414
1868
|
|
|
2415
1869
|
def _build_list_resources_response(
|
|
2416
1870
|
self,
|
|
2417
|
-
pod_resources: List[mlrun.
|
|
2418
|
-
crd_resources: List[mlrun.
|
|
2419
|
-
group_by: Optional[
|
|
1871
|
+
pod_resources: List[mlrun.common.schemas.RuntimeResource] = None,
|
|
1872
|
+
crd_resources: List[mlrun.common.schemas.RuntimeResource] = None,
|
|
1873
|
+
group_by: Optional[
|
|
1874
|
+
mlrun.common.schemas.ListRuntimeResourcesGroupByField
|
|
1875
|
+
] = None,
|
|
2420
1876
|
) -> Union[
|
|
2421
|
-
mlrun.
|
|
2422
|
-
mlrun.
|
|
2423
|
-
mlrun.
|
|
1877
|
+
mlrun.common.schemas.RuntimeResources,
|
|
1878
|
+
mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
|
|
1879
|
+
mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
|
|
2424
1880
|
]:
|
|
2425
1881
|
if crd_resources is None:
|
|
2426
1882
|
crd_resources = []
|
|
@@ -2428,15 +1884,18 @@ class BaseRuntimeHandler(ABC):
|
|
|
2428
1884
|
pod_resources = []
|
|
2429
1885
|
|
|
2430
1886
|
if group_by is None:
|
|
2431
|
-
return mlrun.
|
|
1887
|
+
return mlrun.common.schemas.RuntimeResources(
|
|
2432
1888
|
crd_resources=crd_resources, pod_resources=pod_resources
|
|
2433
1889
|
)
|
|
2434
1890
|
else:
|
|
2435
|
-
if group_by == mlrun.
|
|
1891
|
+
if group_by == mlrun.common.schemas.ListRuntimeResourcesGroupByField.job:
|
|
2436
1892
|
return self._build_grouped_by_job_list_resources_response(
|
|
2437
1893
|
pod_resources, crd_resources
|
|
2438
1894
|
)
|
|
2439
|
-
elif
|
|
1895
|
+
elif (
|
|
1896
|
+
group_by
|
|
1897
|
+
== mlrun.common.schemas.ListRuntimeResourcesGroupByField.project
|
|
1898
|
+
):
|
|
2440
1899
|
return self._build_grouped_by_project_list_resources_response(
|
|
2441
1900
|
pod_resources, crd_resources
|
|
2442
1901
|
)
|
|
@@ -2447,9 +1906,9 @@ class BaseRuntimeHandler(ABC):
|
|
|
2447
1906
|
|
|
2448
1907
|
def _build_grouped_by_project_list_resources_response(
|
|
2449
1908
|
self,
|
|
2450
|
-
pod_resources: List[mlrun.
|
|
2451
|
-
crd_resources: List[mlrun.
|
|
2452
|
-
) -> mlrun.
|
|
1909
|
+
pod_resources: List[mlrun.common.schemas.RuntimeResource] = None,
|
|
1910
|
+
crd_resources: List[mlrun.common.schemas.RuntimeResource] = None,
|
|
1911
|
+
) -> mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput:
|
|
2453
1912
|
resources = {}
|
|
2454
1913
|
for pod_resource in pod_resources:
|
|
2455
1914
|
self._add_resource_to_grouped_by_project_resources_response(
|
|
@@ -2463,9 +1922,9 @@ class BaseRuntimeHandler(ABC):
|
|
|
2463
1922
|
|
|
2464
1923
|
def _build_grouped_by_job_list_resources_response(
|
|
2465
1924
|
self,
|
|
2466
|
-
pod_resources: List[mlrun.
|
|
2467
|
-
crd_resources: List[mlrun.
|
|
2468
|
-
) -> mlrun.
|
|
1925
|
+
pod_resources: List[mlrun.common.schemas.RuntimeResource] = None,
|
|
1926
|
+
crd_resources: List[mlrun.common.schemas.RuntimeResource] = None,
|
|
1927
|
+
) -> mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput:
|
|
2469
1928
|
resources = {}
|
|
2470
1929
|
for pod_resource in pod_resources:
|
|
2471
1930
|
self._add_resource_to_grouped_by_job_resources_response(
|
|
@@ -2479,9 +1938,9 @@ class BaseRuntimeHandler(ABC):
|
|
|
2479
1938
|
|
|
2480
1939
|
def _add_resource_to_grouped_by_project_resources_response(
|
|
2481
1940
|
self,
|
|
2482
|
-
resources: mlrun.
|
|
1941
|
+
resources: mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
|
|
2483
1942
|
resource_field_name: str,
|
|
2484
|
-
resource: mlrun.
|
|
1943
|
+
resource: mlrun.common.schemas.RuntimeResource,
|
|
2485
1944
|
):
|
|
2486
1945
|
if "mlrun/class" in resource.labels:
|
|
2487
1946
|
project = resource.labels.get("mlrun/project", "")
|
|
@@ -2493,9 +1952,9 @@ class BaseRuntimeHandler(ABC):
|
|
|
2493
1952
|
|
|
2494
1953
|
def _add_resource_to_grouped_by_job_resources_response(
|
|
2495
1954
|
self,
|
|
2496
|
-
resources: mlrun.
|
|
1955
|
+
resources: mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
|
|
2497
1956
|
resource_field_name: str,
|
|
2498
|
-
resource: mlrun.
|
|
1957
|
+
resource: mlrun.common.schemas.RuntimeResource,
|
|
2499
1958
|
):
|
|
2500
1959
|
if "mlrun/uid" in resource.labels:
|
|
2501
1960
|
project = resource.labels.get("mlrun/project", config.default_project)
|
|
@@ -2508,16 +1967,18 @@ class BaseRuntimeHandler(ABC):
|
|
|
2508
1967
|
def _add_resource_to_grouped_by_field_resources_response(
|
|
2509
1968
|
first_field_value: str,
|
|
2510
1969
|
second_field_value: str,
|
|
2511
|
-
resources: mlrun.
|
|
1970
|
+
resources: mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
|
|
2512
1971
|
resource_field_name: str,
|
|
2513
|
-
resource: mlrun.
|
|
1972
|
+
resource: mlrun.common.schemas.RuntimeResource,
|
|
2514
1973
|
):
|
|
2515
1974
|
if first_field_value not in resources:
|
|
2516
1975
|
resources[first_field_value] = {}
|
|
2517
1976
|
if second_field_value not in resources[first_field_value]:
|
|
2518
1977
|
resources[first_field_value][
|
|
2519
1978
|
second_field_value
|
|
2520
|
-
] = mlrun.
|
|
1979
|
+
] = mlrun.common.schemas.RuntimeResources(
|
|
1980
|
+
pod_resources=[], crd_resources=[]
|
|
1981
|
+
)
|
|
2521
1982
|
if not getattr(
|
|
2522
1983
|
resources[first_field_value][second_field_value], resource_field_name
|
|
2523
1984
|
):
|
|
@@ -2651,11 +2112,11 @@ class BaseRuntimeHandler(ABC):
|
|
|
2651
2112
|
return project, uid, name
|
|
2652
2113
|
|
|
2653
2114
|
@staticmethod
|
|
2654
|
-
def _build_pod_resources(pods) -> List[mlrun.
|
|
2115
|
+
def _build_pod_resources(pods) -> List[mlrun.common.schemas.RuntimeResource]:
|
|
2655
2116
|
pod_resources = []
|
|
2656
2117
|
for pod in pods:
|
|
2657
2118
|
pod_resources.append(
|
|
2658
|
-
mlrun.
|
|
2119
|
+
mlrun.common.schemas.RuntimeResource(
|
|
2659
2120
|
name=pod["metadata"]["name"],
|
|
2660
2121
|
labels=pod["metadata"]["labels"],
|
|
2661
2122
|
status=pod["status"],
|
|
@@ -2664,11 +2125,13 @@ class BaseRuntimeHandler(ABC):
|
|
|
2664
2125
|
return pod_resources
|
|
2665
2126
|
|
|
2666
2127
|
@staticmethod
|
|
2667
|
-
def _build_crd_resources(
|
|
2128
|
+
def _build_crd_resources(
|
|
2129
|
+
custom_objects,
|
|
2130
|
+
) -> List[mlrun.common.schemas.RuntimeResource]:
|
|
2668
2131
|
crd_resources = []
|
|
2669
2132
|
for custom_object in custom_objects:
|
|
2670
2133
|
crd_resources.append(
|
|
2671
|
-
mlrun.
|
|
2134
|
+
mlrun.common.schemas.RuntimeResource(
|
|
2672
2135
|
name=custom_object["metadata"]["name"],
|
|
2673
2136
|
labels=custom_object["metadata"]["labels"],
|
|
2674
2137
|
status=custom_object.get("status", {}),
|