mlrun 1.3.3rc1__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (444) hide show
  1. mlrun/__init__.py +3 -3
  2. mlrun/__main__.py +79 -37
  3. mlrun/api/__init__.py +1 -1
  4. mlrun/api/api/__init__.py +1 -1
  5. mlrun/api/api/api.py +4 -4
  6. mlrun/api/api/deps.py +10 -21
  7. mlrun/api/api/endpoints/__init__.py +1 -1
  8. mlrun/api/api/endpoints/artifacts.py +64 -36
  9. mlrun/api/api/endpoints/auth.py +4 -4
  10. mlrun/api/api/endpoints/background_tasks.py +11 -11
  11. mlrun/api/api/endpoints/client_spec.py +5 -5
  12. mlrun/api/api/endpoints/clusterization_spec.py +6 -4
  13. mlrun/api/api/endpoints/feature_store.py +124 -115
  14. mlrun/api/api/endpoints/files.py +22 -14
  15. mlrun/api/api/endpoints/frontend_spec.py +28 -21
  16. mlrun/api/api/endpoints/functions.py +142 -87
  17. mlrun/api/api/endpoints/grafana_proxy.py +89 -442
  18. mlrun/api/api/endpoints/healthz.py +20 -7
  19. mlrun/api/api/endpoints/hub.py +320 -0
  20. mlrun/api/api/endpoints/internal/__init__.py +1 -1
  21. mlrun/api/api/endpoints/internal/config.py +1 -1
  22. mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
  23. mlrun/api/api/endpoints/logs.py +11 -11
  24. mlrun/api/api/endpoints/model_endpoints.py +74 -70
  25. mlrun/api/api/endpoints/operations.py +13 -9
  26. mlrun/api/api/endpoints/pipelines.py +93 -88
  27. mlrun/api/api/endpoints/projects.py +35 -35
  28. mlrun/api/api/endpoints/runs.py +69 -27
  29. mlrun/api/api/endpoints/runtime_resources.py +28 -28
  30. mlrun/api/api/endpoints/schedules.py +98 -41
  31. mlrun/api/api/endpoints/secrets.py +37 -32
  32. mlrun/api/api/endpoints/submit.py +12 -12
  33. mlrun/api/api/endpoints/tags.py +20 -22
  34. mlrun/api/api/utils.py +251 -42
  35. mlrun/api/constants.py +1 -1
  36. mlrun/api/crud/__init__.py +18 -15
  37. mlrun/api/crud/artifacts.py +10 -10
  38. mlrun/api/crud/client_spec.py +4 -4
  39. mlrun/api/crud/clusterization_spec.py +3 -3
  40. mlrun/api/crud/feature_store.py +54 -46
  41. mlrun/api/crud/functions.py +3 -3
  42. mlrun/api/crud/hub.py +312 -0
  43. mlrun/api/crud/logs.py +11 -9
  44. mlrun/api/crud/model_monitoring/__init__.py +3 -3
  45. mlrun/api/crud/model_monitoring/grafana.py +435 -0
  46. mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
  47. mlrun/api/crud/notifications.py +149 -0
  48. mlrun/api/crud/pipelines.py +67 -52
  49. mlrun/api/crud/projects.py +51 -23
  50. mlrun/api/crud/runs.py +7 -5
  51. mlrun/api/crud/runtime_resources.py +13 -13
  52. mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
  53. mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
  54. mlrun/api/crud/runtimes/nuclio/function.py +505 -0
  55. mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
  56. mlrun/api/crud/secrets.py +88 -46
  57. mlrun/api/crud/tags.py +5 -5
  58. mlrun/api/db/__init__.py +1 -1
  59. mlrun/api/db/base.py +102 -54
  60. mlrun/api/db/init_db.py +2 -3
  61. mlrun/api/db/session.py +4 -12
  62. mlrun/api/db/sqldb/__init__.py +1 -1
  63. mlrun/api/db/sqldb/db.py +439 -196
  64. mlrun/api/db/sqldb/helpers.py +1 -1
  65. mlrun/api/db/sqldb/models/__init__.py +3 -3
  66. mlrun/api/db/sqldb/models/models_mysql.py +82 -64
  67. mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
  68. mlrun/api/db/sqldb/session.py +27 -20
  69. mlrun/api/initial_data.py +82 -24
  70. mlrun/api/launcher.py +196 -0
  71. mlrun/api/main.py +91 -22
  72. mlrun/api/middlewares.py +6 -5
  73. mlrun/api/migrations_mysql/env.py +1 -1
  74. mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
  75. mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
  76. mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
  77. mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
  78. mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
  79. mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
  80. mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
  81. mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
  82. mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
  83. mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
  84. mlrun/api/migrations_sqlite/env.py +1 -1
  85. mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
  86. mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
  87. mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
  88. mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
  89. mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
  90. mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
  91. mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
  92. mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
  93. mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
  94. mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
  95. mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
  96. mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
  97. mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
  98. mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
  99. mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
  100. mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
  101. mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
  102. mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
  103. mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
  104. mlrun/api/schemas/__init__.py +216 -138
  105. mlrun/api/utils/__init__.py +1 -1
  106. mlrun/api/utils/asyncio.py +1 -1
  107. mlrun/api/utils/auth/__init__.py +1 -1
  108. mlrun/api/utils/auth/providers/__init__.py +1 -1
  109. mlrun/api/utils/auth/providers/base.py +7 -7
  110. mlrun/api/utils/auth/providers/nop.py +6 -7
  111. mlrun/api/utils/auth/providers/opa.py +17 -17
  112. mlrun/api/utils/auth/verifier.py +36 -34
  113. mlrun/api/utils/background_tasks.py +24 -24
  114. mlrun/{builder.py → api/utils/builder.py} +216 -123
  115. mlrun/api/utils/clients/__init__.py +1 -1
  116. mlrun/api/utils/clients/chief.py +19 -4
  117. mlrun/api/utils/clients/iguazio.py +106 -60
  118. mlrun/api/utils/clients/log_collector.py +1 -1
  119. mlrun/api/utils/clients/nuclio.py +23 -23
  120. mlrun/api/utils/clients/protocols/grpc.py +2 -2
  121. mlrun/api/utils/db/__init__.py +1 -1
  122. mlrun/api/utils/db/alembic.py +1 -1
  123. mlrun/api/utils/db/backup.py +1 -1
  124. mlrun/api/utils/db/mysql.py +24 -25
  125. mlrun/api/utils/db/sql_collation.py +1 -1
  126. mlrun/api/utils/db/sqlite_migration.py +2 -2
  127. mlrun/api/utils/events/__init__.py +14 -0
  128. mlrun/api/utils/events/base.py +57 -0
  129. mlrun/api/utils/events/events_factory.py +41 -0
  130. mlrun/api/utils/events/iguazio.py +217 -0
  131. mlrun/api/utils/events/nop.py +55 -0
  132. mlrun/api/utils/helpers.py +16 -13
  133. mlrun/api/utils/memory_reports.py +1 -1
  134. mlrun/api/utils/periodic.py +6 -3
  135. mlrun/api/utils/projects/__init__.py +1 -1
  136. mlrun/api/utils/projects/follower.py +33 -33
  137. mlrun/api/utils/projects/leader.py +36 -34
  138. mlrun/api/utils/projects/member.py +27 -27
  139. mlrun/api/utils/projects/remotes/__init__.py +1 -1
  140. mlrun/api/utils/projects/remotes/follower.py +13 -13
  141. mlrun/api/utils/projects/remotes/leader.py +10 -10
  142. mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
  143. mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
  144. mlrun/api/utils/scheduler.py +140 -51
  145. mlrun/api/utils/singletons/__init__.py +1 -1
  146. mlrun/api/utils/singletons/db.py +9 -15
  147. mlrun/api/utils/singletons/k8s.py +677 -5
  148. mlrun/api/utils/singletons/logs_dir.py +1 -1
  149. mlrun/api/utils/singletons/project_member.py +1 -1
  150. mlrun/api/utils/singletons/scheduler.py +1 -1
  151. mlrun/artifacts/__init__.py +2 -2
  152. mlrun/artifacts/base.py +8 -2
  153. mlrun/artifacts/dataset.py +5 -3
  154. mlrun/artifacts/manager.py +7 -1
  155. mlrun/artifacts/model.py +15 -4
  156. mlrun/artifacts/plots.py +1 -1
  157. mlrun/common/__init__.py +1 -1
  158. mlrun/common/constants.py +15 -0
  159. mlrun/common/model_monitoring.py +209 -0
  160. mlrun/common/schemas/__init__.py +167 -0
  161. mlrun/{api → common}/schemas/artifact.py +13 -14
  162. mlrun/{api → common}/schemas/auth.py +10 -8
  163. mlrun/{api → common}/schemas/background_task.py +3 -3
  164. mlrun/{api → common}/schemas/client_spec.py +1 -1
  165. mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
  166. mlrun/{api → common}/schemas/constants.py +21 -8
  167. mlrun/common/schemas/events.py +36 -0
  168. mlrun/{api → common}/schemas/feature_store.py +2 -1
  169. mlrun/{api → common}/schemas/frontend_spec.py +7 -6
  170. mlrun/{api → common}/schemas/function.py +5 -5
  171. mlrun/{api → common}/schemas/http.py +3 -3
  172. mlrun/common/schemas/hub.py +134 -0
  173. mlrun/{api → common}/schemas/k8s.py +3 -3
  174. mlrun/{api → common}/schemas/memory_reports.py +1 -1
  175. mlrun/common/schemas/model_endpoints.py +342 -0
  176. mlrun/common/schemas/notification.py +57 -0
  177. mlrun/{api → common}/schemas/object.py +6 -6
  178. mlrun/{api → common}/schemas/pipeline.py +3 -3
  179. mlrun/{api → common}/schemas/project.py +6 -5
  180. mlrun/common/schemas/regex.py +24 -0
  181. mlrun/common/schemas/runs.py +30 -0
  182. mlrun/{api → common}/schemas/runtime_resource.py +3 -3
  183. mlrun/{api → common}/schemas/schedule.py +19 -7
  184. mlrun/{api → common}/schemas/secret.py +3 -3
  185. mlrun/{api → common}/schemas/tag.py +2 -2
  186. mlrun/common/types.py +25 -0
  187. mlrun/config.py +152 -20
  188. mlrun/data_types/__init__.py +7 -2
  189. mlrun/data_types/data_types.py +4 -2
  190. mlrun/data_types/infer.py +1 -1
  191. mlrun/data_types/spark.py +10 -3
  192. mlrun/datastore/__init__.py +10 -3
  193. mlrun/datastore/azure_blob.py +1 -1
  194. mlrun/datastore/base.py +185 -53
  195. mlrun/datastore/datastore.py +1 -1
  196. mlrun/datastore/filestore.py +1 -1
  197. mlrun/datastore/google_cloud_storage.py +1 -1
  198. mlrun/datastore/inmem.py +4 -1
  199. mlrun/datastore/redis.py +1 -1
  200. mlrun/datastore/s3.py +1 -1
  201. mlrun/datastore/sources.py +192 -70
  202. mlrun/datastore/spark_udf.py +44 -0
  203. mlrun/datastore/store_resources.py +4 -4
  204. mlrun/datastore/targets.py +115 -45
  205. mlrun/datastore/utils.py +127 -5
  206. mlrun/datastore/v3io.py +1 -1
  207. mlrun/datastore/wasbfs/__init__.py +1 -1
  208. mlrun/datastore/wasbfs/fs.py +1 -1
  209. mlrun/db/__init__.py +7 -5
  210. mlrun/db/base.py +112 -68
  211. mlrun/db/httpdb.py +445 -277
  212. mlrun/db/nopdb.py +491 -0
  213. mlrun/db/sqldb.py +112 -65
  214. mlrun/errors.py +6 -1
  215. mlrun/execution.py +44 -22
  216. mlrun/feature_store/__init__.py +1 -1
  217. mlrun/feature_store/api.py +143 -95
  218. mlrun/feature_store/common.py +16 -20
  219. mlrun/feature_store/feature_set.py +42 -12
  220. mlrun/feature_store/feature_vector.py +32 -21
  221. mlrun/feature_store/ingestion.py +9 -12
  222. mlrun/feature_store/retrieval/__init__.py +3 -2
  223. mlrun/feature_store/retrieval/base.py +388 -66
  224. mlrun/feature_store/retrieval/dask_merger.py +63 -151
  225. mlrun/feature_store/retrieval/job.py +30 -12
  226. mlrun/feature_store/retrieval/local_merger.py +40 -133
  227. mlrun/feature_store/retrieval/spark_merger.py +129 -127
  228. mlrun/feature_store/retrieval/storey_merger.py +173 -0
  229. mlrun/feature_store/steps.py +132 -15
  230. mlrun/features.py +8 -3
  231. mlrun/frameworks/__init__.py +1 -1
  232. mlrun/frameworks/_common/__init__.py +1 -1
  233. mlrun/frameworks/_common/artifacts_library.py +1 -1
  234. mlrun/frameworks/_common/mlrun_interface.py +1 -1
  235. mlrun/frameworks/_common/model_handler.py +1 -1
  236. mlrun/frameworks/_common/plan.py +1 -1
  237. mlrun/frameworks/_common/producer.py +1 -1
  238. mlrun/frameworks/_common/utils.py +1 -1
  239. mlrun/frameworks/_dl_common/__init__.py +1 -1
  240. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
  241. mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
  242. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
  243. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
  244. mlrun/frameworks/_dl_common/model_handler.py +1 -1
  245. mlrun/frameworks/_dl_common/utils.py +1 -1
  246. mlrun/frameworks/_ml_common/__init__.py +1 -1
  247. mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
  248. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
  249. mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
  250. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  251. mlrun/frameworks/_ml_common/model_handler.py +1 -1
  252. mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
  253. mlrun/frameworks/_ml_common/plan.py +1 -1
  254. mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
  255. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
  256. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
  257. mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
  258. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
  259. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
  260. mlrun/frameworks/_ml_common/producer.py +1 -1
  261. mlrun/frameworks/_ml_common/utils.py +1 -1
  262. mlrun/frameworks/auto_mlrun/__init__.py +1 -1
  263. mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
  264. mlrun/frameworks/huggingface/__init__.py +1 -1
  265. mlrun/frameworks/huggingface/model_server.py +1 -1
  266. mlrun/frameworks/lgbm/__init__.py +1 -1
  267. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
  268. mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
  269. mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
  270. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
  271. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
  272. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
  273. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
  274. mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
  275. mlrun/frameworks/lgbm/model_handler.py +1 -1
  276. mlrun/frameworks/lgbm/model_server.py +1 -1
  277. mlrun/frameworks/lgbm/utils.py +1 -1
  278. mlrun/frameworks/onnx/__init__.py +1 -1
  279. mlrun/frameworks/onnx/dataset.py +1 -1
  280. mlrun/frameworks/onnx/mlrun_interface.py +1 -1
  281. mlrun/frameworks/onnx/model_handler.py +1 -1
  282. mlrun/frameworks/onnx/model_server.py +1 -1
  283. mlrun/frameworks/parallel_coordinates.py +1 -1
  284. mlrun/frameworks/pytorch/__init__.py +1 -1
  285. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
  286. mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
  287. mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
  288. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
  289. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
  290. mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
  291. mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
  292. mlrun/frameworks/pytorch/model_handler.py +1 -1
  293. mlrun/frameworks/pytorch/model_server.py +1 -1
  294. mlrun/frameworks/pytorch/utils.py +1 -1
  295. mlrun/frameworks/sklearn/__init__.py +1 -1
  296. mlrun/frameworks/sklearn/estimator.py +1 -1
  297. mlrun/frameworks/sklearn/metric.py +1 -1
  298. mlrun/frameworks/sklearn/metrics_library.py +1 -1
  299. mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
  300. mlrun/frameworks/sklearn/model_handler.py +1 -1
  301. mlrun/frameworks/sklearn/utils.py +1 -1
  302. mlrun/frameworks/tf_keras/__init__.py +1 -1
  303. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
  304. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  305. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
  306. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
  307. mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
  308. mlrun/frameworks/tf_keras/model_handler.py +1 -1
  309. mlrun/frameworks/tf_keras/model_server.py +1 -1
  310. mlrun/frameworks/tf_keras/utils.py +1 -1
  311. mlrun/frameworks/xgboost/__init__.py +1 -1
  312. mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
  313. mlrun/frameworks/xgboost/model_handler.py +1 -1
  314. mlrun/frameworks/xgboost/utils.py +1 -1
  315. mlrun/k8s_utils.py +14 -765
  316. mlrun/kfpops.py +14 -17
  317. mlrun/launcher/__init__.py +13 -0
  318. mlrun/launcher/base.py +406 -0
  319. mlrun/launcher/client.py +159 -0
  320. mlrun/launcher/factory.py +50 -0
  321. mlrun/launcher/local.py +276 -0
  322. mlrun/launcher/remote.py +178 -0
  323. mlrun/lists.py +10 -2
  324. mlrun/mlutils/__init__.py +1 -1
  325. mlrun/mlutils/data.py +1 -1
  326. mlrun/mlutils/models.py +1 -1
  327. mlrun/mlutils/plots.py +1 -1
  328. mlrun/model.py +252 -14
  329. mlrun/model_monitoring/__init__.py +41 -0
  330. mlrun/model_monitoring/features_drift_table.py +1 -1
  331. mlrun/model_monitoring/helpers.py +123 -38
  332. mlrun/model_monitoring/model_endpoint.py +144 -0
  333. mlrun/model_monitoring/model_monitoring_batch.py +310 -259
  334. mlrun/model_monitoring/stores/__init__.py +106 -0
  335. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
  336. mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
  337. mlrun/model_monitoring/stores/models/__init__.py +23 -0
  338. mlrun/model_monitoring/stores/models/base.py +18 -0
  339. mlrun/model_monitoring/stores/models/mysql.py +100 -0
  340. mlrun/model_monitoring/stores/models/sqlite.py +98 -0
  341. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
  342. mlrun/model_monitoring/stream_processing_fs.py +239 -271
  343. mlrun/package/__init__.py +163 -0
  344. mlrun/package/context_handler.py +325 -0
  345. mlrun/package/errors.py +47 -0
  346. mlrun/package/packager.py +298 -0
  347. mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
  348. mlrun/package/packagers/default_packager.py +422 -0
  349. mlrun/package/packagers/numpy_packagers.py +612 -0
  350. mlrun/package/packagers/pandas_packagers.py +968 -0
  351. mlrun/package/packagers/python_standard_library_packagers.py +616 -0
  352. mlrun/package/packagers_manager.py +786 -0
  353. mlrun/package/utils/__init__.py +53 -0
  354. mlrun/package/utils/_archiver.py +226 -0
  355. mlrun/package/utils/_formatter.py +211 -0
  356. mlrun/package/utils/_pickler.py +234 -0
  357. mlrun/package/utils/_supported_format.py +71 -0
  358. mlrun/package/utils/log_hint_utils.py +93 -0
  359. mlrun/package/utils/type_hint_utils.py +298 -0
  360. mlrun/platforms/__init__.py +1 -1
  361. mlrun/platforms/iguazio.py +34 -2
  362. mlrun/platforms/other.py +1 -1
  363. mlrun/projects/__init__.py +1 -1
  364. mlrun/projects/operations.py +14 -9
  365. mlrun/projects/pipelines.py +31 -13
  366. mlrun/projects/project.py +762 -238
  367. mlrun/render.py +49 -19
  368. mlrun/run.py +57 -326
  369. mlrun/runtimes/__init__.py +3 -9
  370. mlrun/runtimes/base.py +247 -784
  371. mlrun/runtimes/constants.py +1 -1
  372. mlrun/runtimes/daskjob.py +45 -41
  373. mlrun/runtimes/funcdoc.py +43 -7
  374. mlrun/runtimes/function.py +66 -656
  375. mlrun/runtimes/function_reference.py +1 -1
  376. mlrun/runtimes/generators.py +1 -1
  377. mlrun/runtimes/kubejob.py +99 -116
  378. mlrun/runtimes/local.py +59 -66
  379. mlrun/runtimes/mpijob/__init__.py +1 -1
  380. mlrun/runtimes/mpijob/abstract.py +13 -15
  381. mlrun/runtimes/mpijob/v1.py +3 -1
  382. mlrun/runtimes/mpijob/v1alpha1.py +1 -1
  383. mlrun/runtimes/nuclio.py +1 -1
  384. mlrun/runtimes/pod.py +51 -26
  385. mlrun/runtimes/remotesparkjob.py +3 -1
  386. mlrun/runtimes/serving.py +12 -4
  387. mlrun/runtimes/sparkjob/__init__.py +1 -2
  388. mlrun/runtimes/sparkjob/abstract.py +44 -31
  389. mlrun/runtimes/sparkjob/spark3job.py +11 -9
  390. mlrun/runtimes/utils.py +61 -42
  391. mlrun/secrets.py +16 -18
  392. mlrun/serving/__init__.py +3 -2
  393. mlrun/serving/merger.py +1 -1
  394. mlrun/serving/remote.py +1 -1
  395. mlrun/serving/routers.py +39 -42
  396. mlrun/serving/server.py +23 -13
  397. mlrun/serving/serving_wrapper.py +1 -1
  398. mlrun/serving/states.py +172 -39
  399. mlrun/serving/utils.py +1 -1
  400. mlrun/serving/v1_serving.py +1 -1
  401. mlrun/serving/v2_serving.py +29 -21
  402. mlrun/utils/__init__.py +1 -2
  403. mlrun/utils/async_http.py +8 -1
  404. mlrun/utils/azure_vault.py +1 -1
  405. mlrun/utils/clones.py +2 -2
  406. mlrun/utils/condition_evaluator.py +65 -0
  407. mlrun/utils/db.py +52 -0
  408. mlrun/utils/helpers.py +188 -13
  409. mlrun/utils/http.py +89 -54
  410. mlrun/utils/logger.py +48 -8
  411. mlrun/utils/model_monitoring.py +132 -100
  412. mlrun/utils/notifications/__init__.py +1 -1
  413. mlrun/utils/notifications/notification/__init__.py +8 -6
  414. mlrun/utils/notifications/notification/base.py +20 -14
  415. mlrun/utils/notifications/notification/console.py +7 -4
  416. mlrun/utils/notifications/notification/git.py +36 -19
  417. mlrun/utils/notifications/notification/ipython.py +10 -8
  418. mlrun/utils/notifications/notification/slack.py +18 -13
  419. mlrun/utils/notifications/notification_pusher.py +377 -56
  420. mlrun/utils/regex.py +6 -1
  421. mlrun/utils/singleton.py +1 -1
  422. mlrun/utils/v3io_clients.py +1 -1
  423. mlrun/utils/vault.py +270 -269
  424. mlrun/utils/version/__init__.py +1 -1
  425. mlrun/utils/version/version.json +2 -2
  426. mlrun/utils/version/version.py +1 -1
  427. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
  428. mlrun-1.4.0.dist-info/RECORD +434 -0
  429. mlrun/api/api/endpoints/marketplace.py +0 -257
  430. mlrun/api/crud/marketplace.py +0 -221
  431. mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
  432. mlrun/api/db/filedb/db.py +0 -518
  433. mlrun/api/schemas/marketplace.py +0 -128
  434. mlrun/api/schemas/model_endpoints.py +0 -185
  435. mlrun/db/filedb.py +0 -891
  436. mlrun/feature_store/retrieval/online.py +0 -92
  437. mlrun/model_monitoring/constants.py +0 -67
  438. mlrun/runtimes/package/context_handler.py +0 -711
  439. mlrun/runtimes/sparkjob/spark2job.py +0 -59
  440. mlrun-1.3.3rc1.dist-info/RECORD +0 -381
  441. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
  442. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
  443. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
  444. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
mlrun/runtimes/base.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -14,69 +14,54 @@
14
14
  import enum
15
15
  import getpass
16
16
  import http
17
- import os.path
18
- import shlex
17
+ import re
19
18
  import traceback
20
- import typing
21
- import uuid
19
+ import warnings
22
20
  from abc import ABC, abstractmethod
23
- from ast import literal_eval
24
21
  from base64 import b64encode
25
- from copy import deepcopy
26
22
  from datetime import datetime, timedelta, timezone
27
23
  from os import environ
28
- from typing import Dict, List, Optional, Tuple, Union
24
+ from typing import Callable, Dict, List, Optional, Tuple, Union
29
25
 
30
- import IPython
31
26
  import requests.exceptions
27
+ from deprecated import deprecated
32
28
  from kubernetes.client.rest import ApiException
33
29
  from nuclio.build import mlrun_footer
34
30
  from sqlalchemy.orm import Session
35
31
 
32
+ import mlrun.api.db.sqldb.session
33
+ import mlrun.api.utils.singletons.db
34
+ import mlrun.common.schemas
36
35
  import mlrun.errors
36
+ import mlrun.launcher.factory
37
37
  import mlrun.utils.helpers
38
+ import mlrun.utils.notifications
38
39
  import mlrun.utils.regex
39
- from mlrun.api import schemas
40
40
  from mlrun.api.constants import LogSources
41
41
  from mlrun.api.db.base import DBInterface
42
42
  from mlrun.utils.helpers import generate_object_uri, verify_field_regex
43
43
 
44
- from ..config import config, is_running_as_api
44
+ from ..config import config
45
45
  from ..datastore import store_manager
46
46
  from ..db import RunDBError, get_or_set_dburl, get_run_db
47
47
  from ..errors import err_to_str
48
- from ..execution import MLClientCtx
49
- from ..k8s_utils import get_k8s_helper
50
- from ..kfpops import mlrun_op, write_kfpmeta
48
+ from ..kfpops import mlrun_op
51
49
  from ..lists import RunList
52
- from ..model import (
53
- BaseMetadata,
54
- HyperParamOptions,
55
- ImageBuilder,
56
- ModelObj,
57
- RunObject,
58
- RunTemplate,
59
- )
60
- from ..secrets import SecretsStore
50
+ from ..model import BaseMetadata, HyperParamOptions, ImageBuilder, ModelObj, RunObject
61
51
  from ..utils import (
62
52
  dict_to_json,
63
53
  dict_to_yaml,
64
54
  enrich_image_url,
65
55
  get_in,
66
56
  get_parsed_docker_registry,
67
- get_ui_url,
68
- is_ipython,
69
57
  logger,
70
- normalize_name,
71
58
  now_date,
72
59
  update_in,
73
60
  )
74
61
  from .constants import PodPhases, RunStates
75
62
  from .funcdoc import update_function_entry_points
76
- from .generators import get_generator
77
- from .utils import RunError, calc_hash, results_to_iter
63
+ from .utils import RunError, calc_hash, get_k8s
78
64
 
79
- run_modes = ["pass"]
80
65
  spec_fields = [
81
66
  "command",
82
67
  "args",
@@ -90,6 +75,7 @@ spec_fields = [
90
75
  "pythonpath",
91
76
  "disable_auto_mount",
92
77
  "allow_empty_resources",
78
+ "clone_target_dir",
93
79
  ]
94
80
 
95
81
 
@@ -130,6 +116,7 @@ class FunctionSpec(ModelObj):
130
116
  default_handler=None,
131
117
  pythonpath=None,
132
118
  disable_auto_mount=False,
119
+ clone_target_dir=None,
133
120
  ):
134
121
 
135
122
  self.command = command or ""
@@ -148,6 +135,9 @@ class FunctionSpec(ModelObj):
148
135
  self.entry_points = entry_points or {}
149
136
  self.disable_auto_mount = disable_auto_mount
150
137
  self.allow_empty_resources = None
138
+ # the build.source is cloned/extracted to the specified clone_target_dir
139
+ # if a relative path is specified, it will be enriched with a temp dir path
140
+ self.clone_target_dir = clone_target_dir or ""
151
141
 
152
142
  @property
153
143
  def build(self) -> ImageBuilder:
@@ -183,14 +173,12 @@ class BaseRuntime(ModelObj):
183
173
  self.is_child = False
184
174
  self._status = None
185
175
  self.status = None
186
- self._is_api_server = False
187
176
  self.verbose = False
188
177
  self._enriched_image = False
189
178
 
190
179
  def set_db_connection(self, conn):
191
180
  if not self._db_conn:
192
181
  self._db_conn = conn
193
- self._is_api_server = mlrun.config.is_running_as_api()
194
182
 
195
183
  @property
196
184
  def metadata(self) -> BaseMetadata:
@@ -216,9 +204,6 @@ class BaseRuntime(ModelObj):
216
204
  def status(self, status):
217
205
  self._status = self._verify_dict(status, "status", FunctionStatus)
218
206
 
219
- def _get_k8s(self):
220
- return get_k8s_helper()
221
-
222
207
  def set_label(self, key, value):
223
208
  self.metadata.labels[key] = str(value)
224
209
  return self
@@ -236,39 +221,6 @@ class BaseRuntime(ModelObj):
236
221
  return True
237
222
  return False
238
223
 
239
- def _use_remote_api(self):
240
- if (
241
- self._is_remote
242
- and not self._is_api_server
243
- and self._get_db()
244
- and self._get_db().kind == "http"
245
- ):
246
- return True
247
- return False
248
-
249
- def _enrich_on_client_side(self):
250
- self.try_auto_mount_based_on_config()
251
- self._fill_credentials()
252
-
253
- def _enrich_on_server_side(self):
254
- pass
255
-
256
- def _enrich_on_server_and_client_sides(self):
257
- """
258
- enrich function also in client side and also on server side
259
- """
260
- pass
261
-
262
- def _enrich_function(self):
263
- """
264
- enriches the function based on the flow state we run in (sdk or server)
265
- """
266
- if self._use_remote_api():
267
- self._enrich_on_client_side()
268
- else:
269
- self._enrich_on_server_side()
270
- self._enrich_on_server_and_client_sides()
271
-
272
224
  def _function_uri(self, tag=None, hash_key=None):
273
225
  return generate_object_uri(
274
226
  self.metadata.project,
@@ -281,11 +233,11 @@ class BaseRuntime(ModelObj):
281
233
  self.spec.rundb = self.spec.rundb or get_or_set_dburl()
282
234
 
283
235
  def _get_db(self):
236
+ # TODO: remove this function and use the launcher db instead
284
237
  self._ensure_run_db()
285
238
  if not self._db_conn:
286
239
  if self.spec.rundb:
287
240
  self._db_conn = get_run_db(self.spec.rundb, secrets=self._secrets)
288
- self._is_api_server = mlrun.config.is_running_as_api()
289
241
  return self._db_conn
290
242
 
291
243
  # This function is different than the auto_mount function, as it mounts to runtimes based on the configuration.
@@ -321,59 +273,62 @@ class BaseRuntime(ModelObj):
321
273
 
322
274
  def run(
323
275
  self,
324
- runspec: RunObject = None,
325
- handler=None,
326
- name: str = "",
327
- project: str = "",
328
- params: dict = None,
329
- inputs: Dict[str, str] = None,
330
- out_path: str = "",
331
- workdir: str = "",
332
- artifact_path: str = "",
333
- watch: bool = True,
334
- schedule: Union[str, schemas.ScheduleCronTrigger] = None,
335
- hyperparams: Dict[str, list] = None,
336
- hyper_param_options: HyperParamOptions = None,
337
- verbose=None,
338
- scrape_metrics: bool = None,
339
- local=False,
340
- local_code_path=None,
341
- auto_build=None,
342
- param_file_secrets: Dict[str, str] = None,
276
+ runspec: Optional[
277
+ Union["mlrun.run.RunTemplate", "mlrun.run.RunObject", dict]
278
+ ] = None,
279
+ handler: Optional[Union[str, Callable]] = None,
280
+ name: Optional[str] = "",
281
+ project: Optional[str] = "",
282
+ params: Optional[dict] = None,
283
+ inputs: Optional[Dict[str, str]] = None,
284
+ out_path: Optional[str] = "",
285
+ workdir: Optional[str] = "",
286
+ artifact_path: Optional[str] = "",
287
+ watch: Optional[bool] = True,
288
+ schedule: Optional[Union[str, mlrun.common.schemas.ScheduleCronTrigger]] = None,
289
+ hyperparams: Optional[Dict[str, list]] = None,
290
+ hyper_param_options: Optional[HyperParamOptions] = None,
291
+ verbose: Optional[bool] = None,
292
+ scrape_metrics: Optional[bool] = None,
293
+ local: Optional[bool] = False,
294
+ local_code_path: Optional[str] = None,
295
+ auto_build: Optional[bool] = None,
296
+ param_file_secrets: Optional[Dict[str, str]] = None,
297
+ notifications: Optional[List[mlrun.model.Notification]] = None,
343
298
  returns: Optional[List[Union[str, Dict[str, str]]]] = None,
344
299
  ) -> RunObject:
345
300
  """
346
301
  Run a local or remote task.
347
302
 
348
- :param runspec: run template object or dict (see RunTemplate)
349
- :param handler: pointer or name of a function handler
350
- :param name: execution name
351
- :param project: project name
352
- :param params: input parameters (dict)
303
+ :param runspec: The run spec to generate the RunObject from. Can be RunTemplate | RunObject | dict.
304
+ :param handler: Pointer or name of a function handler.
305
+ :param name: Execution name.
306
+ :param project: Project name.
307
+ :param params: Input parameters (dict).
353
308
  :param inputs: Input objects to pass to the handler. Type hints can be given so the input will be parsed
354
309
  during runtime from `mlrun.DataItem` to the given type hint. The type hint can be given
355
310
  in the key field of the dictionary after a colon, e.g: "<key> : <type_hint>".
356
- :param out_path: default artifact output path
357
- :param artifact_path: default artifact output path (will replace out_path)
358
- :param workdir: default input artifacts path
359
- :param watch: watch/follow run log
311
+ :param out_path: Default artifact output path.
312
+ :param artifact_path: Default artifact output path (will replace out_path).
313
+ :param workdir: Default input artifacts path.
314
+ :param watch: Watch/follow run log.
360
315
  :param schedule: ScheduleCronTrigger class instance or a standard crontab expression string
361
316
  (which will be converted to the class using its `from_crontab` constructor),
362
317
  see this link for help:
363
318
  https://apscheduler.readthedocs.io/en/3.x/modules/triggers/cron.html#module-apscheduler.triggers.cron
364
- :param hyperparams: dict of param name and list of values to be enumerated e.g. {"p1": [1,2,3]}
319
+ :param hyperparams: Dict of param name and list of values to be enumerated e.g. {"p1": [1,2,3]}
365
320
  the default strategy is grid search, can specify strategy (grid, list, random)
366
- and other options in the hyper_param_options parameter
367
- :param hyper_param_options: dict or :py:class:`~mlrun.model.HyperParamOptions` struct of
368
- hyper parameter options
369
- :param verbose: add verbose prints/logs
370
- :param scrape_metrics: whether to add the `mlrun/scrape-metrics` label to this run's resources
371
- :param local: run the function locally vs on the runtime/cluster
372
- :param local_code_path: path of the code for local runs & debug
373
- :param auto_build: when set to True and the function require build it will be built on the first
374
- function run, use only if you dont plan on changing the build config between runs
375
- :param param_file_secrets: dictionary of secrets to be used only for accessing the hyper-param parameter file.
376
- These secrets are only used locally and will not be stored anywhere
321
+ and other options in the hyper_param_options parameter.
322
+ :param hyper_param_options: Dict or :py:class:`~mlrun.model.HyperParamOptions` struct of hyperparameter options.
323
+ :param verbose: Add verbose prints/logs.
324
+ :param scrape_metrics: Whether to add the `mlrun/scrape-metrics` label to this run's resources.
325
+ :param local: Run the function locally vs on the runtime/cluster.
326
+ :param local_code_path: Path of the code for local runs & debug.
327
+ :param auto_build: When set to True and the function require build it will be built on the first
328
+ function run, use only if you don't plan on changing the build config between runs.
329
+ :param param_file_secrets: Dictionary of secrets to be used only for accessing the hyper-param parameter file.
330
+ These secrets are only used locally and will not be stored anywhere
331
+ :param notifications: List of notifications to push when the run is completed
377
332
  :param returns: List of log hints - configurations for how to log the returning values from the handler's run
378
333
  (as artifacts or results). The list's length must be equal to the amount of returning objects. A
379
334
  log hint may be given as:
@@ -385,206 +340,34 @@ class BaseRuntime(ModelObj):
385
340
  * A dictionary of configurations to use when logging. Further info per object type and artifact
386
341
  type can be given there. The artifact key must appear in the dictionary as "key": "the_key".
387
342
 
388
- :return: run context object (RunObject) with run metadata, results and status
343
+ :return: Run context object (RunObject) with run metadata, results and status
389
344
  """
390
- mlrun.utils.helpers.verify_dict_items_type("Inputs", inputs, [str], [str])
391
-
392
- if self.spec.mode and self.spec.mode not in run_modes:
393
- raise ValueError(f'run mode can only be {",".join(run_modes)}')
394
-
395
- self._enrich_function()
396
-
397
- run = self._create_run_object(runspec)
398
-
399
- if local:
400
-
401
- # do not allow local function to be scheduled
402
- if schedule is not None:
403
- raise mlrun.errors.MLRunInvalidArgumentError(
404
- "local and schedule cannot be used together"
405
- )
406
- return self._run_local(
407
- run,
408
- local_code_path,
409
- project,
410
- name,
411
- workdir,
412
- handler,
413
- params,
414
- inputs,
415
- returns,
416
- artifact_path,
417
- )
418
-
419
- run = self._enrich_run(
420
- run,
421
- handler,
422
- project,
423
- name,
424
- params,
425
- inputs,
426
- returns,
427
- hyperparams,
428
- hyper_param_options,
429
- verbose,
430
- scrape_metrics,
431
- out_path,
432
- artifact_path,
433
- workdir,
345
+ launcher = mlrun.launcher.factory.LauncherFactory.create_launcher(
346
+ self._is_remote, local
434
347
  )
435
- self._validate_output_path(run)
436
- db = self._get_db()
437
-
438
- if not self.is_deployed():
439
- if self.spec.build.auto_build or auto_build:
440
- logger.info(
441
- "Function is not deployed and auto_build flag is set, starting deploy..."
442
- )
443
- self.deploy(skip_deployed=True, show_on_failure=True)
444
- else:
445
- raise RunError(
446
- "function image is not built/ready, set auto_build=True or use .deploy() method first"
447
- )
448
-
449
- if self.verbose:
450
- logger.info(f"runspec:\n{run.to_yaml()}")
451
-
452
- if "V3IO_USERNAME" in environ and "v3io_user" not in run.metadata.labels:
453
- run.metadata.labels["v3io_user"] = environ.get("V3IO_USERNAME")
454
-
455
- if not self.is_child:
456
- db_str = "self" if self._is_api_server else self.spec.rundb
457
- logger.info(
458
- "Storing function",
459
- name=run.metadata.name,
460
- uid=run.metadata.uid,
461
- db=db_str,
462
- )
463
- self._store_function(run, run.metadata, db)
464
-
465
- # execute the job remotely (to a k8s cluster via the API service)
466
- if self._use_remote_api():
467
- return self._submit_job(run, schedule, db, watch)
468
-
469
- elif self._is_remote and not self._is_api_server and not self.kfp:
470
- logger.warning(
471
- "warning!, Api url not set, " "trying to exec remote runtime locally"
472
- )
473
-
474
- execution = MLClientCtx.from_dict(
475
- run.to_dict(),
476
- db,
477
- autocommit=False,
478
- is_api=self._is_api_server,
479
- store_run=False,
480
- )
481
-
482
- self._verify_run_params(run.spec.parameters)
483
-
484
- # create task generator (for child runs) from spec
485
- task_generator = get_generator(
486
- run.spec, execution, param_file_secrets=param_file_secrets
348
+ return launcher.launch(
349
+ runtime=self,
350
+ task=runspec,
351
+ handler=handler,
352
+ name=name,
353
+ project=project,
354
+ params=params,
355
+ inputs=inputs,
356
+ out_path=out_path,
357
+ workdir=workdir,
358
+ artifact_path=artifact_path,
359
+ watch=watch,
360
+ schedule=schedule,
361
+ hyperparams=hyperparams,
362
+ hyper_param_options=hyper_param_options,
363
+ verbose=verbose,
364
+ scrape_metrics=scrape_metrics,
365
+ local_code_path=local_code_path,
366
+ auto_build=auto_build,
367
+ param_file_secrets=param_file_secrets,
368
+ notifications=notifications,
369
+ returns=returns,
487
370
  )
488
- if task_generator:
489
- # verify valid task parameters
490
- tasks = task_generator.generate(run)
491
- for task in tasks:
492
- self._verify_run_params(task.spec.parameters)
493
-
494
- # post verifications, store execution in db and run pre run hooks
495
- execution.store_run()
496
- self._pre_run(run, execution) # hook for runtime specific prep
497
-
498
- last_err = None
499
- # If the runtime is nested, it means the hyper-run will run within a single instance of the run.
500
- # So while in the API, we consider the hyper-run as a single run, and then in the runtime itself when the
501
- # runtime is now a local runtime and therefore `self._is_nested == False`, we run each task as a separate run by
502
- # using the task generator
503
- if task_generator and not self._is_nested:
504
- # multiple runs (based on hyper params or params file)
505
- runner = self._run_many
506
- if hasattr(self, "_parallel_run_many") and task_generator.use_parallel():
507
- runner = self._parallel_run_many
508
- results = runner(task_generator, execution, run)
509
- results_to_iter(results, run, execution)
510
- result = execution.to_dict()
511
- result = self._update_run_state(result, task=run)
512
-
513
- else:
514
- # single run
515
- try:
516
- resp = self._run(run, execution)
517
- if (
518
- watch
519
- and mlrun.runtimes.RuntimeKinds.is_watchable(self.kind)
520
- # API shouldn't watch logs, its the client job to query the run logs
521
- and not mlrun.config.is_running_as_api()
522
- ):
523
- state, _ = run.logs(True, self._get_db())
524
- if state not in ["succeeded", "completed"]:
525
- logger.warning(f"run ended with state {state}")
526
- result = self._update_run_state(resp, task=run)
527
- except RunError as err:
528
- last_err = err
529
- result = self._update_run_state(task=run, err=err)
530
-
531
- self._post_run(result, execution) # hook for runtime specific cleanup
532
-
533
- return self._wrap_run_result(result, run, schedule=schedule, err=last_err)
534
-
535
- def _wrap_run_result(
536
- self, result: dict, runspec: RunObject, schedule=None, err=None
537
- ):
538
- # if the purpose was to schedule (and not to run) nothing to wrap
539
- if schedule:
540
- return
541
-
542
- if result and self.kfp and err is None:
543
- write_kfpmeta(result)
544
-
545
- # show ipython/jupyter result table widget
546
- results_tbl = RunList()
547
- if result:
548
- results_tbl.append(result)
549
- else:
550
- logger.info("no returned result (job may still be in progress)")
551
- results_tbl.append(runspec.to_dict())
552
-
553
- uid = runspec.metadata.uid
554
- project = runspec.metadata.project
555
- if is_ipython and config.ipython_widget:
556
- results_tbl.show()
557
- print()
558
- ui_url = get_ui_url(project, uid)
559
- if ui_url:
560
- ui_url = f' or <a href="{ui_url}" target="_blank">click here</a> to open in UI'
561
- IPython.display.display(
562
- IPython.display.HTML(
563
- f"<b> > to track results use the .show() or .logs() methods {ui_url}</b>"
564
- )
565
- )
566
- elif not (self.is_child and is_running_as_api()):
567
- project_flag = f"-p {project}" if project else ""
568
- info_cmd = f"mlrun get run {uid} {project_flag}"
569
- logs_cmd = f"mlrun logs {uid} {project_flag}"
570
- logger.info(
571
- "To track results use the CLI", info_cmd=info_cmd, logs_cmd=logs_cmd
572
- )
573
- ui_url = get_ui_url(project, uid)
574
- if ui_url:
575
- logger.info("Or click for UI", ui_url=ui_url)
576
- if result:
577
- run = RunObject.from_dict(result)
578
- logger.info(
579
- f"run executed, status={run.status.state}", name=run.metadata.name
580
- )
581
- if run.status.state == "error":
582
- if self._is_remote and not self.is_child:
583
- logger.error(f"runtime error: {run.status.error}")
584
- raise RunError(run.status.error)
585
- return run
586
-
587
- return None
588
371
 
589
372
  def _get_db_run(self, task: RunObject = None):
590
373
  if self._get_db() and task:
@@ -613,237 +396,6 @@ class BaseRuntime(ModelObj):
613
396
  runtime_env["MLRUN_NAMESPACE"] = self.metadata.namespace or config.namespace
614
397
  return runtime_env
615
398
 
616
- def _run_local(
617
- self,
618
- runspec,
619
- local_code_path,
620
- project,
621
- name,
622
- workdir,
623
- handler,
624
- params,
625
- inputs,
626
- returns,
627
- artifact_path,
628
- ):
629
- # allow local run simulation with a flip of a flag
630
- command = self
631
- if local_code_path:
632
- project = project or self.metadata.project
633
- name = name or self.metadata.name
634
- command = local_code_path
635
- return mlrun.run_local(
636
- runspec,
637
- command,
638
- name,
639
- self.spec.args,
640
- workdir=workdir,
641
- project=project,
642
- handler=handler,
643
- params=params,
644
- inputs=inputs,
645
- artifact_path=artifact_path,
646
- mode=self.spec.mode,
647
- allow_empty_resources=self.spec.allow_empty_resources,
648
- returns=returns,
649
- )
650
-
651
- def _create_run_object(self, runspec):
652
- # TODO: Once implemented the `Runtime` handlers configurations (doc strings, params type hints and returning
653
- # log hints, possible parameter values, etc), the configured type hints and log hints should be set into
654
- # the `RunObject` from the `Runtime`.
655
- if runspec:
656
- runspec = deepcopy(runspec)
657
- if isinstance(runspec, str):
658
- runspec = literal_eval(runspec)
659
- if not isinstance(runspec, (dict, RunTemplate, RunObject)):
660
- raise ValueError(
661
- "task/runspec is not a valid task object," f" type={type(runspec)}"
662
- )
663
-
664
- if isinstance(runspec, RunTemplate):
665
- runspec = RunObject.from_template(runspec)
666
- if isinstance(runspec, dict) or runspec is None:
667
- runspec = RunObject.from_dict(runspec)
668
- return runspec
669
-
670
- def _enrich_run(
671
- self,
672
- runspec,
673
- handler,
674
- project_name,
675
- name,
676
- params,
677
- inputs,
678
- returns,
679
- hyperparams,
680
- hyper_param_options,
681
- verbose,
682
- scrape_metrics,
683
- out_path,
684
- artifact_path,
685
- workdir,
686
- ):
687
- runspec.spec.handler = (
688
- handler or runspec.spec.handler or self.spec.default_handler or ""
689
- )
690
- if runspec.spec.handler and self.kind not in ["handler", "dask"]:
691
- runspec.spec.handler = runspec.spec.handler_name
692
-
693
- def_name = self.metadata.name
694
- if runspec.spec.handler_name:
695
- short_name = runspec.spec.handler_name
696
- for separator in ["#", "::", "."]:
697
- # drop paths, module or class name from short name
698
- if separator in short_name:
699
- short_name = short_name.split(separator)[-1]
700
- def_name += "-" + short_name
701
-
702
- runspec.metadata.name = normalize_name(
703
- name=name or runspec.metadata.name or def_name,
704
- # if name or runspec.metadata.name are set then it means that is user defined name and we want to warn the
705
- # user that the passed name needs to be set without underscore, if its not user defined but rather enriched
706
- # from the handler(function) name then we replace the underscore without warning the user.
707
- # most of the time handlers will have `_` in the handler name (python convention is to separate function
708
- # words with `_`), therefore we don't want to be noisy when normalizing the run name
709
- verbose=bool(name or runspec.metadata.name),
710
- )
711
- verify_field_regex(
712
- "run.metadata.name", runspec.metadata.name, mlrun.utils.regex.run_name
713
- )
714
- runspec.metadata.project = (
715
- project_name
716
- or runspec.metadata.project
717
- or self.metadata.project
718
- or config.default_project
719
- )
720
- runspec.spec.parameters = params or runspec.spec.parameters
721
- runspec.spec.inputs = inputs or runspec.spec.inputs
722
- runspec.spec.returns = returns or runspec.spec.returns
723
- runspec.spec.hyperparams = hyperparams or runspec.spec.hyperparams
724
- runspec.spec.hyper_param_options = (
725
- hyper_param_options or runspec.spec.hyper_param_options
726
- )
727
- runspec.spec.verbose = verbose or runspec.spec.verbose
728
- if scrape_metrics is None:
729
- if runspec.spec.scrape_metrics is None:
730
- scrape_metrics = config.scrape_metrics
731
- else:
732
- scrape_metrics = runspec.spec.scrape_metrics
733
- runspec.spec.scrape_metrics = scrape_metrics
734
- runspec.spec.input_path = (
735
- workdir or runspec.spec.input_path or self.spec.workdir
736
- )
737
- if self.spec.allow_empty_resources:
738
- runspec.spec.allow_empty_resources = self.spec.allow_empty_resources
739
-
740
- spec = runspec.spec
741
- if spec.secret_sources:
742
- self._secrets = SecretsStore.from_list(spec.secret_sources)
743
-
744
- # update run metadata (uid, labels) and store in DB
745
- meta = runspec.metadata
746
- meta.uid = meta.uid or uuid.uuid4().hex
747
-
748
- runspec.spec.output_path = out_path or artifact_path or runspec.spec.output_path
749
-
750
- if not runspec.spec.output_path:
751
- if runspec.metadata.project:
752
- if (
753
- mlrun.pipeline_context.project
754
- and runspec.metadata.project
755
- == mlrun.pipeline_context.project.metadata.name
756
- ):
757
- runspec.spec.output_path = (
758
- mlrun.pipeline_context.project.spec.artifact_path
759
- or mlrun.pipeline_context.workflow_artifact_path
760
- )
761
-
762
- if not runspec.spec.output_path and self._get_db():
763
- try:
764
- # not passing or loading the DB before the enrichment on purpose, because we want to enrich the
765
- # spec first as get_db() depends on it
766
- project = self._get_db().get_project(runspec.metadata.project)
767
- # this is mainly for tests, so we won't need to mock get_project for so many tests
768
- # in normal use cases if no project is found we will get an error
769
- if project:
770
- runspec.spec.output_path = project.spec.artifact_path
771
- except mlrun.errors.MLRunNotFoundError:
772
- logger.warning(
773
- f"project {project_name} is not saved in DB yet, "
774
- f"enriching output path with default artifact path: {config.artifact_path}"
775
- )
776
-
777
- if not runspec.spec.output_path:
778
- runspec.spec.output_path = config.artifact_path
779
-
780
- if runspec.spec.output_path:
781
- runspec.spec.output_path = runspec.spec.output_path.replace(
782
- "{{run.uid}}", meta.uid
783
- )
784
- runspec.spec.output_path = mlrun.utils.helpers.fill_artifact_path_template(
785
- runspec.spec.output_path, runspec.metadata.project
786
- )
787
- return runspec
788
-
789
- def _submit_job(self, run: RunObject, schedule, db, watch):
790
- if self._secrets:
791
- run.spec.secret_sources = self._secrets.to_serial()
792
- try:
793
- resp = db.submit_job(run, schedule=schedule)
794
- if schedule:
795
- action = resp.pop("action", "created")
796
- logger.info(f"task schedule {action}", **resp)
797
- return
798
-
799
- except (requests.HTTPError, Exception) as err:
800
- logger.error(f"got remote run err, {err_to_str(err)}")
801
-
802
- if isinstance(err, requests.HTTPError):
803
- self._handle_submit_job_http_error(err)
804
-
805
- result = None
806
- # if we got a schedule no reason to do post_run stuff (it purposed to update the run status with error,
807
- # but there's no run in case of schedule)
808
- if not schedule:
809
- result = self._update_run_state(task=run, err=err_to_str(err))
810
- return self._wrap_run_result(result, run, schedule=schedule, err=err)
811
-
812
- if resp:
813
- txt = get_in(resp, "status.status_text")
814
- if txt:
815
- logger.info(txt)
816
- # watch is None only in scenario where we run from pipeline step, in this case we don't want to watch the run
817
- # logs too frequently but rather just pull the state of the run from the DB and pull the logs every x seconds
818
- # which ideally greater than the pull state interval, this reduces unnecessary load on the API server, as
819
- # running a pipeline is mostly not an interactive process which means the logs pulling doesn't need to be pulled
820
- # in real time
821
- if (
822
- watch is None
823
- and self.kfp
824
- and config.httpdb.logs.pipelines.pull_state.mode == "enabled"
825
- ):
826
- state_interval = int(
827
- config.httpdb.logs.pipelines.pull_state.pull_state_interval
828
- )
829
- logs_interval = int(
830
- config.httpdb.logs.pipelines.pull_state.pull_logs_interval
831
- )
832
-
833
- run.wait_for_completion(
834
- show_logs=True,
835
- sleep=state_interval,
836
- logs_interval=logs_interval,
837
- raise_on_failure=False,
838
- )
839
- resp = self._get_db_run(run)
840
-
841
- elif watch or self.kfp:
842
- run.logs(True, self._get_db())
843
- resp = self._get_db_run(run)
844
-
845
- return self._wrap_run_result(resp, run, schedule=schedule)
846
-
847
399
  @staticmethod
848
400
  def _handle_submit_job_http_error(error: requests.HTTPError):
849
401
  # if we receive a 400 status code, this means the request was invalid and the run wasn't created in the DB.
@@ -1048,6 +600,12 @@ class BaseRuntime(ModelObj):
1048
600
  if not handler:
1049
601
  raise RunError(f"handler must be provided for {self.kind} runtime")
1050
602
 
603
+ def _has_pipeline_param(self) -> bool:
604
+ # check if the runtime has pipeline parameters
605
+ # https://www.kubeflow.org/docs/components/pipelines/v1/sdk/parameters/
606
+ matches = re.findall(mlrun.utils.regex.pipeline_param[0], self.to_json())
607
+ return bool(matches)
608
+
1051
609
  def full_image_path(
1052
610
  self, image=None, client_version: str = None, client_python_version: str = None
1053
611
  ):
@@ -1127,10 +685,9 @@ class BaseRuntime(ModelObj):
1127
685
  :return: KubeFlow containerOp
1128
686
  """
1129
687
 
1130
- # if self.spec.image and not image:
1131
- # image = self.full_image_path()
1132
-
1133
- if use_db:
688
+ # if the function contain KFP PipelineParams (futures) pass the full spec to the
689
+ # ContainerOp this way KFP will substitute the params with previous step outputs
690
+ if use_db and not self._has_pipeline_param():
1134
691
  # if the same function is built as part of the pipeline we do not use the versioned function
1135
692
  # rather the latest function w the same tag so we can pick up the updated image/status
1136
693
  versioned = False if hasattr(self, "_build_in_pipeline") else True
@@ -1206,104 +763,93 @@ class BaseRuntime(ModelObj):
1206
763
  self,
1207
764
  requirements: Union[str, List[str]],
1208
765
  overwrite: bool = False,
1209
- verify_base_image: bool = True,
766
+ verify_base_image: bool = False,
767
+ prepare_image_for_deploy: bool = True,
768
+ requirements_file: str = "",
1210
769
  ):
1211
770
  """add package requirements from file or list to build spec.
1212
771
 
1213
- :param requirements: python requirements file path or list of packages
1214
- :param overwrite: overwrite existing requirements
1215
- :param verify_base_image: verify that the base image is configured
772
+ :param requirements: a list of python packages
773
+ :param requirements_file: a local python requirements file path
774
+ :param overwrite: overwrite existing requirements
775
+ :param verify_base_image: verify that the base image is configured
776
+ (deprecated, use prepare_image_for_deploy)
777
+ :param prepare_image_for_deploy: prepare the image/base_image spec for deployment
1216
778
  :return: function object
1217
779
  """
1218
- encoded_requirements = self._encode_requirements(requirements)
1219
- commands = self.spec.build.commands or [] if not overwrite else []
1220
- new_command = f"python -m pip install {encoded_requirements}"
1221
- # make sure we dont append the same line twice
1222
- if new_command not in commands:
1223
- commands.append(new_command)
1224
- self.spec.build.commands = commands
1225
- if verify_base_image:
1226
- self.verify_base_image()
780
+ self.spec.build.with_requirements(requirements, requirements_file, overwrite)
781
+
782
+ if verify_base_image or prepare_image_for_deploy:
783
+ # TODO: remove verify_base_image in 1.6.0
784
+ if verify_base_image:
785
+ warnings.warn(
786
+ "verify_base_image is deprecated in 1.4.0 and will be removed in 1.6.0, "
787
+ "use prepare_image_for_deploy",
788
+ category=FutureWarning,
789
+ )
790
+ self.prepare_image_for_deploy()
791
+
1227
792
  return self
1228
793
 
1229
794
  def with_commands(
1230
795
  self,
1231
796
  commands: List[str],
1232
797
  overwrite: bool = False,
1233
- verify_base_image: bool = True,
798
+ verify_base_image: bool = False,
799
+ prepare_image_for_deploy: bool = True,
1234
800
  ):
1235
801
  """add commands to build spec.
1236
802
 
1237
- :param commands: list of commands to run during build
803
+ :param commands: list of commands to run during build
804
+ :param overwrite: overwrite existing commands
805
+ :param verify_base_image: verify that the base image is configured
806
+ (deprecated, use prepare_image_for_deploy)
807
+ :param prepare_image_for_deploy: prepare the image/base_image spec for deployment
1238
808
 
1239
809
  :return: function object
1240
810
  """
1241
- if not isinstance(commands, list):
1242
- raise ValueError("commands must be a string list")
1243
- if not self.spec.build.commands or overwrite:
1244
- self.spec.build.commands = commands
1245
- else:
1246
- # add commands to existing build commands
1247
- for command in commands:
1248
- if command not in self.spec.build.commands:
1249
- self.spec.build.commands.append(command)
1250
- # using list(set(x)) won't retain order,
1251
- # solution inspired from https://stackoverflow.com/a/17016257/8116661
1252
- self.spec.build.commands = list(dict.fromkeys(self.spec.build.commands))
1253
- if verify_base_image:
1254
- self.verify_base_image()
811
+ self.spec.build.with_commands(commands, overwrite)
812
+
813
+ if verify_base_image or prepare_image_for_deploy:
814
+ # TODO: remove verify_base_image in 1.6.0
815
+ if verify_base_image:
816
+ warnings.warn(
817
+ "verify_base_image is deprecated in 1.4.0 and will be removed in 1.6.0, "
818
+ "use prepare_image_for_deploy",
819
+ category=FutureWarning,
820
+ )
821
+
822
+ self.prepare_image_for_deploy()
1255
823
  return self
1256
824
 
1257
825
  def clean_build_params(self):
1258
- # when using `with_requirements` we also execute `verify_base_image` which adds the base image and cleans the
1259
- # spec.image, so we need to restore the image back
826
+ # when using `with_requirements` we also execute `prepare_image_for_deploy` which adds the base image
827
+ # and cleans the spec.image, so we need to restore the image back
1260
828
  if self.spec.build.base_image and not self.spec.image:
1261
829
  self.spec.image = self.spec.build.base_image
1262
830
 
1263
831
  self.spec.build = {}
1264
832
  return self
1265
833
 
834
+ # TODO: remove in 1.6.0
835
+ @deprecated(
836
+ version="1.4.0",
837
+ reason="'verify_base_image' will be removed in 1.6.0, use 'prepare_image_for_deploy' instead",
838
+ category=FutureWarning,
839
+ )
1266
840
  def verify_base_image(self):
1267
- build = self.spec.build
1268
- require_build = build.commands or (
1269
- build.source and not build.load_source_on_run
841
+ self.prepare_image_for_deploy()
842
+
843
+ def prepare_image_for_deploy(self):
844
+ """
845
+ if a function has a 'spec.image' it is considered to be deployed,
846
+ but because we allow the user to set 'spec.image' for usability purposes,
847
+ we need to check whether this is a built image or it requires to be built on top.
848
+ """
849
+ launcher = mlrun.launcher.factory.LauncherFactory.create_launcher(
850
+ is_remote=self._is_remote
1270
851
  )
1271
- image = self.spec.image
1272
- # we allow users to not set an image, in that case we'll use the default
1273
- if (
1274
- not image
1275
- and self.kind in mlrun.mlconf.function_defaults.image_by_kind.to_dict()
1276
- ):
1277
- image = mlrun.mlconf.function_defaults.image_by_kind.to_dict()[self.kind]
1278
-
1279
- if (
1280
- self.kind not in mlrun.runtimes.RuntimeKinds.nuclio_runtimes()
1281
- # TODO: need a better way to decide whether a function requires a build
1282
- and require_build
1283
- and image
1284
- and not self.spec.build.base_image
1285
- # when submitting a run we are loading the function from the db, and using new_function for it,
1286
- # this results reaching here, but we are already after deploy of the image, meaning we don't need to prepare
1287
- # the base image for deployment
1288
- and self._is_remote_api()
1289
- ):
1290
- # when the function require build use the image as the base_image for the build
1291
- self.spec.build.base_image = image
1292
- self.spec.image = ""
1293
-
1294
- def _verify_run_params(self, parameters: typing.Dict[str, typing.Any]):
1295
- for param_name, param_value in parameters.items():
1296
-
1297
- if isinstance(param_value, dict):
1298
- # if the parameter is a dict, we might have some nested parameters,
1299
- # in this case we need to verify them as well recursively
1300
- self._verify_run_params(param_value)
1301
-
1302
- # verify that integer parameters don't exceed a int64
1303
- if isinstance(param_value, int) and abs(param_value) >= 2**63:
1304
- raise mlrun.errors.MLRunInvalidArgumentError(
1305
- f"parameter {param_name} value {param_value} exceeds int64"
1306
- )
852
+ launcher.prepare_image_for_deploy(self)
1307
853
 
1308
854
  def export(self, target="", format=".yaml", secrets=None, strip=True):
1309
855
  """save function spec to a local/remote path (default to./function.yaml)
@@ -1334,35 +880,12 @@ class BaseRuntime(ModelObj):
1334
880
  return self
1335
881
 
1336
882
  def save(self, tag="", versioned=False, refresh=False) -> str:
1337
- db = self._get_db()
1338
- if not db:
1339
- logger.error("database connection is not configured")
1340
- return ""
1341
-
1342
- if refresh and self._is_remote_api():
1343
- try:
1344
- meta = self.metadata
1345
- db_func = db.get_function(meta.name, meta.project, meta.tag)
1346
- if db_func and "status" in db_func:
1347
- self.status = db_func["status"]
1348
- if (
1349
- self.status.state
1350
- and self.status.state == "ready"
1351
- and not hasattr(self.status, "nuclio_name")
1352
- ):
1353
- self.spec.image = get_in(db_func, "spec.image", self.spec.image)
1354
- except mlrun.errors.MLRunNotFoundError:
1355
- pass
1356
-
1357
- tag = tag or self.metadata.tag
1358
-
1359
- obj = self.to_dict()
1360
- logger.debug(f"saving function: {self.metadata.name}, tag: {tag}")
1361
- hash_key = db.store_function(
1362
- obj, self.metadata.name, self.metadata.project, tag, versioned
883
+ launcher = mlrun.launcher.factory.LauncherFactory.create_launcher(
884
+ is_remote=self._is_remote
885
+ )
886
+ return launcher.save_function(
887
+ self, tag=tag, versioned=versioned, refresh=refresh
1363
888
  )
1364
- hash_key = hash_key if versioned else None
1365
- return "db://" + self._function_uri(hash_key=hash_key, tag=tag)
1366
889
 
1367
890
  def to_dict(self, fields=None, exclude=None, strip=False):
1368
891
  struct = super().to_dict(fields, exclude=exclude)
@@ -1391,76 +914,11 @@ class BaseRuntime(ModelObj):
1391
914
  line += f", default={p['default']}"
1392
915
  print(" " + line)
1393
916
 
1394
- def _encode_requirements(self, requirements_to_encode):
1395
-
1396
- # if a string, read the file then encode
1397
- if isinstance(requirements_to_encode, str):
1398
- with open(requirements_to_encode, "r") as fp:
1399
- requirements_to_encode = fp.read().splitlines()
1400
-
1401
- requirements = []
1402
- for requirement in requirements_to_encode:
1403
- requirement = requirement.strip()
1404
-
1405
- # ignore empty lines
1406
- # ignore comments
1407
- if not requirement or requirement.startswith("#"):
1408
- continue
1409
-
1410
- # ignore inline comments as well
1411
- inline_comment = requirement.split(" #")
1412
- if len(inline_comment) > 1:
1413
- requirement = inline_comment[0].strip()
1414
-
1415
- # -r / --requirement are flags and should not be escaped
1416
- # we allow such flags (could be passed within the requirements.txt file) and do not
1417
- # try to open the file and include its content since it might be a remote file
1418
- # given on the base image.
1419
- for req_flag in ["-r", "--requirement"]:
1420
- if requirement.startswith(req_flag):
1421
- requirement = requirement[len(req_flag) :].strip()
1422
- requirements.append(req_flag)
1423
- break
1424
-
1425
- # wrap in single quote to ensure that the requirement is treated as a single string
1426
- # quote the requirement to avoid issues with special characters, double quotes, etc.
1427
- requirements.append(shlex.quote(requirement))
1428
-
1429
- return " ".join(requirements)
1430
-
1431
- def _validate_output_path(self, run):
1432
- if is_local(run.spec.output_path):
1433
- message = ""
1434
- if not os.path.isabs(run.spec.output_path):
1435
- message = (
1436
- "artifact/output path is not defined or is local and relative,"
1437
- " artifacts will not be visible in the UI"
1438
- )
1439
- if mlrun.runtimes.RuntimeKinds.requires_absolute_artifacts_path(
1440
- self.kind
1441
- ):
1442
- raise mlrun.errors.MLRunPreconditionFailedError(
1443
- "artifact path (`artifact_path`) must be absolute for remote tasks"
1444
- )
1445
- elif hasattr(self.spec, "volume_mounts") and not self.spec.volume_mounts:
1446
- message = (
1447
- "artifact output path is local while no volume mount is specified. "
1448
- "artifacts would not be visible via UI."
1449
- )
1450
- if message:
1451
- logger.warning(message, output_path=run.spec.output_path)
1452
-
1453
-
1454
- def is_local(url):
1455
- if not url:
1456
- return True
1457
- return "://" not in url
1458
-
1459
917
 
1460
918
  class BaseRuntimeHandler(ABC):
1461
919
  # setting here to allow tests to override
1462
920
  kind = "base"
1463
- class_modes: typing.Dict[RuntimeClassMode, str] = {}
921
+ class_modes: Dict[RuntimeClassMode, str] = {}
1464
922
  wait_for_deletion_interval = 10
1465
923
 
1466
924
  @staticmethod
@@ -1474,12 +932,12 @@ class BaseRuntimeHandler(ABC):
1474
932
  def _should_collect_logs(self) -> bool:
1475
933
  """
1476
934
  There are some runtimes which we don't collect logs for using the log collector
1477
- :return: whether should collect log for it
935
+ :return: whether it should collect log for it
1478
936
  """
1479
937
  return True
1480
938
 
1481
939
  def _get_possible_mlrun_class_label_values(
1482
- self, class_mode: typing.Union[RuntimeClassMode, str] = None
940
+ self, class_mode: Union[RuntimeClassMode, str] = None
1483
941
  ) -> List[str]:
1484
942
  """
1485
943
  Should return the possible values of the mlrun/class label for runtime resources that are of this runtime
@@ -1493,21 +951,20 @@ class BaseRuntimeHandler(ABC):
1493
951
  def list_resources(
1494
952
  self,
1495
953
  project: str,
1496
- object_id: typing.Optional[str] = None,
954
+ object_id: Optional[str] = None,
1497
955
  label_selector: str = None,
1498
- group_by: Optional[mlrun.api.schemas.ListRuntimeResourcesGroupByField] = None,
956
+ group_by: Optional[
957
+ mlrun.common.schemas.ListRuntimeResourcesGroupByField
958
+ ] = None,
1499
959
  ) -> Union[
1500
- mlrun.api.schemas.RuntimeResources,
1501
- mlrun.api.schemas.GroupedByJobRuntimeResourcesOutput,
1502
- mlrun.api.schemas.GroupedByProjectRuntimeResourcesOutput,
960
+ mlrun.common.schemas.RuntimeResources,
961
+ mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
962
+ mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
1503
963
  ]:
1504
964
  # We currently don't support removing runtime resources in non k8s env
1505
- if not mlrun.k8s_utils.get_k8s_helper(
1506
- silent=True
1507
- ).is_running_inside_kubernetes_cluster():
965
+ if not get_k8s().is_running_inside_kubernetes_cluster():
1508
966
  return {}
1509
- k8s_helper = get_k8s_helper()
1510
- namespace = k8s_helper.resolve_namespace()
967
+ namespace = get_k8s().resolve_namespace()
1511
968
  label_selector = self.resolve_label_selector(project, object_id, label_selector)
1512
969
  pods = self._list_pods(namespace, label_selector)
1513
970
  pod_resources = self._build_pod_resources(pods)
@@ -1523,8 +980,10 @@ class BaseRuntimeHandler(ABC):
1523
980
 
1524
981
  def build_output_from_runtime_resources(
1525
982
  self,
1526
- runtime_resources_list: List[mlrun.api.schemas.RuntimeResources],
1527
- group_by: Optional[mlrun.api.schemas.ListRuntimeResourcesGroupByField] = None,
983
+ runtime_resources_list: List[mlrun.common.schemas.RuntimeResources],
984
+ group_by: Optional[
985
+ mlrun.common.schemas.ListRuntimeResourcesGroupByField
986
+ ] = None,
1528
987
  ):
1529
988
  pod_resources = []
1530
989
  crd_resources = []
@@ -1550,12 +1009,9 @@ class BaseRuntimeHandler(ABC):
1550
1009
  if grace_period is None:
1551
1010
  grace_period = config.runtime_resources_deletion_grace_period
1552
1011
  # We currently don't support removing runtime resources in non k8s env
1553
- if not mlrun.k8s_utils.get_k8s_helper(
1554
- silent=True
1555
- ).is_running_inside_kubernetes_cluster():
1012
+ if not get_k8s().is_running_inside_kubernetes_cluster():
1556
1013
  return
1557
- k8s_helper = get_k8s_helper()
1558
- namespace = k8s_helper.resolve_namespace()
1014
+ namespace = get_k8s().resolve_namespace()
1559
1015
  label_selector = self.resolve_label_selector("*", label_selector=label_selector)
1560
1016
  crd_group, crd_version, crd_plural = self._get_crd_info()
1561
1017
  if crd_group and crd_version and crd_plural:
@@ -1603,8 +1059,7 @@ class BaseRuntimeHandler(ABC):
1603
1059
  self.delete_resources(db, db_session, label_selector, force, grace_period)
1604
1060
 
1605
1061
  def monitor_runs(self, db: DBInterface, db_session: Session):
1606
- k8s_helper = get_k8s_helper()
1607
- namespace = k8s_helper.resolve_namespace()
1062
+ namespace = get_k8s().resolve_namespace()
1608
1063
  label_selector = self._get_default_label_selector()
1609
1064
  crd_group, crd_version, crd_plural = self._get_crd_info()
1610
1065
  runtime_resource_is_crd = False
@@ -1753,8 +1208,8 @@ class BaseRuntimeHandler(ABC):
1753
1208
 
1754
1209
  def _add_object_label_selector_if_needed(
1755
1210
  self,
1756
- object_id: typing.Optional[str] = None,
1757
- label_selector: typing.Optional[str] = None,
1211
+ object_id: Optional[str] = None,
1212
+ label_selector: Optional[str] = None,
1758
1213
  ):
1759
1214
  if object_id:
1760
1215
  object_label_selector = self._get_object_label_selector(object_id)
@@ -1776,17 +1231,19 @@ class BaseRuntimeHandler(ABC):
1776
1231
  def _enrich_list_resources_response(
1777
1232
  self,
1778
1233
  response: Union[
1779
- mlrun.api.schemas.RuntimeResources,
1780
- mlrun.api.schemas.GroupedByJobRuntimeResourcesOutput,
1781
- mlrun.api.schemas.GroupedByProjectRuntimeResourcesOutput,
1234
+ mlrun.common.schemas.RuntimeResources,
1235
+ mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
1236
+ mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
1782
1237
  ],
1783
1238
  namespace: str,
1784
1239
  label_selector: str = None,
1785
- group_by: Optional[mlrun.api.schemas.ListRuntimeResourcesGroupByField] = None,
1240
+ group_by: Optional[
1241
+ mlrun.common.schemas.ListRuntimeResourcesGroupByField
1242
+ ] = None,
1786
1243
  ) -> Union[
1787
- mlrun.api.schemas.RuntimeResources,
1788
- mlrun.api.schemas.GroupedByJobRuntimeResourcesOutput,
1789
- mlrun.api.schemas.GroupedByProjectRuntimeResourcesOutput,
1244
+ mlrun.common.schemas.RuntimeResources,
1245
+ mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
1246
+ mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
1790
1247
  ]:
1791
1248
  """
1792
1249
  Override this to list resources other then pods or CRDs (which are handled by the base class)
@@ -1796,12 +1253,14 @@ class BaseRuntimeHandler(ABC):
1796
1253
  def _build_output_from_runtime_resources(
1797
1254
  self,
1798
1255
  response: Union[
1799
- mlrun.api.schemas.RuntimeResources,
1800
- mlrun.api.schemas.GroupedByJobRuntimeResourcesOutput,
1801
- mlrun.api.schemas.GroupedByProjectRuntimeResourcesOutput,
1256
+ mlrun.common.schemas.RuntimeResources,
1257
+ mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
1258
+ mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
1802
1259
  ],
1803
- runtime_resources_list: List[mlrun.api.schemas.RuntimeResources],
1804
- group_by: Optional[mlrun.api.schemas.ListRuntimeResourcesGroupByField] = None,
1260
+ runtime_resources_list: List[mlrun.common.schemas.RuntimeResources],
1261
+ group_by: Optional[
1262
+ mlrun.common.schemas.ListRuntimeResourcesGroupByField
1263
+ ] = None,
1805
1264
  ):
1806
1265
  """
1807
1266
  Override this to add runtime resources other than pods or CRDs (which are handled by the base class) to the
@@ -1883,7 +1342,7 @@ class BaseRuntimeHandler(ABC):
1883
1342
  return in_terminal_state, last_container_completion_time, run_state
1884
1343
 
1885
1344
  def _get_default_label_selector(
1886
- self, class_mode: typing.Union[RuntimeClassMode, str] = None
1345
+ self, class_mode: Union[RuntimeClassMode, str] = None
1887
1346
  ) -> str:
1888
1347
  """
1889
1348
  Override this to add a default label selector
@@ -1932,20 +1391,18 @@ class BaseRuntimeHandler(ABC):
1932
1391
  return False
1933
1392
 
1934
1393
  def _list_pods(self, namespace: str, label_selector: str = None) -> List:
1935
- k8s_helper = get_k8s_helper()
1936
- pods = k8s_helper.list_pods(namespace, selector=label_selector)
1394
+ pods = get_k8s().list_pods(namespace, selector=label_selector)
1937
1395
  # when we work with custom objects (list_namespaced_custom_object) it's always a dict, to be able to generalize
1938
1396
  # code working on runtime resource (either a custom object or a pod) we're transforming to dicts
1939
1397
  pods = [pod.to_dict() for pod in pods]
1940
1398
  return pods
1941
1399
 
1942
1400
  def _list_crd_objects(self, namespace: str, label_selector: str = None) -> List:
1943
- k8s_helper = get_k8s_helper()
1944
1401
  crd_group, crd_version, crd_plural = self._get_crd_info()
1945
1402
  crd_objects = []
1946
1403
  if crd_group and crd_version and crd_plural:
1947
1404
  try:
1948
- crd_objects = k8s_helper.crdapi.list_namespaced_custom_object(
1405
+ crd_objects = get_k8s().crdapi.list_namespaced_custom_object(
1949
1406
  crd_group,
1950
1407
  crd_version,
1951
1408
  namespace,
@@ -1963,9 +1420,9 @@ class BaseRuntimeHandler(ABC):
1963
1420
  def resolve_label_selector(
1964
1421
  self,
1965
1422
  project: str,
1966
- object_id: typing.Optional[str] = None,
1967
- label_selector: typing.Optional[str] = None,
1968
- class_mode: typing.Union[RuntimeClassMode, str] = None,
1423
+ object_id: Optional[str] = None,
1424
+ label_selector: Optional[str] = None,
1425
+ class_mode: Union[RuntimeClassMode, str] = None,
1969
1426
  with_main_runtime_resource_label_selector: bool = False,
1970
1427
  ) -> str:
1971
1428
  default_label_selector = self._get_default_label_selector(class_mode=class_mode)
@@ -1996,7 +1453,7 @@ class BaseRuntimeHandler(ABC):
1996
1453
  @staticmethod
1997
1454
  def resolve_object_id(
1998
1455
  run: dict,
1999
- ) -> typing.Optional[str]:
1456
+ ) -> Optional[str]:
2000
1457
  """
2001
1458
  Get the object id from the run object
2002
1459
  Override this if the object id is not the run uid
@@ -2011,11 +1468,10 @@ class BaseRuntimeHandler(ABC):
2011
1468
  deleted_pods: List[Dict],
2012
1469
  label_selector: str = None,
2013
1470
  ):
2014
- k8s_helper = get_k8s_helper()
2015
1471
  deleted_pod_names = [pod_dict["metadata"]["name"] for pod_dict in deleted_pods]
2016
1472
 
2017
1473
  def _verify_pods_removed():
2018
- pods = k8s_helper.v1api.list_namespaced_pod(
1474
+ pods = get_k8s().v1api.list_namespaced_pod(
2019
1475
  namespace, label_selector=label_selector
2020
1476
  )
2021
1477
  existing_pod_names = [pod.metadata.name for pod in pods.items]
@@ -2068,10 +1524,10 @@ class BaseRuntimeHandler(ABC):
2068
1524
  "name"
2069
1525
  ]
2070
1526
  still_in_deletion_crds_to_pod_names = {}
2071
- jobs_runtime_resources: mlrun.api.schemas.GroupedByJobRuntimeResourcesOutput = self.list_resources(
1527
+ jobs_runtime_resources: mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput = self.list_resources(
2072
1528
  "*",
2073
1529
  label_selector=label_selector,
2074
- group_by=mlrun.api.schemas.ListRuntimeResourcesGroupByField.job,
1530
+ group_by=mlrun.common.schemas.ListRuntimeResourcesGroupByField.job,
2075
1531
  )
2076
1532
  for project, project_jobs in jobs_runtime_resources.items():
2077
1533
  if project not in project_uid_crd_map:
@@ -2119,8 +1575,7 @@ class BaseRuntimeHandler(ABC):
2119
1575
  ) -> List[Dict]:
2120
1576
  if grace_period is None:
2121
1577
  grace_period = config.runtime_resources_deletion_grace_period
2122
- k8s_helper = get_k8s_helper()
2123
- pods = k8s_helper.v1api.list_namespaced_pod(
1578
+ pods = get_k8s().v1api.list_namespaced_pod(
2124
1579
  namespace, label_selector=label_selector
2125
1580
  )
2126
1581
  deleted_pods = []
@@ -2161,7 +1616,7 @@ class BaseRuntimeHandler(ABC):
2161
1616
  pod_name=pod.metadata.name,
2162
1617
  )
2163
1618
 
2164
- get_k8s_helper().delete_pod(pod.metadata.name, namespace)
1619
+ get_k8s().delete_pod(pod.metadata.name, namespace)
2165
1620
  deleted_pods.append(pod_dict)
2166
1621
  except Exception as exc:
2167
1622
  logger.warning(
@@ -2182,11 +1637,10 @@ class BaseRuntimeHandler(ABC):
2182
1637
  ) -> List[Dict]:
2183
1638
  if grace_period is None:
2184
1639
  grace_period = config.runtime_resources_deletion_grace_period
2185
- k8s_helper = get_k8s_helper()
2186
1640
  crd_group, crd_version, crd_plural = self._get_crd_info()
2187
1641
  deleted_crds = []
2188
1642
  try:
2189
- crd_objects = k8s_helper.crdapi.list_namespaced_custom_object(
1643
+ crd_objects = get_k8s().crdapi.list_namespaced_custom_object(
2190
1644
  crd_group,
2191
1645
  crd_version,
2192
1646
  namespace,
@@ -2238,7 +1692,7 @@ class BaseRuntimeHandler(ABC):
2238
1692
  crd_object_name=crd_object["metadata"]["name"],
2239
1693
  )
2240
1694
 
2241
- get_k8s_helper().delete_crd(
1695
+ get_k8s().delete_crd(
2242
1696
  crd_object["metadata"]["name"],
2243
1697
  crd_group,
2244
1698
  crd_version,
@@ -2414,13 +1868,15 @@ class BaseRuntimeHandler(ABC):
2414
1868
 
2415
1869
  def _build_list_resources_response(
2416
1870
  self,
2417
- pod_resources: List[mlrun.api.schemas.RuntimeResource] = None,
2418
- crd_resources: List[mlrun.api.schemas.RuntimeResource] = None,
2419
- group_by: Optional[mlrun.api.schemas.ListRuntimeResourcesGroupByField] = None,
1871
+ pod_resources: List[mlrun.common.schemas.RuntimeResource] = None,
1872
+ crd_resources: List[mlrun.common.schemas.RuntimeResource] = None,
1873
+ group_by: Optional[
1874
+ mlrun.common.schemas.ListRuntimeResourcesGroupByField
1875
+ ] = None,
2420
1876
  ) -> Union[
2421
- mlrun.api.schemas.RuntimeResources,
2422
- mlrun.api.schemas.GroupedByJobRuntimeResourcesOutput,
2423
- mlrun.api.schemas.GroupedByProjectRuntimeResourcesOutput,
1877
+ mlrun.common.schemas.RuntimeResources,
1878
+ mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
1879
+ mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
2424
1880
  ]:
2425
1881
  if crd_resources is None:
2426
1882
  crd_resources = []
@@ -2428,15 +1884,18 @@ class BaseRuntimeHandler(ABC):
2428
1884
  pod_resources = []
2429
1885
 
2430
1886
  if group_by is None:
2431
- return mlrun.api.schemas.RuntimeResources(
1887
+ return mlrun.common.schemas.RuntimeResources(
2432
1888
  crd_resources=crd_resources, pod_resources=pod_resources
2433
1889
  )
2434
1890
  else:
2435
- if group_by == mlrun.api.schemas.ListRuntimeResourcesGroupByField.job:
1891
+ if group_by == mlrun.common.schemas.ListRuntimeResourcesGroupByField.job:
2436
1892
  return self._build_grouped_by_job_list_resources_response(
2437
1893
  pod_resources, crd_resources
2438
1894
  )
2439
- elif group_by == mlrun.api.schemas.ListRuntimeResourcesGroupByField.project:
1895
+ elif (
1896
+ group_by
1897
+ == mlrun.common.schemas.ListRuntimeResourcesGroupByField.project
1898
+ ):
2440
1899
  return self._build_grouped_by_project_list_resources_response(
2441
1900
  pod_resources, crd_resources
2442
1901
  )
@@ -2447,9 +1906,9 @@ class BaseRuntimeHandler(ABC):
2447
1906
 
2448
1907
  def _build_grouped_by_project_list_resources_response(
2449
1908
  self,
2450
- pod_resources: List[mlrun.api.schemas.RuntimeResource] = None,
2451
- crd_resources: List[mlrun.api.schemas.RuntimeResource] = None,
2452
- ) -> mlrun.api.schemas.GroupedByProjectRuntimeResourcesOutput:
1909
+ pod_resources: List[mlrun.common.schemas.RuntimeResource] = None,
1910
+ crd_resources: List[mlrun.common.schemas.RuntimeResource] = None,
1911
+ ) -> mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput:
2453
1912
  resources = {}
2454
1913
  for pod_resource in pod_resources:
2455
1914
  self._add_resource_to_grouped_by_project_resources_response(
@@ -2463,9 +1922,9 @@ class BaseRuntimeHandler(ABC):
2463
1922
 
2464
1923
  def _build_grouped_by_job_list_resources_response(
2465
1924
  self,
2466
- pod_resources: List[mlrun.api.schemas.RuntimeResource] = None,
2467
- crd_resources: List[mlrun.api.schemas.RuntimeResource] = None,
2468
- ) -> mlrun.api.schemas.GroupedByJobRuntimeResourcesOutput:
1925
+ pod_resources: List[mlrun.common.schemas.RuntimeResource] = None,
1926
+ crd_resources: List[mlrun.common.schemas.RuntimeResource] = None,
1927
+ ) -> mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput:
2469
1928
  resources = {}
2470
1929
  for pod_resource in pod_resources:
2471
1930
  self._add_resource_to_grouped_by_job_resources_response(
@@ -2479,9 +1938,9 @@ class BaseRuntimeHandler(ABC):
2479
1938
 
2480
1939
  def _add_resource_to_grouped_by_project_resources_response(
2481
1940
  self,
2482
- resources: mlrun.api.schemas.GroupedByJobRuntimeResourcesOutput,
1941
+ resources: mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
2483
1942
  resource_field_name: str,
2484
- resource: mlrun.api.schemas.RuntimeResource,
1943
+ resource: mlrun.common.schemas.RuntimeResource,
2485
1944
  ):
2486
1945
  if "mlrun/class" in resource.labels:
2487
1946
  project = resource.labels.get("mlrun/project", "")
@@ -2493,9 +1952,9 @@ class BaseRuntimeHandler(ABC):
2493
1952
 
2494
1953
  def _add_resource_to_grouped_by_job_resources_response(
2495
1954
  self,
2496
- resources: mlrun.api.schemas.GroupedByJobRuntimeResourcesOutput,
1955
+ resources: mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
2497
1956
  resource_field_name: str,
2498
- resource: mlrun.api.schemas.RuntimeResource,
1957
+ resource: mlrun.common.schemas.RuntimeResource,
2499
1958
  ):
2500
1959
  if "mlrun/uid" in resource.labels:
2501
1960
  project = resource.labels.get("mlrun/project", config.default_project)
@@ -2508,16 +1967,18 @@ class BaseRuntimeHandler(ABC):
2508
1967
  def _add_resource_to_grouped_by_field_resources_response(
2509
1968
  first_field_value: str,
2510
1969
  second_field_value: str,
2511
- resources: mlrun.api.schemas.GroupedByJobRuntimeResourcesOutput,
1970
+ resources: mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
2512
1971
  resource_field_name: str,
2513
- resource: mlrun.api.schemas.RuntimeResource,
1972
+ resource: mlrun.common.schemas.RuntimeResource,
2514
1973
  ):
2515
1974
  if first_field_value not in resources:
2516
1975
  resources[first_field_value] = {}
2517
1976
  if second_field_value not in resources[first_field_value]:
2518
1977
  resources[first_field_value][
2519
1978
  second_field_value
2520
- ] = mlrun.api.schemas.RuntimeResources(pod_resources=[], crd_resources=[])
1979
+ ] = mlrun.common.schemas.RuntimeResources(
1980
+ pod_resources=[], crd_resources=[]
1981
+ )
2521
1982
  if not getattr(
2522
1983
  resources[first_field_value][second_field_value], resource_field_name
2523
1984
  ):
@@ -2651,11 +2112,11 @@ class BaseRuntimeHandler(ABC):
2651
2112
  return project, uid, name
2652
2113
 
2653
2114
  @staticmethod
2654
- def _build_pod_resources(pods) -> List[mlrun.api.schemas.RuntimeResource]:
2115
+ def _build_pod_resources(pods) -> List[mlrun.common.schemas.RuntimeResource]:
2655
2116
  pod_resources = []
2656
2117
  for pod in pods:
2657
2118
  pod_resources.append(
2658
- mlrun.api.schemas.RuntimeResource(
2119
+ mlrun.common.schemas.RuntimeResource(
2659
2120
  name=pod["metadata"]["name"],
2660
2121
  labels=pod["metadata"]["labels"],
2661
2122
  status=pod["status"],
@@ -2664,11 +2125,13 @@ class BaseRuntimeHandler(ABC):
2664
2125
  return pod_resources
2665
2126
 
2666
2127
  @staticmethod
2667
- def _build_crd_resources(custom_objects) -> List[mlrun.api.schemas.RuntimeResource]:
2128
+ def _build_crd_resources(
2129
+ custom_objects,
2130
+ ) -> List[mlrun.common.schemas.RuntimeResource]:
2668
2131
  crd_resources = []
2669
2132
  for custom_object in custom_objects:
2670
2133
  crd_resources.append(
2671
- mlrun.api.schemas.RuntimeResource(
2134
+ mlrun.common.schemas.RuntimeResource(
2672
2135
  name=custom_object["metadata"]["name"],
2673
2136
  labels=custom_object["metadata"]["labels"],
2674
2137
  status=custom_object.get("status", {}),