mlrun 1.3.3rc1__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (444) hide show
  1. mlrun/__init__.py +3 -3
  2. mlrun/__main__.py +79 -37
  3. mlrun/api/__init__.py +1 -1
  4. mlrun/api/api/__init__.py +1 -1
  5. mlrun/api/api/api.py +4 -4
  6. mlrun/api/api/deps.py +10 -21
  7. mlrun/api/api/endpoints/__init__.py +1 -1
  8. mlrun/api/api/endpoints/artifacts.py +64 -36
  9. mlrun/api/api/endpoints/auth.py +4 -4
  10. mlrun/api/api/endpoints/background_tasks.py +11 -11
  11. mlrun/api/api/endpoints/client_spec.py +5 -5
  12. mlrun/api/api/endpoints/clusterization_spec.py +6 -4
  13. mlrun/api/api/endpoints/feature_store.py +124 -115
  14. mlrun/api/api/endpoints/files.py +22 -14
  15. mlrun/api/api/endpoints/frontend_spec.py +28 -21
  16. mlrun/api/api/endpoints/functions.py +142 -87
  17. mlrun/api/api/endpoints/grafana_proxy.py +89 -442
  18. mlrun/api/api/endpoints/healthz.py +20 -7
  19. mlrun/api/api/endpoints/hub.py +320 -0
  20. mlrun/api/api/endpoints/internal/__init__.py +1 -1
  21. mlrun/api/api/endpoints/internal/config.py +1 -1
  22. mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
  23. mlrun/api/api/endpoints/logs.py +11 -11
  24. mlrun/api/api/endpoints/model_endpoints.py +74 -70
  25. mlrun/api/api/endpoints/operations.py +13 -9
  26. mlrun/api/api/endpoints/pipelines.py +93 -88
  27. mlrun/api/api/endpoints/projects.py +35 -35
  28. mlrun/api/api/endpoints/runs.py +69 -27
  29. mlrun/api/api/endpoints/runtime_resources.py +28 -28
  30. mlrun/api/api/endpoints/schedules.py +98 -41
  31. mlrun/api/api/endpoints/secrets.py +37 -32
  32. mlrun/api/api/endpoints/submit.py +12 -12
  33. mlrun/api/api/endpoints/tags.py +20 -22
  34. mlrun/api/api/utils.py +251 -42
  35. mlrun/api/constants.py +1 -1
  36. mlrun/api/crud/__init__.py +18 -15
  37. mlrun/api/crud/artifacts.py +10 -10
  38. mlrun/api/crud/client_spec.py +4 -4
  39. mlrun/api/crud/clusterization_spec.py +3 -3
  40. mlrun/api/crud/feature_store.py +54 -46
  41. mlrun/api/crud/functions.py +3 -3
  42. mlrun/api/crud/hub.py +312 -0
  43. mlrun/api/crud/logs.py +11 -9
  44. mlrun/api/crud/model_monitoring/__init__.py +3 -3
  45. mlrun/api/crud/model_monitoring/grafana.py +435 -0
  46. mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
  47. mlrun/api/crud/notifications.py +149 -0
  48. mlrun/api/crud/pipelines.py +67 -52
  49. mlrun/api/crud/projects.py +51 -23
  50. mlrun/api/crud/runs.py +7 -5
  51. mlrun/api/crud/runtime_resources.py +13 -13
  52. mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
  53. mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
  54. mlrun/api/crud/runtimes/nuclio/function.py +505 -0
  55. mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
  56. mlrun/api/crud/secrets.py +88 -46
  57. mlrun/api/crud/tags.py +5 -5
  58. mlrun/api/db/__init__.py +1 -1
  59. mlrun/api/db/base.py +102 -54
  60. mlrun/api/db/init_db.py +2 -3
  61. mlrun/api/db/session.py +4 -12
  62. mlrun/api/db/sqldb/__init__.py +1 -1
  63. mlrun/api/db/sqldb/db.py +439 -196
  64. mlrun/api/db/sqldb/helpers.py +1 -1
  65. mlrun/api/db/sqldb/models/__init__.py +3 -3
  66. mlrun/api/db/sqldb/models/models_mysql.py +82 -64
  67. mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
  68. mlrun/api/db/sqldb/session.py +27 -20
  69. mlrun/api/initial_data.py +82 -24
  70. mlrun/api/launcher.py +196 -0
  71. mlrun/api/main.py +91 -22
  72. mlrun/api/middlewares.py +6 -5
  73. mlrun/api/migrations_mysql/env.py +1 -1
  74. mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
  75. mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
  76. mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
  77. mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
  78. mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
  79. mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
  80. mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
  81. mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
  82. mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
  83. mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
  84. mlrun/api/migrations_sqlite/env.py +1 -1
  85. mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
  86. mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
  87. mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
  88. mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
  89. mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
  90. mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
  91. mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
  92. mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
  93. mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
  94. mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
  95. mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
  96. mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
  97. mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
  98. mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
  99. mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
  100. mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
  101. mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
  102. mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
  103. mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
  104. mlrun/api/schemas/__init__.py +216 -138
  105. mlrun/api/utils/__init__.py +1 -1
  106. mlrun/api/utils/asyncio.py +1 -1
  107. mlrun/api/utils/auth/__init__.py +1 -1
  108. mlrun/api/utils/auth/providers/__init__.py +1 -1
  109. mlrun/api/utils/auth/providers/base.py +7 -7
  110. mlrun/api/utils/auth/providers/nop.py +6 -7
  111. mlrun/api/utils/auth/providers/opa.py +17 -17
  112. mlrun/api/utils/auth/verifier.py +36 -34
  113. mlrun/api/utils/background_tasks.py +24 -24
  114. mlrun/{builder.py → api/utils/builder.py} +216 -123
  115. mlrun/api/utils/clients/__init__.py +1 -1
  116. mlrun/api/utils/clients/chief.py +19 -4
  117. mlrun/api/utils/clients/iguazio.py +106 -60
  118. mlrun/api/utils/clients/log_collector.py +1 -1
  119. mlrun/api/utils/clients/nuclio.py +23 -23
  120. mlrun/api/utils/clients/protocols/grpc.py +2 -2
  121. mlrun/api/utils/db/__init__.py +1 -1
  122. mlrun/api/utils/db/alembic.py +1 -1
  123. mlrun/api/utils/db/backup.py +1 -1
  124. mlrun/api/utils/db/mysql.py +24 -25
  125. mlrun/api/utils/db/sql_collation.py +1 -1
  126. mlrun/api/utils/db/sqlite_migration.py +2 -2
  127. mlrun/api/utils/events/__init__.py +14 -0
  128. mlrun/api/utils/events/base.py +57 -0
  129. mlrun/api/utils/events/events_factory.py +41 -0
  130. mlrun/api/utils/events/iguazio.py +217 -0
  131. mlrun/api/utils/events/nop.py +55 -0
  132. mlrun/api/utils/helpers.py +16 -13
  133. mlrun/api/utils/memory_reports.py +1 -1
  134. mlrun/api/utils/periodic.py +6 -3
  135. mlrun/api/utils/projects/__init__.py +1 -1
  136. mlrun/api/utils/projects/follower.py +33 -33
  137. mlrun/api/utils/projects/leader.py +36 -34
  138. mlrun/api/utils/projects/member.py +27 -27
  139. mlrun/api/utils/projects/remotes/__init__.py +1 -1
  140. mlrun/api/utils/projects/remotes/follower.py +13 -13
  141. mlrun/api/utils/projects/remotes/leader.py +10 -10
  142. mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
  143. mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
  144. mlrun/api/utils/scheduler.py +140 -51
  145. mlrun/api/utils/singletons/__init__.py +1 -1
  146. mlrun/api/utils/singletons/db.py +9 -15
  147. mlrun/api/utils/singletons/k8s.py +677 -5
  148. mlrun/api/utils/singletons/logs_dir.py +1 -1
  149. mlrun/api/utils/singletons/project_member.py +1 -1
  150. mlrun/api/utils/singletons/scheduler.py +1 -1
  151. mlrun/artifacts/__init__.py +2 -2
  152. mlrun/artifacts/base.py +8 -2
  153. mlrun/artifacts/dataset.py +5 -3
  154. mlrun/artifacts/manager.py +7 -1
  155. mlrun/artifacts/model.py +15 -4
  156. mlrun/artifacts/plots.py +1 -1
  157. mlrun/common/__init__.py +1 -1
  158. mlrun/common/constants.py +15 -0
  159. mlrun/common/model_monitoring.py +209 -0
  160. mlrun/common/schemas/__init__.py +167 -0
  161. mlrun/{api → common}/schemas/artifact.py +13 -14
  162. mlrun/{api → common}/schemas/auth.py +10 -8
  163. mlrun/{api → common}/schemas/background_task.py +3 -3
  164. mlrun/{api → common}/schemas/client_spec.py +1 -1
  165. mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
  166. mlrun/{api → common}/schemas/constants.py +21 -8
  167. mlrun/common/schemas/events.py +36 -0
  168. mlrun/{api → common}/schemas/feature_store.py +2 -1
  169. mlrun/{api → common}/schemas/frontend_spec.py +7 -6
  170. mlrun/{api → common}/schemas/function.py +5 -5
  171. mlrun/{api → common}/schemas/http.py +3 -3
  172. mlrun/common/schemas/hub.py +134 -0
  173. mlrun/{api → common}/schemas/k8s.py +3 -3
  174. mlrun/{api → common}/schemas/memory_reports.py +1 -1
  175. mlrun/common/schemas/model_endpoints.py +342 -0
  176. mlrun/common/schemas/notification.py +57 -0
  177. mlrun/{api → common}/schemas/object.py +6 -6
  178. mlrun/{api → common}/schemas/pipeline.py +3 -3
  179. mlrun/{api → common}/schemas/project.py +6 -5
  180. mlrun/common/schemas/regex.py +24 -0
  181. mlrun/common/schemas/runs.py +30 -0
  182. mlrun/{api → common}/schemas/runtime_resource.py +3 -3
  183. mlrun/{api → common}/schemas/schedule.py +19 -7
  184. mlrun/{api → common}/schemas/secret.py +3 -3
  185. mlrun/{api → common}/schemas/tag.py +2 -2
  186. mlrun/common/types.py +25 -0
  187. mlrun/config.py +152 -20
  188. mlrun/data_types/__init__.py +7 -2
  189. mlrun/data_types/data_types.py +4 -2
  190. mlrun/data_types/infer.py +1 -1
  191. mlrun/data_types/spark.py +10 -3
  192. mlrun/datastore/__init__.py +10 -3
  193. mlrun/datastore/azure_blob.py +1 -1
  194. mlrun/datastore/base.py +185 -53
  195. mlrun/datastore/datastore.py +1 -1
  196. mlrun/datastore/filestore.py +1 -1
  197. mlrun/datastore/google_cloud_storage.py +1 -1
  198. mlrun/datastore/inmem.py +4 -1
  199. mlrun/datastore/redis.py +1 -1
  200. mlrun/datastore/s3.py +1 -1
  201. mlrun/datastore/sources.py +192 -70
  202. mlrun/datastore/spark_udf.py +44 -0
  203. mlrun/datastore/store_resources.py +4 -4
  204. mlrun/datastore/targets.py +115 -45
  205. mlrun/datastore/utils.py +127 -5
  206. mlrun/datastore/v3io.py +1 -1
  207. mlrun/datastore/wasbfs/__init__.py +1 -1
  208. mlrun/datastore/wasbfs/fs.py +1 -1
  209. mlrun/db/__init__.py +7 -5
  210. mlrun/db/base.py +112 -68
  211. mlrun/db/httpdb.py +445 -277
  212. mlrun/db/nopdb.py +491 -0
  213. mlrun/db/sqldb.py +112 -65
  214. mlrun/errors.py +6 -1
  215. mlrun/execution.py +44 -22
  216. mlrun/feature_store/__init__.py +1 -1
  217. mlrun/feature_store/api.py +143 -95
  218. mlrun/feature_store/common.py +16 -20
  219. mlrun/feature_store/feature_set.py +42 -12
  220. mlrun/feature_store/feature_vector.py +32 -21
  221. mlrun/feature_store/ingestion.py +9 -12
  222. mlrun/feature_store/retrieval/__init__.py +3 -2
  223. mlrun/feature_store/retrieval/base.py +388 -66
  224. mlrun/feature_store/retrieval/dask_merger.py +63 -151
  225. mlrun/feature_store/retrieval/job.py +30 -12
  226. mlrun/feature_store/retrieval/local_merger.py +40 -133
  227. mlrun/feature_store/retrieval/spark_merger.py +129 -127
  228. mlrun/feature_store/retrieval/storey_merger.py +173 -0
  229. mlrun/feature_store/steps.py +132 -15
  230. mlrun/features.py +8 -3
  231. mlrun/frameworks/__init__.py +1 -1
  232. mlrun/frameworks/_common/__init__.py +1 -1
  233. mlrun/frameworks/_common/artifacts_library.py +1 -1
  234. mlrun/frameworks/_common/mlrun_interface.py +1 -1
  235. mlrun/frameworks/_common/model_handler.py +1 -1
  236. mlrun/frameworks/_common/plan.py +1 -1
  237. mlrun/frameworks/_common/producer.py +1 -1
  238. mlrun/frameworks/_common/utils.py +1 -1
  239. mlrun/frameworks/_dl_common/__init__.py +1 -1
  240. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
  241. mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
  242. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
  243. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
  244. mlrun/frameworks/_dl_common/model_handler.py +1 -1
  245. mlrun/frameworks/_dl_common/utils.py +1 -1
  246. mlrun/frameworks/_ml_common/__init__.py +1 -1
  247. mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
  248. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
  249. mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
  250. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  251. mlrun/frameworks/_ml_common/model_handler.py +1 -1
  252. mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
  253. mlrun/frameworks/_ml_common/plan.py +1 -1
  254. mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
  255. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
  256. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
  257. mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
  258. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
  259. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
  260. mlrun/frameworks/_ml_common/producer.py +1 -1
  261. mlrun/frameworks/_ml_common/utils.py +1 -1
  262. mlrun/frameworks/auto_mlrun/__init__.py +1 -1
  263. mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
  264. mlrun/frameworks/huggingface/__init__.py +1 -1
  265. mlrun/frameworks/huggingface/model_server.py +1 -1
  266. mlrun/frameworks/lgbm/__init__.py +1 -1
  267. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
  268. mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
  269. mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
  270. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
  271. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
  272. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
  273. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
  274. mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
  275. mlrun/frameworks/lgbm/model_handler.py +1 -1
  276. mlrun/frameworks/lgbm/model_server.py +1 -1
  277. mlrun/frameworks/lgbm/utils.py +1 -1
  278. mlrun/frameworks/onnx/__init__.py +1 -1
  279. mlrun/frameworks/onnx/dataset.py +1 -1
  280. mlrun/frameworks/onnx/mlrun_interface.py +1 -1
  281. mlrun/frameworks/onnx/model_handler.py +1 -1
  282. mlrun/frameworks/onnx/model_server.py +1 -1
  283. mlrun/frameworks/parallel_coordinates.py +1 -1
  284. mlrun/frameworks/pytorch/__init__.py +1 -1
  285. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
  286. mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
  287. mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
  288. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
  289. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
  290. mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
  291. mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
  292. mlrun/frameworks/pytorch/model_handler.py +1 -1
  293. mlrun/frameworks/pytorch/model_server.py +1 -1
  294. mlrun/frameworks/pytorch/utils.py +1 -1
  295. mlrun/frameworks/sklearn/__init__.py +1 -1
  296. mlrun/frameworks/sklearn/estimator.py +1 -1
  297. mlrun/frameworks/sklearn/metric.py +1 -1
  298. mlrun/frameworks/sklearn/metrics_library.py +1 -1
  299. mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
  300. mlrun/frameworks/sklearn/model_handler.py +1 -1
  301. mlrun/frameworks/sklearn/utils.py +1 -1
  302. mlrun/frameworks/tf_keras/__init__.py +1 -1
  303. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
  304. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  305. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
  306. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
  307. mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
  308. mlrun/frameworks/tf_keras/model_handler.py +1 -1
  309. mlrun/frameworks/tf_keras/model_server.py +1 -1
  310. mlrun/frameworks/tf_keras/utils.py +1 -1
  311. mlrun/frameworks/xgboost/__init__.py +1 -1
  312. mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
  313. mlrun/frameworks/xgboost/model_handler.py +1 -1
  314. mlrun/frameworks/xgboost/utils.py +1 -1
  315. mlrun/k8s_utils.py +14 -765
  316. mlrun/kfpops.py +14 -17
  317. mlrun/launcher/__init__.py +13 -0
  318. mlrun/launcher/base.py +406 -0
  319. mlrun/launcher/client.py +159 -0
  320. mlrun/launcher/factory.py +50 -0
  321. mlrun/launcher/local.py +276 -0
  322. mlrun/launcher/remote.py +178 -0
  323. mlrun/lists.py +10 -2
  324. mlrun/mlutils/__init__.py +1 -1
  325. mlrun/mlutils/data.py +1 -1
  326. mlrun/mlutils/models.py +1 -1
  327. mlrun/mlutils/plots.py +1 -1
  328. mlrun/model.py +252 -14
  329. mlrun/model_monitoring/__init__.py +41 -0
  330. mlrun/model_monitoring/features_drift_table.py +1 -1
  331. mlrun/model_monitoring/helpers.py +123 -38
  332. mlrun/model_monitoring/model_endpoint.py +144 -0
  333. mlrun/model_monitoring/model_monitoring_batch.py +310 -259
  334. mlrun/model_monitoring/stores/__init__.py +106 -0
  335. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
  336. mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
  337. mlrun/model_monitoring/stores/models/__init__.py +23 -0
  338. mlrun/model_monitoring/stores/models/base.py +18 -0
  339. mlrun/model_monitoring/stores/models/mysql.py +100 -0
  340. mlrun/model_monitoring/stores/models/sqlite.py +98 -0
  341. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
  342. mlrun/model_monitoring/stream_processing_fs.py +239 -271
  343. mlrun/package/__init__.py +163 -0
  344. mlrun/package/context_handler.py +325 -0
  345. mlrun/package/errors.py +47 -0
  346. mlrun/package/packager.py +298 -0
  347. mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
  348. mlrun/package/packagers/default_packager.py +422 -0
  349. mlrun/package/packagers/numpy_packagers.py +612 -0
  350. mlrun/package/packagers/pandas_packagers.py +968 -0
  351. mlrun/package/packagers/python_standard_library_packagers.py +616 -0
  352. mlrun/package/packagers_manager.py +786 -0
  353. mlrun/package/utils/__init__.py +53 -0
  354. mlrun/package/utils/_archiver.py +226 -0
  355. mlrun/package/utils/_formatter.py +211 -0
  356. mlrun/package/utils/_pickler.py +234 -0
  357. mlrun/package/utils/_supported_format.py +71 -0
  358. mlrun/package/utils/log_hint_utils.py +93 -0
  359. mlrun/package/utils/type_hint_utils.py +298 -0
  360. mlrun/platforms/__init__.py +1 -1
  361. mlrun/platforms/iguazio.py +34 -2
  362. mlrun/platforms/other.py +1 -1
  363. mlrun/projects/__init__.py +1 -1
  364. mlrun/projects/operations.py +14 -9
  365. mlrun/projects/pipelines.py +31 -13
  366. mlrun/projects/project.py +762 -238
  367. mlrun/render.py +49 -19
  368. mlrun/run.py +57 -326
  369. mlrun/runtimes/__init__.py +3 -9
  370. mlrun/runtimes/base.py +247 -784
  371. mlrun/runtimes/constants.py +1 -1
  372. mlrun/runtimes/daskjob.py +45 -41
  373. mlrun/runtimes/funcdoc.py +43 -7
  374. mlrun/runtimes/function.py +66 -656
  375. mlrun/runtimes/function_reference.py +1 -1
  376. mlrun/runtimes/generators.py +1 -1
  377. mlrun/runtimes/kubejob.py +99 -116
  378. mlrun/runtimes/local.py +59 -66
  379. mlrun/runtimes/mpijob/__init__.py +1 -1
  380. mlrun/runtimes/mpijob/abstract.py +13 -15
  381. mlrun/runtimes/mpijob/v1.py +3 -1
  382. mlrun/runtimes/mpijob/v1alpha1.py +1 -1
  383. mlrun/runtimes/nuclio.py +1 -1
  384. mlrun/runtimes/pod.py +51 -26
  385. mlrun/runtimes/remotesparkjob.py +3 -1
  386. mlrun/runtimes/serving.py +12 -4
  387. mlrun/runtimes/sparkjob/__init__.py +1 -2
  388. mlrun/runtimes/sparkjob/abstract.py +44 -31
  389. mlrun/runtimes/sparkjob/spark3job.py +11 -9
  390. mlrun/runtimes/utils.py +61 -42
  391. mlrun/secrets.py +16 -18
  392. mlrun/serving/__init__.py +3 -2
  393. mlrun/serving/merger.py +1 -1
  394. mlrun/serving/remote.py +1 -1
  395. mlrun/serving/routers.py +39 -42
  396. mlrun/serving/server.py +23 -13
  397. mlrun/serving/serving_wrapper.py +1 -1
  398. mlrun/serving/states.py +172 -39
  399. mlrun/serving/utils.py +1 -1
  400. mlrun/serving/v1_serving.py +1 -1
  401. mlrun/serving/v2_serving.py +29 -21
  402. mlrun/utils/__init__.py +1 -2
  403. mlrun/utils/async_http.py +8 -1
  404. mlrun/utils/azure_vault.py +1 -1
  405. mlrun/utils/clones.py +2 -2
  406. mlrun/utils/condition_evaluator.py +65 -0
  407. mlrun/utils/db.py +52 -0
  408. mlrun/utils/helpers.py +188 -13
  409. mlrun/utils/http.py +89 -54
  410. mlrun/utils/logger.py +48 -8
  411. mlrun/utils/model_monitoring.py +132 -100
  412. mlrun/utils/notifications/__init__.py +1 -1
  413. mlrun/utils/notifications/notification/__init__.py +8 -6
  414. mlrun/utils/notifications/notification/base.py +20 -14
  415. mlrun/utils/notifications/notification/console.py +7 -4
  416. mlrun/utils/notifications/notification/git.py +36 -19
  417. mlrun/utils/notifications/notification/ipython.py +10 -8
  418. mlrun/utils/notifications/notification/slack.py +18 -13
  419. mlrun/utils/notifications/notification_pusher.py +377 -56
  420. mlrun/utils/regex.py +6 -1
  421. mlrun/utils/singleton.py +1 -1
  422. mlrun/utils/v3io_clients.py +1 -1
  423. mlrun/utils/vault.py +270 -269
  424. mlrun/utils/version/__init__.py +1 -1
  425. mlrun/utils/version/version.json +2 -2
  426. mlrun/utils/version/version.py +1 -1
  427. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
  428. mlrun-1.4.0.dist-info/RECORD +434 -0
  429. mlrun/api/api/endpoints/marketplace.py +0 -257
  430. mlrun/api/crud/marketplace.py +0 -221
  431. mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
  432. mlrun/api/db/filedb/db.py +0 -518
  433. mlrun/api/schemas/marketplace.py +0 -128
  434. mlrun/api/schemas/model_endpoints.py +0 -185
  435. mlrun/db/filedb.py +0 -891
  436. mlrun/feature_store/retrieval/online.py +0 -92
  437. mlrun/model_monitoring/constants.py +0 -67
  438. mlrun/runtimes/package/context_handler.py +0 -711
  439. mlrun/runtimes/sparkjob/spark2job.py +0 -59
  440. mlrun-1.3.3rc1.dist-info/RECORD +0 -381
  441. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
  442. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
  443. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
  444. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
mlrun/k8s_utils.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -11,772 +11,27 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- import base64
15
- import hashlib
16
- import time
17
14
  import typing
18
- from datetime import datetime
19
- from sys import stdout
20
15
 
21
16
  import kubernetes.client
22
- from kubernetes import client, config
23
- from kubernetes.client.rest import ApiException
24
17
 
25
- import mlrun.api.schemas
18
+ import mlrun.common.schemas
26
19
  import mlrun.errors
27
20
 
28
21
  from .config import config as mlconfig
29
- from .errors import err_to_str
30
- from .platforms.iguazio import v3io_to_vol
31
- from .utils import logger
32
22
 
33
- _k8s = None
23
+ _running_inside_kubernetes_cluster = None
34
24
 
35
25
 
36
- def get_k8s_helper(namespace=None, silent=False, log=False) -> "K8sHelper":
37
- """
38
- :param silent: set to true if you're calling this function from a code that might run from remotely (outside of a
39
- k8s cluster)
40
- :param log: sometimes we want to avoid logging when executing init_k8s_config
41
- """
42
- global _k8s
43
- if not _k8s:
44
- _k8s = K8sHelper(namespace, silent=silent, log=log)
45
- return _k8s
46
-
47
-
48
- class SecretTypes:
49
- opaque = "Opaque"
50
- v3io_fuse = "v3io/fuse"
51
-
52
-
53
- class K8sHelper:
54
- def __init__(self, namespace=None, config_file=None, silent=False, log=True):
55
- self.namespace = namespace or mlconfig.namespace
56
- self.config_file = config_file
57
- self.running_inside_kubernetes_cluster = False
58
- try:
59
- self._init_k8s_config(log)
60
- self.v1api = client.CoreV1Api()
61
- self.crdapi = client.CustomObjectsApi()
62
- except Exception:
63
- if not silent:
64
- raise
65
-
66
- def resolve_namespace(self, namespace=None):
67
- return namespace or self.namespace
68
-
69
- def _init_k8s_config(self, log=True):
70
- try:
71
- config.load_incluster_config()
72
- self.running_inside_kubernetes_cluster = True
73
- if log:
74
- logger.info("using in-cluster config.")
75
- except Exception:
76
- try:
77
- config.load_kube_config(self.config_file)
78
- if log:
79
- logger.info("using local kubernetes config.")
80
- except Exception:
81
- raise RuntimeError(
82
- "cannot find local kubernetes config file,"
83
- " place it in ~/.kube/config or specify it in "
84
- "KUBECONFIG env var"
85
- )
86
-
87
- def is_running_inside_kubernetes_cluster(self):
88
- return self.running_inside_kubernetes_cluster
89
-
90
- def list_pods(self, namespace=None, selector="", states=None):
91
- try:
92
- resp = self.v1api.list_namespaced_pod(
93
- self.resolve_namespace(namespace), label_selector=selector
94
- )
95
- except ApiException as exc:
96
- logger.error(f"failed to list pods: {err_to_str(exc)}")
97
- raise exc
98
-
99
- items = []
100
- for i in resp.items:
101
- if not states or i.status.phase in states:
102
- items.append(i)
103
- return items
104
-
105
- def clean_pods(self, namespace=None, selector="", states=None):
106
- if not selector and not states:
107
- raise ValueError("labels selector or states list must be specified")
108
- items = self.list_pods(namespace, selector, states)
109
- for item in items:
110
- self.delete_pod(item.metadata.name, item.metadata.namespace)
111
-
112
- def create_pod(self, pod, max_retry=3, retry_interval=3):
113
- if "pod" in dir(pod):
114
- pod = pod.pod
115
- pod.metadata.namespace = self.resolve_namespace(pod.metadata.namespace)
116
-
117
- retry_count = 0
118
- while True:
119
- try:
120
- resp = self.v1api.create_namespaced_pod(pod.metadata.namespace, pod)
121
- except ApiException as exc:
122
-
123
- if retry_count > max_retry:
124
- logger.error(
125
- "failed to create pod after max retries",
126
- retry_count=retry_count,
127
- exc=err_to_str(exc),
128
- pod=pod,
129
- )
130
- raise exc
131
-
132
- logger.error("failed to create pod", exc=err_to_str(exc), pod=pod)
133
-
134
- # known k8s issue, see https://github.com/kubernetes/kubernetes/issues/67761
135
- if "gke-resource-quotas" in err_to_str(exc):
136
- logger.warning(
137
- "failed to create pod due to gke resource error, "
138
- f"sleeping {retry_interval} seconds and retrying"
139
- )
140
- retry_count += 1
141
- time.sleep(retry_interval)
142
- continue
143
-
144
- raise exc
145
- else:
146
- logger.info(f"Pod {resp.metadata.name} created")
147
- return resp.metadata.name, resp.metadata.namespace
148
-
149
- def delete_pod(self, name, namespace=None):
150
- try:
151
- api_response = self.v1api.delete_namespaced_pod(
152
- name,
153
- self.resolve_namespace(namespace),
154
- grace_period_seconds=0,
155
- propagation_policy="Background",
156
- )
157
- return api_response
158
- except ApiException as exc:
159
- # ignore error if pod is already removed
160
- if exc.status != 404:
161
- logger.error(f"failed to delete pod: {err_to_str(exc)}", pod_name=name)
162
- raise exc
163
-
164
- def get_pod(self, name, namespace=None, raise_on_not_found=False):
165
- try:
166
- api_response = self.v1api.read_namespaced_pod(
167
- name=name, namespace=self.resolve_namespace(namespace)
168
- )
169
- return api_response
170
- except ApiException as exc:
171
- if exc.status != 404:
172
- logger.error(f"failed to get pod: {err_to_str(exc)}")
173
- raise exc
174
- else:
175
- if raise_on_not_found:
176
- raise mlrun.errors.MLRunNotFoundError(f"Pod not found: {name}")
177
- return None
178
-
179
- def get_pod_status(self, name, namespace=None):
180
- return self.get_pod(
181
- name, namespace, raise_on_not_found=True
182
- ).status.phase.lower()
183
-
184
- def delete_crd(self, name, crd_group, crd_version, crd_plural, namespace=None):
185
- try:
186
- namespace = self.resolve_namespace(namespace)
187
- self.crdapi.delete_namespaced_custom_object(
188
- crd_group,
189
- crd_version,
190
- namespace,
191
- crd_plural,
192
- name,
193
- )
194
- logger.info(
195
- "Deleted crd object",
196
- crd_name=name,
197
- namespace=namespace,
198
- )
199
- except ApiException as exc:
200
-
201
- # ignore error if crd is already removed
202
- if exc.status != 404:
203
- logger.error(
204
- f"failed to delete crd: {err_to_str(exc)}",
205
- crd_name=name,
206
- crd_group=crd_group,
207
- crd_version=crd_version,
208
- crd_plural=crd_plural,
209
- )
210
- raise exc
211
-
212
- def logs(self, name, namespace=None):
213
- try:
214
- resp = self.v1api.read_namespaced_pod_log(
215
- name=name, namespace=self.resolve_namespace(namespace)
216
- )
217
- except ApiException as exc:
218
- logger.error(f"failed to get pod logs: {err_to_str(exc)}")
219
- raise exc
220
-
221
- return resp
222
-
223
- def run_job(self, pod, timeout=600):
224
- pod_name, namespace = self.create_pod(pod)
225
- if not pod_name:
226
- logger.error("failed to create pod")
227
- return "error"
228
- return self.watch(pod_name, namespace, timeout)
229
-
230
- def watch(self, pod_name, namespace=None, timeout=600, writer=None):
231
- namespace = self.resolve_namespace(namespace)
232
- start_time = datetime.now()
233
- while True:
234
- try:
235
- pod = self.get_pod(pod_name, namespace)
236
- if not pod:
237
- return "error"
238
- status = pod.status.phase.lower()
239
- if status in ["running", "completed", "succeeded"]:
240
- print("")
241
- break
242
- if status == "failed":
243
- return "failed"
244
- elapsed_time = (datetime.now() - start_time).seconds
245
- if elapsed_time > timeout:
246
- return "timeout"
247
- time.sleep(2)
248
- stdout.write(".")
249
- if status != "pending":
250
- logger.warning(f"pod state in loop is {status}")
251
- except ApiException as exc:
252
- logger.error(f"failed waiting for pod: {err_to_str(exc)}\n")
253
- return "error"
254
- outputs = self.v1api.read_namespaced_pod_log(
255
- name=pod_name, namespace=namespace, follow=True, _preload_content=False
256
- )
257
- for out in outputs:
258
- print(out.decode("utf-8"), end="")
259
- if writer:
260
- writer.write(out)
261
-
262
- for i in range(5):
263
- pod_state = self.get_pod(pod_name, namespace).status.phase.lower()
264
- if pod_state != "running":
265
- break
266
- logger.warning("pod still running, waiting 2 sec")
267
- time.sleep(2)
268
-
269
- if pod_state == "failed":
270
- logger.error("pod exited with error")
271
- if writer:
272
- writer.flush()
273
- return pod_state
274
-
275
- def create_cfgmap(self, name, data, namespace="", labels=None):
276
- body = client.api_client.V1ConfigMap()
277
- namespace = self.resolve_namespace(namespace)
278
- body.data = data
279
- if name.endswith("*"):
280
- body.metadata = client.V1ObjectMeta(
281
- generate_name=name[:-1], namespace=namespace, labels=labels
282
- )
283
- else:
284
- body.metadata = client.V1ObjectMeta(
285
- name=name, namespace=namespace, labels=labels
286
- )
287
- try:
288
- resp = self.v1api.create_namespaced_config_map(namespace, body)
289
- except ApiException as exc:
290
- logger.error(f"failed to create configmap: {err_to_str(exc)}")
291
- raise exc
292
-
293
- logger.info(f"ConfigMap {resp.metadata.name} created")
294
- return resp.metadata.name
295
-
296
- def del_cfgmap(self, name, namespace=None):
297
- try:
298
- api_response = self.v1api.delete_namespaced_config_map(
299
- name,
300
- self.resolve_namespace(namespace),
301
- grace_period_seconds=0,
302
- propagation_policy="Background",
303
- )
304
-
305
- return api_response
306
- except ApiException as exc:
307
- # ignore error if ConfigMap is already removed
308
- if exc.status != 404:
309
- logger.error(f"failed to delete ConfigMap: {err_to_str(exc)}")
310
- raise exc
311
-
312
- def list_cfgmap(self, namespace=None, selector=""):
313
- try:
314
- resp = self.v1api.list_namespaced_config_map(
315
- self.resolve_namespace(namespace), watch=False, label_selector=selector
316
- )
317
- except ApiException as exc:
318
- logger.error(f"failed to list ConfigMaps: {err_to_str(exc)}")
319
- raise exc
320
-
321
- items = []
322
- for i in resp.items:
323
- items.append(i)
324
- return items
325
-
326
- def get_logger_pods(self, project, uid, run_kind, namespace=""):
327
-
328
- # As this file is imported in mlrun.runtimes, we sadly cannot have this import in the top level imports
329
- # as that will create an import loop.
330
- # TODO: Fix the import loops already!
331
- import mlrun.runtimes
332
-
333
- namespace = self.resolve_namespace(namespace)
334
- mpijob_crd_version = mlrun.runtimes.utils.resolve_mpijob_crd_version(
335
- api_context=True
336
- )
337
- mpijob_role_label = (
338
- mlrun.runtimes.constants.MPIJobCRDVersions.role_label_by_version(
339
- mpijob_crd_version
340
- )
341
- )
342
- extra_selectors = {
343
- "spark": "spark-role=driver",
344
- "mpijob": f"{mpijob_role_label}=launcher",
345
- }
346
-
347
- # TODO: all mlrun labels are sprinkled in a lot of places - they need to all be defined in a central,
348
- # inclusive place.
349
- selectors = [
350
- "mlrun/class",
351
- f"mlrun/project={project}",
352
- f"mlrun/uid={uid}",
353
- ]
354
-
355
- # In order to make the `list_pods` request return a lighter and quicker result, we narrow the search for
356
- # the relevant pods using the proper label selector according to the run kind
357
- if run_kind in extra_selectors:
358
- selectors.append(extra_selectors[run_kind])
359
-
360
- selector = ",".join(selectors)
361
- pods = self.list_pods(namespace, selector=selector)
362
- if not pods:
363
- logger.error("no pod matches that uid", uid=uid)
364
- return
365
-
366
- return {p.metadata.name: p.status.phase for p in pods}
367
-
368
- def create_project_service_account(self, project, service_account, namespace=""):
369
- namespace = self.resolve_namespace(namespace)
370
- k8s_service_account = client.V1ServiceAccount()
371
- labels = {"mlrun/project": project}
372
- k8s_service_account.metadata = client.V1ObjectMeta(
373
- name=service_account, namespace=namespace, labels=labels
374
- )
375
- try:
376
- api_response = self.v1api.create_namespaced_service_account(
377
- namespace,
378
- k8s_service_account,
379
- )
380
- return api_response
381
- except ApiException as exc:
382
- logger.error(f"failed to create service account: {err_to_str(exc)}")
383
- raise exc
384
-
385
- def get_project_vault_secret_name(
386
- self, project, service_account_name, namespace=""
387
- ):
388
- namespace = self.resolve_namespace(namespace)
389
-
26
+ def is_running_inside_kubernetes_cluster():
27
+ global _running_inside_kubernetes_cluster
28
+ if _running_inside_kubernetes_cluster is None:
390
29
  try:
391
- service_account = self.v1api.read_namespaced_service_account(
392
- service_account_name, namespace
393
- )
394
- except ApiException as exc:
395
- # It's valid for the service account to not exist. Simply return None
396
- if exc.status != 404:
397
- logger.error(f"failed to retrieve service accounts: {err_to_str(exc)}")
398
- raise exc
399
- return None
400
-
401
- if len(service_account.secrets) > 1:
402
- raise ValueError(
403
- f"Service account {service_account_name} has more than one secret"
404
- )
405
-
406
- return service_account.secrets[0].name
407
-
408
- def get_project_secret_name(self, project) -> str:
409
- return mlconfig.secret_stores.kubernetes.project_secret_name.format(
410
- project=project
411
- )
412
-
413
- def get_auth_secret_name(self, access_key: str) -> str:
414
- hashed_access_key = self._hash_access_key(access_key)
415
- return mlconfig.secret_stores.kubernetes.auth_secret_name.format(
416
- hashed_access_key=hashed_access_key
417
- )
418
-
419
- @staticmethod
420
- def _hash_access_key(access_key: str):
421
- return hashlib.sha224(access_key.encode()).hexdigest()
422
-
423
- def store_project_secrets(self, project, secrets, namespace=""):
424
- secret_name = self.get_project_secret_name(project)
425
- self.store_secrets(secret_name, secrets, namespace)
426
-
427
- def read_auth_secret(self, secret_name, namespace="", raise_on_not_found=False):
428
- namespace = self.resolve_namespace(namespace)
429
-
430
- try:
431
- secret_data = self.v1api.read_namespaced_secret(secret_name, namespace).data
432
- except ApiException as exc:
433
- logger.error(
434
- "Failed to read secret",
435
- secret_name=secret_name,
436
- namespace=namespace,
437
- exc=err_to_str(exc),
438
- )
439
- if exc.status != 404:
440
- raise exc
441
- elif raise_on_not_found:
442
- raise mlrun.errors.MLRunNotFoundError(
443
- f"Secret '{secret_name}' was not found in namespace '{namespace}'"
444
- ) from exc
445
-
446
- return None, None
447
-
448
- def _get_secret_value(key):
449
- if secret_data.get(key):
450
- return base64.b64decode(secret_data[key]).decode("utf-8")
451
- else:
452
- return None
453
-
454
- username = _get_secret_value(
455
- mlrun.api.schemas.AuthSecretData.get_field_secret_key("username")
456
- )
457
- access_key = _get_secret_value(
458
- mlrun.api.schemas.AuthSecretData.get_field_secret_key("access_key")
459
- )
460
-
461
- return username, access_key
462
-
463
- def store_auth_secret(self, username: str, access_key: str, namespace="") -> str:
464
- secret_name = self.get_auth_secret_name(access_key)
465
- secret_data = {
466
- mlrun.api.schemas.AuthSecretData.get_field_secret_key("username"): username,
467
- mlrun.api.schemas.AuthSecretData.get_field_secret_key(
468
- "access_key"
469
- ): access_key,
470
- }
471
- self.store_secrets(
472
- secret_name,
473
- secret_data,
474
- namespace,
475
- type_=SecretTypes.v3io_fuse,
476
- labels={"mlrun/username": username},
477
- )
478
- return secret_name
479
-
480
- def store_secrets(
481
- self,
482
- secret_name,
483
- secrets,
484
- namespace="",
485
- type_=SecretTypes.opaque,
486
- labels: typing.Optional[dict] = None,
487
- ):
488
- namespace = self.resolve_namespace(namespace)
489
- try:
490
- k8s_secret = self.v1api.read_namespaced_secret(secret_name, namespace)
491
- except ApiException as exc:
492
- # If secret doesn't exist, we'll simply create it
493
- if exc.status != 404:
494
- logger.error(f"failed to retrieve k8s secret: {err_to_str(exc)}")
495
- raise exc
496
- k8s_secret = client.V1Secret(type=type_)
497
- k8s_secret.metadata = client.V1ObjectMeta(
498
- name=secret_name, namespace=namespace, labels=labels
499
- )
500
- k8s_secret.string_data = secrets
501
- self.v1api.create_namespaced_secret(namespace, k8s_secret)
502
- return
503
-
504
- secret_data = k8s_secret.data.copy()
505
- for key, value in secrets.items():
506
- secret_data[key] = base64.b64encode(value.encode()).decode("utf-8")
507
-
508
- k8s_secret.data = secret_data
509
- self.v1api.replace_namespaced_secret(secret_name, namespace, k8s_secret)
510
-
511
- def delete_project_secrets(self, project, secrets, namespace=""):
512
- secret_name = self.get_project_secret_name(project)
513
- self.delete_secrets(secret_name, secrets, namespace)
514
-
515
- def delete_auth_secret(self, secret_ref: str, namespace=""):
516
- self.delete_secrets(secret_ref, {}, namespace)
517
-
518
- def delete_secrets(self, secret_name, secrets, namespace=""):
519
- namespace = self.resolve_namespace(namespace)
520
-
521
- try:
522
- k8s_secret = self.v1api.read_namespaced_secret(secret_name, namespace)
523
- except ApiException as exc:
524
- # If secret does not exist, return as if the deletion was successfully
525
- if exc.status == 404:
526
- return
527
- else:
528
- logger.error(f"failed to retrieve k8s secret: {err_to_str(exc)}")
529
- raise exc
530
-
531
- if not secrets:
532
- secret_data = {}
533
- else:
534
- secret_data = k8s_secret.data.copy()
535
- for secret in secrets:
536
- secret_data.pop(secret, None)
537
-
538
- if not secret_data:
539
- self.v1api.delete_namespaced_secret(secret_name, namespace)
540
- else:
541
- k8s_secret.data = secret_data
542
- self.v1api.replace_namespaced_secret(secret_name, namespace, k8s_secret)
543
-
544
- def _get_project_secrets_raw_data(self, project, namespace=""):
545
- secret_name = self.get_project_secret_name(project)
546
- return self._get_secret_raw_data(secret_name, namespace)
547
-
548
- def _get_secret_raw_data(self, secret_name, namespace=""):
549
- namespace = self.resolve_namespace(namespace)
550
-
551
- try:
552
- k8s_secret = self.v1api.read_namespaced_secret(secret_name, namespace)
553
- except ApiException:
554
- return None
555
-
556
- return k8s_secret.data
557
-
558
- def get_project_secret_keys(self, project, namespace="", filter_internal=False):
559
- secrets_data = self._get_project_secrets_raw_data(project, namespace)
560
- if not secrets_data:
561
- return []
562
-
563
- secret_keys = list(secrets_data.keys())
564
- if filter_internal:
565
- secret_keys = list(
566
- filter(lambda key: not key.startswith("mlrun."), secret_keys)
567
- )
568
- return secret_keys
569
-
570
- def get_project_secret_data(self, project, secret_keys=None, namespace=""):
571
- secrets_data = self._get_project_secrets_raw_data(project, namespace)
572
- return self._decode_secret_data(secrets_data, secret_keys)
573
-
574
- def get_secret_data(self, secret_name, namespace=""):
575
- secrets_data = self._get_secret_raw_data(secret_name, namespace)
576
- return self._decode_secret_data(secrets_data)
577
-
578
- def _decode_secret_data(self, secrets_data, secret_keys=None):
579
- results = {}
580
- if not secrets_data:
581
- return results
582
-
583
- # If not asking for specific keys, return all
584
- secret_keys = secret_keys or secrets_data.keys()
585
-
586
- for key in secret_keys:
587
- encoded_value = secrets_data.get(key)
588
- if encoded_value:
589
- results[key] = base64.b64decode(secrets_data[key]).decode("utf-8")
590
- return results
591
-
592
-
593
- class BasePod:
594
- def __init__(
595
- self,
596
- task_name="",
597
- image=None,
598
- command=None,
599
- args=None,
600
- namespace="",
601
- kind="job",
602
- project=None,
603
- default_pod_spec_attributes=None,
604
- resources=None,
605
- ):
606
- self.namespace = namespace
607
- self.name = ""
608
- self.task_name = task_name
609
- self.image = image
610
- self.command = command
611
- self.args = args
612
- self._volumes = []
613
- self._mounts = []
614
- self.env = None
615
- self.node_selector = None
616
- self.project = project or mlrun.mlconf.default_project
617
- self._labels = {
618
- "mlrun/task-name": task_name,
619
- "mlrun/class": kind,
620
- "mlrun/project": self.project,
621
- }
622
- self._annotations = {}
623
- self._init_containers = []
624
- # will be applied on the pod spec only when calling .pod(), allows to override spec attributes
625
- self.default_pod_spec_attributes = default_pod_spec_attributes
626
- self.resources = resources
627
-
628
- @property
629
- def pod(self):
630
- return self._get_spec()
631
-
632
- @property
633
- def init_containers(self):
634
- return self._init_containers
635
-
636
- @init_containers.setter
637
- def init_containers(self, containers):
638
- self._init_containers = containers
639
-
640
- def append_init_container(
641
- self,
642
- image,
643
- command=None,
644
- args=None,
645
- env=None,
646
- image_pull_policy="IfNotPresent",
647
- name="init",
648
- ):
649
- if isinstance(env, dict):
650
- env = [client.V1EnvVar(name=k, value=v) for k, v in env.items()]
651
- self._init_containers.append(
652
- client.V1Container(
653
- name=name,
654
- image=image,
655
- env=env,
656
- command=command,
657
- args=args,
658
- image_pull_policy=image_pull_policy,
659
- )
660
- )
661
-
662
- def add_label(self, key, value):
663
- self._labels[key] = str(value)
664
-
665
- def add_annotation(self, key, value):
666
- self._annotations[key] = str(value)
667
-
668
- def add_volume(self, volume: client.V1Volume, mount_path, name=None, sub_path=None):
669
- self._mounts.append(
670
- client.V1VolumeMount(
671
- name=name or volume.name, mount_path=mount_path, sub_path=sub_path
672
- )
673
- )
674
- self._volumes.append(volume)
675
-
676
- def mount_empty(self, name="empty", mount_path="/empty"):
677
- self.add_volume(
678
- client.V1Volume(name=name, empty_dir=client.V1EmptyDirVolumeSource()),
679
- mount_path=mount_path,
680
- )
681
-
682
- def mount_v3io(
683
- self, name="v3io", remote="~/", mount_path="/User", access_key="", user=""
684
- ):
685
- self.add_volume(
686
- v3io_to_vol(name, remote, access_key, user),
687
- mount_path=mount_path,
688
- name=name,
689
- )
690
-
691
- def mount_cfgmap(self, name, path="/config"):
692
- self.add_volume(
693
- client.V1Volume(
694
- name=name, config_map=client.V1ConfigMapVolumeSource(name=name)
695
- ),
696
- mount_path=path,
697
- )
698
-
699
- def mount_secret(self, name, path="/secret", items=None, sub_path=None):
700
- self.add_volume(
701
- client.V1Volume(
702
- name=name,
703
- secret=client.V1SecretVolumeSource(
704
- secret_name=name,
705
- items=items,
706
- ),
707
- ),
708
- mount_path=path,
709
- sub_path=sub_path,
710
- )
711
-
712
- def set_node_selector(self, node_selector: typing.Optional[typing.Dict[str, str]]):
713
- self.node_selector = node_selector
714
-
715
- def _get_spec(self, template=False):
716
-
717
- pod_obj = client.V1PodTemplate if template else client.V1Pod
718
-
719
- if self.env and isinstance(self.env, dict):
720
- env = [client.V1EnvVar(name=k, value=v) for k, v in self.env.items()]
721
- else:
722
- env = self.env
723
- container = client.V1Container(
724
- name="base",
725
- image=self.image,
726
- env=env,
727
- command=self.command,
728
- args=self.args,
729
- volume_mounts=self._mounts,
730
- resources=self.resources,
731
- )
732
-
733
- pod_spec = client.V1PodSpec(
734
- containers=[container],
735
- restart_policy="Never",
736
- volumes=self._volumes,
737
- node_selector=self.node_selector,
738
- )
739
-
740
- # if attribute isn't defined use default pod spec attributes
741
- for key, val in self.default_pod_spec_attributes.items():
742
- if not getattr(pod_spec, key, None):
743
- setattr(pod_spec, key, val)
744
-
745
- for init_containers in self._init_containers:
746
- init_containers.volume_mounts = self._mounts
747
- pod_spec.init_containers = self._init_containers
748
-
749
- pod = pod_obj(
750
- metadata=client.V1ObjectMeta(
751
- generate_name=f"{self.task_name}-",
752
- namespace=self.namespace,
753
- labels=self._labels,
754
- annotations=self._annotations,
755
- ),
756
- spec=pod_spec,
757
- )
758
- return pod
759
-
760
-
761
- def format_labels(labels):
762
- """Convert a dictionary of labels into a comma separated string"""
763
- if labels:
764
- return ",".join([f"{k}={v}" for k, v in labels.items()])
765
- else:
766
- return ""
767
-
768
-
769
- def verify_gpu_requests_and_limits(requests_gpu: str = None, limits_gpu: str = None):
770
- # https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/
771
- if requests_gpu and not limits_gpu:
772
- raise mlrun.errors.MLRunConflictError(
773
- "You cannot specify GPU requests without specifying limits"
774
- )
775
- if requests_gpu and limits_gpu and requests_gpu != limits_gpu:
776
- raise mlrun.errors.MLRunConflictError(
777
- f"When specifying both GPU requests and limits these two values must be equal, "
778
- f"requests_gpu={requests_gpu}, limits_gpu={limits_gpu}"
779
- )
30
+ kubernetes.config.load_incluster_config()
31
+ _running_inside_kubernetes_cluster = True
32
+ except kubernetes.config.ConfigException:
33
+ _running_inside_kubernetes_cluster = False
34
+ return _running_inside_kubernetes_cluster
780
35
 
781
36
 
782
37
  def generate_preemptible_node_selector_requirements(
@@ -785,7 +40,7 @@ def generate_preemptible_node_selector_requirements(
785
40
  """
786
41
  Generate node selector requirements based on the pre-configured node selector of the preemptible nodes.
787
42
  node selector operator represents a key's relationship to a set of values.
788
- Valid operators are listed in :py:class:`~mlrun.api.schemas.NodeSelectorOperator`
43
+ Valid operators are listed in :py:class:`~mlrun.common.schemas.NodeSelectorOperator`
789
44
  :param node_selector_operator: The operator of V1NodeSelectorRequirement
790
45
  :return: List[V1NodeSelectorRequirement]
791
46
  """
@@ -815,12 +70,9 @@ def generate_preemptible_nodes_anti_affinity_terms() -> typing.List[
815
70
  https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity
816
71
  :return: List contains one nodeSelectorTerm with multiple expressions.
817
72
  """
818
- # import here to avoid circular imports
819
- from mlrun.api.schemas import NodeSelectorOperator
820
-
821
73
  # compile affinities with operator NotIn to make sure pods are not running on preemptible nodes.
822
74
  node_selector_requirements = generate_preemptible_node_selector_requirements(
823
- NodeSelectorOperator.node_selector_op_not_in.value
75
+ mlrun.common.schemas.NodeSelectorOperator.node_selector_op_not_in.value
824
76
  )
825
77
  return [
826
78
  kubernetes.client.V1NodeSelectorTerm(
@@ -838,14 +90,11 @@ def generate_preemptible_nodes_affinity_terms() -> typing.List[
838
90
  then the pod can be scheduled onto a node if at least one of the nodeSelectorTerms can be satisfied.
839
91
  :return: List of nodeSelectorTerms associated with the preemptible nodes.
840
92
  """
841
- # import here to avoid circular imports
842
- from mlrun.api.schemas import NodeSelectorOperator
843
-
844
93
  node_selector_terms = []
845
94
 
846
95
  # compile affinities with operator In so pods could schedule on at least one of the preemptible nodes.
847
96
  node_selector_requirements = generate_preemptible_node_selector_requirements(
848
- NodeSelectorOperator.node_selector_op_in.value
97
+ mlrun.common.schemas.NodeSelectorOperator.node_selector_op_in.value
849
98
  )
850
99
  for expression in node_selector_requirements:
851
100
  node_selector_terms.append(