mlrun 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (444) hide show
  1. mlrun/__init__.py +3 -3
  2. mlrun/__main__.py +79 -37
  3. mlrun/api/__init__.py +1 -1
  4. mlrun/api/api/__init__.py +1 -1
  5. mlrun/api/api/api.py +4 -4
  6. mlrun/api/api/deps.py +10 -21
  7. mlrun/api/api/endpoints/__init__.py +1 -1
  8. mlrun/api/api/endpoints/artifacts.py +64 -36
  9. mlrun/api/api/endpoints/auth.py +4 -4
  10. mlrun/api/api/endpoints/background_tasks.py +11 -11
  11. mlrun/api/api/endpoints/client_spec.py +5 -5
  12. mlrun/api/api/endpoints/clusterization_spec.py +6 -4
  13. mlrun/api/api/endpoints/feature_store.py +124 -115
  14. mlrun/api/api/endpoints/files.py +22 -14
  15. mlrun/api/api/endpoints/frontend_spec.py +28 -21
  16. mlrun/api/api/endpoints/functions.py +142 -87
  17. mlrun/api/api/endpoints/grafana_proxy.py +89 -442
  18. mlrun/api/api/endpoints/healthz.py +20 -7
  19. mlrun/api/api/endpoints/hub.py +320 -0
  20. mlrun/api/api/endpoints/internal/__init__.py +1 -1
  21. mlrun/api/api/endpoints/internal/config.py +1 -1
  22. mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
  23. mlrun/api/api/endpoints/logs.py +11 -11
  24. mlrun/api/api/endpoints/model_endpoints.py +74 -70
  25. mlrun/api/api/endpoints/operations.py +13 -9
  26. mlrun/api/api/endpoints/pipelines.py +93 -88
  27. mlrun/api/api/endpoints/projects.py +35 -35
  28. mlrun/api/api/endpoints/runs.py +69 -27
  29. mlrun/api/api/endpoints/runtime_resources.py +28 -28
  30. mlrun/api/api/endpoints/schedules.py +98 -41
  31. mlrun/api/api/endpoints/secrets.py +37 -32
  32. mlrun/api/api/endpoints/submit.py +12 -12
  33. mlrun/api/api/endpoints/tags.py +20 -22
  34. mlrun/api/api/utils.py +251 -42
  35. mlrun/api/constants.py +1 -1
  36. mlrun/api/crud/__init__.py +18 -15
  37. mlrun/api/crud/artifacts.py +10 -10
  38. mlrun/api/crud/client_spec.py +4 -4
  39. mlrun/api/crud/clusterization_spec.py +3 -3
  40. mlrun/api/crud/feature_store.py +54 -46
  41. mlrun/api/crud/functions.py +3 -3
  42. mlrun/api/crud/hub.py +312 -0
  43. mlrun/api/crud/logs.py +11 -9
  44. mlrun/api/crud/model_monitoring/__init__.py +3 -3
  45. mlrun/api/crud/model_monitoring/grafana.py +435 -0
  46. mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
  47. mlrun/api/crud/notifications.py +149 -0
  48. mlrun/api/crud/pipelines.py +67 -52
  49. mlrun/api/crud/projects.py +51 -23
  50. mlrun/api/crud/runs.py +7 -5
  51. mlrun/api/crud/runtime_resources.py +13 -13
  52. mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
  53. mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
  54. mlrun/api/crud/runtimes/nuclio/function.py +505 -0
  55. mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
  56. mlrun/api/crud/secrets.py +88 -46
  57. mlrun/api/crud/tags.py +5 -5
  58. mlrun/api/db/__init__.py +1 -1
  59. mlrun/api/db/base.py +102 -54
  60. mlrun/api/db/init_db.py +2 -3
  61. mlrun/api/db/session.py +4 -12
  62. mlrun/api/db/sqldb/__init__.py +1 -1
  63. mlrun/api/db/sqldb/db.py +439 -196
  64. mlrun/api/db/sqldb/helpers.py +1 -1
  65. mlrun/api/db/sqldb/models/__init__.py +3 -3
  66. mlrun/api/db/sqldb/models/models_mysql.py +82 -64
  67. mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
  68. mlrun/api/db/sqldb/session.py +27 -20
  69. mlrun/api/initial_data.py +82 -24
  70. mlrun/api/launcher.py +196 -0
  71. mlrun/api/main.py +91 -22
  72. mlrun/api/middlewares.py +6 -5
  73. mlrun/api/migrations_mysql/env.py +1 -1
  74. mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
  75. mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
  76. mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
  77. mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
  78. mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
  79. mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
  80. mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
  81. mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
  82. mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
  83. mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
  84. mlrun/api/migrations_sqlite/env.py +1 -1
  85. mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
  86. mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
  87. mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
  88. mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
  89. mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
  90. mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
  91. mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
  92. mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
  93. mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
  94. mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
  95. mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
  96. mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
  97. mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
  98. mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
  99. mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
  100. mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
  101. mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
  102. mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
  103. mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
  104. mlrun/api/schemas/__init__.py +216 -138
  105. mlrun/api/utils/__init__.py +1 -1
  106. mlrun/api/utils/asyncio.py +1 -1
  107. mlrun/api/utils/auth/__init__.py +1 -1
  108. mlrun/api/utils/auth/providers/__init__.py +1 -1
  109. mlrun/api/utils/auth/providers/base.py +7 -7
  110. mlrun/api/utils/auth/providers/nop.py +6 -7
  111. mlrun/api/utils/auth/providers/opa.py +17 -17
  112. mlrun/api/utils/auth/verifier.py +36 -34
  113. mlrun/api/utils/background_tasks.py +24 -24
  114. mlrun/{builder.py → api/utils/builder.py} +216 -123
  115. mlrun/api/utils/clients/__init__.py +1 -1
  116. mlrun/api/utils/clients/chief.py +19 -4
  117. mlrun/api/utils/clients/iguazio.py +106 -60
  118. mlrun/api/utils/clients/log_collector.py +1 -1
  119. mlrun/api/utils/clients/nuclio.py +23 -23
  120. mlrun/api/utils/clients/protocols/grpc.py +2 -2
  121. mlrun/api/utils/db/__init__.py +1 -1
  122. mlrun/api/utils/db/alembic.py +1 -1
  123. mlrun/api/utils/db/backup.py +1 -1
  124. mlrun/api/utils/db/mysql.py +24 -25
  125. mlrun/api/utils/db/sql_collation.py +1 -1
  126. mlrun/api/utils/db/sqlite_migration.py +2 -2
  127. mlrun/api/utils/events/__init__.py +14 -0
  128. mlrun/api/utils/events/base.py +57 -0
  129. mlrun/api/utils/events/events_factory.py +41 -0
  130. mlrun/api/utils/events/iguazio.py +217 -0
  131. mlrun/api/utils/events/nop.py +55 -0
  132. mlrun/api/utils/helpers.py +16 -13
  133. mlrun/api/utils/memory_reports.py +1 -1
  134. mlrun/api/utils/periodic.py +6 -3
  135. mlrun/api/utils/projects/__init__.py +1 -1
  136. mlrun/api/utils/projects/follower.py +33 -33
  137. mlrun/api/utils/projects/leader.py +36 -34
  138. mlrun/api/utils/projects/member.py +27 -27
  139. mlrun/api/utils/projects/remotes/__init__.py +1 -1
  140. mlrun/api/utils/projects/remotes/follower.py +13 -13
  141. mlrun/api/utils/projects/remotes/leader.py +10 -10
  142. mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
  143. mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
  144. mlrun/api/utils/scheduler.py +140 -51
  145. mlrun/api/utils/singletons/__init__.py +1 -1
  146. mlrun/api/utils/singletons/db.py +9 -15
  147. mlrun/api/utils/singletons/k8s.py +677 -5
  148. mlrun/api/utils/singletons/logs_dir.py +1 -1
  149. mlrun/api/utils/singletons/project_member.py +1 -1
  150. mlrun/api/utils/singletons/scheduler.py +1 -1
  151. mlrun/artifacts/__init__.py +2 -2
  152. mlrun/artifacts/base.py +8 -2
  153. mlrun/artifacts/dataset.py +5 -3
  154. mlrun/artifacts/manager.py +7 -1
  155. mlrun/artifacts/model.py +15 -4
  156. mlrun/artifacts/plots.py +1 -1
  157. mlrun/common/__init__.py +1 -1
  158. mlrun/common/constants.py +15 -0
  159. mlrun/common/model_monitoring.py +209 -0
  160. mlrun/common/schemas/__init__.py +167 -0
  161. mlrun/{api → common}/schemas/artifact.py +13 -14
  162. mlrun/{api → common}/schemas/auth.py +10 -8
  163. mlrun/{api → common}/schemas/background_task.py +3 -3
  164. mlrun/{api → common}/schemas/client_spec.py +1 -1
  165. mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
  166. mlrun/{api → common}/schemas/constants.py +21 -8
  167. mlrun/common/schemas/events.py +36 -0
  168. mlrun/{api → common}/schemas/feature_store.py +2 -1
  169. mlrun/{api → common}/schemas/frontend_spec.py +7 -6
  170. mlrun/{api → common}/schemas/function.py +5 -5
  171. mlrun/{api → common}/schemas/http.py +3 -3
  172. mlrun/common/schemas/hub.py +134 -0
  173. mlrun/{api → common}/schemas/k8s.py +3 -3
  174. mlrun/{api → common}/schemas/memory_reports.py +1 -1
  175. mlrun/common/schemas/model_endpoints.py +342 -0
  176. mlrun/common/schemas/notification.py +57 -0
  177. mlrun/{api → common}/schemas/object.py +6 -6
  178. mlrun/{api → common}/schemas/pipeline.py +3 -3
  179. mlrun/{api → common}/schemas/project.py +6 -5
  180. mlrun/common/schemas/regex.py +24 -0
  181. mlrun/common/schemas/runs.py +30 -0
  182. mlrun/{api → common}/schemas/runtime_resource.py +3 -3
  183. mlrun/{api → common}/schemas/schedule.py +19 -7
  184. mlrun/{api → common}/schemas/secret.py +3 -3
  185. mlrun/{api → common}/schemas/tag.py +2 -2
  186. mlrun/common/types.py +25 -0
  187. mlrun/config.py +152 -20
  188. mlrun/data_types/__init__.py +7 -2
  189. mlrun/data_types/data_types.py +4 -2
  190. mlrun/data_types/infer.py +1 -1
  191. mlrun/data_types/spark.py +10 -3
  192. mlrun/datastore/__init__.py +10 -3
  193. mlrun/datastore/azure_blob.py +1 -1
  194. mlrun/datastore/base.py +185 -53
  195. mlrun/datastore/datastore.py +1 -1
  196. mlrun/datastore/filestore.py +1 -1
  197. mlrun/datastore/google_cloud_storage.py +1 -1
  198. mlrun/datastore/inmem.py +4 -1
  199. mlrun/datastore/redis.py +1 -1
  200. mlrun/datastore/s3.py +1 -1
  201. mlrun/datastore/sources.py +192 -70
  202. mlrun/datastore/spark_udf.py +44 -0
  203. mlrun/datastore/store_resources.py +4 -4
  204. mlrun/datastore/targets.py +115 -45
  205. mlrun/datastore/utils.py +127 -5
  206. mlrun/datastore/v3io.py +1 -1
  207. mlrun/datastore/wasbfs/__init__.py +1 -1
  208. mlrun/datastore/wasbfs/fs.py +1 -1
  209. mlrun/db/__init__.py +7 -5
  210. mlrun/db/base.py +112 -68
  211. mlrun/db/httpdb.py +445 -277
  212. mlrun/db/nopdb.py +491 -0
  213. mlrun/db/sqldb.py +112 -65
  214. mlrun/errors.py +6 -1
  215. mlrun/execution.py +44 -22
  216. mlrun/feature_store/__init__.py +1 -1
  217. mlrun/feature_store/api.py +143 -95
  218. mlrun/feature_store/common.py +16 -20
  219. mlrun/feature_store/feature_set.py +42 -12
  220. mlrun/feature_store/feature_vector.py +32 -21
  221. mlrun/feature_store/ingestion.py +9 -12
  222. mlrun/feature_store/retrieval/__init__.py +3 -2
  223. mlrun/feature_store/retrieval/base.py +388 -66
  224. mlrun/feature_store/retrieval/dask_merger.py +63 -151
  225. mlrun/feature_store/retrieval/job.py +30 -12
  226. mlrun/feature_store/retrieval/local_merger.py +40 -133
  227. mlrun/feature_store/retrieval/spark_merger.py +129 -127
  228. mlrun/feature_store/retrieval/storey_merger.py +173 -0
  229. mlrun/feature_store/steps.py +132 -15
  230. mlrun/features.py +8 -3
  231. mlrun/frameworks/__init__.py +1 -1
  232. mlrun/frameworks/_common/__init__.py +1 -1
  233. mlrun/frameworks/_common/artifacts_library.py +1 -1
  234. mlrun/frameworks/_common/mlrun_interface.py +1 -1
  235. mlrun/frameworks/_common/model_handler.py +1 -1
  236. mlrun/frameworks/_common/plan.py +1 -1
  237. mlrun/frameworks/_common/producer.py +1 -1
  238. mlrun/frameworks/_common/utils.py +1 -1
  239. mlrun/frameworks/_dl_common/__init__.py +1 -1
  240. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
  241. mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
  242. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
  243. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
  244. mlrun/frameworks/_dl_common/model_handler.py +1 -1
  245. mlrun/frameworks/_dl_common/utils.py +1 -1
  246. mlrun/frameworks/_ml_common/__init__.py +1 -1
  247. mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
  248. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
  249. mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
  250. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  251. mlrun/frameworks/_ml_common/model_handler.py +1 -1
  252. mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
  253. mlrun/frameworks/_ml_common/plan.py +1 -1
  254. mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
  255. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
  256. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
  257. mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
  258. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
  259. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
  260. mlrun/frameworks/_ml_common/producer.py +1 -1
  261. mlrun/frameworks/_ml_common/utils.py +1 -1
  262. mlrun/frameworks/auto_mlrun/__init__.py +1 -1
  263. mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
  264. mlrun/frameworks/huggingface/__init__.py +1 -1
  265. mlrun/frameworks/huggingface/model_server.py +1 -1
  266. mlrun/frameworks/lgbm/__init__.py +1 -1
  267. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
  268. mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
  269. mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
  270. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
  271. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
  272. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
  273. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
  274. mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
  275. mlrun/frameworks/lgbm/model_handler.py +1 -1
  276. mlrun/frameworks/lgbm/model_server.py +1 -1
  277. mlrun/frameworks/lgbm/utils.py +1 -1
  278. mlrun/frameworks/onnx/__init__.py +1 -1
  279. mlrun/frameworks/onnx/dataset.py +1 -1
  280. mlrun/frameworks/onnx/mlrun_interface.py +1 -1
  281. mlrun/frameworks/onnx/model_handler.py +1 -1
  282. mlrun/frameworks/onnx/model_server.py +1 -1
  283. mlrun/frameworks/parallel_coordinates.py +1 -1
  284. mlrun/frameworks/pytorch/__init__.py +1 -1
  285. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
  286. mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
  287. mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
  288. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
  289. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
  290. mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
  291. mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
  292. mlrun/frameworks/pytorch/model_handler.py +1 -1
  293. mlrun/frameworks/pytorch/model_server.py +1 -1
  294. mlrun/frameworks/pytorch/utils.py +1 -1
  295. mlrun/frameworks/sklearn/__init__.py +1 -1
  296. mlrun/frameworks/sklearn/estimator.py +1 -1
  297. mlrun/frameworks/sklearn/metric.py +1 -1
  298. mlrun/frameworks/sklearn/metrics_library.py +1 -1
  299. mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
  300. mlrun/frameworks/sklearn/model_handler.py +1 -1
  301. mlrun/frameworks/sklearn/utils.py +1 -1
  302. mlrun/frameworks/tf_keras/__init__.py +1 -1
  303. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
  304. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  305. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
  306. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
  307. mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
  308. mlrun/frameworks/tf_keras/model_handler.py +1 -1
  309. mlrun/frameworks/tf_keras/model_server.py +1 -1
  310. mlrun/frameworks/tf_keras/utils.py +1 -1
  311. mlrun/frameworks/xgboost/__init__.py +1 -1
  312. mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
  313. mlrun/frameworks/xgboost/model_handler.py +1 -1
  314. mlrun/frameworks/xgboost/utils.py +1 -1
  315. mlrun/k8s_utils.py +14 -765
  316. mlrun/kfpops.py +14 -17
  317. mlrun/launcher/__init__.py +13 -0
  318. mlrun/launcher/base.py +406 -0
  319. mlrun/launcher/client.py +159 -0
  320. mlrun/launcher/factory.py +50 -0
  321. mlrun/launcher/local.py +276 -0
  322. mlrun/launcher/remote.py +178 -0
  323. mlrun/lists.py +10 -2
  324. mlrun/mlutils/__init__.py +1 -1
  325. mlrun/mlutils/data.py +1 -1
  326. mlrun/mlutils/models.py +1 -1
  327. mlrun/mlutils/plots.py +1 -1
  328. mlrun/model.py +252 -14
  329. mlrun/model_monitoring/__init__.py +41 -0
  330. mlrun/model_monitoring/features_drift_table.py +1 -1
  331. mlrun/model_monitoring/helpers.py +123 -38
  332. mlrun/model_monitoring/model_endpoint.py +144 -0
  333. mlrun/model_monitoring/model_monitoring_batch.py +310 -259
  334. mlrun/model_monitoring/stores/__init__.py +106 -0
  335. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
  336. mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
  337. mlrun/model_monitoring/stores/models/__init__.py +23 -0
  338. mlrun/model_monitoring/stores/models/base.py +18 -0
  339. mlrun/model_monitoring/stores/models/mysql.py +100 -0
  340. mlrun/model_monitoring/stores/models/sqlite.py +98 -0
  341. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
  342. mlrun/model_monitoring/stream_processing_fs.py +239 -271
  343. mlrun/package/__init__.py +163 -0
  344. mlrun/package/context_handler.py +325 -0
  345. mlrun/package/errors.py +47 -0
  346. mlrun/package/packager.py +298 -0
  347. mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
  348. mlrun/package/packagers/default_packager.py +422 -0
  349. mlrun/package/packagers/numpy_packagers.py +612 -0
  350. mlrun/package/packagers/pandas_packagers.py +968 -0
  351. mlrun/package/packagers/python_standard_library_packagers.py +616 -0
  352. mlrun/package/packagers_manager.py +786 -0
  353. mlrun/package/utils/__init__.py +53 -0
  354. mlrun/package/utils/_archiver.py +226 -0
  355. mlrun/package/utils/_formatter.py +211 -0
  356. mlrun/package/utils/_pickler.py +234 -0
  357. mlrun/package/utils/_supported_format.py +71 -0
  358. mlrun/package/utils/log_hint_utils.py +93 -0
  359. mlrun/package/utils/type_hint_utils.py +298 -0
  360. mlrun/platforms/__init__.py +1 -1
  361. mlrun/platforms/iguazio.py +34 -2
  362. mlrun/platforms/other.py +1 -1
  363. mlrun/projects/__init__.py +1 -1
  364. mlrun/projects/operations.py +14 -9
  365. mlrun/projects/pipelines.py +31 -13
  366. mlrun/projects/project.py +762 -238
  367. mlrun/render.py +49 -19
  368. mlrun/run.py +57 -326
  369. mlrun/runtimes/__init__.py +3 -9
  370. mlrun/runtimes/base.py +247 -784
  371. mlrun/runtimes/constants.py +1 -1
  372. mlrun/runtimes/daskjob.py +45 -41
  373. mlrun/runtimes/funcdoc.py +43 -7
  374. mlrun/runtimes/function.py +66 -656
  375. mlrun/runtimes/function_reference.py +1 -1
  376. mlrun/runtimes/generators.py +1 -1
  377. mlrun/runtimes/kubejob.py +99 -116
  378. mlrun/runtimes/local.py +59 -66
  379. mlrun/runtimes/mpijob/__init__.py +1 -1
  380. mlrun/runtimes/mpijob/abstract.py +13 -15
  381. mlrun/runtimes/mpijob/v1.py +3 -1
  382. mlrun/runtimes/mpijob/v1alpha1.py +1 -1
  383. mlrun/runtimes/nuclio.py +1 -1
  384. mlrun/runtimes/pod.py +51 -26
  385. mlrun/runtimes/remotesparkjob.py +3 -1
  386. mlrun/runtimes/serving.py +12 -4
  387. mlrun/runtimes/sparkjob/__init__.py +1 -2
  388. mlrun/runtimes/sparkjob/abstract.py +44 -31
  389. mlrun/runtimes/sparkjob/spark3job.py +11 -9
  390. mlrun/runtimes/utils.py +61 -42
  391. mlrun/secrets.py +16 -18
  392. mlrun/serving/__init__.py +3 -2
  393. mlrun/serving/merger.py +1 -1
  394. mlrun/serving/remote.py +1 -1
  395. mlrun/serving/routers.py +39 -42
  396. mlrun/serving/server.py +23 -13
  397. mlrun/serving/serving_wrapper.py +1 -1
  398. mlrun/serving/states.py +172 -39
  399. mlrun/serving/utils.py +1 -1
  400. mlrun/serving/v1_serving.py +1 -1
  401. mlrun/serving/v2_serving.py +29 -21
  402. mlrun/utils/__init__.py +1 -2
  403. mlrun/utils/async_http.py +8 -1
  404. mlrun/utils/azure_vault.py +1 -1
  405. mlrun/utils/clones.py +2 -2
  406. mlrun/utils/condition_evaluator.py +65 -0
  407. mlrun/utils/db.py +52 -0
  408. mlrun/utils/helpers.py +188 -13
  409. mlrun/utils/http.py +89 -54
  410. mlrun/utils/logger.py +48 -8
  411. mlrun/utils/model_monitoring.py +132 -100
  412. mlrun/utils/notifications/__init__.py +1 -1
  413. mlrun/utils/notifications/notification/__init__.py +8 -6
  414. mlrun/utils/notifications/notification/base.py +20 -14
  415. mlrun/utils/notifications/notification/console.py +7 -4
  416. mlrun/utils/notifications/notification/git.py +36 -19
  417. mlrun/utils/notifications/notification/ipython.py +10 -8
  418. mlrun/utils/notifications/notification/slack.py +18 -13
  419. mlrun/utils/notifications/notification_pusher.py +377 -56
  420. mlrun/utils/regex.py +6 -1
  421. mlrun/utils/singleton.py +1 -1
  422. mlrun/utils/v3io_clients.py +1 -1
  423. mlrun/utils/vault.py +270 -269
  424. mlrun/utils/version/__init__.py +1 -1
  425. mlrun/utils/version/version.json +2 -2
  426. mlrun/utils/version/version.py +1 -1
  427. {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
  428. mlrun-1.4.0.dist-info/RECORD +434 -0
  429. mlrun/api/api/endpoints/marketplace.py +0 -257
  430. mlrun/api/crud/marketplace.py +0 -221
  431. mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
  432. mlrun/api/db/filedb/db.py +0 -518
  433. mlrun/api/schemas/marketplace.py +0 -128
  434. mlrun/api/schemas/model_endpoints.py +0 -185
  435. mlrun/db/filedb.py +0 -891
  436. mlrun/feature_store/retrieval/online.py +0 -92
  437. mlrun/model_monitoring/constants.py +0 -67
  438. mlrun/runtimes/package/context_handler.py +0 -711
  439. mlrun/runtimes/sparkjob/spark2job.py +0 -59
  440. mlrun-1.3.3.dist-info/RECORD +0 -381
  441. {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
  442. {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
  443. {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
  444. {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -28,14 +28,16 @@ import v3io.dataplane
28
28
  import v3io_frames
29
29
 
30
30
  import mlrun
31
- import mlrun.api.schemas
31
+ import mlrun.common.model_monitoring
32
+ import mlrun.common.schemas
32
33
  import mlrun.data_types.infer
33
34
  import mlrun.feature_store as fstore
35
+ import mlrun.model_monitoring
36
+ import mlrun.model_monitoring.stores
34
37
  import mlrun.run
35
38
  import mlrun.utils.helpers
36
39
  import mlrun.utils.model_monitoring
37
40
  import mlrun.utils.v3io_clients
38
- from mlrun.model_monitoring.constants import EventFieldType
39
41
  from mlrun.utils import logger
40
42
 
41
43
 
@@ -461,6 +463,7 @@ def calculate_inputs_statistics(
461
463
 
462
464
  :returns: The calculated statistics of the inputs data.
463
465
  """
466
+
464
467
  # Use `DFDataInfer` to calculate the statistics over the inputs:
465
468
  inputs_statistics = mlrun.data_types.infer.DFDataInfer.get_stats(
466
469
  df=inputs,
@@ -493,8 +496,6 @@ class BatchProcessor:
493
496
  self,
494
497
  context: mlrun.run.MLClientCtx,
495
498
  project: str,
496
- model_monitoring_access_key: str,
497
- v3io_access_key: str,
498
499
  ):
499
500
 
500
501
  """
@@ -502,60 +503,16 @@ class BatchProcessor:
502
503
 
503
504
  :param context: An MLRun context.
504
505
  :param project: Project name.
505
- :param model_monitoring_access_key: Access key to apply the model monitoring process.
506
- :param v3io_access_key: Token key for v3io.
507
506
  """
508
507
  self.context = context
509
508
  self.project = project
510
509
 
511
- self.v3io_access_key = v3io_access_key
512
- self.model_monitoring_access_key = (
513
- model_monitoring_access_key or v3io_access_key
514
- )
515
-
516
510
  # Initialize virtual drift object
517
511
  self.virtual_drift = VirtualDrift(inf_capping=10)
518
512
 
519
- # Define the required paths for the project objects.
520
- # Note that the kv table, tsdb, and the input stream paths are located at the default location
521
- # while the parquet path is located at the user-space location
522
- template = mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default
523
- kv_path = template.format(project=self.project, kind="endpoints")
524
- (
525
- _,
526
- self.kv_container,
527
- self.kv_path,
528
- ) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(kv_path)
529
- tsdb_path = template.format(project=project, kind="events")
530
- (
531
- _,
532
- self.tsdb_container,
533
- self.tsdb_path,
534
- ) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(tsdb_path)
535
- stream_path = template.format(project=self.project, kind="log_stream")
536
- (
537
- _,
538
- self.stream_container,
539
- self.stream_path,
540
- ) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(stream_path)
541
- self.parquet_path = (
542
- mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
543
- project=project, kind="parquet"
544
- )
545
- )
546
-
547
513
  logger.info(
548
514
  "Initializing BatchProcessor",
549
515
  project=project,
550
- model_monitoring_access_key_initalized=bool(model_monitoring_access_key),
551
- v3io_access_key_initialized=bool(v3io_access_key),
552
- parquet_path=self.parquet_path,
553
- kv_container=self.kv_container,
554
- kv_path=self.kv_path,
555
- tsdb_container=self.tsdb_container,
556
- tsdb_path=self.tsdb_path,
557
- stream_container=self.stream_container,
558
- stream_path=self.stream_path,
559
516
  )
560
517
 
561
518
  # Get drift thresholds from the model monitoring configuration
@@ -567,46 +524,87 @@ class BatchProcessor:
567
524
  )
568
525
 
569
526
  # Get a runtime database
570
- self.db = mlrun.get_run_db()
571
527
 
572
- # Get the frames clients based on the v3io configuration
573
- # it will be used later for writing the results into the tsdb
574
- self.v3io = mlrun.utils.v3io_clients.get_v3io_client(
575
- access_key=self.v3io_access_key
576
- )
577
- self.frames = mlrun.utils.v3io_clients.get_frames_client(
578
- address=mlrun.mlconf.v3io_framesd,
579
- container=self.tsdb_container,
580
- token=self.v3io_access_key,
528
+ self.db = mlrun.model_monitoring.stores.get_model_endpoint_store(
529
+ project=project
581
530
  )
582
531
 
532
+ if not mlrun.mlconf.is_ce_mode():
533
+ # TODO: Once there is a time series DB alternative in a non-CE deployment, we need to update this if
534
+ # statement to be applied only for V3IO TSDB
535
+ self._initialize_v3io_configurations()
536
+
583
537
  # If an error occurs, it will be raised using the following argument
584
538
  self.exception = None
585
539
 
586
540
  # Get the batch interval range
587
- self.batch_dict = context.parameters[EventFieldType.BATCH_INTERVALS_DICT]
541
+ self.batch_dict = context.parameters[
542
+ mlrun.common.model_monitoring.EventFieldType.BATCH_INTERVALS_DICT
543
+ ]
588
544
 
589
- # TODO: This will be removed in 1.2.0 once the job params can be parsed with different types
545
+ # TODO: This will be removed in 1.5.0 once the job params can be parsed with different types
590
546
  # Convert batch dict string into a dictionary
591
547
  if isinstance(self.batch_dict, str):
592
548
  self._parse_batch_dict_str()
593
549
 
550
+ def _initialize_v3io_configurations(self):
551
+ self.v3io_access_key = os.environ.get("V3IO_ACCESS_KEY")
552
+ self.model_monitoring_access_key = (
553
+ os.environ.get("MODEL_MONITORING_ACCESS_KEY") or self.v3io_access_key
554
+ )
555
+
556
+ # Define the required paths for the project objects
557
+ tsdb_path = mlrun.mlconf.get_model_monitoring_file_target_path(
558
+ project=self.project,
559
+ kind=mlrun.common.model_monitoring.FileTargetKind.EVENTS,
560
+ )
561
+ (
562
+ _,
563
+ self.tsdb_container,
564
+ self.tsdb_path,
565
+ ) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(tsdb_path)
566
+ # stream_path = template.format(project=self.project, kind="log_stream")
567
+ stream_path = mlrun.mlconf.get_model_monitoring_file_target_path(
568
+ project=self.project,
569
+ kind=mlrun.common.model_monitoring.FileTargetKind.LOG_STREAM,
570
+ )
571
+ (
572
+ _,
573
+ self.stream_container,
574
+ self.stream_path,
575
+ ) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(stream_path)
576
+
577
+ # Get the frames clients based on the v3io configuration
578
+ # it will be used later for writing the results into the tsdb
579
+ self.v3io = mlrun.utils.v3io_clients.get_v3io_client(
580
+ access_key=self.v3io_access_key
581
+ )
582
+ self.frames = mlrun.utils.v3io_clients.get_frames_client(
583
+ address=mlrun.mlconf.v3io_framesd,
584
+ container=self.tsdb_container,
585
+ token=self.v3io_access_key,
586
+ )
587
+
594
588
  def post_init(self):
595
589
  """
596
590
  Preprocess of the batch processing.
597
591
  """
598
592
 
599
- # create v3io stream based on the input stream
600
- response = self.v3io.create_stream(
601
- container=self.stream_container,
602
- path=self.stream_path,
603
- shard_count=1,
604
- raise_for_status=v3io.dataplane.RaiseForStatus.never,
605
- access_key=self.v3io_access_key,
606
- )
593
+ if not mlrun.mlconf.is_ce_mode():
594
+ # Create v3io stream based on the input stream
595
+ response = self.v3io.create_stream(
596
+ container=self.stream_container,
597
+ path=self.stream_path,
598
+ shard_count=1,
599
+ raise_for_status=v3io.dataplane.RaiseForStatus.never,
600
+ access_key=self.v3io_access_key,
601
+ )
607
602
 
608
- if not (response.status_code == 400 and "ResourceInUse" in str(response.body)):
609
- response.raise_for_status([409, 204, 403])
603
+ if not (
604
+ response.status_code == 400 and "ResourceInUse" in str(response.body)
605
+ ):
606
+ response.raise_for_status([409, 204, 403])
607
+ pass
610
608
 
611
609
  def run(self):
612
610
  """
@@ -614,231 +612,218 @@ class BatchProcessor:
614
612
  """
615
613
  # Get model endpoints (each deployed project has at least 1 serving model):
616
614
  try:
617
- endpoints = self.db.list_model_endpoints(self.project)
615
+ endpoints = self.db.list_model_endpoints()
618
616
  except Exception as e:
619
617
  logger.error("Failed to list endpoints", exc=e)
620
618
  return
621
619
 
622
- active_endpoints = set()
623
- for endpoint in endpoints.endpoints:
620
+ for endpoint in endpoints:
624
621
  if (
625
- endpoint.spec.active
626
- and endpoint.spec.monitoring_mode
627
- == mlrun.api.schemas.ModelMonitoringMode.enabled.value
622
+ endpoint[mlrun.common.model_monitoring.EventFieldType.ACTIVE]
623
+ and endpoint[
624
+ mlrun.common.model_monitoring.EventFieldType.MONITORING_MODE
625
+ ]
626
+ == mlrun.common.model_monitoring.ModelMonitoringMode.enabled.value
628
627
  ):
629
- active_endpoints.add(endpoint.metadata.uid)
630
-
631
- # perform drift analysis for each model endpoint
632
- for endpoint_id in active_endpoints:
633
- try:
634
-
635
- # Get model endpoint object:
636
- endpoint = self.db.get_model_endpoint(
637
- project=self.project, endpoint_id=endpoint_id
638
- )
639
-
640
628
  # Skip router endpoint:
641
629
  if (
642
- endpoint.status.endpoint_type
643
- == mlrun.utils.model_monitoring.EndpointType.ROUTER
630
+ int(
631
+ endpoint[
632
+ mlrun.common.model_monitoring.EventFieldType.ENDPOINT_TYPE
633
+ ]
634
+ )
635
+ == mlrun.common.model_monitoring.EndpointType.ROUTER
644
636
  ):
645
- # endpoint.status.feature_stats is None
646
- logger.info(f"{endpoint_id} is router skipping")
637
+ # Router endpoint has no feature stats
638
+ logger.info(
639
+ f"{endpoint[mlrun.common.model_monitoring.EventFieldType.UID]} is router skipping"
640
+ )
647
641
  continue
642
+ self.update_drift_metrics(endpoint=endpoint)
648
643
 
649
- # convert feature set into dataframe and get the latest dataset
650
- (
651
- _,
652
- serving_function_name,
653
- _,
654
- _,
655
- ) = mlrun.utils.helpers.parse_versioned_object_uri(
656
- endpoint.spec.function_uri
657
- )
658
-
659
- model_name = endpoint.spec.model.replace(":", "-")
644
+ def update_drift_metrics(self, endpoint: dict):
645
+ try:
646
+ # Convert feature set into dataframe and get the latest dataset
647
+ (
648
+ _,
649
+ serving_function_name,
650
+ _,
651
+ _,
652
+ ) = mlrun.utils.helpers.parse_versioned_object_uri(
653
+ endpoint[mlrun.common.model_monitoring.EventFieldType.FUNCTION_URI]
654
+ )
660
655
 
661
- m_fs = fstore.get_feature_set(
662
- f"store://feature-sets/{self.project}/monitoring-{serving_function_name}-{model_name}"
663
- )
656
+ model_name = endpoint[
657
+ mlrun.common.model_monitoring.EventFieldType.MODEL
658
+ ].replace(":", "-")
664
659
 
665
- # Getting batch interval start time and end time
666
- start_time, end_time = self.get_interval_range()
660
+ m_fs = fstore.get_feature_set(
661
+ f"store://feature-sets/{self.project}/monitoring-{serving_function_name}-{model_name}"
662
+ )
667
663
 
668
- try:
669
- df = m_fs.to_dataframe(
670
- start_time=start_time,
671
- end_time=end_time,
672
- time_column="timestamp",
673
- )
664
+ # Getting batch interval start time and end time
665
+ start_time, end_time = self._get_interval_range()
674
666
 
675
- if len(df) == 0:
676
- logger.warn(
677
- "Not enough model events since the beginning of the batch interval",
678
- parquet_target=m_fs.status.targets[0].path,
679
- endpoint=endpoint_id,
680
- min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
681
- start_time=str(
682
- datetime.datetime.now() - datetime.timedelta(hours=1)
683
- ),
684
- end_time=str(datetime.datetime.now()),
685
- )
686
- continue
667
+ try:
668
+ df = m_fs.to_dataframe(
669
+ start_time=start_time,
670
+ end_time=end_time,
671
+ time_column=mlrun.common.model_monitoring.EventFieldType.TIMESTAMP,
672
+ )
687
673
 
688
- # TODO: The below warn will be removed once the state of the Feature Store target is updated
689
- # as expected. In that case, the existence of the file will be checked before trying to get
690
- # the offline data from the feature set.
691
- # Continue if not enough events provided since the deployment of the model endpoint
692
- except FileNotFoundError:
674
+ if len(df) == 0:
693
675
  logger.warn(
694
- "Parquet not found, probably due to not enough model events",
676
+ "Not enough model events since the beginning of the batch interval",
695
677
  parquet_target=m_fs.status.targets[0].path,
696
- endpoint=endpoint_id,
678
+ endpoint=endpoint[
679
+ mlrun.common.model_monitoring.EventFieldType.UID
680
+ ],
697
681
  min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
682
+ start_time=str(
683
+ datetime.datetime.now() - datetime.timedelta(hours=1)
684
+ ),
685
+ end_time=str(datetime.datetime.now()),
698
686
  )
699
- continue
687
+ return
688
+
689
+ # TODO: The below warn will be removed once the state of the Feature Store target is updated
690
+ # as expected. In that case, the existence of the file will be checked before trying to get
691
+ # the offline data from the feature set.
692
+ # Continue if not enough events provided since the deployment of the model endpoint
693
+ except FileNotFoundError:
694
+ logger.warn(
695
+ "Parquet not found, probably due to not enough model events",
696
+ parquet_target=m_fs.status.targets[0].path,
697
+ endpoint=endpoint[mlrun.common.model_monitoring.EventFieldType.UID],
698
+ min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
699
+ )
700
+ return
700
701
 
701
- # Get feature names from monitoring feature set
702
- feature_names = [
703
- feature_name["name"]
704
- for feature_name in m_fs.spec.features.to_dict()
705
- ]
702
+ # Get feature names from monitoring feature set
703
+ feature_names = [
704
+ feature_name["name"] for feature_name in m_fs.spec.features.to_dict()
705
+ ]
706
706
 
707
- # Create DataFrame based on the input features
708
- stats_columns = [
709
- "timestamp",
710
- *feature_names,
711
- ]
707
+ # Create DataFrame based on the input features
708
+ stats_columns = [
709
+ mlrun.common.model_monitoring.EventFieldType.TIMESTAMP,
710
+ *feature_names,
711
+ ]
712
712
 
713
- # Add label names if provided
714
- if endpoint.spec.label_names:
715
- stats_columns.extend(endpoint.spec.label_names)
713
+ # Add label names if provided
714
+ if endpoint[mlrun.common.model_monitoring.EventFieldType.LABEL_NAMES]:
715
+ labels = endpoint[
716
+ mlrun.common.model_monitoring.EventFieldType.LABEL_NAMES
717
+ ]
718
+ if isinstance(labels, str):
719
+ labels = json.loads(labels)
720
+ stats_columns.extend(labels)
721
+ named_features_df = df[stats_columns].copy()
722
+
723
+ # Infer feature set stats and schema
724
+ fstore.api._infer_from_static_df(
725
+ named_features_df,
726
+ m_fs,
727
+ options=mlrun.data_types.infer.InferOptions.all_stats(),
728
+ )
716
729
 
717
- named_features_df = df[stats_columns].copy()
730
+ # Save feature set to apply changes
731
+ m_fs.save()
718
732
 
719
- # Infer feature set stats and schema
720
- fstore.api._infer_from_static_df(
721
- named_features_df,
722
- m_fs,
723
- options=mlrun.data_types.infer.InferOptions.all_stats(),
724
- )
733
+ # Get the timestamp of the latest request:
734
+ timestamp = df[mlrun.common.model_monitoring.EventFieldType.TIMESTAMP].iloc[
735
+ -1
736
+ ]
725
737
 
726
- # Save feature set to apply changes
727
- m_fs.save()
738
+ # Get the feature stats from the model endpoint for reference data
739
+ feature_stats = json.loads(
740
+ endpoint[mlrun.common.model_monitoring.EventFieldType.FEATURE_STATS]
741
+ )
728
742
 
729
- # Get the timestamp of the latest request:
730
- timestamp = df["timestamp"].iloc[-1]
743
+ # Get the current stats:
744
+ current_stats = calculate_inputs_statistics(
745
+ sample_set_statistics=feature_stats,
746
+ inputs=named_features_df,
747
+ )
731
748
 
732
- # Get the current stats:
733
- current_stats = calculate_inputs_statistics(
734
- sample_set_statistics=endpoint.status.feature_stats,
735
- inputs=named_features_df,
749
+ # Compute the drift based on the histogram of the current stats and the histogram of the original
750
+ # feature stats that can be found in the model endpoint object:
751
+ drift_result = self.virtual_drift.compute_drift_from_histograms(
752
+ feature_stats=feature_stats,
753
+ current_stats=current_stats,
754
+ )
755
+ logger.info("Drift result", drift_result=drift_result)
756
+
757
+ # Get drift thresholds from the model configuration:
758
+ monitor_configuration = (
759
+ json.loads(
760
+ endpoint[
761
+ mlrun.common.model_monitoring.EventFieldType.MONITOR_CONFIGURATION
762
+ ]
736
763
  )
764
+ or {}
765
+ )
766
+ possible_drift = monitor_configuration.get(
767
+ "possible_drift", self.default_possible_drift_threshold
768
+ )
769
+ drift_detected = monitor_configuration.get(
770
+ "drift_detected", self.default_drift_detected_threshold
771
+ )
737
772
 
738
- # Compute the drift based on the histogram of the current stats and the histogram of the original
739
- # feature stats that can be found in the model endpoint object:
740
- drift_result = self.virtual_drift.compute_drift_from_histograms(
741
- feature_stats=endpoint.status.feature_stats,
742
- current_stats=current_stats,
743
- )
744
- logger.info("Drift result", drift_result=drift_result)
773
+ # Check for possible drift based on the results of the statistical metrics defined above:
774
+ drift_status, drift_measure = self.virtual_drift.check_for_drift(
775
+ metrics_results_dictionary=drift_result,
776
+ possible_drift_threshold=possible_drift,
777
+ drift_detected_threshold=drift_detected,
778
+ )
779
+ logger.info(
780
+ "Drift status",
781
+ endpoint_id=endpoint[mlrun.common.model_monitoring.EventFieldType.UID],
782
+ drift_status=drift_status.value,
783
+ drift_measure=drift_measure,
784
+ )
745
785
 
746
- # Get drift thresholds from the model configuration:
747
- monitor_configuration = endpoint.spec.monitor_configuration or {}
748
- possible_drift = monitor_configuration.get(
749
- "possible_drift", self.default_possible_drift_threshold
750
- )
751
- drift_detected = monitor_configuration.get(
752
- "drift_detected", self.default_drift_detected_threshold
753
- )
786
+ attributes = {
787
+ "current_stats": json.dumps(current_stats),
788
+ "drift_measures": json.dumps(drift_result),
789
+ "drift_status": drift_status.value,
790
+ }
754
791
 
755
- # Check for possible drift based on the results of the statistical metrics defined above:
756
- drift_status, drift_measure = self.virtual_drift.check_for_drift(
757
- metrics_results_dictionary=drift_result,
758
- possible_drift_threshold=possible_drift,
759
- drift_detected_threshold=drift_detected,
760
- )
761
- logger.info(
762
- "Drift status",
763
- endpoint_id=endpoint_id,
764
- drift_status=drift_status.value,
792
+ self.db.update_model_endpoint(
793
+ endpoint_id=endpoint[mlrun.common.model_monitoring.EventFieldType.UID],
794
+ attributes=attributes,
795
+ )
796
+
797
+ if not mlrun.mlconf.is_ce_mode():
798
+ # Update drift results in TSDB
799
+ self._update_drift_in_input_stream(
800
+ endpoint_id=endpoint[
801
+ mlrun.common.model_monitoring.EventFieldType.UID
802
+ ],
803
+ drift_status=drift_status,
765
804
  drift_measure=drift_measure,
805
+ drift_result=drift_result,
806
+ timestamp=timestamp,
766
807
  )
767
-
768
- # If drift was detected, add the results to the input stream
769
- if (
770
- drift_status == DriftStatus.POSSIBLE_DRIFT
771
- or drift_status == DriftStatus.DRIFT_DETECTED
772
- ):
773
- self.v3io.stream.put_records(
774
- container=self.stream_container,
775
- stream_path=self.stream_path,
776
- records=[
777
- {
778
- "data": json.dumps(
779
- {
780
- "endpoint_id": endpoint_id,
781
- "drift_status": drift_status.value,
782
- "drift_measure": drift_measure,
783
- "drift_per_feature": {**drift_result},
784
- }
785
- )
786
- }
787
- ],
788
- )
789
-
790
- attributes = {
791
- "current_stats": json.dumps(current_stats),
792
- "drift_measures": json.dumps(drift_result),
793
- "drift_status": drift_status.value,
794
- }
795
-
796
- self.db.patch_model_endpoint(
797
- project=self.project,
798
- endpoint_id=endpoint_id,
799
- attributes=attributes,
808
+ logger.info(
809
+ "Done updating drift measures",
810
+ endpoint_id=endpoint[
811
+ mlrun.common.model_monitoring.EventFieldType.UID
812
+ ],
800
813
  )
801
814
 
802
- # Update the results in tsdb:
803
- tsdb_drift_measures = {
804
- "endpoint_id": endpoint_id,
805
- "timestamp": pd.to_datetime(
806
- timestamp,
807
- format=EventFieldType.TIME_FORMAT,
808
- ),
809
- "record_type": "drift_measures",
810
- "tvd_mean": drift_result["tvd_mean"],
811
- "kld_mean": drift_result["kld_mean"],
812
- "hellinger_mean": drift_result["hellinger_mean"],
813
- }
814
-
815
- try:
816
- self.frames.write(
817
- backend="tsdb",
818
- table=self.tsdb_path,
819
- dfs=pd.DataFrame.from_dict([tsdb_drift_measures]),
820
- index_cols=["timestamp", "endpoint_id", "record_type"],
821
- )
822
- except v3io_frames.errors.Error as err:
823
- logger.warn(
824
- "Could not write drift measures to TSDB",
825
- err=err,
826
- tsdb_path=self.tsdb_path,
827
- endpoint=endpoint_id,
828
- )
829
-
830
- logger.info("Done updating drift measures", endpoint_id=endpoint_id)
831
-
832
- except Exception as e:
833
- logger.error(f"Exception for endpoint {endpoint_id}")
834
- self.exception = e
815
+ except Exception as e:
816
+ logger.error(
817
+ f"Exception for endpoint {endpoint[mlrun.common.model_monitoring.EventFieldType.UID]}"
818
+ )
819
+ self.exception = e
835
820
 
836
- def get_interval_range(self) -> Tuple[datetime.datetime, datetime.datetime]:
821
+ def _get_interval_range(self) -> Tuple[datetime.datetime, datetime.datetime]:
837
822
  """Getting batch interval time range"""
838
823
  minutes, hours, days = (
839
- self.batch_dict[EventFieldType.MINUTES],
840
- self.batch_dict[EventFieldType.HOURS],
841
- self.batch_dict[EventFieldType.DAYS],
824
+ self.batch_dict[mlrun.common.model_monitoring.EventFieldType.MINUTES],
825
+ self.batch_dict[mlrun.common.model_monitoring.EventFieldType.HOURS],
826
+ self.batch_dict[mlrun.common.model_monitoring.EventFieldType.DAYS],
842
827
  )
843
828
  start_time = datetime.datetime.now() - datetime.timedelta(
844
829
  minutes=minutes, hours=hours, days=days
@@ -858,13 +843,79 @@ class BatchProcessor:
858
843
  pair_list = pair.split(":")
859
844
  self.batch_dict[pair_list[0]] = float(pair_list[1])
860
845
 
846
+ def _update_drift_in_input_stream(
847
+ self,
848
+ endpoint_id: str,
849
+ drift_status: DriftStatus,
850
+ drift_measure: float,
851
+ drift_result: Dict[str, Dict[str, Any]],
852
+ timestamp: pd._libs.tslibs.timestamps.Timestamp,
853
+ ):
854
+ """Update drift results in input stream.
855
+
856
+ :param endpoint_id: The unique id of the model endpoint.
857
+ :param drift_status: Drift status result. Possible values can be found under DriftStatus enum class.
858
+ :param drift_measure: The drift result (float) based on the mean of the Total Variance Distance and the
859
+ Hellinger distance.
860
+ :param drift_result: A dictionary that includes the drift results for each feature.
861
+ :param timestamp: Pandas Timestamp value.
862
+
863
+ """
864
+
865
+ if (
866
+ drift_status == DriftStatus.POSSIBLE_DRIFT
867
+ or drift_status == DriftStatus.DRIFT_DETECTED
868
+ ):
869
+ self.v3io.stream.put_records(
870
+ container=self.stream_container,
871
+ stream_path=self.stream_path,
872
+ records=[
873
+ {
874
+ "data": json.dumps(
875
+ {
876
+ "endpoint_id": endpoint_id,
877
+ "drift_status": drift_status.value,
878
+ "drift_measure": drift_measure,
879
+ "drift_per_feature": {**drift_result},
880
+ }
881
+ )
882
+ }
883
+ ],
884
+ )
885
+
886
+ # Update the results in tsdb:
887
+ tsdb_drift_measures = {
888
+ "endpoint_id": endpoint_id,
889
+ "timestamp": pd.to_datetime(
890
+ timestamp,
891
+ format=mlrun.common.model_monitoring.EventFieldType.TIME_FORMAT,
892
+ ),
893
+ "record_type": "drift_measures",
894
+ "tvd_mean": drift_result["tvd_mean"],
895
+ "kld_mean": drift_result["kld_mean"],
896
+ "hellinger_mean": drift_result["hellinger_mean"],
897
+ }
898
+
899
+ try:
900
+ self.frames.write(
901
+ backend="tsdb",
902
+ table=self.tsdb_path,
903
+ dfs=pd.DataFrame.from_dict([tsdb_drift_measures]),
904
+ index_cols=["timestamp", "endpoint_id", "record_type"],
905
+ )
906
+ except v3io_frames.errors.Error as err:
907
+ logger.warn(
908
+ "Could not write drift measures to TSDB",
909
+ err=err,
910
+ tsdb_path=self.tsdb_path,
911
+ endpoint=endpoint_id,
912
+ )
913
+
861
914
 
862
915
  def handler(context: mlrun.run.MLClientCtx):
863
916
  batch_processor = BatchProcessor(
864
917
  context=context,
865
918
  project=context.project,
866
- model_monitoring_access_key=os.environ.get("MODEL_MONITORING_ACCESS_KEY"),
867
- v3io_access_key=os.environ.get("V3IO_ACCESS_KEY"),
868
919
  )
869
920
  batch_processor.post_init()
870
921
  batch_processor.run()