mlrun 1.3.3rc1__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (444) hide show
  1. mlrun/__init__.py +3 -3
  2. mlrun/__main__.py +79 -37
  3. mlrun/api/__init__.py +1 -1
  4. mlrun/api/api/__init__.py +1 -1
  5. mlrun/api/api/api.py +4 -4
  6. mlrun/api/api/deps.py +10 -21
  7. mlrun/api/api/endpoints/__init__.py +1 -1
  8. mlrun/api/api/endpoints/artifacts.py +64 -36
  9. mlrun/api/api/endpoints/auth.py +4 -4
  10. mlrun/api/api/endpoints/background_tasks.py +11 -11
  11. mlrun/api/api/endpoints/client_spec.py +5 -5
  12. mlrun/api/api/endpoints/clusterization_spec.py +6 -4
  13. mlrun/api/api/endpoints/feature_store.py +124 -115
  14. mlrun/api/api/endpoints/files.py +22 -14
  15. mlrun/api/api/endpoints/frontend_spec.py +28 -21
  16. mlrun/api/api/endpoints/functions.py +142 -87
  17. mlrun/api/api/endpoints/grafana_proxy.py +89 -442
  18. mlrun/api/api/endpoints/healthz.py +20 -7
  19. mlrun/api/api/endpoints/hub.py +320 -0
  20. mlrun/api/api/endpoints/internal/__init__.py +1 -1
  21. mlrun/api/api/endpoints/internal/config.py +1 -1
  22. mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
  23. mlrun/api/api/endpoints/logs.py +11 -11
  24. mlrun/api/api/endpoints/model_endpoints.py +74 -70
  25. mlrun/api/api/endpoints/operations.py +13 -9
  26. mlrun/api/api/endpoints/pipelines.py +93 -88
  27. mlrun/api/api/endpoints/projects.py +35 -35
  28. mlrun/api/api/endpoints/runs.py +69 -27
  29. mlrun/api/api/endpoints/runtime_resources.py +28 -28
  30. mlrun/api/api/endpoints/schedules.py +98 -41
  31. mlrun/api/api/endpoints/secrets.py +37 -32
  32. mlrun/api/api/endpoints/submit.py +12 -12
  33. mlrun/api/api/endpoints/tags.py +20 -22
  34. mlrun/api/api/utils.py +251 -42
  35. mlrun/api/constants.py +1 -1
  36. mlrun/api/crud/__init__.py +18 -15
  37. mlrun/api/crud/artifacts.py +10 -10
  38. mlrun/api/crud/client_spec.py +4 -4
  39. mlrun/api/crud/clusterization_spec.py +3 -3
  40. mlrun/api/crud/feature_store.py +54 -46
  41. mlrun/api/crud/functions.py +3 -3
  42. mlrun/api/crud/hub.py +312 -0
  43. mlrun/api/crud/logs.py +11 -9
  44. mlrun/api/crud/model_monitoring/__init__.py +3 -3
  45. mlrun/api/crud/model_monitoring/grafana.py +435 -0
  46. mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
  47. mlrun/api/crud/notifications.py +149 -0
  48. mlrun/api/crud/pipelines.py +67 -52
  49. mlrun/api/crud/projects.py +51 -23
  50. mlrun/api/crud/runs.py +7 -5
  51. mlrun/api/crud/runtime_resources.py +13 -13
  52. mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
  53. mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
  54. mlrun/api/crud/runtimes/nuclio/function.py +505 -0
  55. mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
  56. mlrun/api/crud/secrets.py +88 -46
  57. mlrun/api/crud/tags.py +5 -5
  58. mlrun/api/db/__init__.py +1 -1
  59. mlrun/api/db/base.py +102 -54
  60. mlrun/api/db/init_db.py +2 -3
  61. mlrun/api/db/session.py +4 -12
  62. mlrun/api/db/sqldb/__init__.py +1 -1
  63. mlrun/api/db/sqldb/db.py +439 -196
  64. mlrun/api/db/sqldb/helpers.py +1 -1
  65. mlrun/api/db/sqldb/models/__init__.py +3 -3
  66. mlrun/api/db/sqldb/models/models_mysql.py +82 -64
  67. mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
  68. mlrun/api/db/sqldb/session.py +27 -20
  69. mlrun/api/initial_data.py +82 -24
  70. mlrun/api/launcher.py +196 -0
  71. mlrun/api/main.py +91 -22
  72. mlrun/api/middlewares.py +6 -5
  73. mlrun/api/migrations_mysql/env.py +1 -1
  74. mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
  75. mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
  76. mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
  77. mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
  78. mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
  79. mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
  80. mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
  81. mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
  82. mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
  83. mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
  84. mlrun/api/migrations_sqlite/env.py +1 -1
  85. mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
  86. mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
  87. mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
  88. mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
  89. mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
  90. mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
  91. mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
  92. mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
  93. mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
  94. mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
  95. mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
  96. mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
  97. mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
  98. mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
  99. mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
  100. mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
  101. mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
  102. mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
  103. mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
  104. mlrun/api/schemas/__init__.py +216 -138
  105. mlrun/api/utils/__init__.py +1 -1
  106. mlrun/api/utils/asyncio.py +1 -1
  107. mlrun/api/utils/auth/__init__.py +1 -1
  108. mlrun/api/utils/auth/providers/__init__.py +1 -1
  109. mlrun/api/utils/auth/providers/base.py +7 -7
  110. mlrun/api/utils/auth/providers/nop.py +6 -7
  111. mlrun/api/utils/auth/providers/opa.py +17 -17
  112. mlrun/api/utils/auth/verifier.py +36 -34
  113. mlrun/api/utils/background_tasks.py +24 -24
  114. mlrun/{builder.py → api/utils/builder.py} +216 -123
  115. mlrun/api/utils/clients/__init__.py +1 -1
  116. mlrun/api/utils/clients/chief.py +19 -4
  117. mlrun/api/utils/clients/iguazio.py +106 -60
  118. mlrun/api/utils/clients/log_collector.py +1 -1
  119. mlrun/api/utils/clients/nuclio.py +23 -23
  120. mlrun/api/utils/clients/protocols/grpc.py +2 -2
  121. mlrun/api/utils/db/__init__.py +1 -1
  122. mlrun/api/utils/db/alembic.py +1 -1
  123. mlrun/api/utils/db/backup.py +1 -1
  124. mlrun/api/utils/db/mysql.py +24 -25
  125. mlrun/api/utils/db/sql_collation.py +1 -1
  126. mlrun/api/utils/db/sqlite_migration.py +2 -2
  127. mlrun/api/utils/events/__init__.py +14 -0
  128. mlrun/api/utils/events/base.py +57 -0
  129. mlrun/api/utils/events/events_factory.py +41 -0
  130. mlrun/api/utils/events/iguazio.py +217 -0
  131. mlrun/api/utils/events/nop.py +55 -0
  132. mlrun/api/utils/helpers.py +16 -13
  133. mlrun/api/utils/memory_reports.py +1 -1
  134. mlrun/api/utils/periodic.py +6 -3
  135. mlrun/api/utils/projects/__init__.py +1 -1
  136. mlrun/api/utils/projects/follower.py +33 -33
  137. mlrun/api/utils/projects/leader.py +36 -34
  138. mlrun/api/utils/projects/member.py +27 -27
  139. mlrun/api/utils/projects/remotes/__init__.py +1 -1
  140. mlrun/api/utils/projects/remotes/follower.py +13 -13
  141. mlrun/api/utils/projects/remotes/leader.py +10 -10
  142. mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
  143. mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
  144. mlrun/api/utils/scheduler.py +140 -51
  145. mlrun/api/utils/singletons/__init__.py +1 -1
  146. mlrun/api/utils/singletons/db.py +9 -15
  147. mlrun/api/utils/singletons/k8s.py +677 -5
  148. mlrun/api/utils/singletons/logs_dir.py +1 -1
  149. mlrun/api/utils/singletons/project_member.py +1 -1
  150. mlrun/api/utils/singletons/scheduler.py +1 -1
  151. mlrun/artifacts/__init__.py +2 -2
  152. mlrun/artifacts/base.py +8 -2
  153. mlrun/artifacts/dataset.py +5 -3
  154. mlrun/artifacts/manager.py +7 -1
  155. mlrun/artifacts/model.py +15 -4
  156. mlrun/artifacts/plots.py +1 -1
  157. mlrun/common/__init__.py +1 -1
  158. mlrun/common/constants.py +15 -0
  159. mlrun/common/model_monitoring.py +209 -0
  160. mlrun/common/schemas/__init__.py +167 -0
  161. mlrun/{api → common}/schemas/artifact.py +13 -14
  162. mlrun/{api → common}/schemas/auth.py +10 -8
  163. mlrun/{api → common}/schemas/background_task.py +3 -3
  164. mlrun/{api → common}/schemas/client_spec.py +1 -1
  165. mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
  166. mlrun/{api → common}/schemas/constants.py +21 -8
  167. mlrun/common/schemas/events.py +36 -0
  168. mlrun/{api → common}/schemas/feature_store.py +2 -1
  169. mlrun/{api → common}/schemas/frontend_spec.py +7 -6
  170. mlrun/{api → common}/schemas/function.py +5 -5
  171. mlrun/{api → common}/schemas/http.py +3 -3
  172. mlrun/common/schemas/hub.py +134 -0
  173. mlrun/{api → common}/schemas/k8s.py +3 -3
  174. mlrun/{api → common}/schemas/memory_reports.py +1 -1
  175. mlrun/common/schemas/model_endpoints.py +342 -0
  176. mlrun/common/schemas/notification.py +57 -0
  177. mlrun/{api → common}/schemas/object.py +6 -6
  178. mlrun/{api → common}/schemas/pipeline.py +3 -3
  179. mlrun/{api → common}/schemas/project.py +6 -5
  180. mlrun/common/schemas/regex.py +24 -0
  181. mlrun/common/schemas/runs.py +30 -0
  182. mlrun/{api → common}/schemas/runtime_resource.py +3 -3
  183. mlrun/{api → common}/schemas/schedule.py +19 -7
  184. mlrun/{api → common}/schemas/secret.py +3 -3
  185. mlrun/{api → common}/schemas/tag.py +2 -2
  186. mlrun/common/types.py +25 -0
  187. mlrun/config.py +152 -20
  188. mlrun/data_types/__init__.py +7 -2
  189. mlrun/data_types/data_types.py +4 -2
  190. mlrun/data_types/infer.py +1 -1
  191. mlrun/data_types/spark.py +10 -3
  192. mlrun/datastore/__init__.py +10 -3
  193. mlrun/datastore/azure_blob.py +1 -1
  194. mlrun/datastore/base.py +185 -53
  195. mlrun/datastore/datastore.py +1 -1
  196. mlrun/datastore/filestore.py +1 -1
  197. mlrun/datastore/google_cloud_storage.py +1 -1
  198. mlrun/datastore/inmem.py +4 -1
  199. mlrun/datastore/redis.py +1 -1
  200. mlrun/datastore/s3.py +1 -1
  201. mlrun/datastore/sources.py +192 -70
  202. mlrun/datastore/spark_udf.py +44 -0
  203. mlrun/datastore/store_resources.py +4 -4
  204. mlrun/datastore/targets.py +115 -45
  205. mlrun/datastore/utils.py +127 -5
  206. mlrun/datastore/v3io.py +1 -1
  207. mlrun/datastore/wasbfs/__init__.py +1 -1
  208. mlrun/datastore/wasbfs/fs.py +1 -1
  209. mlrun/db/__init__.py +7 -5
  210. mlrun/db/base.py +112 -68
  211. mlrun/db/httpdb.py +445 -277
  212. mlrun/db/nopdb.py +491 -0
  213. mlrun/db/sqldb.py +112 -65
  214. mlrun/errors.py +6 -1
  215. mlrun/execution.py +44 -22
  216. mlrun/feature_store/__init__.py +1 -1
  217. mlrun/feature_store/api.py +143 -95
  218. mlrun/feature_store/common.py +16 -20
  219. mlrun/feature_store/feature_set.py +42 -12
  220. mlrun/feature_store/feature_vector.py +32 -21
  221. mlrun/feature_store/ingestion.py +9 -12
  222. mlrun/feature_store/retrieval/__init__.py +3 -2
  223. mlrun/feature_store/retrieval/base.py +388 -66
  224. mlrun/feature_store/retrieval/dask_merger.py +63 -151
  225. mlrun/feature_store/retrieval/job.py +30 -12
  226. mlrun/feature_store/retrieval/local_merger.py +40 -133
  227. mlrun/feature_store/retrieval/spark_merger.py +129 -127
  228. mlrun/feature_store/retrieval/storey_merger.py +173 -0
  229. mlrun/feature_store/steps.py +132 -15
  230. mlrun/features.py +8 -3
  231. mlrun/frameworks/__init__.py +1 -1
  232. mlrun/frameworks/_common/__init__.py +1 -1
  233. mlrun/frameworks/_common/artifacts_library.py +1 -1
  234. mlrun/frameworks/_common/mlrun_interface.py +1 -1
  235. mlrun/frameworks/_common/model_handler.py +1 -1
  236. mlrun/frameworks/_common/plan.py +1 -1
  237. mlrun/frameworks/_common/producer.py +1 -1
  238. mlrun/frameworks/_common/utils.py +1 -1
  239. mlrun/frameworks/_dl_common/__init__.py +1 -1
  240. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
  241. mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
  242. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
  243. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
  244. mlrun/frameworks/_dl_common/model_handler.py +1 -1
  245. mlrun/frameworks/_dl_common/utils.py +1 -1
  246. mlrun/frameworks/_ml_common/__init__.py +1 -1
  247. mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
  248. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
  249. mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
  250. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  251. mlrun/frameworks/_ml_common/model_handler.py +1 -1
  252. mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
  253. mlrun/frameworks/_ml_common/plan.py +1 -1
  254. mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
  255. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
  256. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
  257. mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
  258. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
  259. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
  260. mlrun/frameworks/_ml_common/producer.py +1 -1
  261. mlrun/frameworks/_ml_common/utils.py +1 -1
  262. mlrun/frameworks/auto_mlrun/__init__.py +1 -1
  263. mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
  264. mlrun/frameworks/huggingface/__init__.py +1 -1
  265. mlrun/frameworks/huggingface/model_server.py +1 -1
  266. mlrun/frameworks/lgbm/__init__.py +1 -1
  267. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
  268. mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
  269. mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
  270. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
  271. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
  272. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
  273. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
  274. mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
  275. mlrun/frameworks/lgbm/model_handler.py +1 -1
  276. mlrun/frameworks/lgbm/model_server.py +1 -1
  277. mlrun/frameworks/lgbm/utils.py +1 -1
  278. mlrun/frameworks/onnx/__init__.py +1 -1
  279. mlrun/frameworks/onnx/dataset.py +1 -1
  280. mlrun/frameworks/onnx/mlrun_interface.py +1 -1
  281. mlrun/frameworks/onnx/model_handler.py +1 -1
  282. mlrun/frameworks/onnx/model_server.py +1 -1
  283. mlrun/frameworks/parallel_coordinates.py +1 -1
  284. mlrun/frameworks/pytorch/__init__.py +1 -1
  285. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
  286. mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
  287. mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
  288. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
  289. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
  290. mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
  291. mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
  292. mlrun/frameworks/pytorch/model_handler.py +1 -1
  293. mlrun/frameworks/pytorch/model_server.py +1 -1
  294. mlrun/frameworks/pytorch/utils.py +1 -1
  295. mlrun/frameworks/sklearn/__init__.py +1 -1
  296. mlrun/frameworks/sklearn/estimator.py +1 -1
  297. mlrun/frameworks/sklearn/metric.py +1 -1
  298. mlrun/frameworks/sklearn/metrics_library.py +1 -1
  299. mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
  300. mlrun/frameworks/sklearn/model_handler.py +1 -1
  301. mlrun/frameworks/sklearn/utils.py +1 -1
  302. mlrun/frameworks/tf_keras/__init__.py +1 -1
  303. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
  304. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  305. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
  306. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
  307. mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
  308. mlrun/frameworks/tf_keras/model_handler.py +1 -1
  309. mlrun/frameworks/tf_keras/model_server.py +1 -1
  310. mlrun/frameworks/tf_keras/utils.py +1 -1
  311. mlrun/frameworks/xgboost/__init__.py +1 -1
  312. mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
  313. mlrun/frameworks/xgboost/model_handler.py +1 -1
  314. mlrun/frameworks/xgboost/utils.py +1 -1
  315. mlrun/k8s_utils.py +14 -765
  316. mlrun/kfpops.py +14 -17
  317. mlrun/launcher/__init__.py +13 -0
  318. mlrun/launcher/base.py +406 -0
  319. mlrun/launcher/client.py +159 -0
  320. mlrun/launcher/factory.py +50 -0
  321. mlrun/launcher/local.py +276 -0
  322. mlrun/launcher/remote.py +178 -0
  323. mlrun/lists.py +10 -2
  324. mlrun/mlutils/__init__.py +1 -1
  325. mlrun/mlutils/data.py +1 -1
  326. mlrun/mlutils/models.py +1 -1
  327. mlrun/mlutils/plots.py +1 -1
  328. mlrun/model.py +252 -14
  329. mlrun/model_monitoring/__init__.py +41 -0
  330. mlrun/model_monitoring/features_drift_table.py +1 -1
  331. mlrun/model_monitoring/helpers.py +123 -38
  332. mlrun/model_monitoring/model_endpoint.py +144 -0
  333. mlrun/model_monitoring/model_monitoring_batch.py +310 -259
  334. mlrun/model_monitoring/stores/__init__.py +106 -0
  335. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
  336. mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
  337. mlrun/model_monitoring/stores/models/__init__.py +23 -0
  338. mlrun/model_monitoring/stores/models/base.py +18 -0
  339. mlrun/model_monitoring/stores/models/mysql.py +100 -0
  340. mlrun/model_monitoring/stores/models/sqlite.py +98 -0
  341. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
  342. mlrun/model_monitoring/stream_processing_fs.py +239 -271
  343. mlrun/package/__init__.py +163 -0
  344. mlrun/package/context_handler.py +325 -0
  345. mlrun/package/errors.py +47 -0
  346. mlrun/package/packager.py +298 -0
  347. mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
  348. mlrun/package/packagers/default_packager.py +422 -0
  349. mlrun/package/packagers/numpy_packagers.py +612 -0
  350. mlrun/package/packagers/pandas_packagers.py +968 -0
  351. mlrun/package/packagers/python_standard_library_packagers.py +616 -0
  352. mlrun/package/packagers_manager.py +786 -0
  353. mlrun/package/utils/__init__.py +53 -0
  354. mlrun/package/utils/_archiver.py +226 -0
  355. mlrun/package/utils/_formatter.py +211 -0
  356. mlrun/package/utils/_pickler.py +234 -0
  357. mlrun/package/utils/_supported_format.py +71 -0
  358. mlrun/package/utils/log_hint_utils.py +93 -0
  359. mlrun/package/utils/type_hint_utils.py +298 -0
  360. mlrun/platforms/__init__.py +1 -1
  361. mlrun/platforms/iguazio.py +34 -2
  362. mlrun/platforms/other.py +1 -1
  363. mlrun/projects/__init__.py +1 -1
  364. mlrun/projects/operations.py +14 -9
  365. mlrun/projects/pipelines.py +31 -13
  366. mlrun/projects/project.py +762 -238
  367. mlrun/render.py +49 -19
  368. mlrun/run.py +57 -326
  369. mlrun/runtimes/__init__.py +3 -9
  370. mlrun/runtimes/base.py +247 -784
  371. mlrun/runtimes/constants.py +1 -1
  372. mlrun/runtimes/daskjob.py +45 -41
  373. mlrun/runtimes/funcdoc.py +43 -7
  374. mlrun/runtimes/function.py +66 -656
  375. mlrun/runtimes/function_reference.py +1 -1
  376. mlrun/runtimes/generators.py +1 -1
  377. mlrun/runtimes/kubejob.py +99 -116
  378. mlrun/runtimes/local.py +59 -66
  379. mlrun/runtimes/mpijob/__init__.py +1 -1
  380. mlrun/runtimes/mpijob/abstract.py +13 -15
  381. mlrun/runtimes/mpijob/v1.py +3 -1
  382. mlrun/runtimes/mpijob/v1alpha1.py +1 -1
  383. mlrun/runtimes/nuclio.py +1 -1
  384. mlrun/runtimes/pod.py +51 -26
  385. mlrun/runtimes/remotesparkjob.py +3 -1
  386. mlrun/runtimes/serving.py +12 -4
  387. mlrun/runtimes/sparkjob/__init__.py +1 -2
  388. mlrun/runtimes/sparkjob/abstract.py +44 -31
  389. mlrun/runtimes/sparkjob/spark3job.py +11 -9
  390. mlrun/runtimes/utils.py +61 -42
  391. mlrun/secrets.py +16 -18
  392. mlrun/serving/__init__.py +3 -2
  393. mlrun/serving/merger.py +1 -1
  394. mlrun/serving/remote.py +1 -1
  395. mlrun/serving/routers.py +39 -42
  396. mlrun/serving/server.py +23 -13
  397. mlrun/serving/serving_wrapper.py +1 -1
  398. mlrun/serving/states.py +172 -39
  399. mlrun/serving/utils.py +1 -1
  400. mlrun/serving/v1_serving.py +1 -1
  401. mlrun/serving/v2_serving.py +29 -21
  402. mlrun/utils/__init__.py +1 -2
  403. mlrun/utils/async_http.py +8 -1
  404. mlrun/utils/azure_vault.py +1 -1
  405. mlrun/utils/clones.py +2 -2
  406. mlrun/utils/condition_evaluator.py +65 -0
  407. mlrun/utils/db.py +52 -0
  408. mlrun/utils/helpers.py +188 -13
  409. mlrun/utils/http.py +89 -54
  410. mlrun/utils/logger.py +48 -8
  411. mlrun/utils/model_monitoring.py +132 -100
  412. mlrun/utils/notifications/__init__.py +1 -1
  413. mlrun/utils/notifications/notification/__init__.py +8 -6
  414. mlrun/utils/notifications/notification/base.py +20 -14
  415. mlrun/utils/notifications/notification/console.py +7 -4
  416. mlrun/utils/notifications/notification/git.py +36 -19
  417. mlrun/utils/notifications/notification/ipython.py +10 -8
  418. mlrun/utils/notifications/notification/slack.py +18 -13
  419. mlrun/utils/notifications/notification_pusher.py +377 -56
  420. mlrun/utils/regex.py +6 -1
  421. mlrun/utils/singleton.py +1 -1
  422. mlrun/utils/v3io_clients.py +1 -1
  423. mlrun/utils/vault.py +270 -269
  424. mlrun/utils/version/__init__.py +1 -1
  425. mlrun/utils/version/version.json +2 -2
  426. mlrun/utils/version/version.py +1 -1
  427. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
  428. mlrun-1.4.0.dist-info/RECORD +434 -0
  429. mlrun/api/api/endpoints/marketplace.py +0 -257
  430. mlrun/api/crud/marketplace.py +0 -221
  431. mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
  432. mlrun/api/db/filedb/db.py +0 -518
  433. mlrun/api/schemas/marketplace.py +0 -128
  434. mlrun/api/schemas/model_endpoints.py +0 -185
  435. mlrun/db/filedb.py +0 -891
  436. mlrun/feature_store/retrieval/online.py +0 -92
  437. mlrun/model_monitoring/constants.py +0 -67
  438. mlrun/runtimes/package/context_handler.py +0 -711
  439. mlrun/runtimes/sparkjob/spark2job.py +0 -59
  440. mlrun-1.3.3rc1.dist-info/RECORD +0 -381
  441. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
  442. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
  443. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
  444. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
mlrun/datastore/base.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -11,21 +11,27 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- import sys
15
14
  import tempfile
15
+ import urllib.parse
16
16
  from base64 import b64encode
17
17
  from os import path, remove
18
+ from typing import Optional, Union
18
19
 
19
20
  import dask.dataframe as dd
20
21
  import fsspec
21
22
  import orjson
22
23
  import pandas as pd
24
+ import pyarrow
25
+ import pytz
23
26
  import requests
24
27
  import urllib3
25
28
 
26
29
  import mlrun.errors
27
30
  from mlrun.errors import err_to_str
28
- from mlrun.utils import is_ipython, logger
31
+ from mlrun.utils import StorePrefix, is_ipython, logger
32
+
33
+ from .store_resources import is_store_uri, parse_store_uri
34
+ from .utils import filter_df_start_end_time, select_columns_from_df
29
35
 
30
36
  verify_ssl = False
31
37
  if not verify_ssl:
@@ -63,6 +69,17 @@ class DataStore:
63
69
  def is_unstructured(self):
64
70
  return True
65
71
 
72
+ @staticmethod
73
+ def _sanitize_url(url):
74
+ """
75
+ Extract only the schema, netloc, and path from an input URL if they exist,
76
+ excluding parameters, query, or fragments.
77
+ """
78
+ parsed_url = urllib.parse.urlparse(url)
79
+ scheme = f"{parsed_url.scheme}:" if parsed_url.scheme else ""
80
+ netloc = f"//{parsed_url.netloc}" if parsed_url.netloc else "//"
81
+ return f"{scheme}{netloc}{parsed_url.path}"
82
+
66
83
  @staticmethod
67
84
  def uri_to_kfp(endpoint, subpath):
68
85
  raise ValueError("data store doesnt support KFP URLs")
@@ -71,7 +88,7 @@ class DataStore:
71
88
  def uri_to_ipython(endpoint, subpath):
72
89
  return ""
73
90
 
74
- def get_filesystem(self, silent=True):
91
+ def get_filesystem(self, silent=True) -> Optional[fsspec.AbstractFileSystem]:
75
92
  """return fsspec file system object, if supported"""
76
93
  return None
77
94
 
@@ -135,6 +152,64 @@ class DataStore:
135
152
  def upload(self, key, src_path):
136
153
  pass
137
154
 
155
+ @staticmethod
156
+ def _parquet_reader(df_module, url, file_system, time_column, start_time, end_time):
157
+ from storey.utils import find_filters, find_partitions
158
+
159
+ def set_filters(
160
+ partitions_time_attributes, start_time_inner, end_time_inner, kwargs
161
+ ):
162
+ filters = []
163
+ find_filters(
164
+ partitions_time_attributes,
165
+ start_time_inner,
166
+ end_time_inner,
167
+ filters,
168
+ time_column,
169
+ )
170
+ kwargs["filters"] = filters
171
+
172
+ def reader(*args, **kwargs):
173
+ if start_time or end_time:
174
+ if time_column is None:
175
+ raise mlrun.errors.MLRunInvalidArgumentError(
176
+ "When providing start_time or end_time, must provide time_column"
177
+ )
178
+
179
+ partitions_time_attributes = find_partitions(url, file_system)
180
+ set_filters(
181
+ partitions_time_attributes,
182
+ start_time,
183
+ end_time,
184
+ kwargs,
185
+ )
186
+ try:
187
+ return df_module.read_parquet(*args, **kwargs)
188
+ except pyarrow.lib.ArrowInvalid as ex:
189
+ if not str(ex).startswith(
190
+ "Cannot compare timestamp with timezone to timestamp without timezone"
191
+ ):
192
+ raise ex
193
+
194
+ if start_time.tzinfo:
195
+ start_time_inner = start_time.replace(tzinfo=None)
196
+ end_time_inner = end_time.replace(tzinfo=None)
197
+ else:
198
+ start_time_inner = start_time.replace(tzinfo=pytz.utc)
199
+ end_time_inner = end_time.replace(tzinfo=pytz.utc)
200
+
201
+ set_filters(
202
+ partitions_time_attributes,
203
+ start_time_inner,
204
+ end_time_inner,
205
+ kwargs,
206
+ )
207
+ return df_module.read_parquet(*args, **kwargs)
208
+ else:
209
+ return df_module.read_parquet(*args, **kwargs)
210
+
211
+ return reader
212
+
138
213
  def as_df(
139
214
  self,
140
215
  url,
@@ -148,17 +223,29 @@ class DataStore:
148
223
  **kwargs,
149
224
  ):
150
225
  df_module = df_module or pd
151
- if url.endswith(".csv") or format == "csv":
226
+ file_url = self._sanitize_url(url)
227
+ is_csv, is_json, drop_time_column = False, False, False
228
+ file_system = self.get_filesystem()
229
+ if file_url.endswith(".csv") or format == "csv":
230
+ is_csv = True
231
+ drop_time_column = False
152
232
  if columns:
233
+ if (
234
+ time_column
235
+ and (start_time or end_time)
236
+ and time_column not in columns
237
+ ):
238
+ columns.append(time_column)
239
+ drop_time_column = True
153
240
  kwargs["usecols"] = columns
241
+
154
242
  reader = df_module.read_csv
155
- filesystem = self.get_filesystem()
156
- if filesystem:
157
- if filesystem.isdir(url):
243
+ if file_system:
244
+ if file_system.isdir(file_url):
158
245
 
159
246
  def reader(*args, **kwargs):
160
247
  base_path = args[0]
161
- file_entries = filesystem.listdir(base_path)
248
+ file_entries = file_system.listdir(base_path)
162
249
  filenames = []
163
250
  for file_entry in file_entries:
164
251
  if (
@@ -176,51 +263,31 @@ class DataStore:
176
263
  dfs.append(df_module.read_csv(*updated_args, **kwargs))
177
264
  return pd.concat(dfs)
178
265
 
179
- elif url.endswith(".parquet") or url.endswith(".pq") or format == "parquet":
266
+ elif (
267
+ file_url.endswith(".parquet")
268
+ or file_url.endswith(".pq")
269
+ or format == "parquet"
270
+ ):
180
271
  if columns:
181
272
  kwargs["columns"] = columns
182
273
 
183
- def reader(*args, **kwargs):
184
- if start_time or end_time:
185
- if sys.version_info < (3, 7):
186
- raise ValueError(
187
- f"feature not supported for python version {sys.version_info}"
188
- )
189
-
190
- if time_column is None:
191
- raise mlrun.errors.MLRunInvalidArgumentError(
192
- "When providing start_time or end_time, must provide time_column"
193
- )
194
-
195
- from storey.utils import find_filters, find_partitions
196
-
197
- filters = []
198
- partitions_time_attributes = find_partitions(url, file_system)
199
-
200
- find_filters(
201
- partitions_time_attributes,
202
- start_time,
203
- end_time,
204
- filters,
205
- time_column,
206
- )
207
- kwargs["filters"] = filters
208
-
209
- return df_module.read_parquet(*args, **kwargs)
274
+ reader = self._parquet_reader(
275
+ df_module, url, file_system, time_column, start_time, end_time
276
+ )
210
277
 
211
- elif url.endswith(".json") or format == "json":
278
+ elif file_url.endswith(".json") or format == "json":
279
+ is_json = True
212
280
  reader = df_module.read_json
213
281
 
214
282
  else:
215
283
  raise Exception(f"file type unhandled {url}")
216
284
 
217
- file_system = self.get_filesystem()
218
285
  if file_system:
219
- if self.supports_isdir() and file_system.isdir(url) or df_module == dd:
286
+ if self.supports_isdir() and file_system.isdir(file_url) or df_module == dd:
220
287
  storage_options = self.get_storage_options()
221
288
  if storage_options:
222
289
  kwargs["storage_options"] = storage_options
223
- return reader(url, **kwargs)
290
+ df = reader(url, **kwargs)
224
291
  else:
225
292
 
226
293
  file = url
@@ -230,12 +297,26 @@ class DataStore:
230
297
  # support the storage_options parameter.
231
298
  file = file_system.open(url)
232
299
 
233
- return reader(file, **kwargs)
234
-
235
- temp_file = tempfile.NamedTemporaryFile(delete=False)
236
- self.download(self._join(subpath), temp_file.name)
237
- df = reader(temp_file.name, **kwargs)
238
- remove(temp_file.name)
300
+ df = reader(file, **kwargs)
301
+ else:
302
+ temp_file = tempfile.NamedTemporaryFile(delete=False)
303
+ self.download(self._join(subpath), temp_file.name)
304
+ df = reader(temp_file.name, **kwargs)
305
+ remove(temp_file.name)
306
+
307
+ if is_json or is_csv:
308
+ # for parquet file the time filtering is executed in `reader`
309
+ df = filter_df_start_end_time(
310
+ df,
311
+ time_column=time_column,
312
+ start_time=start_time,
313
+ end_time=end_time,
314
+ )
315
+ if drop_time_column:
316
+ df.drop(columns=[time_column], inplace=True)
317
+ if is_json:
318
+ # for csv and parquet files the columns select is executed in `reader`.
319
+ df = select_columns_from_df(df, columns=columns)
239
320
  return df
240
321
 
241
322
  def to_dict(self):
@@ -383,7 +464,7 @@ class DataItem:
383
464
  return self._store.listdir(self._path)
384
465
 
385
466
  def local(self):
386
- """get the local path of the file, download to tmp first if its a remote object"""
467
+ """get the local path of the file, download to tmp first if it's a remote object"""
387
468
  if self.kind == "file":
388
469
  return self._path
389
470
  if self._local_path:
@@ -397,27 +478,47 @@ class DataItem:
397
478
  self.download(self._local_path)
398
479
  return self._local_path
399
480
 
481
+ def remove_local(self):
482
+ """remove the local file if it exists and was downloaded from a remote object"""
483
+ if self.kind == "file":
484
+ return
485
+
486
+ if self._local_path:
487
+ remove(self._local_path)
488
+ self._local_path = ""
489
+
400
490
  def as_df(
401
491
  self,
402
492
  columns=None,
403
493
  df_module=None,
404
494
  format="",
495
+ time_column=None,
496
+ start_time=None,
497
+ end_time=None,
405
498
  **kwargs,
406
499
  ):
407
500
  """return a dataframe object (generated from the dataitem).
408
501
 
409
- :param columns: optional, list of columns to select
410
- :param df_module: optional, py module used to create the DataFrame (e.g. pd, dd, cudf, ..)
411
- :param format: file format, if not specified it will be deducted from the suffix
502
+ :param columns: optional, list of columns to select
503
+ :param df_module: optional, py module used to create the DataFrame (e.g. pd, dd, cudf, ..)
504
+ :param format: file format, if not specified it will be deducted from the suffix
505
+ :param start_time: filters out data before this time
506
+ :param end_time: filters out data after this time
507
+ :param time_column: Store timestamp_key will be used if None.
508
+ The results will be filtered by this column and start_time & end_time.
412
509
  """
413
- return self._store.as_df(
510
+ df = self._store.as_df(
414
511
  self._url,
415
512
  self._path,
416
513
  columns=columns,
417
514
  df_module=df_module,
418
515
  format=format,
516
+ time_column=time_column,
517
+ start_time=start_time,
518
+ end_time=end_time,
419
519
  **kwargs,
420
520
  )
521
+ return df
421
522
 
422
523
  def show(self, format=None):
423
524
  """show the data object content in Jupyter
@@ -451,6 +552,19 @@ class DataItem:
451
552
  else:
452
553
  logger.error(f"unsupported show() format {suffix} for {self.url}")
453
554
 
555
+ def get_artifact_type(self) -> Union[str, None]:
556
+ """
557
+ Check if the data item represents an Artifact (one of Artifact, DatasetArtifact and ModelArtifact). If it does
558
+ it return the store uri prefix (artifacts, datasets or models), otherwise None.
559
+
560
+ :return: The store prefix of the artifact if it is an artifact data item and None if not.
561
+ """
562
+ if self.artifact_url and is_store_uri(url=self.artifact_url):
563
+ store_uri_prefix = parse_store_uri(self.artifact_url)[0]
564
+ if StorePrefix.is_artifact(prefix=store_uri_prefix):
565
+ return store_uri_prefix
566
+ return None
567
+
454
568
  def __str__(self):
455
569
  return self.url
456
570
 
@@ -514,7 +628,12 @@ def http_upload(url, file_path, headers=None, auth=None):
514
628
  class HttpStore(DataStore):
515
629
  def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
516
630
  super().__init__(parent, name, schema, endpoint, secrets)
631
+ self._https_auth_token = None
632
+ self._schema = schema
517
633
  self.auth = None
634
+ self._headers = {}
635
+ self._enrich_https_token()
636
+ self._validate_https_token()
518
637
 
519
638
  def get_filesystem(self, silent=True):
520
639
  """return fsspec file system object, if supported"""
@@ -532,9 +651,22 @@ class HttpStore(DataStore):
532
651
  raise ValueError("unimplemented")
533
652
 
534
653
  def get(self, key, size=None, offset=0):
535
- data = http_get(self.url + self._join(key), None, self.auth)
654
+ data = http_get(self.url + self._join(key), self._headers, self.auth)
536
655
  if offset:
537
656
  data = data[offset:]
538
657
  if size:
539
658
  data = data[:size]
540
659
  return data
660
+
661
+ def _enrich_https_token(self):
662
+ token = self._get_secret_or_env("HTTPS_AUTH_TOKEN")
663
+ if token:
664
+ self._https_auth_token = token
665
+ self._headers.setdefault("Authorization", f"token {token}")
666
+
667
+ def _validate_https_token(self):
668
+ if self._https_auth_token and self._schema in ["http"]:
669
+ logger.warn(
670
+ f"A AUTH TOKEN should not be provided while using {self._schema} "
671
+ f"schema as it is not secure and is not recommended."
672
+ )
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2021 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
mlrun/datastore/inmem.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -80,5 +80,8 @@ class InMemoryStore(DataStore):
80
80
  reader = df_module.read_json
81
81
  else:
82
82
  raise mlrun.errors.MLRunInvalidArgumentError(f"file type unhandled {url}")
83
+ # InMemoryStore store do not filter on time
84
+ for field in ["time_column", "start_time", "end_time"]:
85
+ kwargs.pop(field, None)
83
86
 
84
87
  return reader(item, **kwargs)
mlrun/datastore/redis.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2022 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
mlrun/datastore/s3.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.