mlrun 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (444) hide show
  1. mlrun/__init__.py +3 -3
  2. mlrun/__main__.py +79 -37
  3. mlrun/api/__init__.py +1 -1
  4. mlrun/api/api/__init__.py +1 -1
  5. mlrun/api/api/api.py +4 -4
  6. mlrun/api/api/deps.py +10 -21
  7. mlrun/api/api/endpoints/__init__.py +1 -1
  8. mlrun/api/api/endpoints/artifacts.py +64 -36
  9. mlrun/api/api/endpoints/auth.py +4 -4
  10. mlrun/api/api/endpoints/background_tasks.py +11 -11
  11. mlrun/api/api/endpoints/client_spec.py +5 -5
  12. mlrun/api/api/endpoints/clusterization_spec.py +6 -4
  13. mlrun/api/api/endpoints/feature_store.py +124 -115
  14. mlrun/api/api/endpoints/files.py +22 -14
  15. mlrun/api/api/endpoints/frontend_spec.py +28 -21
  16. mlrun/api/api/endpoints/functions.py +142 -87
  17. mlrun/api/api/endpoints/grafana_proxy.py +89 -442
  18. mlrun/api/api/endpoints/healthz.py +20 -7
  19. mlrun/api/api/endpoints/hub.py +320 -0
  20. mlrun/api/api/endpoints/internal/__init__.py +1 -1
  21. mlrun/api/api/endpoints/internal/config.py +1 -1
  22. mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
  23. mlrun/api/api/endpoints/logs.py +11 -11
  24. mlrun/api/api/endpoints/model_endpoints.py +74 -70
  25. mlrun/api/api/endpoints/operations.py +13 -9
  26. mlrun/api/api/endpoints/pipelines.py +93 -88
  27. mlrun/api/api/endpoints/projects.py +35 -35
  28. mlrun/api/api/endpoints/runs.py +69 -27
  29. mlrun/api/api/endpoints/runtime_resources.py +28 -28
  30. mlrun/api/api/endpoints/schedules.py +98 -41
  31. mlrun/api/api/endpoints/secrets.py +37 -32
  32. mlrun/api/api/endpoints/submit.py +12 -12
  33. mlrun/api/api/endpoints/tags.py +20 -22
  34. mlrun/api/api/utils.py +251 -42
  35. mlrun/api/constants.py +1 -1
  36. mlrun/api/crud/__init__.py +18 -15
  37. mlrun/api/crud/artifacts.py +10 -10
  38. mlrun/api/crud/client_spec.py +4 -4
  39. mlrun/api/crud/clusterization_spec.py +3 -3
  40. mlrun/api/crud/feature_store.py +54 -46
  41. mlrun/api/crud/functions.py +3 -3
  42. mlrun/api/crud/hub.py +312 -0
  43. mlrun/api/crud/logs.py +11 -9
  44. mlrun/api/crud/model_monitoring/__init__.py +3 -3
  45. mlrun/api/crud/model_monitoring/grafana.py +435 -0
  46. mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
  47. mlrun/api/crud/notifications.py +149 -0
  48. mlrun/api/crud/pipelines.py +67 -52
  49. mlrun/api/crud/projects.py +51 -23
  50. mlrun/api/crud/runs.py +7 -5
  51. mlrun/api/crud/runtime_resources.py +13 -13
  52. mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
  53. mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
  54. mlrun/api/crud/runtimes/nuclio/function.py +505 -0
  55. mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
  56. mlrun/api/crud/secrets.py +88 -46
  57. mlrun/api/crud/tags.py +5 -5
  58. mlrun/api/db/__init__.py +1 -1
  59. mlrun/api/db/base.py +102 -54
  60. mlrun/api/db/init_db.py +2 -3
  61. mlrun/api/db/session.py +4 -12
  62. mlrun/api/db/sqldb/__init__.py +1 -1
  63. mlrun/api/db/sqldb/db.py +439 -196
  64. mlrun/api/db/sqldb/helpers.py +1 -1
  65. mlrun/api/db/sqldb/models/__init__.py +3 -3
  66. mlrun/api/db/sqldb/models/models_mysql.py +82 -64
  67. mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
  68. mlrun/api/db/sqldb/session.py +27 -20
  69. mlrun/api/initial_data.py +82 -24
  70. mlrun/api/launcher.py +196 -0
  71. mlrun/api/main.py +91 -22
  72. mlrun/api/middlewares.py +6 -5
  73. mlrun/api/migrations_mysql/env.py +1 -1
  74. mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
  75. mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
  76. mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
  77. mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
  78. mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
  79. mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
  80. mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
  81. mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
  82. mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
  83. mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
  84. mlrun/api/migrations_sqlite/env.py +1 -1
  85. mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
  86. mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
  87. mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
  88. mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
  89. mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
  90. mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
  91. mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
  92. mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
  93. mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
  94. mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
  95. mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
  96. mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
  97. mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
  98. mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
  99. mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
  100. mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
  101. mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
  102. mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
  103. mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
  104. mlrun/api/schemas/__init__.py +216 -138
  105. mlrun/api/utils/__init__.py +1 -1
  106. mlrun/api/utils/asyncio.py +1 -1
  107. mlrun/api/utils/auth/__init__.py +1 -1
  108. mlrun/api/utils/auth/providers/__init__.py +1 -1
  109. mlrun/api/utils/auth/providers/base.py +7 -7
  110. mlrun/api/utils/auth/providers/nop.py +6 -7
  111. mlrun/api/utils/auth/providers/opa.py +17 -17
  112. mlrun/api/utils/auth/verifier.py +36 -34
  113. mlrun/api/utils/background_tasks.py +24 -24
  114. mlrun/{builder.py → api/utils/builder.py} +216 -123
  115. mlrun/api/utils/clients/__init__.py +1 -1
  116. mlrun/api/utils/clients/chief.py +19 -4
  117. mlrun/api/utils/clients/iguazio.py +106 -60
  118. mlrun/api/utils/clients/log_collector.py +1 -1
  119. mlrun/api/utils/clients/nuclio.py +23 -23
  120. mlrun/api/utils/clients/protocols/grpc.py +2 -2
  121. mlrun/api/utils/db/__init__.py +1 -1
  122. mlrun/api/utils/db/alembic.py +1 -1
  123. mlrun/api/utils/db/backup.py +1 -1
  124. mlrun/api/utils/db/mysql.py +24 -25
  125. mlrun/api/utils/db/sql_collation.py +1 -1
  126. mlrun/api/utils/db/sqlite_migration.py +2 -2
  127. mlrun/api/utils/events/__init__.py +14 -0
  128. mlrun/api/utils/events/base.py +57 -0
  129. mlrun/api/utils/events/events_factory.py +41 -0
  130. mlrun/api/utils/events/iguazio.py +217 -0
  131. mlrun/api/utils/events/nop.py +55 -0
  132. mlrun/api/utils/helpers.py +16 -13
  133. mlrun/api/utils/memory_reports.py +1 -1
  134. mlrun/api/utils/periodic.py +6 -3
  135. mlrun/api/utils/projects/__init__.py +1 -1
  136. mlrun/api/utils/projects/follower.py +33 -33
  137. mlrun/api/utils/projects/leader.py +36 -34
  138. mlrun/api/utils/projects/member.py +27 -27
  139. mlrun/api/utils/projects/remotes/__init__.py +1 -1
  140. mlrun/api/utils/projects/remotes/follower.py +13 -13
  141. mlrun/api/utils/projects/remotes/leader.py +10 -10
  142. mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
  143. mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
  144. mlrun/api/utils/scheduler.py +140 -51
  145. mlrun/api/utils/singletons/__init__.py +1 -1
  146. mlrun/api/utils/singletons/db.py +9 -15
  147. mlrun/api/utils/singletons/k8s.py +677 -5
  148. mlrun/api/utils/singletons/logs_dir.py +1 -1
  149. mlrun/api/utils/singletons/project_member.py +1 -1
  150. mlrun/api/utils/singletons/scheduler.py +1 -1
  151. mlrun/artifacts/__init__.py +2 -2
  152. mlrun/artifacts/base.py +8 -2
  153. mlrun/artifacts/dataset.py +5 -3
  154. mlrun/artifacts/manager.py +7 -1
  155. mlrun/artifacts/model.py +15 -4
  156. mlrun/artifacts/plots.py +1 -1
  157. mlrun/common/__init__.py +1 -1
  158. mlrun/common/constants.py +15 -0
  159. mlrun/common/model_monitoring.py +209 -0
  160. mlrun/common/schemas/__init__.py +167 -0
  161. mlrun/{api → common}/schemas/artifact.py +13 -14
  162. mlrun/{api → common}/schemas/auth.py +10 -8
  163. mlrun/{api → common}/schemas/background_task.py +3 -3
  164. mlrun/{api → common}/schemas/client_spec.py +1 -1
  165. mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
  166. mlrun/{api → common}/schemas/constants.py +21 -8
  167. mlrun/common/schemas/events.py +36 -0
  168. mlrun/{api → common}/schemas/feature_store.py +2 -1
  169. mlrun/{api → common}/schemas/frontend_spec.py +7 -6
  170. mlrun/{api → common}/schemas/function.py +5 -5
  171. mlrun/{api → common}/schemas/http.py +3 -3
  172. mlrun/common/schemas/hub.py +134 -0
  173. mlrun/{api → common}/schemas/k8s.py +3 -3
  174. mlrun/{api → common}/schemas/memory_reports.py +1 -1
  175. mlrun/common/schemas/model_endpoints.py +342 -0
  176. mlrun/common/schemas/notification.py +57 -0
  177. mlrun/{api → common}/schemas/object.py +6 -6
  178. mlrun/{api → common}/schemas/pipeline.py +3 -3
  179. mlrun/{api → common}/schemas/project.py +6 -5
  180. mlrun/common/schemas/regex.py +24 -0
  181. mlrun/common/schemas/runs.py +30 -0
  182. mlrun/{api → common}/schemas/runtime_resource.py +3 -3
  183. mlrun/{api → common}/schemas/schedule.py +19 -7
  184. mlrun/{api → common}/schemas/secret.py +3 -3
  185. mlrun/{api → common}/schemas/tag.py +2 -2
  186. mlrun/common/types.py +25 -0
  187. mlrun/config.py +152 -20
  188. mlrun/data_types/__init__.py +7 -2
  189. mlrun/data_types/data_types.py +4 -2
  190. mlrun/data_types/infer.py +1 -1
  191. mlrun/data_types/spark.py +10 -3
  192. mlrun/datastore/__init__.py +10 -3
  193. mlrun/datastore/azure_blob.py +1 -1
  194. mlrun/datastore/base.py +185 -53
  195. mlrun/datastore/datastore.py +1 -1
  196. mlrun/datastore/filestore.py +1 -1
  197. mlrun/datastore/google_cloud_storage.py +1 -1
  198. mlrun/datastore/inmem.py +4 -1
  199. mlrun/datastore/redis.py +1 -1
  200. mlrun/datastore/s3.py +1 -1
  201. mlrun/datastore/sources.py +192 -70
  202. mlrun/datastore/spark_udf.py +44 -0
  203. mlrun/datastore/store_resources.py +4 -4
  204. mlrun/datastore/targets.py +115 -45
  205. mlrun/datastore/utils.py +127 -5
  206. mlrun/datastore/v3io.py +1 -1
  207. mlrun/datastore/wasbfs/__init__.py +1 -1
  208. mlrun/datastore/wasbfs/fs.py +1 -1
  209. mlrun/db/__init__.py +7 -5
  210. mlrun/db/base.py +112 -68
  211. mlrun/db/httpdb.py +445 -277
  212. mlrun/db/nopdb.py +491 -0
  213. mlrun/db/sqldb.py +112 -65
  214. mlrun/errors.py +6 -1
  215. mlrun/execution.py +44 -22
  216. mlrun/feature_store/__init__.py +1 -1
  217. mlrun/feature_store/api.py +143 -95
  218. mlrun/feature_store/common.py +16 -20
  219. mlrun/feature_store/feature_set.py +42 -12
  220. mlrun/feature_store/feature_vector.py +32 -21
  221. mlrun/feature_store/ingestion.py +9 -12
  222. mlrun/feature_store/retrieval/__init__.py +3 -2
  223. mlrun/feature_store/retrieval/base.py +388 -66
  224. mlrun/feature_store/retrieval/dask_merger.py +63 -151
  225. mlrun/feature_store/retrieval/job.py +30 -12
  226. mlrun/feature_store/retrieval/local_merger.py +40 -133
  227. mlrun/feature_store/retrieval/spark_merger.py +129 -127
  228. mlrun/feature_store/retrieval/storey_merger.py +173 -0
  229. mlrun/feature_store/steps.py +132 -15
  230. mlrun/features.py +8 -3
  231. mlrun/frameworks/__init__.py +1 -1
  232. mlrun/frameworks/_common/__init__.py +1 -1
  233. mlrun/frameworks/_common/artifacts_library.py +1 -1
  234. mlrun/frameworks/_common/mlrun_interface.py +1 -1
  235. mlrun/frameworks/_common/model_handler.py +1 -1
  236. mlrun/frameworks/_common/plan.py +1 -1
  237. mlrun/frameworks/_common/producer.py +1 -1
  238. mlrun/frameworks/_common/utils.py +1 -1
  239. mlrun/frameworks/_dl_common/__init__.py +1 -1
  240. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
  241. mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
  242. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
  243. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
  244. mlrun/frameworks/_dl_common/model_handler.py +1 -1
  245. mlrun/frameworks/_dl_common/utils.py +1 -1
  246. mlrun/frameworks/_ml_common/__init__.py +1 -1
  247. mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
  248. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
  249. mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
  250. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  251. mlrun/frameworks/_ml_common/model_handler.py +1 -1
  252. mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
  253. mlrun/frameworks/_ml_common/plan.py +1 -1
  254. mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
  255. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
  256. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
  257. mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
  258. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
  259. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
  260. mlrun/frameworks/_ml_common/producer.py +1 -1
  261. mlrun/frameworks/_ml_common/utils.py +1 -1
  262. mlrun/frameworks/auto_mlrun/__init__.py +1 -1
  263. mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
  264. mlrun/frameworks/huggingface/__init__.py +1 -1
  265. mlrun/frameworks/huggingface/model_server.py +1 -1
  266. mlrun/frameworks/lgbm/__init__.py +1 -1
  267. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
  268. mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
  269. mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
  270. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
  271. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
  272. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
  273. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
  274. mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
  275. mlrun/frameworks/lgbm/model_handler.py +1 -1
  276. mlrun/frameworks/lgbm/model_server.py +1 -1
  277. mlrun/frameworks/lgbm/utils.py +1 -1
  278. mlrun/frameworks/onnx/__init__.py +1 -1
  279. mlrun/frameworks/onnx/dataset.py +1 -1
  280. mlrun/frameworks/onnx/mlrun_interface.py +1 -1
  281. mlrun/frameworks/onnx/model_handler.py +1 -1
  282. mlrun/frameworks/onnx/model_server.py +1 -1
  283. mlrun/frameworks/parallel_coordinates.py +1 -1
  284. mlrun/frameworks/pytorch/__init__.py +1 -1
  285. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
  286. mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
  287. mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
  288. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
  289. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
  290. mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
  291. mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
  292. mlrun/frameworks/pytorch/model_handler.py +1 -1
  293. mlrun/frameworks/pytorch/model_server.py +1 -1
  294. mlrun/frameworks/pytorch/utils.py +1 -1
  295. mlrun/frameworks/sklearn/__init__.py +1 -1
  296. mlrun/frameworks/sklearn/estimator.py +1 -1
  297. mlrun/frameworks/sklearn/metric.py +1 -1
  298. mlrun/frameworks/sklearn/metrics_library.py +1 -1
  299. mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
  300. mlrun/frameworks/sklearn/model_handler.py +1 -1
  301. mlrun/frameworks/sklearn/utils.py +1 -1
  302. mlrun/frameworks/tf_keras/__init__.py +1 -1
  303. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
  304. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  305. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
  306. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
  307. mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
  308. mlrun/frameworks/tf_keras/model_handler.py +1 -1
  309. mlrun/frameworks/tf_keras/model_server.py +1 -1
  310. mlrun/frameworks/tf_keras/utils.py +1 -1
  311. mlrun/frameworks/xgboost/__init__.py +1 -1
  312. mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
  313. mlrun/frameworks/xgboost/model_handler.py +1 -1
  314. mlrun/frameworks/xgboost/utils.py +1 -1
  315. mlrun/k8s_utils.py +14 -765
  316. mlrun/kfpops.py +14 -17
  317. mlrun/launcher/__init__.py +13 -0
  318. mlrun/launcher/base.py +406 -0
  319. mlrun/launcher/client.py +159 -0
  320. mlrun/launcher/factory.py +50 -0
  321. mlrun/launcher/local.py +276 -0
  322. mlrun/launcher/remote.py +178 -0
  323. mlrun/lists.py +10 -2
  324. mlrun/mlutils/__init__.py +1 -1
  325. mlrun/mlutils/data.py +1 -1
  326. mlrun/mlutils/models.py +1 -1
  327. mlrun/mlutils/plots.py +1 -1
  328. mlrun/model.py +252 -14
  329. mlrun/model_monitoring/__init__.py +41 -0
  330. mlrun/model_monitoring/features_drift_table.py +1 -1
  331. mlrun/model_monitoring/helpers.py +123 -38
  332. mlrun/model_monitoring/model_endpoint.py +144 -0
  333. mlrun/model_monitoring/model_monitoring_batch.py +310 -259
  334. mlrun/model_monitoring/stores/__init__.py +106 -0
  335. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
  336. mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
  337. mlrun/model_monitoring/stores/models/__init__.py +23 -0
  338. mlrun/model_monitoring/stores/models/base.py +18 -0
  339. mlrun/model_monitoring/stores/models/mysql.py +100 -0
  340. mlrun/model_monitoring/stores/models/sqlite.py +98 -0
  341. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
  342. mlrun/model_monitoring/stream_processing_fs.py +239 -271
  343. mlrun/package/__init__.py +163 -0
  344. mlrun/package/context_handler.py +325 -0
  345. mlrun/package/errors.py +47 -0
  346. mlrun/package/packager.py +298 -0
  347. mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
  348. mlrun/package/packagers/default_packager.py +422 -0
  349. mlrun/package/packagers/numpy_packagers.py +612 -0
  350. mlrun/package/packagers/pandas_packagers.py +968 -0
  351. mlrun/package/packagers/python_standard_library_packagers.py +616 -0
  352. mlrun/package/packagers_manager.py +786 -0
  353. mlrun/package/utils/__init__.py +53 -0
  354. mlrun/package/utils/_archiver.py +226 -0
  355. mlrun/package/utils/_formatter.py +211 -0
  356. mlrun/package/utils/_pickler.py +234 -0
  357. mlrun/package/utils/_supported_format.py +71 -0
  358. mlrun/package/utils/log_hint_utils.py +93 -0
  359. mlrun/package/utils/type_hint_utils.py +298 -0
  360. mlrun/platforms/__init__.py +1 -1
  361. mlrun/platforms/iguazio.py +34 -2
  362. mlrun/platforms/other.py +1 -1
  363. mlrun/projects/__init__.py +1 -1
  364. mlrun/projects/operations.py +14 -9
  365. mlrun/projects/pipelines.py +31 -13
  366. mlrun/projects/project.py +762 -238
  367. mlrun/render.py +49 -19
  368. mlrun/run.py +57 -326
  369. mlrun/runtimes/__init__.py +3 -9
  370. mlrun/runtimes/base.py +247 -784
  371. mlrun/runtimes/constants.py +1 -1
  372. mlrun/runtimes/daskjob.py +45 -41
  373. mlrun/runtimes/funcdoc.py +43 -7
  374. mlrun/runtimes/function.py +66 -656
  375. mlrun/runtimes/function_reference.py +1 -1
  376. mlrun/runtimes/generators.py +1 -1
  377. mlrun/runtimes/kubejob.py +99 -116
  378. mlrun/runtimes/local.py +59 -66
  379. mlrun/runtimes/mpijob/__init__.py +1 -1
  380. mlrun/runtimes/mpijob/abstract.py +13 -15
  381. mlrun/runtimes/mpijob/v1.py +3 -1
  382. mlrun/runtimes/mpijob/v1alpha1.py +1 -1
  383. mlrun/runtimes/nuclio.py +1 -1
  384. mlrun/runtimes/pod.py +51 -26
  385. mlrun/runtimes/remotesparkjob.py +3 -1
  386. mlrun/runtimes/serving.py +12 -4
  387. mlrun/runtimes/sparkjob/__init__.py +1 -2
  388. mlrun/runtimes/sparkjob/abstract.py +44 -31
  389. mlrun/runtimes/sparkjob/spark3job.py +11 -9
  390. mlrun/runtimes/utils.py +61 -42
  391. mlrun/secrets.py +16 -18
  392. mlrun/serving/__init__.py +3 -2
  393. mlrun/serving/merger.py +1 -1
  394. mlrun/serving/remote.py +1 -1
  395. mlrun/serving/routers.py +39 -42
  396. mlrun/serving/server.py +23 -13
  397. mlrun/serving/serving_wrapper.py +1 -1
  398. mlrun/serving/states.py +172 -39
  399. mlrun/serving/utils.py +1 -1
  400. mlrun/serving/v1_serving.py +1 -1
  401. mlrun/serving/v2_serving.py +29 -21
  402. mlrun/utils/__init__.py +1 -2
  403. mlrun/utils/async_http.py +8 -1
  404. mlrun/utils/azure_vault.py +1 -1
  405. mlrun/utils/clones.py +2 -2
  406. mlrun/utils/condition_evaluator.py +65 -0
  407. mlrun/utils/db.py +52 -0
  408. mlrun/utils/helpers.py +188 -13
  409. mlrun/utils/http.py +89 -54
  410. mlrun/utils/logger.py +48 -8
  411. mlrun/utils/model_monitoring.py +132 -100
  412. mlrun/utils/notifications/__init__.py +1 -1
  413. mlrun/utils/notifications/notification/__init__.py +8 -6
  414. mlrun/utils/notifications/notification/base.py +20 -14
  415. mlrun/utils/notifications/notification/console.py +7 -4
  416. mlrun/utils/notifications/notification/git.py +36 -19
  417. mlrun/utils/notifications/notification/ipython.py +10 -8
  418. mlrun/utils/notifications/notification/slack.py +18 -13
  419. mlrun/utils/notifications/notification_pusher.py +377 -56
  420. mlrun/utils/regex.py +6 -1
  421. mlrun/utils/singleton.py +1 -1
  422. mlrun/utils/v3io_clients.py +1 -1
  423. mlrun/utils/vault.py +270 -269
  424. mlrun/utils/version/__init__.py +1 -1
  425. mlrun/utils/version/version.json +2 -2
  426. mlrun/utils/version/version.py +1 -1
  427. {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
  428. mlrun-1.4.0.dist-info/RECORD +434 -0
  429. mlrun/api/api/endpoints/marketplace.py +0 -257
  430. mlrun/api/crud/marketplace.py +0 -221
  431. mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
  432. mlrun/api/db/filedb/db.py +0 -518
  433. mlrun/api/schemas/marketplace.py +0 -128
  434. mlrun/api/schemas/model_endpoints.py +0 -185
  435. mlrun/db/filedb.py +0 -891
  436. mlrun/feature_store/retrieval/online.py +0 -92
  437. mlrun/model_monitoring/constants.py +0 -67
  438. mlrun/runtimes/package/context_handler.py +0 -711
  439. mlrun/runtimes/sparkjob/spark2job.py +0 -59
  440. mlrun-1.3.3.dist-info/RECORD +0 -381
  441. {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
  442. {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
  443. {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
  444. {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -32,7 +32,12 @@ from ..config import config
32
32
  from ..model import DataSource
33
33
  from ..platforms.iguazio import parse_path
34
34
  from ..utils import get_class
35
- from .utils import store_path_to_spark
35
+ from .utils import (
36
+ _generate_sql_query_with_time_filter,
37
+ filter_df_start_end_time,
38
+ select_columns_from_df,
39
+ store_path_to_spark,
40
+ )
36
41
 
37
42
 
38
43
  def get_source_from_dict(source):
@@ -62,38 +67,59 @@ class BaseSourceDriver(DataSource):
62
67
  def to_step(self, key_field=None, time_field=None, context=None):
63
68
  import storey
64
69
 
70
+ if not self.support_storey:
71
+ raise mlrun.errors.MLRunRuntimeError(
72
+ f"{type(self).__name__} does not support storey engine"
73
+ )
74
+
65
75
  return storey.SyncEmitSource(context=context)
66
76
 
67
77
  def get_table_object(self):
68
78
  """get storey Table object"""
69
79
  return None
70
80
 
71
- def to_dataframe(self):
72
- return mlrun.store_manager.object(url=self.path).as_df()
73
-
74
- def filter_df_start_end_time(self, df, time_field):
75
- # give priority to source time_field over the feature set's timestamp_key
76
- if self.time_field:
77
- time_field = self.time_field
78
-
79
- if self.start_time or self.end_time:
80
- self.start_time = (
81
- datetime.min if self.start_time is None else self.start_time
82
- )
83
- self.end_time = datetime.max if self.end_time is None else self.end_time
84
- df = df.filter(
85
- (df[time_field] > self.start_time) & (df[time_field] <= self.end_time)
86
- )
87
- return df
81
+ def to_dataframe(
82
+ self,
83
+ columns=None,
84
+ df_module=None,
85
+ entities=None,
86
+ start_time=None,
87
+ end_time=None,
88
+ time_field=None,
89
+ ):
90
+ """return the source data as dataframe"""
91
+ return mlrun.store_manager.object(url=self.path).as_df(
92
+ columns=columns,
93
+ df_module=df_module,
94
+ start_time=start_time or self.start_time,
95
+ end_time=end_time or self.end_time,
96
+ time_column=time_field or self.time_field,
97
+ )
88
98
 
89
- def to_spark_df(self, session, named_view=False, time_field=None):
99
+ def to_spark_df(self, session, named_view=False, time_field=None, columns=None):
90
100
  if self.support_spark:
91
101
  df = session.read.load(**self.get_spark_options())
92
102
  if named_view:
93
103
  df.createOrReplaceTempView(self.name)
94
- return df
104
+ return self._filter_spark_df(df, time_field, columns)
95
105
  raise NotImplementedError()
96
106
 
107
+ def _filter_spark_df(self, df, time_field=None, columns=None):
108
+ if not (columns or time_field):
109
+ return df
110
+
111
+ from pyspark.sql.functions import col
112
+
113
+ if time_field:
114
+ if self.start_time:
115
+ df = df.filter(col(time_field) > self.start_time)
116
+ if self.end_time:
117
+ df = df.filter(col(time_field) <= self.end_time)
118
+
119
+ if columns:
120
+ df = df.select([col(name) for name in columns])
121
+ return df
122
+
97
123
  def get_spark_options(self):
98
124
  # options used in spark.read.load(**options)
99
125
  raise NotImplementedError()
@@ -166,7 +192,6 @@ class CSVSource(BaseSourceDriver):
166
192
 
167
193
  return storey.CSVSource(
168
194
  paths=self.path,
169
- header=True,
170
195
  build_dict=True,
171
196
  key_field=self.key_field or key_field,
172
197
  storage_options=self._get_store().get_storage_options(),
@@ -182,7 +207,7 @@ class CSVSource(BaseSourceDriver):
182
207
  "inferSchema": "true",
183
208
  }
184
209
 
185
- def to_spark_df(self, session, named_view=False, time_field=None):
210
+ def to_spark_df(self, session, named_view=False, time_field=None, columns=None):
186
211
  import pyspark.sql.functions as funcs
187
212
 
188
213
  df = session.read.load(**self.get_spark_options())
@@ -196,15 +221,28 @@ class CSVSource(BaseSourceDriver):
196
221
  df = df.withColumn(col_name, funcs.col(col_name).cast("timestamp"))
197
222
  if named_view:
198
223
  df.createOrReplaceTempView(self.name)
199
- return df
224
+ return self._filter_spark_df(df, time_field, columns)
200
225
 
201
- def to_dataframe(self):
202
- kwargs = self.attributes.get("reader_args", {})
203
- chunksize = self.attributes.get("chunksize")
204
- if chunksize:
205
- kwargs["chunksize"] = chunksize
226
+ def to_dataframe(
227
+ self,
228
+ columns=None,
229
+ df_module=None,
230
+ entities=None,
231
+ start_time=None,
232
+ end_time=None,
233
+ time_field=None,
234
+ ):
235
+ reader_args = self.attributes.get("reader_args", {})
206
236
  return mlrun.store_manager.object(url=self.path).as_df(
207
- parse_dates=self._parse_dates, **kwargs
237
+ columns=columns,
238
+ df_module=df_module,
239
+ format="csv",
240
+ start_time=start_time or self.start_time,
241
+ end_time=end_time or self.end_time,
242
+ time_column=time_field or self.time_field,
243
+ parse_dates=self._parse_dates,
244
+ chunksize=self.attributes.get("chunksize"),
245
+ **reader_args,
208
246
  )
209
247
 
210
248
  def is_iterator(self):
@@ -246,7 +284,6 @@ class ParquetSource(BaseSourceDriver):
246
284
  start_time: Optional[Union[datetime, str]] = None,
247
285
  end_time: Optional[Union[datetime, str]] = None,
248
286
  ):
249
-
250
287
  super().__init__(
251
288
  name,
252
289
  path,
@@ -312,10 +349,24 @@ class ParquetSource(BaseSourceDriver):
312
349
  "format": "parquet",
313
350
  }
314
351
 
315
- def to_dataframe(self):
316
- kwargs = self.attributes.get("reader_args", {})
352
+ def to_dataframe(
353
+ self,
354
+ columns=None,
355
+ df_module=None,
356
+ entities=None,
357
+ start_time=None,
358
+ end_time=None,
359
+ time_field=None,
360
+ ):
361
+ reader_args = self.attributes.get("reader_args", {})
317
362
  return mlrun.store_manager.object(url=self.path).as_df(
318
- format="parquet", **kwargs
363
+ columns=columns,
364
+ df_module=df_module,
365
+ start_time=start_time or self.start_time,
366
+ end_time=end_time or self.end_time,
367
+ time_column=time_field or self.time_field,
368
+ format="parquet",
369
+ **reader_args,
319
370
  )
320
371
 
321
372
 
@@ -323,8 +374,13 @@ class BigQuerySource(BaseSourceDriver):
323
374
  """
324
375
  Reads Google BigQuery query results as input source for a flow.
325
376
 
377
+ For authentication, set the GCP_CREDENTIALS project secret to the credentials json string.
378
+
326
379
  example::
327
380
 
381
+ # set the credentials
382
+ project.set_secrets({"GCP_CREDENTIALS": gcp_credentials_json})
383
+
328
384
  # use sql query
329
385
  query_string = "SELECT * FROM `the-psf.pypi.downloads20210328` LIMIT 5000"
330
386
  source = BigQuerySource("bq1", query=query_string,
@@ -371,11 +427,21 @@ class BigQuerySource(BaseSourceDriver):
371
427
  end_time=None,
372
428
  gcp_project: str = None,
373
429
  spark_options: dict = None,
430
+ **kwargs,
374
431
  ):
375
432
  if query and table:
376
433
  raise mlrun.errors.MLRunInvalidArgumentError(
377
434
  "cannot specify both table and query args"
378
435
  )
436
+ # Otherwise, the client library does not fully respect the limit
437
+ if (
438
+ max_results_for_table
439
+ and chunksize
440
+ and max_results_for_table % chunksize != 0
441
+ ):
442
+ raise mlrun.errors.MLRunInvalidArgumentError(
443
+ "max_results_for_table must be a multiple of chunksize"
444
+ )
379
445
  attrs = {
380
446
  "query": query,
381
447
  "table": table,
@@ -394,8 +460,8 @@ class BigQuerySource(BaseSourceDriver):
394
460
  schedule=schedule,
395
461
  start_time=start_time,
396
462
  end_time=end_time,
463
+ **kwargs,
397
464
  )
398
- self._rows_iterator = None
399
465
 
400
466
  def _get_credentials_string(self):
401
467
  gcp_project = self.attributes.get("gcp_project", None)
@@ -417,7 +483,15 @@ class BigQuerySource(BaseSourceDriver):
417
483
  return credentials, gcp_project or gcp_cred_dict["project_id"]
418
484
  return None, gcp_project
419
485
 
420
- def to_dataframe(self):
486
+ def to_dataframe(
487
+ self,
488
+ columns=None,
489
+ df_module=None,
490
+ entities=None,
491
+ start_time=None,
492
+ end_time=None,
493
+ time_field=None,
494
+ ):
421
495
  from google.cloud import bigquery
422
496
  from google.cloud.bigquery_storage_v1 import BigQueryReadClient
423
497
 
@@ -438,39 +512,43 @@ class BigQuerySource(BaseSourceDriver):
438
512
  if query:
439
513
  query_job = bqclient.query(query)
440
514
 
441
- self._rows_iterator = query_job.result(page_size=chunksize)
442
- dtypes = schema_to_dtypes(self._rows_iterator.schema)
443
- if chunksize:
444
- # passing bqstorage_client greatly improves performance
445
- return self._rows_iterator.to_dataframe_iterable(
446
- bqstorage_client=BigQueryReadClient(), dtypes=dtypes
447
- )
448
- else:
449
- return self._rows_iterator.to_dataframe(dtypes=dtypes)
515
+ rows_iterator = query_job.result(page_size=chunksize)
450
516
  elif table:
451
517
  table = self.attributes.get("table")
452
518
  max_results = self.attributes.get("max_results")
453
519
 
454
- rows = bqclient.list_rows(
520
+ rows_iterator = bqclient.list_rows(
455
521
  table, page_size=chunksize, max_results=max_results
456
522
  )
457
- dtypes = schema_to_dtypes(rows.schema)
458
- if chunksize:
459
- # passing bqstorage_client greatly improves performance
460
- return rows.to_dataframe_iterable(
461
- bqstorage_client=BigQueryReadClient(), dtypes=dtypes
462
- )
463
- else:
464
- return rows.to_dataframe(dtypes=dtypes)
465
523
  else:
466
524
  raise mlrun.errors.MLRunInvalidArgumentError(
467
525
  "table or query args must be specified"
468
526
  )
469
527
 
528
+ dtypes = schema_to_dtypes(rows_iterator.schema)
529
+ if chunksize:
530
+ # passing bqstorage_client greatly improves performance
531
+ df = rows_iterator.to_dataframe_iterable(
532
+ bqstorage_client=BigQueryReadClient(), dtypes=dtypes
533
+ )
534
+ else:
535
+ df = rows_iterator.to_dataframe(dtypes=dtypes)
536
+
537
+ # TODO : filter as part of the query
538
+ return select_columns_from_df(
539
+ filter_df_start_end_time(
540
+ df,
541
+ time_column=time_field or self.time_field,
542
+ start_time=start_time or self.start_time,
543
+ end_time=end_time or self.end_time,
544
+ ),
545
+ columns=columns,
546
+ )
547
+
470
548
  def is_iterator(self):
471
549
  return bool(self.attributes.get("chunksize"))
472
550
 
473
- def to_spark_df(self, session, named_view=False, time_field=None):
551
+ def to_spark_df(self, session, named_view=False, time_field=None, columns=None):
474
552
  options = copy(self.attributes.get("spark_options", {}))
475
553
  credentials, gcp_project = self._get_credentials_string()
476
554
  if credentials:
@@ -500,7 +578,7 @@ class BigQuerySource(BaseSourceDriver):
500
578
  df = session.read.format("bigquery").load(**options)
501
579
  if named_view:
502
580
  df.createOrReplaceTempView(self.name)
503
- return df
581
+ return self._filter_spark_df(df, time_field, columns)
504
582
 
505
583
 
506
584
  class SnowflakeSource(BaseSourceDriver):
@@ -555,6 +633,7 @@ class SnowflakeSource(BaseSourceDriver):
555
633
  database: str = None,
556
634
  schema: str = None,
557
635
  warehouse: str = None,
636
+ **kwargs,
558
637
  ):
559
638
  attrs = {
560
639
  "query": query,
@@ -573,6 +652,7 @@ class SnowflakeSource(BaseSourceDriver):
573
652
  schedule=schedule,
574
653
  start_time=start_time,
575
654
  end_time=end_time,
655
+ **kwargs,
576
656
  )
577
657
 
578
658
  def _get_password(self):
@@ -664,7 +744,7 @@ class DataFrameSource:
664
744
  context=self.context or context,
665
745
  )
666
746
 
667
- def to_dataframe(self):
747
+ def to_dataframe(self, **kwargs):
668
748
  return self._df
669
749
 
670
750
  def is_iterator(self):
@@ -839,7 +919,15 @@ class KafkaSource(OnlineSource):
839
919
  attributes["sasl"] = sasl
840
920
  super().__init__(attributes=attributes, **kwargs)
841
921
 
842
- def to_dataframe(self):
922
+ def to_dataframe(
923
+ self,
924
+ columns=None,
925
+ df_module=None,
926
+ entities=None,
927
+ start_time=None,
928
+ end_time=None,
929
+ time_field=None,
930
+ ):
843
931
  raise mlrun.MLRunInvalidArgumentError(
844
932
  "KafkaSource does not support batch processing"
845
933
  )
@@ -880,13 +968,15 @@ class SQLSource(BaseSourceDriver):
880
968
  table_name: str = None,
881
969
  spark_options: dict = None,
882
970
  time_fields: List[str] = None,
971
+ parse_dates: List[str] = None,
972
+ **kwargs,
883
973
  ):
884
974
  """
885
975
  Reads SqlDB as input source for a flow.
886
976
  example::
887
- db_path = "mysql+pymysql://<username>:<password>@<host>:<port>/<db_name>"
888
- source = SqlDBSource(
889
- collection_name='source_name', db_path=self.db, key_field='key'
977
+ db_url = "mysql+pymysql://<username>:<password>@<host>:<port>/<db_name>"
978
+ source = SQLSource(
979
+ table_name='source_name', db_url=db_url, key_field='key'
890
980
  )
891
981
  :param name: source name
892
982
  :param chunksize: number of rows per chunk (default large single chunk)
@@ -903,19 +993,32 @@ class SQLSource(BaseSourceDriver):
903
993
  from the current database
904
994
  :param spark_options: additional spark read options
905
995
  :param time_fields : all the field to be parsed as timestamp.
996
+ :param parse_dates : all the field to be parsed as timestamp.
906
997
  """
907
-
998
+ if time_fields:
999
+ warnings.warn(
1000
+ "'time_fields' is deprecated, use 'parse_dates' instead. "
1001
+ "This will be removed in 1.6.0",
1002
+ # TODO: Remove this in 1.6.0
1003
+ FutureWarning,
1004
+ )
1005
+ parse_dates = time_fields
908
1006
  db_url = db_url or mlrun.mlconf.sql.url
909
1007
  if db_url is None:
910
1008
  raise mlrun.errors.MLRunInvalidArgumentError(
911
1009
  "cannot specify without db_path arg or secret MLRUN_SQL__URL"
912
1010
  )
1011
+ if time_field:
1012
+ if parse_dates:
1013
+ time_fields.append(time_field)
1014
+ else:
1015
+ parse_dates = [time_field]
913
1016
  attrs = {
914
1017
  "chunksize": chunksize,
915
1018
  "spark_options": spark_options,
916
1019
  "table_name": table_name,
917
1020
  "db_path": db_url,
918
- "time_fields": time_fields,
1021
+ "parse_dates": parse_dates,
919
1022
  }
920
1023
  attrs = {key: value for key, value in attrs.items() if value is not None}
921
1024
  super().__init__(
@@ -926,24 +1029,43 @@ class SQLSource(BaseSourceDriver):
926
1029
  schedule=schedule,
927
1030
  start_time=start_time,
928
1031
  end_time=end_time,
1032
+ **kwargs,
929
1033
  )
930
1034
 
931
- def to_dataframe(self):
932
- import sqlalchemy as db
1035
+ def to_dataframe(
1036
+ self,
1037
+ columns=None,
1038
+ df_module=None,
1039
+ entities=None,
1040
+ start_time=None,
1041
+ end_time=None,
1042
+ time_field=None,
1043
+ ):
1044
+ import sqlalchemy as sqlalchemy
933
1045
 
934
- query = self.attributes.get("query", None)
935
1046
  db_path = self.attributes.get("db_path")
936
1047
  table_name = self.attributes.get("table_name")
937
- if not query:
938
- query = f"SELECT * FROM {table_name}"
1048
+ parse_dates = self.attributes.get("parse_dates")
1049
+ time_field = time_field or self.time_field
1050
+ start_time = start_time or self.start_time
1051
+ end_time = end_time or self.end_time
939
1052
  if table_name and db_path:
940
- engine = db.create_engine(db_path)
1053
+ engine = sqlalchemy.create_engine(db_path)
1054
+ query, parse_dates = _generate_sql_query_with_time_filter(
1055
+ table_name=table_name,
1056
+ engine=engine,
1057
+ time_column=time_field,
1058
+ parse_dates=parse_dates,
1059
+ start_time=start_time,
1060
+ end_time=end_time,
1061
+ )
941
1062
  with engine.connect() as con:
942
1063
  return pd.read_sql(
943
1064
  query,
944
1065
  con=con,
945
1066
  chunksize=self.attributes.get("chunksize"),
946
- parse_dates=self.attributes.get("time_fields"),
1067
+ parse_dates=parse_dates,
1068
+ columns=columns,
947
1069
  )
948
1070
  else:
949
1071
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -0,0 +1,44 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import hashlib
15
+
16
+ from pyspark.sql.functions import udf
17
+ from pyspark.sql.types import StringType
18
+
19
+
20
+ def _hash_list(*list_to_hash):
21
+ list_to_hash = [str(element) for element in list_to_hash]
22
+ str_concatted = "".join(list_to_hash)
23
+ sha1 = hashlib.sha1()
24
+ sha1.update(str_concatted.encode("utf8"))
25
+ return sha1.hexdigest()
26
+
27
+
28
+ def _redis_stringify_key(*args):
29
+ if len(args) == 1:
30
+ key_list = args[0]
31
+ else:
32
+ key_list = list(args)
33
+ suffix = "}:static"
34
+ if isinstance(key_list, list):
35
+ if len(key_list) >= 3:
36
+ return str(key_list[0]) + "." + _hash_list(*key_list[1:]) + suffix
37
+ if len(key_list) == 2:
38
+ return str(key_list[0]) + "." + str(key_list[1]) + suffix
39
+ return str(key_list[0]) + suffix
40
+ return str(key_list) + suffix
41
+
42
+
43
+ hash_and_concat_v3io_udf = udf(_hash_list, StringType())
44
+ hash_and_concat_redis_udf = udf(_redis_stringify_key, StringType())
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -81,7 +81,7 @@ class ResourceCache:
81
81
  endpoint, uri = parse_path(uri)
82
82
  self._tabels[uri] = Table(
83
83
  uri,
84
- V3ioDriver(webapi=endpoint),
84
+ V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api),
85
85
  flush_interval_secs=mlrun.mlconf.feature_store.flush_interval,
86
86
  )
87
87
  return self._tabels[uri]
@@ -101,8 +101,8 @@ class ResourceCache:
101
101
  if is_store_uri(uri):
102
102
  resource = get_store_resource(uri)
103
103
  if resource.kind in [
104
- mlrun.api.schemas.ObjectKind.feature_set.value,
105
- mlrun.api.schemas.ObjectKind.feature_vector.value,
104
+ mlrun.common.schemas.ObjectKind.feature_set.value,
105
+ mlrun.common.schemas.ObjectKind.feature_vector.value,
106
106
  ]:
107
107
  target = get_online_target(resource)
108
108
  if not target: