mlrun 1.3.3rc1__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (444) hide show
  1. mlrun/__init__.py +3 -3
  2. mlrun/__main__.py +79 -37
  3. mlrun/api/__init__.py +1 -1
  4. mlrun/api/api/__init__.py +1 -1
  5. mlrun/api/api/api.py +4 -4
  6. mlrun/api/api/deps.py +10 -21
  7. mlrun/api/api/endpoints/__init__.py +1 -1
  8. mlrun/api/api/endpoints/artifacts.py +64 -36
  9. mlrun/api/api/endpoints/auth.py +4 -4
  10. mlrun/api/api/endpoints/background_tasks.py +11 -11
  11. mlrun/api/api/endpoints/client_spec.py +5 -5
  12. mlrun/api/api/endpoints/clusterization_spec.py +6 -4
  13. mlrun/api/api/endpoints/feature_store.py +124 -115
  14. mlrun/api/api/endpoints/files.py +22 -14
  15. mlrun/api/api/endpoints/frontend_spec.py +28 -21
  16. mlrun/api/api/endpoints/functions.py +142 -87
  17. mlrun/api/api/endpoints/grafana_proxy.py +89 -442
  18. mlrun/api/api/endpoints/healthz.py +20 -7
  19. mlrun/api/api/endpoints/hub.py +320 -0
  20. mlrun/api/api/endpoints/internal/__init__.py +1 -1
  21. mlrun/api/api/endpoints/internal/config.py +1 -1
  22. mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
  23. mlrun/api/api/endpoints/logs.py +11 -11
  24. mlrun/api/api/endpoints/model_endpoints.py +74 -70
  25. mlrun/api/api/endpoints/operations.py +13 -9
  26. mlrun/api/api/endpoints/pipelines.py +93 -88
  27. mlrun/api/api/endpoints/projects.py +35 -35
  28. mlrun/api/api/endpoints/runs.py +69 -27
  29. mlrun/api/api/endpoints/runtime_resources.py +28 -28
  30. mlrun/api/api/endpoints/schedules.py +98 -41
  31. mlrun/api/api/endpoints/secrets.py +37 -32
  32. mlrun/api/api/endpoints/submit.py +12 -12
  33. mlrun/api/api/endpoints/tags.py +20 -22
  34. mlrun/api/api/utils.py +251 -42
  35. mlrun/api/constants.py +1 -1
  36. mlrun/api/crud/__init__.py +18 -15
  37. mlrun/api/crud/artifacts.py +10 -10
  38. mlrun/api/crud/client_spec.py +4 -4
  39. mlrun/api/crud/clusterization_spec.py +3 -3
  40. mlrun/api/crud/feature_store.py +54 -46
  41. mlrun/api/crud/functions.py +3 -3
  42. mlrun/api/crud/hub.py +312 -0
  43. mlrun/api/crud/logs.py +11 -9
  44. mlrun/api/crud/model_monitoring/__init__.py +3 -3
  45. mlrun/api/crud/model_monitoring/grafana.py +435 -0
  46. mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
  47. mlrun/api/crud/notifications.py +149 -0
  48. mlrun/api/crud/pipelines.py +67 -52
  49. mlrun/api/crud/projects.py +51 -23
  50. mlrun/api/crud/runs.py +7 -5
  51. mlrun/api/crud/runtime_resources.py +13 -13
  52. mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
  53. mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
  54. mlrun/api/crud/runtimes/nuclio/function.py +505 -0
  55. mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
  56. mlrun/api/crud/secrets.py +88 -46
  57. mlrun/api/crud/tags.py +5 -5
  58. mlrun/api/db/__init__.py +1 -1
  59. mlrun/api/db/base.py +102 -54
  60. mlrun/api/db/init_db.py +2 -3
  61. mlrun/api/db/session.py +4 -12
  62. mlrun/api/db/sqldb/__init__.py +1 -1
  63. mlrun/api/db/sqldb/db.py +439 -196
  64. mlrun/api/db/sqldb/helpers.py +1 -1
  65. mlrun/api/db/sqldb/models/__init__.py +3 -3
  66. mlrun/api/db/sqldb/models/models_mysql.py +82 -64
  67. mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
  68. mlrun/api/db/sqldb/session.py +27 -20
  69. mlrun/api/initial_data.py +82 -24
  70. mlrun/api/launcher.py +196 -0
  71. mlrun/api/main.py +91 -22
  72. mlrun/api/middlewares.py +6 -5
  73. mlrun/api/migrations_mysql/env.py +1 -1
  74. mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
  75. mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
  76. mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
  77. mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
  78. mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
  79. mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
  80. mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
  81. mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
  82. mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
  83. mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
  84. mlrun/api/migrations_sqlite/env.py +1 -1
  85. mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
  86. mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
  87. mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
  88. mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
  89. mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
  90. mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
  91. mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
  92. mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
  93. mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
  94. mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
  95. mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
  96. mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
  97. mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
  98. mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
  99. mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
  100. mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
  101. mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
  102. mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
  103. mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
  104. mlrun/api/schemas/__init__.py +216 -138
  105. mlrun/api/utils/__init__.py +1 -1
  106. mlrun/api/utils/asyncio.py +1 -1
  107. mlrun/api/utils/auth/__init__.py +1 -1
  108. mlrun/api/utils/auth/providers/__init__.py +1 -1
  109. mlrun/api/utils/auth/providers/base.py +7 -7
  110. mlrun/api/utils/auth/providers/nop.py +6 -7
  111. mlrun/api/utils/auth/providers/opa.py +17 -17
  112. mlrun/api/utils/auth/verifier.py +36 -34
  113. mlrun/api/utils/background_tasks.py +24 -24
  114. mlrun/{builder.py → api/utils/builder.py} +216 -123
  115. mlrun/api/utils/clients/__init__.py +1 -1
  116. mlrun/api/utils/clients/chief.py +19 -4
  117. mlrun/api/utils/clients/iguazio.py +106 -60
  118. mlrun/api/utils/clients/log_collector.py +1 -1
  119. mlrun/api/utils/clients/nuclio.py +23 -23
  120. mlrun/api/utils/clients/protocols/grpc.py +2 -2
  121. mlrun/api/utils/db/__init__.py +1 -1
  122. mlrun/api/utils/db/alembic.py +1 -1
  123. mlrun/api/utils/db/backup.py +1 -1
  124. mlrun/api/utils/db/mysql.py +24 -25
  125. mlrun/api/utils/db/sql_collation.py +1 -1
  126. mlrun/api/utils/db/sqlite_migration.py +2 -2
  127. mlrun/api/utils/events/__init__.py +14 -0
  128. mlrun/api/utils/events/base.py +57 -0
  129. mlrun/api/utils/events/events_factory.py +41 -0
  130. mlrun/api/utils/events/iguazio.py +217 -0
  131. mlrun/api/utils/events/nop.py +55 -0
  132. mlrun/api/utils/helpers.py +16 -13
  133. mlrun/api/utils/memory_reports.py +1 -1
  134. mlrun/api/utils/periodic.py +6 -3
  135. mlrun/api/utils/projects/__init__.py +1 -1
  136. mlrun/api/utils/projects/follower.py +33 -33
  137. mlrun/api/utils/projects/leader.py +36 -34
  138. mlrun/api/utils/projects/member.py +27 -27
  139. mlrun/api/utils/projects/remotes/__init__.py +1 -1
  140. mlrun/api/utils/projects/remotes/follower.py +13 -13
  141. mlrun/api/utils/projects/remotes/leader.py +10 -10
  142. mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
  143. mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
  144. mlrun/api/utils/scheduler.py +140 -51
  145. mlrun/api/utils/singletons/__init__.py +1 -1
  146. mlrun/api/utils/singletons/db.py +9 -15
  147. mlrun/api/utils/singletons/k8s.py +677 -5
  148. mlrun/api/utils/singletons/logs_dir.py +1 -1
  149. mlrun/api/utils/singletons/project_member.py +1 -1
  150. mlrun/api/utils/singletons/scheduler.py +1 -1
  151. mlrun/artifacts/__init__.py +2 -2
  152. mlrun/artifacts/base.py +8 -2
  153. mlrun/artifacts/dataset.py +5 -3
  154. mlrun/artifacts/manager.py +7 -1
  155. mlrun/artifacts/model.py +15 -4
  156. mlrun/artifacts/plots.py +1 -1
  157. mlrun/common/__init__.py +1 -1
  158. mlrun/common/constants.py +15 -0
  159. mlrun/common/model_monitoring.py +209 -0
  160. mlrun/common/schemas/__init__.py +167 -0
  161. mlrun/{api → common}/schemas/artifact.py +13 -14
  162. mlrun/{api → common}/schemas/auth.py +10 -8
  163. mlrun/{api → common}/schemas/background_task.py +3 -3
  164. mlrun/{api → common}/schemas/client_spec.py +1 -1
  165. mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
  166. mlrun/{api → common}/schemas/constants.py +21 -8
  167. mlrun/common/schemas/events.py +36 -0
  168. mlrun/{api → common}/schemas/feature_store.py +2 -1
  169. mlrun/{api → common}/schemas/frontend_spec.py +7 -6
  170. mlrun/{api → common}/schemas/function.py +5 -5
  171. mlrun/{api → common}/schemas/http.py +3 -3
  172. mlrun/common/schemas/hub.py +134 -0
  173. mlrun/{api → common}/schemas/k8s.py +3 -3
  174. mlrun/{api → common}/schemas/memory_reports.py +1 -1
  175. mlrun/common/schemas/model_endpoints.py +342 -0
  176. mlrun/common/schemas/notification.py +57 -0
  177. mlrun/{api → common}/schemas/object.py +6 -6
  178. mlrun/{api → common}/schemas/pipeline.py +3 -3
  179. mlrun/{api → common}/schemas/project.py +6 -5
  180. mlrun/common/schemas/regex.py +24 -0
  181. mlrun/common/schemas/runs.py +30 -0
  182. mlrun/{api → common}/schemas/runtime_resource.py +3 -3
  183. mlrun/{api → common}/schemas/schedule.py +19 -7
  184. mlrun/{api → common}/schemas/secret.py +3 -3
  185. mlrun/{api → common}/schemas/tag.py +2 -2
  186. mlrun/common/types.py +25 -0
  187. mlrun/config.py +152 -20
  188. mlrun/data_types/__init__.py +7 -2
  189. mlrun/data_types/data_types.py +4 -2
  190. mlrun/data_types/infer.py +1 -1
  191. mlrun/data_types/spark.py +10 -3
  192. mlrun/datastore/__init__.py +10 -3
  193. mlrun/datastore/azure_blob.py +1 -1
  194. mlrun/datastore/base.py +185 -53
  195. mlrun/datastore/datastore.py +1 -1
  196. mlrun/datastore/filestore.py +1 -1
  197. mlrun/datastore/google_cloud_storage.py +1 -1
  198. mlrun/datastore/inmem.py +4 -1
  199. mlrun/datastore/redis.py +1 -1
  200. mlrun/datastore/s3.py +1 -1
  201. mlrun/datastore/sources.py +192 -70
  202. mlrun/datastore/spark_udf.py +44 -0
  203. mlrun/datastore/store_resources.py +4 -4
  204. mlrun/datastore/targets.py +115 -45
  205. mlrun/datastore/utils.py +127 -5
  206. mlrun/datastore/v3io.py +1 -1
  207. mlrun/datastore/wasbfs/__init__.py +1 -1
  208. mlrun/datastore/wasbfs/fs.py +1 -1
  209. mlrun/db/__init__.py +7 -5
  210. mlrun/db/base.py +112 -68
  211. mlrun/db/httpdb.py +445 -277
  212. mlrun/db/nopdb.py +491 -0
  213. mlrun/db/sqldb.py +112 -65
  214. mlrun/errors.py +6 -1
  215. mlrun/execution.py +44 -22
  216. mlrun/feature_store/__init__.py +1 -1
  217. mlrun/feature_store/api.py +143 -95
  218. mlrun/feature_store/common.py +16 -20
  219. mlrun/feature_store/feature_set.py +42 -12
  220. mlrun/feature_store/feature_vector.py +32 -21
  221. mlrun/feature_store/ingestion.py +9 -12
  222. mlrun/feature_store/retrieval/__init__.py +3 -2
  223. mlrun/feature_store/retrieval/base.py +388 -66
  224. mlrun/feature_store/retrieval/dask_merger.py +63 -151
  225. mlrun/feature_store/retrieval/job.py +30 -12
  226. mlrun/feature_store/retrieval/local_merger.py +40 -133
  227. mlrun/feature_store/retrieval/spark_merger.py +129 -127
  228. mlrun/feature_store/retrieval/storey_merger.py +173 -0
  229. mlrun/feature_store/steps.py +132 -15
  230. mlrun/features.py +8 -3
  231. mlrun/frameworks/__init__.py +1 -1
  232. mlrun/frameworks/_common/__init__.py +1 -1
  233. mlrun/frameworks/_common/artifacts_library.py +1 -1
  234. mlrun/frameworks/_common/mlrun_interface.py +1 -1
  235. mlrun/frameworks/_common/model_handler.py +1 -1
  236. mlrun/frameworks/_common/plan.py +1 -1
  237. mlrun/frameworks/_common/producer.py +1 -1
  238. mlrun/frameworks/_common/utils.py +1 -1
  239. mlrun/frameworks/_dl_common/__init__.py +1 -1
  240. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
  241. mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
  242. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
  243. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
  244. mlrun/frameworks/_dl_common/model_handler.py +1 -1
  245. mlrun/frameworks/_dl_common/utils.py +1 -1
  246. mlrun/frameworks/_ml_common/__init__.py +1 -1
  247. mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
  248. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
  249. mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
  250. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  251. mlrun/frameworks/_ml_common/model_handler.py +1 -1
  252. mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
  253. mlrun/frameworks/_ml_common/plan.py +1 -1
  254. mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
  255. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
  256. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
  257. mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
  258. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
  259. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
  260. mlrun/frameworks/_ml_common/producer.py +1 -1
  261. mlrun/frameworks/_ml_common/utils.py +1 -1
  262. mlrun/frameworks/auto_mlrun/__init__.py +1 -1
  263. mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
  264. mlrun/frameworks/huggingface/__init__.py +1 -1
  265. mlrun/frameworks/huggingface/model_server.py +1 -1
  266. mlrun/frameworks/lgbm/__init__.py +1 -1
  267. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
  268. mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
  269. mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
  270. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
  271. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
  272. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
  273. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
  274. mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
  275. mlrun/frameworks/lgbm/model_handler.py +1 -1
  276. mlrun/frameworks/lgbm/model_server.py +1 -1
  277. mlrun/frameworks/lgbm/utils.py +1 -1
  278. mlrun/frameworks/onnx/__init__.py +1 -1
  279. mlrun/frameworks/onnx/dataset.py +1 -1
  280. mlrun/frameworks/onnx/mlrun_interface.py +1 -1
  281. mlrun/frameworks/onnx/model_handler.py +1 -1
  282. mlrun/frameworks/onnx/model_server.py +1 -1
  283. mlrun/frameworks/parallel_coordinates.py +1 -1
  284. mlrun/frameworks/pytorch/__init__.py +1 -1
  285. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
  286. mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
  287. mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
  288. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
  289. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
  290. mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
  291. mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
  292. mlrun/frameworks/pytorch/model_handler.py +1 -1
  293. mlrun/frameworks/pytorch/model_server.py +1 -1
  294. mlrun/frameworks/pytorch/utils.py +1 -1
  295. mlrun/frameworks/sklearn/__init__.py +1 -1
  296. mlrun/frameworks/sklearn/estimator.py +1 -1
  297. mlrun/frameworks/sklearn/metric.py +1 -1
  298. mlrun/frameworks/sklearn/metrics_library.py +1 -1
  299. mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
  300. mlrun/frameworks/sklearn/model_handler.py +1 -1
  301. mlrun/frameworks/sklearn/utils.py +1 -1
  302. mlrun/frameworks/tf_keras/__init__.py +1 -1
  303. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
  304. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  305. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
  306. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
  307. mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
  308. mlrun/frameworks/tf_keras/model_handler.py +1 -1
  309. mlrun/frameworks/tf_keras/model_server.py +1 -1
  310. mlrun/frameworks/tf_keras/utils.py +1 -1
  311. mlrun/frameworks/xgboost/__init__.py +1 -1
  312. mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
  313. mlrun/frameworks/xgboost/model_handler.py +1 -1
  314. mlrun/frameworks/xgboost/utils.py +1 -1
  315. mlrun/k8s_utils.py +14 -765
  316. mlrun/kfpops.py +14 -17
  317. mlrun/launcher/__init__.py +13 -0
  318. mlrun/launcher/base.py +406 -0
  319. mlrun/launcher/client.py +159 -0
  320. mlrun/launcher/factory.py +50 -0
  321. mlrun/launcher/local.py +276 -0
  322. mlrun/launcher/remote.py +178 -0
  323. mlrun/lists.py +10 -2
  324. mlrun/mlutils/__init__.py +1 -1
  325. mlrun/mlutils/data.py +1 -1
  326. mlrun/mlutils/models.py +1 -1
  327. mlrun/mlutils/plots.py +1 -1
  328. mlrun/model.py +252 -14
  329. mlrun/model_monitoring/__init__.py +41 -0
  330. mlrun/model_monitoring/features_drift_table.py +1 -1
  331. mlrun/model_monitoring/helpers.py +123 -38
  332. mlrun/model_monitoring/model_endpoint.py +144 -0
  333. mlrun/model_monitoring/model_monitoring_batch.py +310 -259
  334. mlrun/model_monitoring/stores/__init__.py +106 -0
  335. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
  336. mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
  337. mlrun/model_monitoring/stores/models/__init__.py +23 -0
  338. mlrun/model_monitoring/stores/models/base.py +18 -0
  339. mlrun/model_monitoring/stores/models/mysql.py +100 -0
  340. mlrun/model_monitoring/stores/models/sqlite.py +98 -0
  341. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
  342. mlrun/model_monitoring/stream_processing_fs.py +239 -271
  343. mlrun/package/__init__.py +163 -0
  344. mlrun/package/context_handler.py +325 -0
  345. mlrun/package/errors.py +47 -0
  346. mlrun/package/packager.py +298 -0
  347. mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
  348. mlrun/package/packagers/default_packager.py +422 -0
  349. mlrun/package/packagers/numpy_packagers.py +612 -0
  350. mlrun/package/packagers/pandas_packagers.py +968 -0
  351. mlrun/package/packagers/python_standard_library_packagers.py +616 -0
  352. mlrun/package/packagers_manager.py +786 -0
  353. mlrun/package/utils/__init__.py +53 -0
  354. mlrun/package/utils/_archiver.py +226 -0
  355. mlrun/package/utils/_formatter.py +211 -0
  356. mlrun/package/utils/_pickler.py +234 -0
  357. mlrun/package/utils/_supported_format.py +71 -0
  358. mlrun/package/utils/log_hint_utils.py +93 -0
  359. mlrun/package/utils/type_hint_utils.py +298 -0
  360. mlrun/platforms/__init__.py +1 -1
  361. mlrun/platforms/iguazio.py +34 -2
  362. mlrun/platforms/other.py +1 -1
  363. mlrun/projects/__init__.py +1 -1
  364. mlrun/projects/operations.py +14 -9
  365. mlrun/projects/pipelines.py +31 -13
  366. mlrun/projects/project.py +762 -238
  367. mlrun/render.py +49 -19
  368. mlrun/run.py +57 -326
  369. mlrun/runtimes/__init__.py +3 -9
  370. mlrun/runtimes/base.py +247 -784
  371. mlrun/runtimes/constants.py +1 -1
  372. mlrun/runtimes/daskjob.py +45 -41
  373. mlrun/runtimes/funcdoc.py +43 -7
  374. mlrun/runtimes/function.py +66 -656
  375. mlrun/runtimes/function_reference.py +1 -1
  376. mlrun/runtimes/generators.py +1 -1
  377. mlrun/runtimes/kubejob.py +99 -116
  378. mlrun/runtimes/local.py +59 -66
  379. mlrun/runtimes/mpijob/__init__.py +1 -1
  380. mlrun/runtimes/mpijob/abstract.py +13 -15
  381. mlrun/runtimes/mpijob/v1.py +3 -1
  382. mlrun/runtimes/mpijob/v1alpha1.py +1 -1
  383. mlrun/runtimes/nuclio.py +1 -1
  384. mlrun/runtimes/pod.py +51 -26
  385. mlrun/runtimes/remotesparkjob.py +3 -1
  386. mlrun/runtimes/serving.py +12 -4
  387. mlrun/runtimes/sparkjob/__init__.py +1 -2
  388. mlrun/runtimes/sparkjob/abstract.py +44 -31
  389. mlrun/runtimes/sparkjob/spark3job.py +11 -9
  390. mlrun/runtimes/utils.py +61 -42
  391. mlrun/secrets.py +16 -18
  392. mlrun/serving/__init__.py +3 -2
  393. mlrun/serving/merger.py +1 -1
  394. mlrun/serving/remote.py +1 -1
  395. mlrun/serving/routers.py +39 -42
  396. mlrun/serving/server.py +23 -13
  397. mlrun/serving/serving_wrapper.py +1 -1
  398. mlrun/serving/states.py +172 -39
  399. mlrun/serving/utils.py +1 -1
  400. mlrun/serving/v1_serving.py +1 -1
  401. mlrun/serving/v2_serving.py +29 -21
  402. mlrun/utils/__init__.py +1 -2
  403. mlrun/utils/async_http.py +8 -1
  404. mlrun/utils/azure_vault.py +1 -1
  405. mlrun/utils/clones.py +2 -2
  406. mlrun/utils/condition_evaluator.py +65 -0
  407. mlrun/utils/db.py +52 -0
  408. mlrun/utils/helpers.py +188 -13
  409. mlrun/utils/http.py +89 -54
  410. mlrun/utils/logger.py +48 -8
  411. mlrun/utils/model_monitoring.py +132 -100
  412. mlrun/utils/notifications/__init__.py +1 -1
  413. mlrun/utils/notifications/notification/__init__.py +8 -6
  414. mlrun/utils/notifications/notification/base.py +20 -14
  415. mlrun/utils/notifications/notification/console.py +7 -4
  416. mlrun/utils/notifications/notification/git.py +36 -19
  417. mlrun/utils/notifications/notification/ipython.py +10 -8
  418. mlrun/utils/notifications/notification/slack.py +18 -13
  419. mlrun/utils/notifications/notification_pusher.py +377 -56
  420. mlrun/utils/regex.py +6 -1
  421. mlrun/utils/singleton.py +1 -1
  422. mlrun/utils/v3io_clients.py +1 -1
  423. mlrun/utils/vault.py +270 -269
  424. mlrun/utils/version/__init__.py +1 -1
  425. mlrun/utils/version/version.json +2 -2
  426. mlrun/utils/version/version.py +1 -1
  427. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
  428. mlrun-1.4.0.dist-info/RECORD +434 -0
  429. mlrun/api/api/endpoints/marketplace.py +0 -257
  430. mlrun/api/crud/marketplace.py +0 -221
  431. mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
  432. mlrun/api/db/filedb/db.py +0 -518
  433. mlrun/api/schemas/marketplace.py +0 -128
  434. mlrun/api/schemas/model_endpoints.py +0 -185
  435. mlrun/db/filedb.py +0 -891
  436. mlrun/feature_store/retrieval/online.py +0 -92
  437. mlrun/model_monitoring/constants.py +0 -67
  438. mlrun/runtimes/package/context_handler.py +0 -711
  439. mlrun/runtimes/sparkjob/spark2job.py +0 -59
  440. mlrun-1.3.3rc1.dist-info/RECORD +0 -381
  441. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
  442. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
  443. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
  444. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -13,16 +13,30 @@
13
13
  # limitations under the License.
14
14
  #
15
15
  import abc
16
+ import typing
17
+ from datetime import datetime
18
+
19
+ import dask.dataframe as dd
20
+ import pandas as pd
16
21
 
17
22
  import mlrun
18
23
  from mlrun.datastore.targets import CSVTarget, ParquetTarget
24
+ from mlrun.feature_store.feature_set import FeatureSet
25
+ from mlrun.feature_store.feature_vector import Feature
19
26
 
20
- from ...utils import logger
27
+ from ...utils import logger, str_to_timestamp
28
+ from ..feature_vector import OfflineVectorResponse
21
29
 
22
30
 
23
31
  class BaseMerger(abc.ABC):
24
32
  """abstract feature merger class"""
25
33
 
34
+ # In order to be an online merger, the merger should implement `init_online_vector_service` function.
35
+ support_online = False
36
+
37
+ # In order to be an offline merger, the merger should implement
38
+ # `_order_by`, `_filter`, `_drop_columns_from_result`, `_rename_columns_and_select`, `_get_engine_df` functions.
39
+ support_offline = False
26
40
  engine = None
27
41
 
28
42
  def __init__(self, vector, **engine_args):
@@ -36,6 +50,8 @@ class BaseMerger(abc.ABC):
36
50
  self._drop_indexes = True
37
51
  self._target = None
38
52
  self._alias = dict()
53
+ self._origin_alias = dict()
54
+ self._entity_rows_node_name = "__mlrun__$entity_rows$"
39
55
 
40
56
  def _append_drop_column(self, key):
41
57
  if key and key not in self._drop_columns:
@@ -67,22 +83,19 @@ class BaseMerger(abc.ABC):
67
83
  drop_columns=None,
68
84
  start_time=None,
69
85
  end_time=None,
86
+ timestamp_for_filtering=None,
70
87
  with_indexes=None,
71
88
  update_stats=None,
72
89
  query=None,
73
- join_type="inner",
90
+ order_by=None,
74
91
  ):
75
92
  self._target = target
76
- self._join_type = join_type
77
93
 
78
94
  # calculate the index columns and columns we need to drop
79
95
  self._drop_columns = drop_columns or self._drop_columns
80
96
  if self.vector.spec.with_indexes or with_indexes:
81
97
  self._drop_indexes = False
82
98
 
83
- if entity_timestamp_column and self._drop_indexes:
84
- self._append_drop_column(entity_timestamp_column)
85
-
86
99
  # retrieve the feature set objects/fields needed for the vector
87
100
  feature_set_objects, feature_set_fields = self.vector.parse_features(
88
101
  update_stats=update_stats
@@ -96,23 +109,34 @@ class BaseMerger(abc.ABC):
96
109
  # update the feature vector objects with refreshed stats
97
110
  self.vector.save()
98
111
 
112
+ if self._drop_indexes and entity_timestamp_column:
113
+ self._append_drop_column(entity_timestamp_column)
114
+
99
115
  for feature_set in feature_set_objects.values():
100
- if not entity_timestamp_column and self._drop_indexes:
116
+ if self._drop_indexes:
101
117
  self._append_drop_column(feature_set.spec.timestamp_key)
102
118
  for key in feature_set.spec.entities.keys():
103
119
  self._append_index(key)
104
120
 
105
- return self._generate_vector(
121
+ start_time = str_to_timestamp(start_time)
122
+ end_time = str_to_timestamp(end_time)
123
+ if start_time and not end_time:
124
+ # if end_time is not specified set it to now()
125
+ end_time = pd.Timestamp.now()
126
+
127
+ return self._generate_offline_vector(
106
128
  entity_rows,
107
129
  entity_timestamp_column,
108
130
  feature_set_objects=feature_set_objects,
109
131
  feature_set_fields=feature_set_fields,
110
132
  start_time=start_time,
111
133
  end_time=end_time,
134
+ timestamp_for_filtering=timestamp_for_filtering,
112
135
  query=query,
136
+ order_by=order_by,
113
137
  )
114
138
 
115
- def _write_to_target(self):
139
+ def _write_to_offline_target(self):
116
140
  if self._target:
117
141
  is_persistent_vector = self.vector.metadata.name is not None
118
142
  if not self._target.path and not is_persistent_vector:
@@ -125,6 +149,14 @@ class BaseMerger(abc.ABC):
125
149
  target_status = self._target.update_resource_status("ready", size=size)
126
150
  logger.info(f"wrote target: {target_status}")
127
151
  self.vector.save()
152
+ if not self._drop_indexes:
153
+ self.vector.spec.entity_fields = [
154
+ Feature(name=feature, value_type=self._result_df[feature].dtype)
155
+ if self._result_df[feature].dtype.name != "object"
156
+ else Feature(name=feature, value_type="str")
157
+ for feature in self._index_columns
158
+ ]
159
+ self.vector.save()
128
160
 
129
161
  def _set_indexes(self, df):
130
162
  if self._index_columns and not self._drop_indexes:
@@ -134,29 +166,16 @@ class BaseMerger(abc.ABC):
134
166
  if index not in df.columns:
135
167
  index_columns_missing.append(index)
136
168
  if not index_columns_missing:
137
- if self.engine == "local" or self.engine == "spark":
138
- df.set_index(self._index_columns, inplace=True)
139
- elif self.engine == "dask":
140
- if len(self._index_columns) == 1:
141
- return df.set_index(self._index_columns[0])
142
- elif len(self._index_columns) != 1:
143
- return self._reset_index(self._result_df)
144
- else:
145
- logger.info(
146
- "The entities will stay as columns because "
147
- "Dask dataframe does not yet support multi-indexes"
148
- )
149
- return self._result_df
169
+ df.set_index(self._index_columns, inplace=True)
150
170
  else:
151
171
  logger.warn(
152
172
  f"Can't set index, not all index columns found: {index_columns_missing}. "
153
173
  f"It is possible that column was already indexed."
154
174
  )
155
- else:
156
- return df
175
+ else:
176
+ df.reset_index(drop=True, inplace=True)
157
177
 
158
- @abc.abstractmethod
159
- def _generate_vector(
178
+ def _generate_offline_vector(
160
179
  self,
161
180
  entity_rows,
162
181
  entity_timestamp_column,
@@ -164,9 +183,203 @@ class BaseMerger(abc.ABC):
164
183
  feature_set_fields,
165
184
  start_time=None,
166
185
  end_time=None,
186
+ timestamp_for_filtering=None,
167
187
  query=None,
188
+ order_by=None,
168
189
  ):
169
- raise NotImplementedError("_generate_vector() operation not supported in class")
190
+ self._create_engine_env()
191
+
192
+ feature_sets = []
193
+ dfs = []
194
+ keys = (
195
+ []
196
+ ) # the struct of key is [[[],[]], ..] So that each record indicates which way the corresponding
197
+ # featureset is connected to the previous one, and within each record the left keys are indicated in index 0
198
+ # and the right keys in index 1, this keys will be the keys that will be used in this join
199
+
200
+ fs_link_list = self._create_linked_relation_list(
201
+ feature_set_objects, feature_set_fields
202
+ )
203
+
204
+ filtered = False
205
+ for node in fs_link_list:
206
+ name = node.name
207
+ feature_set = feature_set_objects[name]
208
+ feature_sets.append(feature_set)
209
+ columns = feature_set_fields[name]
210
+ self._origin_alias.update({name: alias for name, alias in columns})
211
+ column_names = [name for name, _ in columns]
212
+
213
+ for column in node.data["save_cols"]:
214
+ if column not in column_names:
215
+ column_names.append(column)
216
+ if column not in self._index_columns:
217
+ self._append_drop_column(column)
218
+
219
+ if isinstance(timestamp_for_filtering, dict):
220
+ time_column = timestamp_for_filtering.get(
221
+ name, feature_set.spec.timestamp_key
222
+ )
223
+ elif isinstance(timestamp_for_filtering, str):
224
+ time_column = timestamp_for_filtering
225
+ else:
226
+ time_column = feature_set.spec.timestamp_key
227
+
228
+ if time_column != feature_set.spec.timestamp_key and time_column not in [
229
+ feature.name for feature in feature_set.spec.features
230
+ ]:
231
+ raise mlrun.errors.MLRunInvalidArgumentError(
232
+ f"Feature set `{name}` "
233
+ f"does not have a column named `{time_column}` to filter on."
234
+ )
235
+
236
+ if self._drop_indexes:
237
+ self._append_drop_column(time_column)
238
+ if (start_time or end_time) and time_column:
239
+ filtered = True
240
+
241
+ df = self._get_engine_df(
242
+ feature_set,
243
+ name,
244
+ column_names,
245
+ start_time if time_column else None,
246
+ end_time if time_column else None,
247
+ time_column,
248
+ )
249
+
250
+ column_names += node.data["save_index"]
251
+ node.data["save_cols"] += node.data["save_index"]
252
+ fs_entities_and_timestamp = list(feature_set.spec.entities.keys())
253
+ if feature_set.spec.timestamp_key:
254
+ column_names.append(feature_set.spec.timestamp_key)
255
+ node.data["save_cols"].append(feature_set.spec.timestamp_key)
256
+ fs_entities_and_timestamp.append(feature_set.spec.timestamp_key)
257
+
258
+ # rename columns to be unique for each feature set and select if needed
259
+ rename_col_dict = {
260
+ column: f"{column}_{name}"
261
+ for column in column_names
262
+ if column not in node.data["save_cols"]
263
+ }
264
+ df_temp = self._rename_columns_and_select(
265
+ df,
266
+ rename_col_dict,
267
+ columns=list(set(column_names + fs_entities_and_timestamp)),
268
+ )
269
+
270
+ if df_temp is not None:
271
+ df = df_temp
272
+ del df_temp
273
+
274
+ dfs.append(df)
275
+ del df
276
+
277
+ keys.append([node.data["left_keys"], node.data["right_keys"]])
278
+
279
+ # update alias according to the unique column name
280
+ new_columns = []
281
+ if not self._drop_indexes:
282
+ new_columns.extend([(ind, ind) for ind in fs_entities_and_timestamp])
283
+ for column, alias in columns:
284
+ if column in rename_col_dict:
285
+ new_columns.append((rename_col_dict[column], alias or column))
286
+ else:
287
+ new_columns.append((column, alias))
288
+ self._update_alias(dictionary={name: alias for name, alias in new_columns})
289
+
290
+ # None of the feature sets was filtered as required
291
+ if not filtered and (start_time or end_time):
292
+ raise mlrun.errors.MLRunRuntimeError(
293
+ "start_time and end_time can only be provided in conjunction with "
294
+ "a timestamp column, or when the at least one feature_set has a timestamp key"
295
+ )
296
+ # convert pandas entity_rows to spark\dask DF if needed
297
+ if (
298
+ entity_rows is not None
299
+ and not hasattr(entity_rows, "rdd")
300
+ and self.engine == "spark"
301
+ ):
302
+ entity_rows = self.spark.createDataFrame(entity_rows)
303
+ elif (
304
+ entity_rows is not None
305
+ and not hasattr(entity_rows, "dask")
306
+ and self.engine == "dask"
307
+ ):
308
+ entity_rows = dd.from_pandas(
309
+ entity_rows, npartitions=len(entity_rows.columns)
310
+ )
311
+
312
+ # join the feature data frames
313
+ result_timestamp = self.merge(
314
+ entity_df=entity_rows,
315
+ entity_timestamp_column=entity_timestamp_column
316
+ if entity_rows is not None
317
+ else None,
318
+ featuresets=feature_sets,
319
+ featureset_dfs=dfs,
320
+ keys=keys,
321
+ )
322
+
323
+ all_columns = None
324
+ if not self._drop_indexes and result_timestamp:
325
+ if result_timestamp not in self._alias.values():
326
+ self._update_alias(key=result_timestamp, val=result_timestamp)
327
+ all_columns = list(self._alias.keys())
328
+
329
+ df_temp = self._rename_columns_and_select(
330
+ self._result_df, self._alias, columns=all_columns
331
+ )
332
+ if df_temp is not None:
333
+ self._result_df = df_temp
334
+ del df_temp
335
+
336
+ df_temp = self._drop_columns_from_result()
337
+ if df_temp is not None:
338
+ self._result_df = df_temp
339
+ del df_temp
340
+
341
+ if self.vector.status.label_column:
342
+ self._result_df = self._result_df.dropna(
343
+ subset=[self.vector.status.label_column]
344
+ )
345
+ # filter joined data frame by the query param
346
+ if query:
347
+ self._filter(query)
348
+
349
+ if order_by:
350
+ if isinstance(order_by, str):
351
+ order_by = [order_by]
352
+ order_by_active = [
353
+ order_col
354
+ if order_col in self._result_df.columns
355
+ else self._origin_alias.get(order_col, None)
356
+ for order_col in order_by
357
+ ]
358
+ if None in order_by_active:
359
+ raise mlrun.errors.MLRunInvalidArgumentError(
360
+ f"Result dataframe contains {self._result_df.columns} "
361
+ f"columns and can't order by {order_by}"
362
+ )
363
+ self._order_by(order_by_active)
364
+
365
+ self._write_to_offline_target()
366
+ return OfflineVectorResponse(self)
367
+
368
+ def init_online_vector_service(
369
+ self, entity_keys, fixed_window_type, update_stats=False
370
+ ):
371
+ """
372
+ initialize the `OnlineVectorService`
373
+
374
+ :param entity_keys: list of the feature_vector indexes.
375
+ :param fixed_window_type: determines how to query the fixed window values which were previously
376
+ inserted by ingest
377
+ :param update_stats: update features statistics from the requested feature sets on the vector.
378
+ Default: False.
379
+
380
+ :return: `OnlineVectorService`
381
+ """
382
+ raise NotImplementedError
170
383
 
171
384
  def _unpersist_df(self, df):
172
385
  pass
@@ -178,7 +391,6 @@ class BaseMerger(abc.ABC):
178
391
  featuresets: list,
179
392
  featureset_dfs: list,
180
393
  keys: list = None,
181
- all_columns: list = None,
182
394
  ):
183
395
  """join the entities and feature set features into a result dataframe"""
184
396
  merged_df = entity_df
@@ -190,10 +402,6 @@ class BaseMerger(abc.ABC):
190
402
  else:
191
403
  # keys can be multiple keys on each side of the join
192
404
  keys = [[[], []]] * len(featureset_dfs)
193
- if all_columns is not None:
194
- all_columns.pop(0)
195
- else:
196
- all_columns = [[]] * len(featureset_dfs)
197
405
  entity_timestamp_column = (
198
406
  entity_timestamp_column or featureset.spec.timestamp_key
199
407
  )
@@ -203,16 +411,9 @@ class BaseMerger(abc.ABC):
203
411
  # and it can join only by the entities of the first `featureset`
204
412
  keys[0][0] = keys[0][1] = list(featuresets[0].spec.entities.keys())
205
413
 
206
- for featureset, featureset_df, lr_key, columns in zip(
207
- featuresets, featureset_dfs, keys, all_columns
208
- ):
209
- if featureset.spec.timestamp_key:
414
+ for featureset, featureset_df, lr_key in zip(featuresets, featureset_dfs, keys):
415
+ if featureset.spec.timestamp_key and entity_timestamp_column:
210
416
  merge_func = self._asof_join
211
- if self._join_type != "inner":
212
- logger.warn(
213
- "Merge all the features with as_of_join and don't "
214
- "take into account the join_type that was given"
215
- )
216
417
  else:
217
418
  merge_func = self._join
218
419
 
@@ -223,7 +424,9 @@ class BaseMerger(abc.ABC):
223
424
  featureset_df,
224
425
  lr_key[0],
225
426
  lr_key[1],
226
- columns,
427
+ )
428
+ entity_timestamp_column = (
429
+ entity_timestamp_column or featureset.spec.timestamp_key
227
430
  )
228
431
 
229
432
  # unpersist as required by the implementation (e.g. spark) and delete references
@@ -232,8 +435,8 @@ class BaseMerger(abc.ABC):
232
435
  del featureset_df
233
436
 
234
437
  self._result_df = merged_df
438
+ return entity_timestamp_column
235
439
 
236
- @abc.abstractmethod
237
440
  def _asof_join(
238
441
  self,
239
442
  entity_df,
@@ -242,11 +445,9 @@ class BaseMerger(abc.ABC):
242
445
  featureset_df,
243
446
  left_keys: list,
244
447
  right_keys: list,
245
- columns: list,
246
448
  ):
247
449
  raise NotImplementedError("_asof_join() operation not implemented in class")
248
450
 
249
- @abc.abstractmethod
250
451
  def _join(
251
452
  self,
252
453
  entity_df,
@@ -255,7 +456,6 @@ class BaseMerger(abc.ABC):
255
456
  featureset_df,
256
457
  left_keys: list,
257
458
  right_keys: list,
258
- columns: list,
259
459
  ):
260
460
  raise NotImplementedError("_join() operation not implemented in class")
261
461
 
@@ -267,6 +467,7 @@ class BaseMerger(abc.ABC):
267
467
 
268
468
  def get_df(self, to_pandas=True):
269
469
  """return the result as a dataframe (pandas by default)"""
470
+ self._set_indexes(self._result_df)
270
471
  return self._result_df
271
472
 
272
473
  def to_parquet(self, target_path, **kw):
@@ -293,6 +494,9 @@ class BaseMerger(abc.ABC):
293
494
  def __eq__(self, other):
294
495
  return self.name == other.name
295
496
 
497
+ def __copy__(self):
498
+ return BaseMerger._Node(self.name, self.order, self.data.copy())
499
+
296
500
  class _LinkedList:
297
501
  def __init__(self, head=None):
298
502
  self.head = head
@@ -313,6 +517,19 @@ class BaseMerger(abc.ABC):
313
517
  yield node
314
518
  node = node.next
315
519
 
520
+ def __copy__(self):
521
+ ll = BaseMerger._LinkedList()
522
+ prev_node = None
523
+ for node in self:
524
+ new_node = node.__copy__()
525
+ if ll.head is None:
526
+ ll.head = new_node
527
+ else:
528
+ prev_node.next = new_node
529
+ prev_node = new_node
530
+ ll.len = self.len
531
+ return ll
532
+
316
533
  def add_first(self, node):
317
534
  node.next = self.head
318
535
  self.head = node
@@ -325,7 +542,9 @@ class BaseMerger(abc.ABC):
325
542
  for current_node in self:
326
543
  pass
327
544
  current_node.next = node
328
- self.len += 1
545
+ while node:
546
+ self.len += 1
547
+ node = node.next
329
548
 
330
549
  def add_after(self, target_node, new_node):
331
550
  new_node.next = target_node.next
@@ -346,7 +565,9 @@ class BaseMerger(abc.ABC):
346
565
  node = self.find_node(other_head.name)
347
566
  if node is None:
348
567
  return
349
- node.data["save_cols"] += other_head.data["save_cols"]
568
+ for col in other_head.data["save_cols"]:
569
+ if col not in node.data["save_cols"]:
570
+ node.data["save_cols"].append(col)
350
571
  for other_node in other_iter:
351
572
  if self.find_node(other_node.name) is None:
352
573
  while node is not None and other_node.order > node.order:
@@ -357,10 +578,11 @@ class BaseMerger(abc.ABC):
357
578
  self.add_last(other_node)
358
579
  node = other_node
359
580
 
360
- @staticmethod
361
- def _create_linked_relation_list(feature_set_objects, feature_set_fields):
581
+ def _create_linked_relation_list(
582
+ self, feature_set_objects, feature_set_fields, entity_rows_keys=None
583
+ ):
362
584
  feature_set_names = list(feature_set_fields.keys())
363
- if len(feature_set_names) == 1:
585
+ if len(feature_set_names) == 1 and not entity_rows_keys:
364
586
  return BaseMerger._LinkedList(
365
587
  head=BaseMerger._Node(
366
588
  name=feature_set_names[0],
@@ -420,10 +642,9 @@ class BaseMerger(abc.ABC):
420
642
  )
421
643
  )
422
644
 
423
- # checking if feature_set have relation with feature_set_in
424
- relation_wise = all(curr_col_relation_list)
425
-
426
- if relation_wise:
645
+ if all(
646
+ curr_col_relation_list
647
+ ): # checking if feature_set have relation with feature_set_in
427
648
  # add to the link list feature set according to the defined relation
428
649
  linked_list_relation.add_last(
429
650
  BaseMerger._Node(
@@ -437,8 +658,8 @@ class BaseMerger(abc.ABC):
437
658
  order=name_in_order,
438
659
  )
439
660
  )
440
- linked_list_relation.head.data["save_cols"].append(
441
- *curr_col_relation_list
661
+ linked_list_relation.head.data["save_cols"].extend(
662
+ curr_col_relation_list
442
663
  )
443
664
  elif name_in_order > head_order and sorted(
444
665
  feature_set_in_entity_list_names
@@ -460,26 +681,127 @@ class BaseMerger(abc.ABC):
460
681
  linked_list_relation.head.data["save_index"] = keys
461
682
  return linked_list_relation
462
683
 
684
+ def _build_entity_rows_relation(entity_rows_relation, fs_name, fs_order):
685
+ feature_set_entity_list = feature_set_entity_list_dict[fs_name]
686
+ feature_set_entity_list_names = list(feature_set_entity_list.keys())
687
+
688
+ if all([ent in entity_rows_keys for ent in feature_set_entity_list_names]):
689
+ # add to the link list feature set according to indexes match,
690
+ # only if all entities in the feature set exist in the entity rows
691
+ keys = feature_set_entity_list_names
692
+ entity_rows_relation.add_last(
693
+ BaseMerger._Node(
694
+ fs_name,
695
+ data={
696
+ "left_keys": keys,
697
+ "right_keys": keys,
698
+ "save_cols": [],
699
+ "save_index": keys,
700
+ },
701
+ order=fs_order,
702
+ )
703
+ )
704
+ entity_rows_relation.head.data["save_index"] = keys
705
+
706
+ if entity_rows_keys is not None:
707
+ entity_rows_linked_relation = _create_relation(
708
+ self._entity_rows_node_name, -1
709
+ )
710
+ relation_linked_lists.append(entity_rows_linked_relation)
711
+ linked_list_len_goal = len(feature_set_objects) + 1
712
+ else:
713
+ entity_rows_linked_relation = None
714
+ linked_list_len_goal = len(feature_set_objects)
715
+
463
716
  for i, name in enumerate(feature_set_names):
464
717
  linked_relation = _create_relation(name, i)
718
+ if entity_rows_linked_relation is not None:
719
+ _build_entity_rows_relation(entity_rows_linked_relation, name, i)
465
720
  for j, name_in in enumerate(feature_set_names):
466
721
  if name != name_in:
467
722
  linked_relation = _build_relation(name_in, j, linked_relation, i)
468
723
  relation_linked_lists.append(linked_relation)
469
724
 
470
725
  # concat all the link lists to one, for the merging process
471
- link_list_iter = iter(relation_linked_lists)
472
- return_relation = next(link_list_iter)
473
- for relation_list in link_list_iter:
474
- return_relation.concat(relation_list)
475
- if return_relation.len != len(feature_set_objects):
476
- raise mlrun.errors.MLRunRuntimeError("Failed to merge")
726
+ for i in range(len(relation_linked_lists)):
727
+ return_relation = relation_linked_lists[i].__copy__()
728
+ for relation_list in relation_linked_lists:
729
+ return_relation.concat(relation_list)
730
+ if return_relation.len == linked_list_len_goal:
731
+ return return_relation
477
732
 
478
- return return_relation
733
+ raise mlrun.errors.MLRunRuntimeError("Failed to merge")
479
734
 
480
- @classmethod
481
735
  def get_default_image(cls, kind):
482
736
  return mlrun.mlconf.feature_store.default_job_image
483
737
 
484
738
  def _reset_index(self, _result_df):
485
739
  raise NotImplementedError
740
+
741
+ def _create_engine_env(self):
742
+ """
743
+ initialize engine env if needed
744
+ """
745
+ raise NotImplementedError
746
+
747
+ def _get_engine_df(
748
+ self,
749
+ feature_set: FeatureSet,
750
+ feature_set_name: typing.List[str],
751
+ column_names: typing.List[str] = None,
752
+ start_time: typing.Union[str, datetime] = None,
753
+ end_time: typing.Union[str, datetime] = None,
754
+ time_column: typing.Optional[str] = None,
755
+ ):
756
+ """
757
+ Return the feature_set data frame according to the args
758
+
759
+ :param feature_set: current feature_set to extract from the data frame
760
+ :param feature_set_name: the name of the current feature_set
761
+ :param column_names: list of columns to select (if not all)
762
+ :param start_time: filter by start time
763
+ :param end_time: filter by end time
764
+ :param time_column: specify the time column name to filter on
765
+
766
+ :return: Data frame of the current engine
767
+ """
768
+ raise NotImplementedError
769
+
770
+ def _rename_columns_and_select(
771
+ self,
772
+ df,
773
+ rename_col_dict: typing.Dict[str, str],
774
+ columns: typing.List[str] = None,
775
+ ):
776
+ """
777
+ rename the columns of the df according to rename_col_dict, and select only `columns` if it is not none
778
+
779
+ :param df: the data frame to change
780
+ :param rename_col_dict: the renaming dictionary - {<current_column_name>: <new_column_name>, ...}
781
+ :param columns: list of columns to select (if not all)
782
+
783
+ :return: the data frame after the transformation or None if the transformation were preformed inplace
784
+ """
785
+ raise NotImplementedError
786
+
787
+ def _drop_columns_from_result(self):
788
+ """
789
+ drop `self._drop_columns` from `self._result_df`
790
+ """
791
+ raise NotImplementedError
792
+
793
+ def _filter(self, query: str):
794
+ """
795
+ filter `self._result_df` by `query`
796
+
797
+ :param query: The query string used to filter rows
798
+ """
799
+ raise NotImplementedError
800
+
801
+ def _order_by(self, order_by_active: typing.List[str]):
802
+ """
803
+ Order by `order_by_active` along all axis.
804
+
805
+ :param order_by_active: list of names to sort by.
806
+ """
807
+ raise NotImplementedError