mlrun 1.3.3rc1__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (444) hide show
  1. mlrun/__init__.py +3 -3
  2. mlrun/__main__.py +79 -37
  3. mlrun/api/__init__.py +1 -1
  4. mlrun/api/api/__init__.py +1 -1
  5. mlrun/api/api/api.py +4 -4
  6. mlrun/api/api/deps.py +10 -21
  7. mlrun/api/api/endpoints/__init__.py +1 -1
  8. mlrun/api/api/endpoints/artifacts.py +64 -36
  9. mlrun/api/api/endpoints/auth.py +4 -4
  10. mlrun/api/api/endpoints/background_tasks.py +11 -11
  11. mlrun/api/api/endpoints/client_spec.py +5 -5
  12. mlrun/api/api/endpoints/clusterization_spec.py +6 -4
  13. mlrun/api/api/endpoints/feature_store.py +124 -115
  14. mlrun/api/api/endpoints/files.py +22 -14
  15. mlrun/api/api/endpoints/frontend_spec.py +28 -21
  16. mlrun/api/api/endpoints/functions.py +142 -87
  17. mlrun/api/api/endpoints/grafana_proxy.py +89 -442
  18. mlrun/api/api/endpoints/healthz.py +20 -7
  19. mlrun/api/api/endpoints/hub.py +320 -0
  20. mlrun/api/api/endpoints/internal/__init__.py +1 -1
  21. mlrun/api/api/endpoints/internal/config.py +1 -1
  22. mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
  23. mlrun/api/api/endpoints/logs.py +11 -11
  24. mlrun/api/api/endpoints/model_endpoints.py +74 -70
  25. mlrun/api/api/endpoints/operations.py +13 -9
  26. mlrun/api/api/endpoints/pipelines.py +93 -88
  27. mlrun/api/api/endpoints/projects.py +35 -35
  28. mlrun/api/api/endpoints/runs.py +69 -27
  29. mlrun/api/api/endpoints/runtime_resources.py +28 -28
  30. mlrun/api/api/endpoints/schedules.py +98 -41
  31. mlrun/api/api/endpoints/secrets.py +37 -32
  32. mlrun/api/api/endpoints/submit.py +12 -12
  33. mlrun/api/api/endpoints/tags.py +20 -22
  34. mlrun/api/api/utils.py +251 -42
  35. mlrun/api/constants.py +1 -1
  36. mlrun/api/crud/__init__.py +18 -15
  37. mlrun/api/crud/artifacts.py +10 -10
  38. mlrun/api/crud/client_spec.py +4 -4
  39. mlrun/api/crud/clusterization_spec.py +3 -3
  40. mlrun/api/crud/feature_store.py +54 -46
  41. mlrun/api/crud/functions.py +3 -3
  42. mlrun/api/crud/hub.py +312 -0
  43. mlrun/api/crud/logs.py +11 -9
  44. mlrun/api/crud/model_monitoring/__init__.py +3 -3
  45. mlrun/api/crud/model_monitoring/grafana.py +435 -0
  46. mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
  47. mlrun/api/crud/notifications.py +149 -0
  48. mlrun/api/crud/pipelines.py +67 -52
  49. mlrun/api/crud/projects.py +51 -23
  50. mlrun/api/crud/runs.py +7 -5
  51. mlrun/api/crud/runtime_resources.py +13 -13
  52. mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
  53. mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
  54. mlrun/api/crud/runtimes/nuclio/function.py +505 -0
  55. mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
  56. mlrun/api/crud/secrets.py +88 -46
  57. mlrun/api/crud/tags.py +5 -5
  58. mlrun/api/db/__init__.py +1 -1
  59. mlrun/api/db/base.py +102 -54
  60. mlrun/api/db/init_db.py +2 -3
  61. mlrun/api/db/session.py +4 -12
  62. mlrun/api/db/sqldb/__init__.py +1 -1
  63. mlrun/api/db/sqldb/db.py +439 -196
  64. mlrun/api/db/sqldb/helpers.py +1 -1
  65. mlrun/api/db/sqldb/models/__init__.py +3 -3
  66. mlrun/api/db/sqldb/models/models_mysql.py +82 -64
  67. mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
  68. mlrun/api/db/sqldb/session.py +27 -20
  69. mlrun/api/initial_data.py +82 -24
  70. mlrun/api/launcher.py +196 -0
  71. mlrun/api/main.py +91 -22
  72. mlrun/api/middlewares.py +6 -5
  73. mlrun/api/migrations_mysql/env.py +1 -1
  74. mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
  75. mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
  76. mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
  77. mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
  78. mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
  79. mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
  80. mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
  81. mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
  82. mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
  83. mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
  84. mlrun/api/migrations_sqlite/env.py +1 -1
  85. mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
  86. mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
  87. mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
  88. mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
  89. mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
  90. mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
  91. mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
  92. mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
  93. mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
  94. mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
  95. mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
  96. mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
  97. mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
  98. mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
  99. mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
  100. mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
  101. mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
  102. mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
  103. mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
  104. mlrun/api/schemas/__init__.py +216 -138
  105. mlrun/api/utils/__init__.py +1 -1
  106. mlrun/api/utils/asyncio.py +1 -1
  107. mlrun/api/utils/auth/__init__.py +1 -1
  108. mlrun/api/utils/auth/providers/__init__.py +1 -1
  109. mlrun/api/utils/auth/providers/base.py +7 -7
  110. mlrun/api/utils/auth/providers/nop.py +6 -7
  111. mlrun/api/utils/auth/providers/opa.py +17 -17
  112. mlrun/api/utils/auth/verifier.py +36 -34
  113. mlrun/api/utils/background_tasks.py +24 -24
  114. mlrun/{builder.py → api/utils/builder.py} +216 -123
  115. mlrun/api/utils/clients/__init__.py +1 -1
  116. mlrun/api/utils/clients/chief.py +19 -4
  117. mlrun/api/utils/clients/iguazio.py +106 -60
  118. mlrun/api/utils/clients/log_collector.py +1 -1
  119. mlrun/api/utils/clients/nuclio.py +23 -23
  120. mlrun/api/utils/clients/protocols/grpc.py +2 -2
  121. mlrun/api/utils/db/__init__.py +1 -1
  122. mlrun/api/utils/db/alembic.py +1 -1
  123. mlrun/api/utils/db/backup.py +1 -1
  124. mlrun/api/utils/db/mysql.py +24 -25
  125. mlrun/api/utils/db/sql_collation.py +1 -1
  126. mlrun/api/utils/db/sqlite_migration.py +2 -2
  127. mlrun/api/utils/events/__init__.py +14 -0
  128. mlrun/api/utils/events/base.py +57 -0
  129. mlrun/api/utils/events/events_factory.py +41 -0
  130. mlrun/api/utils/events/iguazio.py +217 -0
  131. mlrun/api/utils/events/nop.py +55 -0
  132. mlrun/api/utils/helpers.py +16 -13
  133. mlrun/api/utils/memory_reports.py +1 -1
  134. mlrun/api/utils/periodic.py +6 -3
  135. mlrun/api/utils/projects/__init__.py +1 -1
  136. mlrun/api/utils/projects/follower.py +33 -33
  137. mlrun/api/utils/projects/leader.py +36 -34
  138. mlrun/api/utils/projects/member.py +27 -27
  139. mlrun/api/utils/projects/remotes/__init__.py +1 -1
  140. mlrun/api/utils/projects/remotes/follower.py +13 -13
  141. mlrun/api/utils/projects/remotes/leader.py +10 -10
  142. mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
  143. mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
  144. mlrun/api/utils/scheduler.py +140 -51
  145. mlrun/api/utils/singletons/__init__.py +1 -1
  146. mlrun/api/utils/singletons/db.py +9 -15
  147. mlrun/api/utils/singletons/k8s.py +677 -5
  148. mlrun/api/utils/singletons/logs_dir.py +1 -1
  149. mlrun/api/utils/singletons/project_member.py +1 -1
  150. mlrun/api/utils/singletons/scheduler.py +1 -1
  151. mlrun/artifacts/__init__.py +2 -2
  152. mlrun/artifacts/base.py +8 -2
  153. mlrun/artifacts/dataset.py +5 -3
  154. mlrun/artifacts/manager.py +7 -1
  155. mlrun/artifacts/model.py +15 -4
  156. mlrun/artifacts/plots.py +1 -1
  157. mlrun/common/__init__.py +1 -1
  158. mlrun/common/constants.py +15 -0
  159. mlrun/common/model_monitoring.py +209 -0
  160. mlrun/common/schemas/__init__.py +167 -0
  161. mlrun/{api → common}/schemas/artifact.py +13 -14
  162. mlrun/{api → common}/schemas/auth.py +10 -8
  163. mlrun/{api → common}/schemas/background_task.py +3 -3
  164. mlrun/{api → common}/schemas/client_spec.py +1 -1
  165. mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
  166. mlrun/{api → common}/schemas/constants.py +21 -8
  167. mlrun/common/schemas/events.py +36 -0
  168. mlrun/{api → common}/schemas/feature_store.py +2 -1
  169. mlrun/{api → common}/schemas/frontend_spec.py +7 -6
  170. mlrun/{api → common}/schemas/function.py +5 -5
  171. mlrun/{api → common}/schemas/http.py +3 -3
  172. mlrun/common/schemas/hub.py +134 -0
  173. mlrun/{api → common}/schemas/k8s.py +3 -3
  174. mlrun/{api → common}/schemas/memory_reports.py +1 -1
  175. mlrun/common/schemas/model_endpoints.py +342 -0
  176. mlrun/common/schemas/notification.py +57 -0
  177. mlrun/{api → common}/schemas/object.py +6 -6
  178. mlrun/{api → common}/schemas/pipeline.py +3 -3
  179. mlrun/{api → common}/schemas/project.py +6 -5
  180. mlrun/common/schemas/regex.py +24 -0
  181. mlrun/common/schemas/runs.py +30 -0
  182. mlrun/{api → common}/schemas/runtime_resource.py +3 -3
  183. mlrun/{api → common}/schemas/schedule.py +19 -7
  184. mlrun/{api → common}/schemas/secret.py +3 -3
  185. mlrun/{api → common}/schemas/tag.py +2 -2
  186. mlrun/common/types.py +25 -0
  187. mlrun/config.py +152 -20
  188. mlrun/data_types/__init__.py +7 -2
  189. mlrun/data_types/data_types.py +4 -2
  190. mlrun/data_types/infer.py +1 -1
  191. mlrun/data_types/spark.py +10 -3
  192. mlrun/datastore/__init__.py +10 -3
  193. mlrun/datastore/azure_blob.py +1 -1
  194. mlrun/datastore/base.py +185 -53
  195. mlrun/datastore/datastore.py +1 -1
  196. mlrun/datastore/filestore.py +1 -1
  197. mlrun/datastore/google_cloud_storage.py +1 -1
  198. mlrun/datastore/inmem.py +4 -1
  199. mlrun/datastore/redis.py +1 -1
  200. mlrun/datastore/s3.py +1 -1
  201. mlrun/datastore/sources.py +192 -70
  202. mlrun/datastore/spark_udf.py +44 -0
  203. mlrun/datastore/store_resources.py +4 -4
  204. mlrun/datastore/targets.py +115 -45
  205. mlrun/datastore/utils.py +127 -5
  206. mlrun/datastore/v3io.py +1 -1
  207. mlrun/datastore/wasbfs/__init__.py +1 -1
  208. mlrun/datastore/wasbfs/fs.py +1 -1
  209. mlrun/db/__init__.py +7 -5
  210. mlrun/db/base.py +112 -68
  211. mlrun/db/httpdb.py +445 -277
  212. mlrun/db/nopdb.py +491 -0
  213. mlrun/db/sqldb.py +112 -65
  214. mlrun/errors.py +6 -1
  215. mlrun/execution.py +44 -22
  216. mlrun/feature_store/__init__.py +1 -1
  217. mlrun/feature_store/api.py +143 -95
  218. mlrun/feature_store/common.py +16 -20
  219. mlrun/feature_store/feature_set.py +42 -12
  220. mlrun/feature_store/feature_vector.py +32 -21
  221. mlrun/feature_store/ingestion.py +9 -12
  222. mlrun/feature_store/retrieval/__init__.py +3 -2
  223. mlrun/feature_store/retrieval/base.py +388 -66
  224. mlrun/feature_store/retrieval/dask_merger.py +63 -151
  225. mlrun/feature_store/retrieval/job.py +30 -12
  226. mlrun/feature_store/retrieval/local_merger.py +40 -133
  227. mlrun/feature_store/retrieval/spark_merger.py +129 -127
  228. mlrun/feature_store/retrieval/storey_merger.py +173 -0
  229. mlrun/feature_store/steps.py +132 -15
  230. mlrun/features.py +8 -3
  231. mlrun/frameworks/__init__.py +1 -1
  232. mlrun/frameworks/_common/__init__.py +1 -1
  233. mlrun/frameworks/_common/artifacts_library.py +1 -1
  234. mlrun/frameworks/_common/mlrun_interface.py +1 -1
  235. mlrun/frameworks/_common/model_handler.py +1 -1
  236. mlrun/frameworks/_common/plan.py +1 -1
  237. mlrun/frameworks/_common/producer.py +1 -1
  238. mlrun/frameworks/_common/utils.py +1 -1
  239. mlrun/frameworks/_dl_common/__init__.py +1 -1
  240. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
  241. mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
  242. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
  243. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
  244. mlrun/frameworks/_dl_common/model_handler.py +1 -1
  245. mlrun/frameworks/_dl_common/utils.py +1 -1
  246. mlrun/frameworks/_ml_common/__init__.py +1 -1
  247. mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
  248. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
  249. mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
  250. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  251. mlrun/frameworks/_ml_common/model_handler.py +1 -1
  252. mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
  253. mlrun/frameworks/_ml_common/plan.py +1 -1
  254. mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
  255. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
  256. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
  257. mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
  258. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
  259. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
  260. mlrun/frameworks/_ml_common/producer.py +1 -1
  261. mlrun/frameworks/_ml_common/utils.py +1 -1
  262. mlrun/frameworks/auto_mlrun/__init__.py +1 -1
  263. mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
  264. mlrun/frameworks/huggingface/__init__.py +1 -1
  265. mlrun/frameworks/huggingface/model_server.py +1 -1
  266. mlrun/frameworks/lgbm/__init__.py +1 -1
  267. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
  268. mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
  269. mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
  270. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
  271. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
  272. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
  273. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
  274. mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
  275. mlrun/frameworks/lgbm/model_handler.py +1 -1
  276. mlrun/frameworks/lgbm/model_server.py +1 -1
  277. mlrun/frameworks/lgbm/utils.py +1 -1
  278. mlrun/frameworks/onnx/__init__.py +1 -1
  279. mlrun/frameworks/onnx/dataset.py +1 -1
  280. mlrun/frameworks/onnx/mlrun_interface.py +1 -1
  281. mlrun/frameworks/onnx/model_handler.py +1 -1
  282. mlrun/frameworks/onnx/model_server.py +1 -1
  283. mlrun/frameworks/parallel_coordinates.py +1 -1
  284. mlrun/frameworks/pytorch/__init__.py +1 -1
  285. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
  286. mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
  287. mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
  288. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
  289. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
  290. mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
  291. mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
  292. mlrun/frameworks/pytorch/model_handler.py +1 -1
  293. mlrun/frameworks/pytorch/model_server.py +1 -1
  294. mlrun/frameworks/pytorch/utils.py +1 -1
  295. mlrun/frameworks/sklearn/__init__.py +1 -1
  296. mlrun/frameworks/sklearn/estimator.py +1 -1
  297. mlrun/frameworks/sklearn/metric.py +1 -1
  298. mlrun/frameworks/sklearn/metrics_library.py +1 -1
  299. mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
  300. mlrun/frameworks/sklearn/model_handler.py +1 -1
  301. mlrun/frameworks/sklearn/utils.py +1 -1
  302. mlrun/frameworks/tf_keras/__init__.py +1 -1
  303. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
  304. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  305. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
  306. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
  307. mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
  308. mlrun/frameworks/tf_keras/model_handler.py +1 -1
  309. mlrun/frameworks/tf_keras/model_server.py +1 -1
  310. mlrun/frameworks/tf_keras/utils.py +1 -1
  311. mlrun/frameworks/xgboost/__init__.py +1 -1
  312. mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
  313. mlrun/frameworks/xgboost/model_handler.py +1 -1
  314. mlrun/frameworks/xgboost/utils.py +1 -1
  315. mlrun/k8s_utils.py +14 -765
  316. mlrun/kfpops.py +14 -17
  317. mlrun/launcher/__init__.py +13 -0
  318. mlrun/launcher/base.py +406 -0
  319. mlrun/launcher/client.py +159 -0
  320. mlrun/launcher/factory.py +50 -0
  321. mlrun/launcher/local.py +276 -0
  322. mlrun/launcher/remote.py +178 -0
  323. mlrun/lists.py +10 -2
  324. mlrun/mlutils/__init__.py +1 -1
  325. mlrun/mlutils/data.py +1 -1
  326. mlrun/mlutils/models.py +1 -1
  327. mlrun/mlutils/plots.py +1 -1
  328. mlrun/model.py +252 -14
  329. mlrun/model_monitoring/__init__.py +41 -0
  330. mlrun/model_monitoring/features_drift_table.py +1 -1
  331. mlrun/model_monitoring/helpers.py +123 -38
  332. mlrun/model_monitoring/model_endpoint.py +144 -0
  333. mlrun/model_monitoring/model_monitoring_batch.py +310 -259
  334. mlrun/model_monitoring/stores/__init__.py +106 -0
  335. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
  336. mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
  337. mlrun/model_monitoring/stores/models/__init__.py +23 -0
  338. mlrun/model_monitoring/stores/models/base.py +18 -0
  339. mlrun/model_monitoring/stores/models/mysql.py +100 -0
  340. mlrun/model_monitoring/stores/models/sqlite.py +98 -0
  341. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
  342. mlrun/model_monitoring/stream_processing_fs.py +239 -271
  343. mlrun/package/__init__.py +163 -0
  344. mlrun/package/context_handler.py +325 -0
  345. mlrun/package/errors.py +47 -0
  346. mlrun/package/packager.py +298 -0
  347. mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
  348. mlrun/package/packagers/default_packager.py +422 -0
  349. mlrun/package/packagers/numpy_packagers.py +612 -0
  350. mlrun/package/packagers/pandas_packagers.py +968 -0
  351. mlrun/package/packagers/python_standard_library_packagers.py +616 -0
  352. mlrun/package/packagers_manager.py +786 -0
  353. mlrun/package/utils/__init__.py +53 -0
  354. mlrun/package/utils/_archiver.py +226 -0
  355. mlrun/package/utils/_formatter.py +211 -0
  356. mlrun/package/utils/_pickler.py +234 -0
  357. mlrun/package/utils/_supported_format.py +71 -0
  358. mlrun/package/utils/log_hint_utils.py +93 -0
  359. mlrun/package/utils/type_hint_utils.py +298 -0
  360. mlrun/platforms/__init__.py +1 -1
  361. mlrun/platforms/iguazio.py +34 -2
  362. mlrun/platforms/other.py +1 -1
  363. mlrun/projects/__init__.py +1 -1
  364. mlrun/projects/operations.py +14 -9
  365. mlrun/projects/pipelines.py +31 -13
  366. mlrun/projects/project.py +762 -238
  367. mlrun/render.py +49 -19
  368. mlrun/run.py +57 -326
  369. mlrun/runtimes/__init__.py +3 -9
  370. mlrun/runtimes/base.py +247 -784
  371. mlrun/runtimes/constants.py +1 -1
  372. mlrun/runtimes/daskjob.py +45 -41
  373. mlrun/runtimes/funcdoc.py +43 -7
  374. mlrun/runtimes/function.py +66 -656
  375. mlrun/runtimes/function_reference.py +1 -1
  376. mlrun/runtimes/generators.py +1 -1
  377. mlrun/runtimes/kubejob.py +99 -116
  378. mlrun/runtimes/local.py +59 -66
  379. mlrun/runtimes/mpijob/__init__.py +1 -1
  380. mlrun/runtimes/mpijob/abstract.py +13 -15
  381. mlrun/runtimes/mpijob/v1.py +3 -1
  382. mlrun/runtimes/mpijob/v1alpha1.py +1 -1
  383. mlrun/runtimes/nuclio.py +1 -1
  384. mlrun/runtimes/pod.py +51 -26
  385. mlrun/runtimes/remotesparkjob.py +3 -1
  386. mlrun/runtimes/serving.py +12 -4
  387. mlrun/runtimes/sparkjob/__init__.py +1 -2
  388. mlrun/runtimes/sparkjob/abstract.py +44 -31
  389. mlrun/runtimes/sparkjob/spark3job.py +11 -9
  390. mlrun/runtimes/utils.py +61 -42
  391. mlrun/secrets.py +16 -18
  392. mlrun/serving/__init__.py +3 -2
  393. mlrun/serving/merger.py +1 -1
  394. mlrun/serving/remote.py +1 -1
  395. mlrun/serving/routers.py +39 -42
  396. mlrun/serving/server.py +23 -13
  397. mlrun/serving/serving_wrapper.py +1 -1
  398. mlrun/serving/states.py +172 -39
  399. mlrun/serving/utils.py +1 -1
  400. mlrun/serving/v1_serving.py +1 -1
  401. mlrun/serving/v2_serving.py +29 -21
  402. mlrun/utils/__init__.py +1 -2
  403. mlrun/utils/async_http.py +8 -1
  404. mlrun/utils/azure_vault.py +1 -1
  405. mlrun/utils/clones.py +2 -2
  406. mlrun/utils/condition_evaluator.py +65 -0
  407. mlrun/utils/db.py +52 -0
  408. mlrun/utils/helpers.py +188 -13
  409. mlrun/utils/http.py +89 -54
  410. mlrun/utils/logger.py +48 -8
  411. mlrun/utils/model_monitoring.py +132 -100
  412. mlrun/utils/notifications/__init__.py +1 -1
  413. mlrun/utils/notifications/notification/__init__.py +8 -6
  414. mlrun/utils/notifications/notification/base.py +20 -14
  415. mlrun/utils/notifications/notification/console.py +7 -4
  416. mlrun/utils/notifications/notification/git.py +36 -19
  417. mlrun/utils/notifications/notification/ipython.py +10 -8
  418. mlrun/utils/notifications/notification/slack.py +18 -13
  419. mlrun/utils/notifications/notification_pusher.py +377 -56
  420. mlrun/utils/regex.py +6 -1
  421. mlrun/utils/singleton.py +1 -1
  422. mlrun/utils/v3io_clients.py +1 -1
  423. mlrun/utils/vault.py +270 -269
  424. mlrun/utils/version/__init__.py +1 -1
  425. mlrun/utils/version/version.json +2 -2
  426. mlrun/utils/version/version.py +1 -1
  427. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
  428. mlrun-1.4.0.dist-info/RECORD +434 -0
  429. mlrun/api/api/endpoints/marketplace.py +0 -257
  430. mlrun/api/crud/marketplace.py +0 -221
  431. mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
  432. mlrun/api/db/filedb/db.py +0 -518
  433. mlrun/api/schemas/marketplace.py +0 -128
  434. mlrun/api/schemas/model_endpoints.py +0 -185
  435. mlrun/db/filedb.py +0 -891
  436. mlrun/feature_store/retrieval/online.py +0 -92
  437. mlrun/model_monitoring/constants.py +0 -67
  438. mlrun/runtimes/package/context_handler.py +0 -711
  439. mlrun/runtimes/sparkjob/spark2job.py +0 -59
  440. mlrun-1.3.3rc1.dist-info/RECORD +0 -381
  441. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
  442. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
  443. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
  444. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -20,151 +20,18 @@ from dask.distributed import Client
20
20
 
21
21
  import mlrun
22
22
 
23
- from ..feature_vector import OfflineVectorResponse
24
23
  from .base import BaseMerger
25
24
 
26
25
 
27
26
  class DaskFeatureMerger(BaseMerger):
28
27
  engine = "dask"
28
+ support_offline = True
29
29
 
30
30
  def __init__(self, vector, **engine_args):
31
31
  super().__init__(vector, **engine_args)
32
32
  self.client = engine_args.get("dask_client")
33
33
  self._dask_cluster_uri = engine_args.get("dask_cluster_uri")
34
34
 
35
- def _generate_vector(
36
- self,
37
- entity_rows,
38
- entity_timestamp_column,
39
- feature_set_objects,
40
- feature_set_fields,
41
- start_time=None,
42
- end_time=None,
43
- query=None,
44
- ):
45
- if "index" not in self._index_columns:
46
- self._append_drop_column("index")
47
-
48
- # init the dask client if needed
49
- if not self.client:
50
- if self._dask_cluster_uri:
51
- function = mlrun.import_function(self._dask_cluster_uri)
52
- self.client = function.client
53
- else:
54
- self.client = Client()
55
-
56
- # load dataframes
57
- feature_sets = []
58
- dfs = []
59
- keys = (
60
- []
61
- ) # the struct of key is [[[],[]], ..] So that each record indicates which way the corresponding
62
- # featureset is connected to the previous one, and within each record the left keys are indicated in index 0
63
- # and the right keys in index 1, this keys will be the keys that will be used in this join
64
- all_columns = []
65
-
66
- fs_link_list = self._create_linked_relation_list(
67
- feature_set_objects, feature_set_fields
68
- )
69
-
70
- for node in fs_link_list:
71
- name = node.name
72
- feature_set = feature_set_objects[name]
73
- feature_sets.append(feature_set)
74
- columns = feature_set_fields[name]
75
- column_names = [name for name, alias in columns]
76
-
77
- for col in node.data["save_cols"]:
78
- if col not in column_names:
79
- self._append_drop_column(col)
80
- column_names += node.data["save_cols"]
81
-
82
- df = feature_set.to_dataframe(
83
- columns=column_names,
84
- df_module=dd,
85
- start_time=start_time,
86
- end_time=end_time,
87
- time_column=entity_timestamp_column,
88
- index=False,
89
- )
90
-
91
- df = df.reset_index()
92
- column_names += node.data["save_index"]
93
- node.data["save_cols"] += node.data["save_index"]
94
- entity_timestamp_column_list = (
95
- [entity_timestamp_column]
96
- if entity_timestamp_column
97
- else feature_set.spec.timestamp_key
98
- )
99
- if entity_timestamp_column_list:
100
- column_names += entity_timestamp_column_list
101
- node.data["save_cols"] += entity_timestamp_column_list
102
-
103
- df = df.persist()
104
-
105
- # rename columns to be unique for each feature set
106
- rename_col_dict = {
107
- col: f"{col}_{name}"
108
- for col in column_names
109
- if col not in node.data["save_cols"]
110
- }
111
- df = df.rename(
112
- columns=rename_col_dict,
113
- )
114
-
115
- dfs.append(df)
116
- del df
117
-
118
- keys.append([node.data["left_keys"], node.data["right_keys"]])
119
-
120
- # update alias according to the unique column name
121
- new_columns = []
122
- for col, alias in columns:
123
- if col in rename_col_dict and alias:
124
- new_columns.append((rename_col_dict[col], alias))
125
- elif col in rename_col_dict and not alias:
126
- new_columns.append((rename_col_dict[col], col))
127
- else:
128
- new_columns.append((col, alias))
129
- all_columns.append(new_columns)
130
- self._update_alias(
131
- dictionary={name: alias for name, alias in new_columns if alias}
132
- )
133
-
134
- self.merge(
135
- entity_df=entity_rows,
136
- entity_timestamp_column=entity_timestamp_column,
137
- featuresets=feature_sets,
138
- featureset_dfs=dfs,
139
- keys=keys,
140
- all_columns=all_columns,
141
- )
142
-
143
- self._result_df = self._result_df.drop(
144
- columns=self._drop_columns, errors="ignore"
145
- )
146
-
147
- # renaming all columns according to self._alias
148
- self._result_df = self._result_df.rename(
149
- columns=self._alias,
150
- )
151
-
152
- if self.vector.status.label_column:
153
- self._result_df = self._result_df.dropna(
154
- subset=[self.vector.status.label_column]
155
- )
156
- # filter joined data frame by the query param
157
- if query:
158
- self._result_df = self._result_df.query(query)
159
-
160
- if self._drop_indexes:
161
- self._result_df = self._reset_index(self._result_df)
162
- else:
163
- self._result_df = self._set_indexes(self._result_df)
164
- self._write_to_target()
165
-
166
- return OfflineVectorResponse(self)
167
-
168
35
  def _reset_index(self, df):
169
36
  to_drop = df.index.name is None
170
37
  df = df.reset_index(drop=to_drop)
@@ -178,27 +45,22 @@ class DaskFeatureMerger(BaseMerger):
178
45
  featureset_df,
179
46
  left_keys: list,
180
47
  right_keys: list,
181
- columns: list,
182
48
  ):
49
+ def sort_partition(partition, timestamp):
50
+ return partition.sort_values(timestamp)
183
51
 
184
- entity_df = self._reset_index(entity_df)
185
- entity_df = (
186
- entity_df
187
- if entity_timestamp_column not in entity_df
188
- else entity_df.set_index(entity_timestamp_column, drop=True)
52
+ entity_df = entity_df.map_partitions(
53
+ sort_partition, timestamp=entity_timestamp_column
189
54
  )
190
- featureset_df = self._reset_index(featureset_df)
191
- featureset_df = (
192
- featureset_df
193
- if entity_timestamp_column not in featureset_df
194
- else featureset_df.set_index(entity_timestamp_column, drop=True)
55
+ featureset_df = featureset_df.map_partitions(
56
+ sort_partition, timestamp=featureset.spec.timestamp_key
195
57
  )
196
58
 
197
59
  merged_df = merge_asof(
198
60
  entity_df,
199
61
  featureset_df,
200
- left_index=True,
201
- right_index=True,
62
+ left_on=entity_timestamp_column,
63
+ right_on=featureset.spec.timestamp_key,
202
64
  left_by=left_keys or None,
203
65
  right_by=right_keys or None,
204
66
  suffixes=("", f"_{featureset.metadata.name}_"),
@@ -217,7 +79,6 @@ class DaskFeatureMerger(BaseMerger):
217
79
  featureset_df,
218
80
  left_keys: list,
219
81
  right_keys: list,
220
- columns: list,
221
82
  ):
222
83
 
223
84
  fs_name = featureset.metadata.name
@@ -241,5 +102,56 @@ class DaskFeatureMerger(BaseMerger):
241
102
 
242
103
  def get_df(self, to_pandas=True):
243
104
  if to_pandas and hasattr(self._result_df, "dask"):
244
- return self._result_df.compute()
245
- return self._result_df
105
+ df = self._result_df.compute()
106
+ else:
107
+ df = self._result_df
108
+ self._set_indexes(df)
109
+ return df
110
+
111
+ def _create_engine_env(self):
112
+ if "index" not in self._index_columns:
113
+ self._append_drop_column("index")
114
+
115
+ # init the dask client if needed
116
+ if not self.client:
117
+ if self._dask_cluster_uri:
118
+ function = mlrun.import_function(self._dask_cluster_uri)
119
+ self.client = function.client
120
+ else:
121
+ self.client = Client()
122
+
123
+ def _get_engine_df(
124
+ self,
125
+ feature_set,
126
+ feature_set_name,
127
+ column_names=None,
128
+ start_time=None,
129
+ end_time=None,
130
+ time_column=None,
131
+ ):
132
+ df = feature_set.to_dataframe(
133
+ columns=column_names,
134
+ df_module=dd,
135
+ start_time=start_time,
136
+ end_time=end_time,
137
+ time_column=time_column,
138
+ index=False,
139
+ )
140
+
141
+ return self._reset_index(df).persist()
142
+
143
+ def _rename_columns_and_select(self, df, rename_col_dict, columns=None):
144
+ return df.rename(
145
+ columns=rename_col_dict,
146
+ )
147
+
148
+ def _drop_columns_from_result(self):
149
+ self._result_df = self._result_df.drop(
150
+ columns=self._drop_columns, errors="ignore"
151
+ )
152
+
153
+ def _filter(self, query):
154
+ self._result_df = self._result_df.query(query)
155
+
156
+ def _order_by(self, order_by_active):
157
+ self._result_df.sort_values(by=order_by_active)
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -33,12 +33,15 @@ def run_merge_job(
33
33
  engine_args: dict,
34
34
  spark_service: str = None,
35
35
  entity_rows=None,
36
- timestamp_column=None,
36
+ entity_timestamp_column=None,
37
37
  run_config=None,
38
38
  drop_columns=None,
39
39
  with_indexes=None,
40
40
  query=None,
41
- join_type="inner",
41
+ order_by=None,
42
+ start_time=None,
43
+ end_time=None,
44
+ timestamp_for_filtering=None,
42
45
  ):
43
46
  name = vector.metadata.name
44
47
  if not target or not hasattr(target, "to_dict"):
@@ -92,20 +95,27 @@ def run_merge_job(
92
95
  set_default_resources(
93
96
  function.spec.executor_resources, function.with_executor_requests
94
97
  )
98
+ if start_time and not isinstance(start_time, str):
99
+ start_time = start_time.isoformat()
100
+ if end_time and not isinstance(end_time, str):
101
+ end_time = end_time.isoformat()
95
102
 
96
103
  task = new_task(
97
104
  name=name,
98
105
  params={
99
106
  "vector_uri": vector.uri,
100
107
  "target": target.to_dict(),
101
- "timestamp_column": timestamp_column,
108
+ "entity_timestamp_column": entity_timestamp_column,
102
109
  "drop_columns": drop_columns,
103
110
  "with_indexes": with_indexes,
104
111
  "query": query,
105
- "join_type": join_type,
112
+ "order_by": order_by,
113
+ "start_time": start_time,
114
+ "end_time": end_time,
115
+ "timestamp_for_filtering": timestamp_for_filtering,
106
116
  "engine_args": engine_args,
107
117
  },
108
- inputs={"entity_rows": entity_rows},
118
+ inputs={"entity_rows": entity_rows} if entity_rows is not None else {},
109
119
  )
110
120
  task.spec.secret_sources = run_config.secret_sources
111
121
  task.set_label("job-type", "feature-merge").set_label("feature-vector", vector.uri)
@@ -120,15 +130,16 @@ def run_merge_job(
120
130
  watch=run_config.watch,
121
131
  )
122
132
  logger.info(f"feature vector merge job started, run id = {run.uid()}")
123
- return RemoteVectorResponse(vector, run)
133
+ return RemoteVectorResponse(vector, run, with_indexes)
124
134
 
125
135
 
126
136
  class RemoteVectorResponse:
127
137
  """get_offline_features response object"""
128
138
 
129
- def __init__(self, vector, run):
139
+ def __init__(self, vector, run, with_indexes=False):
130
140
  self.run = run
131
141
  self.vector = vector
142
+ self.with_indexes = with_indexes or self.vector.spec.with_indexes
132
143
 
133
144
  @property
134
145
  def status(self):
@@ -147,12 +158,18 @@ class RemoteVectorResponse:
147
158
  :param df_module: optional, py module used to create the DataFrame (e.g. pd, dd, cudf, ..)
148
159
  :param kwargs: extended DataItem.as_df() args
149
160
  """
161
+
150
162
  file_format = kwargs.get("format")
151
163
  if not file_format:
152
164
  file_format = self.run.status.results["target"]["kind"]
153
- return mlrun.get_dataitem(self.target_uri).as_df(
165
+ df = mlrun.get_dataitem(self.target_uri).as_df(
154
166
  columns=columns, df_module=df_module, format=file_format, **kwargs
155
167
  )
168
+ if self.with_indexes:
169
+ df.set_index(
170
+ list(self.vector.spec.entity_fields.keys()), inplace=True, drop=True
171
+ )
172
+ return df
156
173
 
157
174
  @property
158
175
  def target_uri(self):
@@ -166,17 +183,18 @@ import mlrun
166
183
  import mlrun.feature_store.retrieval
167
184
  from mlrun.datastore.targets import get_target_driver
168
185
  def merge_handler(context, vector_uri, target, entity_rows=None,
169
- timestamp_column=None, drop_columns=None, with_indexes=None, query=None, join_type='inner', engine_args=None):
186
+ entity_timestamp_column=None, drop_columns=None, with_indexes=None, query=None,
187
+ engine_args=None, order_by=None, start_time=None, end_time=None, timestamp_for_filtering=None):
170
188
  vector = context.get_store_resource(vector_uri)
171
189
  store_target = get_target_driver(target, vector)
172
- entity_timestamp_column = timestamp_column or vector.spec.timestamp_field
173
190
  if entity_rows:
174
191
  entity_rows = entity_rows.as_df()
175
192
 
176
193
  context.logger.info(f"starting vector merge task to {vector.uri}")
177
194
  merger = mlrun.feature_store.retrieval.{{{engine}}}(vector, **(engine_args or {}))
178
195
  merger.start(entity_rows, entity_timestamp_column, store_target, drop_columns, with_indexes=with_indexes,
179
- query=query, join_type=join_type)
196
+ query=query, order_by=order_by, start_time=start_time, end_time=end_time,
197
+ timestamp_for_filtering=timestamp_for_filtering)
180
198
 
181
199
  target = vector.status.targets[store_target.name].to_dict()
182
200
  context.log_result('feature_vector', vector.uri)
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -16,143 +16,16 @@ import re
16
16
 
17
17
  import pandas as pd
18
18
 
19
- from ..feature_vector import OfflineVectorResponse
20
19
  from .base import BaseMerger
21
20
 
22
21
 
23
22
  class LocalFeatureMerger(BaseMerger):
24
23
  engine = "local"
24
+ support_offline = True
25
25
 
26
26
  def __init__(self, vector, **engine_args):
27
27
  super().__init__(vector, **engine_args)
28
28
 
29
- def _generate_vector(
30
- self,
31
- entity_rows,
32
- entity_timestamp_column,
33
- feature_set_objects,
34
- feature_set_fields,
35
- start_time=None,
36
- end_time=None,
37
- query=None,
38
- ):
39
-
40
- feature_sets = []
41
- dfs = []
42
- keys = (
43
- []
44
- ) # the struct of key is [[[],[]], ..] So that each record indicates which way the corresponding
45
- # featureset is connected to the previous one, and within each record the left keys are indicated in index 0
46
- # and the right keys in index 1, this keys will be the keys that will be used in this join
47
- all_columns = []
48
-
49
- fs_link_list = self._create_linked_relation_list(
50
- feature_set_objects, feature_set_fields
51
- )
52
-
53
- for node in fs_link_list:
54
- name = node.name
55
- feature_set = feature_set_objects[name]
56
- feature_sets.append(feature_set)
57
- columns = feature_set_fields[name]
58
- column_names = [name for name, alias in columns]
59
-
60
- for col in node.data["save_cols"]:
61
- if col not in column_names:
62
- self._append_drop_column(col)
63
- column_names += node.data["save_cols"]
64
-
65
- # handling case where there are multiple feature sets and user creates vector where entity_timestamp_
66
- # column is from a specific feature set (can't be entity timestamp)
67
- if (
68
- entity_timestamp_column in column_names
69
- or feature_set.spec.timestamp_key == entity_timestamp_column
70
- ):
71
- df = feature_set.to_dataframe(
72
- columns=column_names,
73
- start_time=start_time,
74
- end_time=end_time,
75
- time_column=entity_timestamp_column,
76
- )
77
- else:
78
- df = feature_set.to_dataframe(
79
- columns=column_names,
80
- time_column=entity_timestamp_column,
81
- )
82
- if df.index.names[0]:
83
- df.reset_index(inplace=True)
84
- column_names += node.data["save_index"]
85
- node.data["save_cols"] += node.data["save_index"]
86
- entity_timestamp_column_list = (
87
- [entity_timestamp_column]
88
- if entity_timestamp_column
89
- else feature_set.spec.timestamp_key
90
- )
91
- if entity_timestamp_column_list:
92
- column_names += entity_timestamp_column_list
93
- node.data["save_cols"] += entity_timestamp_column_list
94
- # rename columns to be unique for each feature set
95
- rename_col_dict = {
96
- col: f"{col}_{name}"
97
- for col in column_names
98
- if col not in node.data["save_cols"]
99
- }
100
- df.rename(
101
- columns=rename_col_dict,
102
- inplace=True,
103
- )
104
-
105
- dfs.append(df)
106
- keys.append([node.data["left_keys"], node.data["right_keys"]])
107
-
108
- # update alias according to the unique column name
109
- new_columns = []
110
- for col, alias in columns:
111
- if col in rename_col_dict and alias:
112
- new_columns.append((rename_col_dict[col], alias))
113
- elif col in rename_col_dict and not alias:
114
- new_columns.append((rename_col_dict[col], col))
115
- else:
116
- new_columns.append((col, alias))
117
- all_columns.append(new_columns)
118
- self._update_alias(
119
- dictionary={name: alias for name, alias in new_columns if alias}
120
- )
121
-
122
- self.merge(
123
- entity_df=entity_rows,
124
- entity_timestamp_column=entity_timestamp_column,
125
- featuresets=feature_sets,
126
- featureset_dfs=dfs,
127
- keys=keys,
128
- all_columns=all_columns,
129
- )
130
-
131
- self._result_df.drop(columns=self._drop_columns, inplace=True, errors="ignore")
132
-
133
- # renaming all columns according to self._alias
134
- self._result_df.rename(
135
- columns=self._alias,
136
- inplace=True,
137
- )
138
- if self.vector.status.label_column:
139
- self._result_df.dropna(
140
- subset=[self.vector.status.label_column],
141
- inplace=True,
142
- )
143
- # filter joined data frame by the query param
144
- if query:
145
- self._result_df.query(query, inplace=True)
146
-
147
- if self._drop_indexes:
148
- self._result_df.reset_index(drop=True, inplace=True)
149
- else:
150
- self._set_indexes(self._result_df)
151
-
152
- self._write_to_target()
153
-
154
- return OfflineVectorResponse(self)
155
-
156
29
  def _asof_join(
157
30
  self,
158
31
  entity_df,
@@ -161,7 +34,6 @@ class LocalFeatureMerger(BaseMerger):
161
34
  featureset_df,
162
35
  left_keys: list,
163
36
  right_keys: list,
164
- columns: list,
165
37
  ):
166
38
 
167
39
  indexes = None
@@ -176,7 +48,7 @@ class LocalFeatureMerger(BaseMerger):
176
48
  featureset_df[featureset.spec.timestamp_key]
177
49
  )
178
50
  entity_df.sort_values(by=entity_timestamp_column, inplace=True)
179
- featureset_df.sort_values(by=entity_timestamp_column, inplace=True)
51
+ featureset_df.sort_values(by=featureset.spec.timestamp_key, inplace=True)
180
52
 
181
53
  merged_df = pd.merge_asof(
182
54
  entity_df,
@@ -191,7 +63,6 @@ class LocalFeatureMerger(BaseMerger):
191
63
  for col in merged_df.columns:
192
64
  if re.findall(f"_{featureset.metadata.name}_$", col):
193
65
  self._append_drop_column(col)
194
-
195
66
  # Undo indexing tricks for asof merge
196
67
  # to return the correct indexes and not
197
68
  # overload `index` columns
@@ -213,7 +84,6 @@ class LocalFeatureMerger(BaseMerger):
213
84
  featureset_df,
214
85
  left_keys: list,
215
86
  right_keys: list,
216
- columns: list,
217
87
  ):
218
88
  fs_name = featureset.metadata.name
219
89
  merged_df = pd.merge(
@@ -228,3 +98,40 @@ class LocalFeatureMerger(BaseMerger):
228
98
  if re.findall(f"_{fs_name}_$", col):
229
99
  self._append_drop_column(col)
230
100
  return merged_df
101
+
102
+ def _create_engine_env(self):
103
+ pass
104
+
105
+ def _get_engine_df(
106
+ self,
107
+ feature_set,
108
+ feature_set_name,
109
+ column_names=None,
110
+ start_time=None,
111
+ end_time=None,
112
+ time_column=None,
113
+ ):
114
+ df = feature_set.to_dataframe(
115
+ columns=column_names,
116
+ start_time=start_time,
117
+ end_time=end_time,
118
+ time_column=time_column,
119
+ )
120
+ if df.index.names[0]:
121
+ df.reset_index(inplace=True)
122
+ return df
123
+
124
+ def _rename_columns_and_select(self, df, rename_col_dict, columns=None):
125
+ df.rename(
126
+ columns=rename_col_dict,
127
+ inplace=True,
128
+ )
129
+
130
+ def _drop_columns_from_result(self):
131
+ self._result_df.drop(columns=self._drop_columns, inplace=True, errors="ignore")
132
+
133
+ def _filter(self, query):
134
+ self._result_df.query(query, inplace=True)
135
+
136
+ def _order_by(self, order_by_active):
137
+ self._result_df.sort_values(by=order_by_active, ignore_index=True, inplace=True)