mlrun 1.3.3rc1__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (444) hide show
  1. mlrun/__init__.py +3 -3
  2. mlrun/__main__.py +79 -37
  3. mlrun/api/__init__.py +1 -1
  4. mlrun/api/api/__init__.py +1 -1
  5. mlrun/api/api/api.py +4 -4
  6. mlrun/api/api/deps.py +10 -21
  7. mlrun/api/api/endpoints/__init__.py +1 -1
  8. mlrun/api/api/endpoints/artifacts.py +64 -36
  9. mlrun/api/api/endpoints/auth.py +4 -4
  10. mlrun/api/api/endpoints/background_tasks.py +11 -11
  11. mlrun/api/api/endpoints/client_spec.py +5 -5
  12. mlrun/api/api/endpoints/clusterization_spec.py +6 -4
  13. mlrun/api/api/endpoints/feature_store.py +124 -115
  14. mlrun/api/api/endpoints/files.py +22 -14
  15. mlrun/api/api/endpoints/frontend_spec.py +28 -21
  16. mlrun/api/api/endpoints/functions.py +142 -87
  17. mlrun/api/api/endpoints/grafana_proxy.py +89 -442
  18. mlrun/api/api/endpoints/healthz.py +20 -7
  19. mlrun/api/api/endpoints/hub.py +320 -0
  20. mlrun/api/api/endpoints/internal/__init__.py +1 -1
  21. mlrun/api/api/endpoints/internal/config.py +1 -1
  22. mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
  23. mlrun/api/api/endpoints/logs.py +11 -11
  24. mlrun/api/api/endpoints/model_endpoints.py +74 -70
  25. mlrun/api/api/endpoints/operations.py +13 -9
  26. mlrun/api/api/endpoints/pipelines.py +93 -88
  27. mlrun/api/api/endpoints/projects.py +35 -35
  28. mlrun/api/api/endpoints/runs.py +69 -27
  29. mlrun/api/api/endpoints/runtime_resources.py +28 -28
  30. mlrun/api/api/endpoints/schedules.py +98 -41
  31. mlrun/api/api/endpoints/secrets.py +37 -32
  32. mlrun/api/api/endpoints/submit.py +12 -12
  33. mlrun/api/api/endpoints/tags.py +20 -22
  34. mlrun/api/api/utils.py +251 -42
  35. mlrun/api/constants.py +1 -1
  36. mlrun/api/crud/__init__.py +18 -15
  37. mlrun/api/crud/artifacts.py +10 -10
  38. mlrun/api/crud/client_spec.py +4 -4
  39. mlrun/api/crud/clusterization_spec.py +3 -3
  40. mlrun/api/crud/feature_store.py +54 -46
  41. mlrun/api/crud/functions.py +3 -3
  42. mlrun/api/crud/hub.py +312 -0
  43. mlrun/api/crud/logs.py +11 -9
  44. mlrun/api/crud/model_monitoring/__init__.py +3 -3
  45. mlrun/api/crud/model_monitoring/grafana.py +435 -0
  46. mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
  47. mlrun/api/crud/notifications.py +149 -0
  48. mlrun/api/crud/pipelines.py +67 -52
  49. mlrun/api/crud/projects.py +51 -23
  50. mlrun/api/crud/runs.py +7 -5
  51. mlrun/api/crud/runtime_resources.py +13 -13
  52. mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
  53. mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
  54. mlrun/api/crud/runtimes/nuclio/function.py +505 -0
  55. mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
  56. mlrun/api/crud/secrets.py +88 -46
  57. mlrun/api/crud/tags.py +5 -5
  58. mlrun/api/db/__init__.py +1 -1
  59. mlrun/api/db/base.py +102 -54
  60. mlrun/api/db/init_db.py +2 -3
  61. mlrun/api/db/session.py +4 -12
  62. mlrun/api/db/sqldb/__init__.py +1 -1
  63. mlrun/api/db/sqldb/db.py +439 -196
  64. mlrun/api/db/sqldb/helpers.py +1 -1
  65. mlrun/api/db/sqldb/models/__init__.py +3 -3
  66. mlrun/api/db/sqldb/models/models_mysql.py +82 -64
  67. mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
  68. mlrun/api/db/sqldb/session.py +27 -20
  69. mlrun/api/initial_data.py +82 -24
  70. mlrun/api/launcher.py +196 -0
  71. mlrun/api/main.py +91 -22
  72. mlrun/api/middlewares.py +6 -5
  73. mlrun/api/migrations_mysql/env.py +1 -1
  74. mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
  75. mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
  76. mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
  77. mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
  78. mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
  79. mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
  80. mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
  81. mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
  82. mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
  83. mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
  84. mlrun/api/migrations_sqlite/env.py +1 -1
  85. mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
  86. mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
  87. mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
  88. mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
  89. mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
  90. mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
  91. mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
  92. mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
  93. mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
  94. mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
  95. mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
  96. mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
  97. mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
  98. mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
  99. mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
  100. mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
  101. mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
  102. mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
  103. mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
  104. mlrun/api/schemas/__init__.py +216 -138
  105. mlrun/api/utils/__init__.py +1 -1
  106. mlrun/api/utils/asyncio.py +1 -1
  107. mlrun/api/utils/auth/__init__.py +1 -1
  108. mlrun/api/utils/auth/providers/__init__.py +1 -1
  109. mlrun/api/utils/auth/providers/base.py +7 -7
  110. mlrun/api/utils/auth/providers/nop.py +6 -7
  111. mlrun/api/utils/auth/providers/opa.py +17 -17
  112. mlrun/api/utils/auth/verifier.py +36 -34
  113. mlrun/api/utils/background_tasks.py +24 -24
  114. mlrun/{builder.py → api/utils/builder.py} +216 -123
  115. mlrun/api/utils/clients/__init__.py +1 -1
  116. mlrun/api/utils/clients/chief.py +19 -4
  117. mlrun/api/utils/clients/iguazio.py +106 -60
  118. mlrun/api/utils/clients/log_collector.py +1 -1
  119. mlrun/api/utils/clients/nuclio.py +23 -23
  120. mlrun/api/utils/clients/protocols/grpc.py +2 -2
  121. mlrun/api/utils/db/__init__.py +1 -1
  122. mlrun/api/utils/db/alembic.py +1 -1
  123. mlrun/api/utils/db/backup.py +1 -1
  124. mlrun/api/utils/db/mysql.py +24 -25
  125. mlrun/api/utils/db/sql_collation.py +1 -1
  126. mlrun/api/utils/db/sqlite_migration.py +2 -2
  127. mlrun/api/utils/events/__init__.py +14 -0
  128. mlrun/api/utils/events/base.py +57 -0
  129. mlrun/api/utils/events/events_factory.py +41 -0
  130. mlrun/api/utils/events/iguazio.py +217 -0
  131. mlrun/api/utils/events/nop.py +55 -0
  132. mlrun/api/utils/helpers.py +16 -13
  133. mlrun/api/utils/memory_reports.py +1 -1
  134. mlrun/api/utils/periodic.py +6 -3
  135. mlrun/api/utils/projects/__init__.py +1 -1
  136. mlrun/api/utils/projects/follower.py +33 -33
  137. mlrun/api/utils/projects/leader.py +36 -34
  138. mlrun/api/utils/projects/member.py +27 -27
  139. mlrun/api/utils/projects/remotes/__init__.py +1 -1
  140. mlrun/api/utils/projects/remotes/follower.py +13 -13
  141. mlrun/api/utils/projects/remotes/leader.py +10 -10
  142. mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
  143. mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
  144. mlrun/api/utils/scheduler.py +140 -51
  145. mlrun/api/utils/singletons/__init__.py +1 -1
  146. mlrun/api/utils/singletons/db.py +9 -15
  147. mlrun/api/utils/singletons/k8s.py +677 -5
  148. mlrun/api/utils/singletons/logs_dir.py +1 -1
  149. mlrun/api/utils/singletons/project_member.py +1 -1
  150. mlrun/api/utils/singletons/scheduler.py +1 -1
  151. mlrun/artifacts/__init__.py +2 -2
  152. mlrun/artifacts/base.py +8 -2
  153. mlrun/artifacts/dataset.py +5 -3
  154. mlrun/artifacts/manager.py +7 -1
  155. mlrun/artifacts/model.py +15 -4
  156. mlrun/artifacts/plots.py +1 -1
  157. mlrun/common/__init__.py +1 -1
  158. mlrun/common/constants.py +15 -0
  159. mlrun/common/model_monitoring.py +209 -0
  160. mlrun/common/schemas/__init__.py +167 -0
  161. mlrun/{api → common}/schemas/artifact.py +13 -14
  162. mlrun/{api → common}/schemas/auth.py +10 -8
  163. mlrun/{api → common}/schemas/background_task.py +3 -3
  164. mlrun/{api → common}/schemas/client_spec.py +1 -1
  165. mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
  166. mlrun/{api → common}/schemas/constants.py +21 -8
  167. mlrun/common/schemas/events.py +36 -0
  168. mlrun/{api → common}/schemas/feature_store.py +2 -1
  169. mlrun/{api → common}/schemas/frontend_spec.py +7 -6
  170. mlrun/{api → common}/schemas/function.py +5 -5
  171. mlrun/{api → common}/schemas/http.py +3 -3
  172. mlrun/common/schemas/hub.py +134 -0
  173. mlrun/{api → common}/schemas/k8s.py +3 -3
  174. mlrun/{api → common}/schemas/memory_reports.py +1 -1
  175. mlrun/common/schemas/model_endpoints.py +342 -0
  176. mlrun/common/schemas/notification.py +57 -0
  177. mlrun/{api → common}/schemas/object.py +6 -6
  178. mlrun/{api → common}/schemas/pipeline.py +3 -3
  179. mlrun/{api → common}/schemas/project.py +6 -5
  180. mlrun/common/schemas/regex.py +24 -0
  181. mlrun/common/schemas/runs.py +30 -0
  182. mlrun/{api → common}/schemas/runtime_resource.py +3 -3
  183. mlrun/{api → common}/schemas/schedule.py +19 -7
  184. mlrun/{api → common}/schemas/secret.py +3 -3
  185. mlrun/{api → common}/schemas/tag.py +2 -2
  186. mlrun/common/types.py +25 -0
  187. mlrun/config.py +152 -20
  188. mlrun/data_types/__init__.py +7 -2
  189. mlrun/data_types/data_types.py +4 -2
  190. mlrun/data_types/infer.py +1 -1
  191. mlrun/data_types/spark.py +10 -3
  192. mlrun/datastore/__init__.py +10 -3
  193. mlrun/datastore/azure_blob.py +1 -1
  194. mlrun/datastore/base.py +185 -53
  195. mlrun/datastore/datastore.py +1 -1
  196. mlrun/datastore/filestore.py +1 -1
  197. mlrun/datastore/google_cloud_storage.py +1 -1
  198. mlrun/datastore/inmem.py +4 -1
  199. mlrun/datastore/redis.py +1 -1
  200. mlrun/datastore/s3.py +1 -1
  201. mlrun/datastore/sources.py +192 -70
  202. mlrun/datastore/spark_udf.py +44 -0
  203. mlrun/datastore/store_resources.py +4 -4
  204. mlrun/datastore/targets.py +115 -45
  205. mlrun/datastore/utils.py +127 -5
  206. mlrun/datastore/v3io.py +1 -1
  207. mlrun/datastore/wasbfs/__init__.py +1 -1
  208. mlrun/datastore/wasbfs/fs.py +1 -1
  209. mlrun/db/__init__.py +7 -5
  210. mlrun/db/base.py +112 -68
  211. mlrun/db/httpdb.py +445 -277
  212. mlrun/db/nopdb.py +491 -0
  213. mlrun/db/sqldb.py +112 -65
  214. mlrun/errors.py +6 -1
  215. mlrun/execution.py +44 -22
  216. mlrun/feature_store/__init__.py +1 -1
  217. mlrun/feature_store/api.py +143 -95
  218. mlrun/feature_store/common.py +16 -20
  219. mlrun/feature_store/feature_set.py +42 -12
  220. mlrun/feature_store/feature_vector.py +32 -21
  221. mlrun/feature_store/ingestion.py +9 -12
  222. mlrun/feature_store/retrieval/__init__.py +3 -2
  223. mlrun/feature_store/retrieval/base.py +388 -66
  224. mlrun/feature_store/retrieval/dask_merger.py +63 -151
  225. mlrun/feature_store/retrieval/job.py +30 -12
  226. mlrun/feature_store/retrieval/local_merger.py +40 -133
  227. mlrun/feature_store/retrieval/spark_merger.py +129 -127
  228. mlrun/feature_store/retrieval/storey_merger.py +173 -0
  229. mlrun/feature_store/steps.py +132 -15
  230. mlrun/features.py +8 -3
  231. mlrun/frameworks/__init__.py +1 -1
  232. mlrun/frameworks/_common/__init__.py +1 -1
  233. mlrun/frameworks/_common/artifacts_library.py +1 -1
  234. mlrun/frameworks/_common/mlrun_interface.py +1 -1
  235. mlrun/frameworks/_common/model_handler.py +1 -1
  236. mlrun/frameworks/_common/plan.py +1 -1
  237. mlrun/frameworks/_common/producer.py +1 -1
  238. mlrun/frameworks/_common/utils.py +1 -1
  239. mlrun/frameworks/_dl_common/__init__.py +1 -1
  240. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
  241. mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
  242. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
  243. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
  244. mlrun/frameworks/_dl_common/model_handler.py +1 -1
  245. mlrun/frameworks/_dl_common/utils.py +1 -1
  246. mlrun/frameworks/_ml_common/__init__.py +1 -1
  247. mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
  248. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
  249. mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
  250. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  251. mlrun/frameworks/_ml_common/model_handler.py +1 -1
  252. mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
  253. mlrun/frameworks/_ml_common/plan.py +1 -1
  254. mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
  255. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
  256. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
  257. mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
  258. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
  259. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
  260. mlrun/frameworks/_ml_common/producer.py +1 -1
  261. mlrun/frameworks/_ml_common/utils.py +1 -1
  262. mlrun/frameworks/auto_mlrun/__init__.py +1 -1
  263. mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
  264. mlrun/frameworks/huggingface/__init__.py +1 -1
  265. mlrun/frameworks/huggingface/model_server.py +1 -1
  266. mlrun/frameworks/lgbm/__init__.py +1 -1
  267. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
  268. mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
  269. mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
  270. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
  271. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
  272. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
  273. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
  274. mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
  275. mlrun/frameworks/lgbm/model_handler.py +1 -1
  276. mlrun/frameworks/lgbm/model_server.py +1 -1
  277. mlrun/frameworks/lgbm/utils.py +1 -1
  278. mlrun/frameworks/onnx/__init__.py +1 -1
  279. mlrun/frameworks/onnx/dataset.py +1 -1
  280. mlrun/frameworks/onnx/mlrun_interface.py +1 -1
  281. mlrun/frameworks/onnx/model_handler.py +1 -1
  282. mlrun/frameworks/onnx/model_server.py +1 -1
  283. mlrun/frameworks/parallel_coordinates.py +1 -1
  284. mlrun/frameworks/pytorch/__init__.py +1 -1
  285. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
  286. mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
  287. mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
  288. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
  289. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
  290. mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
  291. mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
  292. mlrun/frameworks/pytorch/model_handler.py +1 -1
  293. mlrun/frameworks/pytorch/model_server.py +1 -1
  294. mlrun/frameworks/pytorch/utils.py +1 -1
  295. mlrun/frameworks/sklearn/__init__.py +1 -1
  296. mlrun/frameworks/sklearn/estimator.py +1 -1
  297. mlrun/frameworks/sklearn/metric.py +1 -1
  298. mlrun/frameworks/sklearn/metrics_library.py +1 -1
  299. mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
  300. mlrun/frameworks/sklearn/model_handler.py +1 -1
  301. mlrun/frameworks/sklearn/utils.py +1 -1
  302. mlrun/frameworks/tf_keras/__init__.py +1 -1
  303. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
  304. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  305. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
  306. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
  307. mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
  308. mlrun/frameworks/tf_keras/model_handler.py +1 -1
  309. mlrun/frameworks/tf_keras/model_server.py +1 -1
  310. mlrun/frameworks/tf_keras/utils.py +1 -1
  311. mlrun/frameworks/xgboost/__init__.py +1 -1
  312. mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
  313. mlrun/frameworks/xgboost/model_handler.py +1 -1
  314. mlrun/frameworks/xgboost/utils.py +1 -1
  315. mlrun/k8s_utils.py +14 -765
  316. mlrun/kfpops.py +14 -17
  317. mlrun/launcher/__init__.py +13 -0
  318. mlrun/launcher/base.py +406 -0
  319. mlrun/launcher/client.py +159 -0
  320. mlrun/launcher/factory.py +50 -0
  321. mlrun/launcher/local.py +276 -0
  322. mlrun/launcher/remote.py +178 -0
  323. mlrun/lists.py +10 -2
  324. mlrun/mlutils/__init__.py +1 -1
  325. mlrun/mlutils/data.py +1 -1
  326. mlrun/mlutils/models.py +1 -1
  327. mlrun/mlutils/plots.py +1 -1
  328. mlrun/model.py +252 -14
  329. mlrun/model_monitoring/__init__.py +41 -0
  330. mlrun/model_monitoring/features_drift_table.py +1 -1
  331. mlrun/model_monitoring/helpers.py +123 -38
  332. mlrun/model_monitoring/model_endpoint.py +144 -0
  333. mlrun/model_monitoring/model_monitoring_batch.py +310 -259
  334. mlrun/model_monitoring/stores/__init__.py +106 -0
  335. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
  336. mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
  337. mlrun/model_monitoring/stores/models/__init__.py +23 -0
  338. mlrun/model_monitoring/stores/models/base.py +18 -0
  339. mlrun/model_monitoring/stores/models/mysql.py +100 -0
  340. mlrun/model_monitoring/stores/models/sqlite.py +98 -0
  341. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
  342. mlrun/model_monitoring/stream_processing_fs.py +239 -271
  343. mlrun/package/__init__.py +163 -0
  344. mlrun/package/context_handler.py +325 -0
  345. mlrun/package/errors.py +47 -0
  346. mlrun/package/packager.py +298 -0
  347. mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
  348. mlrun/package/packagers/default_packager.py +422 -0
  349. mlrun/package/packagers/numpy_packagers.py +612 -0
  350. mlrun/package/packagers/pandas_packagers.py +968 -0
  351. mlrun/package/packagers/python_standard_library_packagers.py +616 -0
  352. mlrun/package/packagers_manager.py +786 -0
  353. mlrun/package/utils/__init__.py +53 -0
  354. mlrun/package/utils/_archiver.py +226 -0
  355. mlrun/package/utils/_formatter.py +211 -0
  356. mlrun/package/utils/_pickler.py +234 -0
  357. mlrun/package/utils/_supported_format.py +71 -0
  358. mlrun/package/utils/log_hint_utils.py +93 -0
  359. mlrun/package/utils/type_hint_utils.py +298 -0
  360. mlrun/platforms/__init__.py +1 -1
  361. mlrun/platforms/iguazio.py +34 -2
  362. mlrun/platforms/other.py +1 -1
  363. mlrun/projects/__init__.py +1 -1
  364. mlrun/projects/operations.py +14 -9
  365. mlrun/projects/pipelines.py +31 -13
  366. mlrun/projects/project.py +762 -238
  367. mlrun/render.py +49 -19
  368. mlrun/run.py +57 -326
  369. mlrun/runtimes/__init__.py +3 -9
  370. mlrun/runtimes/base.py +247 -784
  371. mlrun/runtimes/constants.py +1 -1
  372. mlrun/runtimes/daskjob.py +45 -41
  373. mlrun/runtimes/funcdoc.py +43 -7
  374. mlrun/runtimes/function.py +66 -656
  375. mlrun/runtimes/function_reference.py +1 -1
  376. mlrun/runtimes/generators.py +1 -1
  377. mlrun/runtimes/kubejob.py +99 -116
  378. mlrun/runtimes/local.py +59 -66
  379. mlrun/runtimes/mpijob/__init__.py +1 -1
  380. mlrun/runtimes/mpijob/abstract.py +13 -15
  381. mlrun/runtimes/mpijob/v1.py +3 -1
  382. mlrun/runtimes/mpijob/v1alpha1.py +1 -1
  383. mlrun/runtimes/nuclio.py +1 -1
  384. mlrun/runtimes/pod.py +51 -26
  385. mlrun/runtimes/remotesparkjob.py +3 -1
  386. mlrun/runtimes/serving.py +12 -4
  387. mlrun/runtimes/sparkjob/__init__.py +1 -2
  388. mlrun/runtimes/sparkjob/abstract.py +44 -31
  389. mlrun/runtimes/sparkjob/spark3job.py +11 -9
  390. mlrun/runtimes/utils.py +61 -42
  391. mlrun/secrets.py +16 -18
  392. mlrun/serving/__init__.py +3 -2
  393. mlrun/serving/merger.py +1 -1
  394. mlrun/serving/remote.py +1 -1
  395. mlrun/serving/routers.py +39 -42
  396. mlrun/serving/server.py +23 -13
  397. mlrun/serving/serving_wrapper.py +1 -1
  398. mlrun/serving/states.py +172 -39
  399. mlrun/serving/utils.py +1 -1
  400. mlrun/serving/v1_serving.py +1 -1
  401. mlrun/serving/v2_serving.py +29 -21
  402. mlrun/utils/__init__.py +1 -2
  403. mlrun/utils/async_http.py +8 -1
  404. mlrun/utils/azure_vault.py +1 -1
  405. mlrun/utils/clones.py +2 -2
  406. mlrun/utils/condition_evaluator.py +65 -0
  407. mlrun/utils/db.py +52 -0
  408. mlrun/utils/helpers.py +188 -13
  409. mlrun/utils/http.py +89 -54
  410. mlrun/utils/logger.py +48 -8
  411. mlrun/utils/model_monitoring.py +132 -100
  412. mlrun/utils/notifications/__init__.py +1 -1
  413. mlrun/utils/notifications/notification/__init__.py +8 -6
  414. mlrun/utils/notifications/notification/base.py +20 -14
  415. mlrun/utils/notifications/notification/console.py +7 -4
  416. mlrun/utils/notifications/notification/git.py +36 -19
  417. mlrun/utils/notifications/notification/ipython.py +10 -8
  418. mlrun/utils/notifications/notification/slack.py +18 -13
  419. mlrun/utils/notifications/notification_pusher.py +377 -56
  420. mlrun/utils/regex.py +6 -1
  421. mlrun/utils/singleton.py +1 -1
  422. mlrun/utils/v3io_clients.py +1 -1
  423. mlrun/utils/vault.py +270 -269
  424. mlrun/utils/version/__init__.py +1 -1
  425. mlrun/utils/version/version.json +2 -2
  426. mlrun/utils/version/version.py +1 -1
  427. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
  428. mlrun-1.4.0.dist-info/RECORD +434 -0
  429. mlrun/api/api/endpoints/marketplace.py +0 -257
  430. mlrun/api/crud/marketplace.py +0 -221
  431. mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
  432. mlrun/api/db/filedb/db.py +0 -518
  433. mlrun/api/schemas/marketplace.py +0 -128
  434. mlrun/api/schemas/model_endpoints.py +0 -185
  435. mlrun/db/filedb.py +0 -891
  436. mlrun/feature_store/retrieval/online.py +0 -92
  437. mlrun/model_monitoring/constants.py +0 -67
  438. mlrun/runtimes/package/context_handler.py +0 -711
  439. mlrun/runtimes/sparkjob/spark2job.py +0 -59
  440. mlrun-1.3.3rc1.dist-info/RECORD +0 -381
  441. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
  442. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
  443. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
  444. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -12,10 +12,12 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import copy
15
+ import importlib.util
16
+ import pathlib
17
+ import sys
15
18
  import warnings
16
19
  from datetime import datetime
17
- from typing import List, Optional, Union
18
- from urllib.parse import urlparse
20
+ from typing import Any, Dict, List, Optional, Union
19
21
 
20
22
  import pandas as pd
21
23
 
@@ -28,7 +30,6 @@ from ..datastore.store_resources import parse_store_uri
28
30
  from ..datastore.targets import (
29
31
  BaseStoreTarget,
30
32
  get_default_prefix_for_source,
31
- get_default_targets,
32
33
  get_target_driver,
33
34
  kind_to_driver,
34
35
  validate_target_list,
@@ -39,7 +40,7 @@ from ..model import DataSource, DataTargetBase
39
40
  from ..runtimes import RuntimeKinds
40
41
  from ..runtimes.function_reference import FunctionReference
41
42
  from ..serving.server import Response
42
- from ..utils import get_caller_globals, logger, normalize_name, str_to_timestamp
43
+ from ..utils import get_caller_globals, logger, normalize_name
43
44
  from .common import (
44
45
  RunConfig,
45
46
  get_feature_set_by_uri,
@@ -61,7 +62,7 @@ from .ingestion import (
61
62
  run_ingestion_job,
62
63
  run_spark_graph,
63
64
  )
64
- from .retrieval import get_merger, init_feature_vector_graph, run_merge_job
65
+ from .retrieval import get_merger, run_merge_job
65
66
 
66
67
  _v3iofs = None
67
68
  spark_transform_handler = "transform"
@@ -77,7 +78,7 @@ def _features_to_vector_and_check_permissions(features, update_stats):
77
78
  "feature vector name must be specified"
78
79
  )
79
80
  verify_feature_vector_permissions(
80
- vector, mlrun.api.schemas.AuthorizationAction.update
81
+ vector, mlrun.common.schemas.AuthorizationAction.update
81
82
  )
82
83
 
83
84
  vector.save()
@@ -102,8 +103,9 @@ def get_offline_features(
102
103
  engine: str = None,
103
104
  engine_args: dict = None,
104
105
  query: str = None,
105
- join_type: str = "inner",
106
+ order_by: Union[str, List[str]] = None,
106
107
  spark_service: str = None,
108
+ timestamp_for_filtering: Union[str, Dict[str, str]] = None,
107
109
  ) -> OfflineVectorResponse:
108
110
  """retrieve offline feature vector results
109
111
 
@@ -133,35 +135,44 @@ def get_offline_features(
133
135
  print(vector.get_stats_table())
134
136
  resp.to_parquet("./out.parquet")
135
137
 
136
- :param feature_vector: feature vector uri or FeatureVector object. passing feature vector obj requires update
137
- permissions
138
- :param entity_rows: dataframe with entity rows to join with
139
- :param target: where to write the results to
140
- :param drop_columns: list of columns to drop from the final result
141
- :param entity_timestamp_column: timestamp column name in the entity rows dataframe
142
- :param run_config: function and/or run configuration
143
- see :py:class:`~mlrun.feature_store.RunConfig`
144
- :param start_time: datetime, low limit of time needed to be filtered. Optional.
145
- entity_timestamp_column must be passed when using time filtering.
146
- :param end_time: datetime, high limit of time needed to be filtered. Optional.
147
- entity_timestamp_column must be passed when using time filtering.
148
- :param with_indexes: return vector with index columns and timestamp_key from the feature sets (default False)
149
- :param update_stats: update features statistics from the requested feature sets on the vector. Default is False.
150
- :param engine: processing engine kind ("local", "dask", or "spark")
151
- :param engine_args: kwargs for the processing engine
152
- :param query: The query string used to filter rows
153
- :param spark_service: Name of the spark service to be used (when using a remote-spark runtime)
154
- :param join_type: {'left', 'right', 'outer', 'inner'}, default 'inner'
155
- Supported retrieval engines: "dask", "local"
156
- This parameter is in use when entity_timestamp_column and
157
- feature_vector.spec.timestamp_field are None, if one of them
158
- isn't none we're preforming as_of join.
159
- Possible values :
160
- * left: use only keys from left frame (SQL: left outer join)
161
- * right: use only keys from right frame (SQL: right outer join)
162
- * outer: use union of keys from both frames (SQL: full outer join)
163
- * inner: use intersection of keys from both frames (SQL: inner join).
138
+ :param feature_vector: feature vector uri or FeatureVector object. passing feature vector obj requires
139
+ update permissions
140
+ :param entity_rows: dataframe with entity rows to join with
141
+ :param target: where to write the results to
142
+ :param drop_columns: list of columns to drop from the final result
143
+ :param entity_timestamp_column: timestamp column name in the entity rows dataframe. can be specified
144
+ only if param entity_rows was specified.
145
+ :param run_config: function and/or run configuration
146
+ see :py:class:`~mlrun.feature_store.RunConfig`
147
+ :param start_time: datetime, low limit of time needed to be filtered. Optional.
148
+ :param end_time: datetime, high limit of time needed to be filtered. Optional.
149
+ :param with_indexes: Return vector with/without the entities and the timestamp_key of the feature sets
150
+ and with/without entity_timestamp_column and timestamp_for_filtering columns.
151
+ This property can be specified also in the feature vector spec
152
+ (feature_vector.spec.with_indexes)
153
+ (default False)
154
+ :param update_stats: update features statistics from the requested feature sets on the vector.
155
+ (default False).
156
+ :param engine: processing engine kind ("local", "dask", or "spark")
157
+ :param engine_args: kwargs for the processing engine
158
+ :param query: The query string used to filter rows on the output
159
+ :param spark_service: Name of the spark service to be used (when using a remote-spark runtime)
160
+ :param order_by: Name or list of names to order by. The name or the names in the list can be the
161
+ feature name or the alias of the feature you pass in the feature list.
162
+ :param timestamp_for_filtering: name of the column to filter by, can be str for all the feature sets or a
163
+ dictionary ({<feature set name>: <timestamp column name>, ...})
164
+ that indicates the timestamp column name for each feature set. Optional.
165
+ By default, the filter executes on the timestamp_key of each feature set.
166
+ Note: the time filtering is performed on each feature set before the
167
+ merge process using start_time and end_time params.
168
+
164
169
  """
170
+ if entity_rows is None and entity_timestamp_column is not None:
171
+ raise mlrun.errors.MLRunInvalidArgumentError(
172
+ "entity_timestamp_column param "
173
+ "can not be specified without entity_rows param"
174
+ )
175
+
165
176
  if isinstance(feature_vector, FeatureVector):
166
177
  update_stats = True
167
178
 
@@ -184,23 +195,17 @@ def get_offline_features(
184
195
  engine_args,
185
196
  spark_service,
186
197
  entity_rows,
187
- timestamp_column=entity_timestamp_column,
198
+ entity_timestamp_column=entity_timestamp_column,
188
199
  run_config=run_config,
189
200
  drop_columns=drop_columns,
190
201
  with_indexes=with_indexes,
191
202
  query=query,
192
- join_type=join_type,
203
+ order_by=order_by,
204
+ start_time=start_time,
205
+ end_time=end_time,
206
+ timestamp_for_filtering=timestamp_for_filtering,
193
207
  )
194
208
 
195
- start_time = str_to_timestamp(start_time)
196
- end_time = str_to_timestamp(end_time)
197
- if (start_time or end_time) and not entity_timestamp_column:
198
- raise TypeError(
199
- "entity_timestamp_column or feature_vector.spec.timestamp_field is required when passing start/end time"
200
- )
201
- if start_time and not end_time:
202
- # if end_time is not specified set it to now()
203
- end_time = pd.Timestamp.now()
204
209
  merger = merger_engine(feature_vector, **(engine_args or {}))
205
210
  return merger.start(
206
211
  entity_rows,
@@ -209,10 +214,11 @@ def get_offline_features(
209
214
  drop_columns=drop_columns,
210
215
  start_time=start_time,
211
216
  end_time=end_time,
217
+ timestamp_for_filtering=timestamp_for_filtering,
212
218
  with_indexes=with_indexes,
213
219
  update_stats=update_stats,
214
220
  query=query,
215
- join_type=join_type,
221
+ order_by=order_by,
216
222
  )
217
223
 
218
224
 
@@ -222,6 +228,7 @@ def get_online_feature_service(
222
228
  fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
223
229
  impute_policy: dict = None,
224
230
  update_stats: bool = False,
231
+ entity_keys: List[str] = None,
225
232
  ) -> OnlineVectorService:
226
233
  """initialize and return online feature vector service api,
227
234
  returns :py:class:`~mlrun.feature_store.OnlineVectorService`
@@ -241,14 +248,15 @@ def get_online_feature_service(
241
248
 
242
249
  Example with imputing::
243
250
 
244
- with get_online_feature_service(vector_uri, impute_policy={"*": "$mean", "amount": 0)) as svc:
251
+ with get_online_feature_service(vector_uri, entity_keys=['id'],
252
+ impute_policy={"*": "$mean", "amount": 0)) as svc:
245
253
  resp = svc.get([{"id": "C123487"}])
246
254
 
247
255
  2. as simple function, note that in that option you need to close the session.
248
256
 
249
257
  Example::
250
258
 
251
- svc = get_online_feature_service(vector_uri)
259
+ svc = get_online_feature_service(vector_uri, entity_keys=['ticker'])
252
260
  try:
253
261
  resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
254
262
  print(resp)
@@ -260,7 +268,8 @@ def get_online_feature_service(
260
268
 
261
269
  Example with imputing::
262
270
 
263
- svc = get_online_feature_service(vector_uri, impute_policy={"*": "$mean", "amount": 0))
271
+ svc = get_online_feature_service(vector_uri, entity_keys=['id'],
272
+ impute_policy={"*": "$mean", "amount": 0))
264
273
  try:
265
274
  resp = svc.get([{"id": "C123487"}])
266
275
  except Exception as e:
@@ -268,15 +277,21 @@ def get_online_feature_service(
268
277
  finally:
269
278
  svc.close()
270
279
 
271
- :param feature_vector: feature vector uri or FeatureVector object. passing feature vector obj requires update
272
- permissions
273
- :param run_config: function and/or run configuration for remote jobs/services
274
- :param impute_policy: a dict with `impute_policy` per feature, the dict key is the feature name and the dict
275
- value indicate which value will be used in case the feature is NaN/empty, the replaced
276
- value can be fixed number for constants or $mean, $max, $min, $std, $count for statistical
277
- values. "*" is used to specify the default for all features, example: `{"*": "$mean"}`
278
- :param fixed_window_type: determines how to query the fixed window values which were previously inserted by ingest
279
- :param update_stats: update features statistics from the requested feature sets on the vector. Default: False.
280
+ :param feature_vector: feature vector uri or FeatureVector object. passing feature vector obj requires update
281
+ permissions.
282
+ :param run_config: function and/or run configuration for remote jobs/services
283
+ :param impute_policy: a dict with `impute_policy` per feature, the dict key is the feature name and the dict
284
+ value indicate which value will be used in case the feature is NaN/empty, the replaced
285
+ value can be fixed number for constants or $mean, $max, $min, $std, $count
286
+ for statistical
287
+ values. "*" is used to specify the default for all features, example: `{"*": "$mean"}`
288
+ :param fixed_window_type: determines how to query the fixed window values which were previously inserted by ingest
289
+ :param update_stats: update features statistics from the requested feature sets on the vector.
290
+ Default: False.
291
+ :param entity_keys: Entity list of the first feature_set in the vector.
292
+ The indexes that are used to query the online service.
293
+ :return: Initialize the `OnlineVectorService`.
294
+ Will be used in subclasses where `support_online=True`.
280
295
  """
281
296
  if isinstance(feature_vector, FeatureVector):
282
297
  update_stats = True
@@ -288,17 +303,15 @@ def get_online_feature_service(
288
303
  if impute_policy and not feature_vector.status.stats:
289
304
  update_stats = True
290
305
 
291
- graph, index_columns = init_feature_vector_graph(
292
- feature_vector, fixed_window_type, update_stats=update_stats
293
- )
294
- service = OnlineVectorService(
295
- feature_vector, graph, index_columns, impute_policy=impute_policy
296
- )
297
- service.initialize()
298
-
306
+ engine_args = {"impute_policy": impute_policy}
307
+ merger_engine = get_merger("storey")
299
308
  # todo: support remote service (using remote nuclio/mlrun function if run_config)
300
309
 
301
- return service
310
+ merger = merger_engine(feature_vector, **engine_args)
311
+
312
+ return merger.init_online_vector_service(
313
+ entity_keys, fixed_window_type, update_stats=update_stats
314
+ )
302
315
 
303
316
 
304
317
  def _rename_source_dataframe_columns(df):
@@ -322,6 +335,21 @@ def _rename_source_dataframe_columns(df):
322
335
  return df
323
336
 
324
337
 
338
+ def _get_namespace(run_config: RunConfig) -> Dict[str, Any]:
339
+ # if running locally, we need to import the file dynamically to get its namespace
340
+ if run_config and run_config.local and run_config.function:
341
+ filename = run_config.function.spec.filename
342
+ if filename:
343
+ module_name = pathlib.Path(filename).name.rsplit(".", maxsplit=1)[0]
344
+ spec = importlib.util.spec_from_file_location(module_name, filename)
345
+ module = importlib.util.module_from_spec(spec)
346
+ sys.modules[module_name] = module
347
+ spec.loader.exec_module(module)
348
+ return vars(__import__(module_name))
349
+ else:
350
+ return get_caller_globals()
351
+
352
+
325
353
  def ingest(
326
354
  featureset: Union[FeatureSet, str] = None,
327
355
  source=None,
@@ -367,7 +395,8 @@ def ingest(
367
395
  :param targets: optional list of data target objects
368
396
  :param namespace: namespace or module containing graph classes
369
397
  :param return_df: indicate if to return a dataframe with the graph results
370
- :param infer_options: schema and stats infer options (:py:class:`~mlrun.feature_store.InferOptions`)
398
+ :param infer_options: schema (for discovery of entities, features in featureset), index, stats,
399
+ histogram and preview infer options (:py:class:`~mlrun.feature_store.InferOptions`)
371
400
  :param run_config: function and/or run configuration for remote jobs,
372
401
  see :py:class:`~mlrun.feature_store.RunConfig`
373
402
  :param mlrun_context: mlrun context (when running as a job), for internal use !
@@ -405,6 +434,15 @@ def ingest(
405
434
  raise mlrun.errors.MLRunInvalidArgumentError(
406
435
  "feature set and source must be specified"
407
436
  )
437
+ if (
438
+ not mlrun_context
439
+ and not targets
440
+ and not (featureset.spec.targets or featureset.spec.with_default_targets)
441
+ and (run_config is not None and not run_config.local)
442
+ ):
443
+ raise mlrun.errors.MLRunInvalidArgumentError(
444
+ f"Feature set {featureset.metadata.name} is remote ingested with no targets defined, aborting"
445
+ )
408
446
 
409
447
  if featureset is not None:
410
448
  featureset.validate_steps(namespace=namespace)
@@ -416,7 +454,7 @@ def ingest(
416
454
  )
417
455
  # remote job execution
418
456
  verify_feature_set_permissions(
419
- featureset, mlrun.api.schemas.AuthorizationAction.update
457
+ featureset, mlrun.common.schemas.AuthorizationAction.update
420
458
  )
421
459
  run_config = run_config.copy() if run_config else RunConfig()
422
460
  source, run_config.parameters = set_task_params(
@@ -448,7 +486,7 @@ def ingest(
448
486
 
449
487
  featureset.validate_steps(namespace=namespace)
450
488
  verify_feature_set_permissions(
451
- featureset, mlrun.api.schemas.AuthorizationAction.update
489
+ featureset, mlrun.common.schemas.AuthorizationAction.update
452
490
  )
453
491
  if not source:
454
492
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -477,19 +515,21 @@ def ingest(
477
515
  f"Source.end_time is {str(source.end_time)}"
478
516
  )
479
517
 
480
- if mlrun_context:
481
- mlrun_context.logger.info(
482
- f"starting ingestion task to {featureset.uri}.{filter_time_string}"
483
- )
518
+ if mlrun_context:
519
+ mlrun_context.logger.info(
520
+ f"starting ingestion task to {featureset.uri}.{filter_time_string}"
521
+ )
522
+
484
523
  return_df = False
485
524
 
486
525
  if featureset.spec.passthrough:
487
526
  featureset.spec.source = source
488
527
  featureset.spec.validate_no_processing_for_passthrough()
489
528
 
490
- namespace = namespace or get_caller_globals()
529
+ if not namespace:
530
+ namespace = _get_namespace(run_config)
491
531
 
492
- targets_to_ingest = targets or featureset.spec.targets or get_default_targets()
532
+ targets_to_ingest = targets or featureset.spec.targets
493
533
  targets_to_ingest = copy.deepcopy(targets_to_ingest)
494
534
 
495
535
  validate_target_paths_for_engine(targets_to_ingest, featureset.spec.engine, source)
@@ -633,10 +673,14 @@ def preview(
633
673
  :param entity_columns: list of entity (index) column names
634
674
  :param timestamp_key: DEPRECATED. Use FeatureSet parameter.
635
675
  :param namespace: namespace or module containing graph classes
636
- :param options: schema and stats infer options (:py:class:`~mlrun.feature_store.InferOptions`)
676
+ :param options: schema (for discovery of entities, features in featureset), index, stats,
677
+ histogram and preview infer options (:py:class:`~mlrun.feature_store.InferOptions`)
637
678
  :param verbose: verbose log
638
679
  :param sample_size: num of rows to sample from the dataset (for large datasets)
639
680
  """
681
+ if isinstance(source, pd.DataFrame):
682
+ source = _rename_source_dataframe_columns(source)
683
+
640
684
  # preview reads the source as a pandas df, which is not fully compatible with spark
641
685
  if featureset.spec.engine == "spark":
642
686
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -661,7 +705,7 @@ def preview(
661
705
  source = mlrun.store_manager.object(url=source).as_df()
662
706
 
663
707
  verify_feature_set_permissions(
664
- featureset, mlrun.api.schemas.AuthorizationAction.update
708
+ featureset, mlrun.common.schemas.AuthorizationAction.update
665
709
  )
666
710
 
667
711
  featureset.spec.validate_no_processing_for_passthrough()
@@ -686,7 +730,9 @@ def preview(
686
730
  )
687
731
  # reduce the size of the ingestion if we do not infer stats
688
732
  rows_limit = (
689
- 0 if InferOptions.get_common_options(options, InferOptions.Stats) else 1000
733
+ None
734
+ if InferOptions.get_common_options(options, InferOptions.Stats)
735
+ else 1000
690
736
  )
691
737
  source = init_featureset_graph(
692
738
  source,
@@ -757,7 +803,7 @@ def deploy_ingestion_service(
757
803
  featureset = get_feature_set_by_uri(featureset)
758
804
 
759
805
  verify_feature_set_permissions(
760
- featureset, mlrun.api.schemas.AuthorizationAction.update
806
+ featureset, mlrun.common.schemas.AuthorizationAction.update
761
807
  )
762
808
 
763
809
  verify_feature_set_exists(featureset)
@@ -770,7 +816,7 @@ def deploy_ingestion_service(
770
816
  name=featureset.metadata.name,
771
817
  )
772
818
 
773
- targets_to_ingest = targets or featureset.spec.targets or get_default_targets()
819
+ targets_to_ingest = targets or featureset.spec.targets
774
820
  targets_to_ingest = copy.deepcopy(targets_to_ingest)
775
821
  featureset.update_targets_for_ingest(targets_to_ingest)
776
822
 
@@ -832,7 +878,11 @@ def _ingest_with_spark(
832
878
  f"{featureset.metadata.project}-{featureset.metadata.name}"
833
879
  )
834
880
 
835
- spark = pyspark.sql.SparkSession.builder.appName(session_name).getOrCreate()
881
+ spark = (
882
+ pyspark.sql.SparkSession.builder.appName(session_name)
883
+ .config("spark.sql.session.timeZone", "UTC")
884
+ .getOrCreate()
885
+ )
836
886
  created_spark_context = True
837
887
 
838
888
  timestamp_key = featureset.spec.timestamp_key
@@ -843,12 +893,14 @@ def _ingest_with_spark(
843
893
  df = source
844
894
  else:
845
895
  df = source.to_spark_df(spark, time_field=timestamp_key)
846
- df = source.filter_df_start_end_time(df, timestamp_key)
847
896
  if featureset.spec.graph and featureset.spec.graph.steps:
848
897
  df = run_spark_graph(df, featureset, namespace, spark)
849
898
 
850
899
  if isinstance(df, Response) and df.status_code != 0:
851
900
  mlrun.errors.raise_for_status_code(df.status_code, df.body.split(": ")[1])
901
+
902
+ df.persist()
903
+
852
904
  _infer_from_static_df(df, featureset, options=infer_options)
853
905
 
854
906
  key_columns = list(featureset.spec.entities.keys())
@@ -863,14 +915,6 @@ def _ingest_with_spark(
863
915
  target.set_resource(featureset)
864
916
  if featureset.spec.passthrough and target.is_offline:
865
917
  continue
866
- if target.path and urlparse(target.path).scheme == "":
867
- if mlrun_context:
868
- mlrun_context.logger.error(
869
- "Paths for spark ingest must contain schema, i.e v3io, s3, az"
870
- )
871
- raise mlrun.errors.MLRunInvalidArgumentError(
872
- "Paths for spark ingest must contain schema, i.e v3io, s3, az"
873
- )
874
918
  spark_options = target.get_spark_options(
875
919
  key_columns, timestamp_key, overwrite
876
920
  )
@@ -957,11 +1001,15 @@ def _infer_from_static_df(
957
1001
  ):
958
1002
  """infer feature-set schema & stats from static dataframe (without pipeline)"""
959
1003
  if hasattr(df, "to_dataframe"):
1004
+ if hasattr(df, "time_field"):
1005
+ time_field = df.time_field or featureset.spec.timestamp_key
1006
+ else:
1007
+ time_field = featureset.spec.timestamp_key
960
1008
  if df.is_iterator():
961
1009
  # todo: describe over multiple chunks
962
- df = next(df.to_dataframe())
1010
+ df = next(df.to_dataframe(time_field=time_field))
963
1011
  else:
964
- df = df.to_dataframe()
1012
+ df = df.to_dataframe(time_field=time_field)
965
1013
  inferer = get_infer_interface(df)
966
1014
  if InferOptions.get_common_options(options, InferOptions.schema()):
967
1015
  featureset.spec.timestamp_key = inferer.infer_schema(
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -16,11 +16,11 @@ from copy import copy
16
16
 
17
17
  import mlrun
18
18
  import mlrun.errors
19
- from mlrun.api.schemas import AuthorizationVerificationInput
19
+ from mlrun.common.schemas import AuthorizationVerificationInput
20
20
  from mlrun.runtimes import BaseRuntime
21
21
  from mlrun.runtimes.function_reference import FunctionReference
22
22
  from mlrun.runtimes.utils import enrich_function_from_dict
23
- from mlrun.utils import StorePrefix, logger, mlconf, parse_versioned_object_uri
23
+ from mlrun.utils import StorePrefix, logger, parse_versioned_object_uri
24
24
 
25
25
  from ..config import config
26
26
 
@@ -86,13 +86,13 @@ def get_feature_set_by_uri(uri, project=None):
86
86
  db = mlrun.get_run_db()
87
87
  project, name, tag, uid = parse_feature_set_uri(uri, project)
88
88
  resource = (
89
- mlrun.api.schemas.AuthorizationResourceTypes.feature_set.to_resource_string(
89
+ mlrun.common.schemas.AuthorizationResourceTypes.feature_set.to_resource_string(
90
90
  project, "feature-set"
91
91
  )
92
92
  )
93
93
 
94
94
  auth_input = AuthorizationVerificationInput(
95
- resource=resource, action=mlrun.api.schemas.AuthorizationAction.read
95
+ resource=resource, action=mlrun.common.schemas.AuthorizationAction.read
96
96
  )
97
97
  db.verify_authorization(auth_input)
98
98
 
@@ -115,19 +115,17 @@ def get_feature_vector_by_uri(uri, project=None, update=True):
115
115
 
116
116
  project, name, tag, uid = parse_versioned_object_uri(uri, default_project)
117
117
 
118
- resource = (
119
- mlrun.api.schemas.AuthorizationResourceTypes.feature_vector.to_resource_string(
120
- project, "feature-vector"
121
- )
118
+ resource = mlrun.common.schemas.AuthorizationResourceTypes.feature_vector.to_resource_string(
119
+ project, "feature-vector"
122
120
  )
123
121
 
124
122
  if update:
125
123
  auth_input = AuthorizationVerificationInput(
126
- resource=resource, action=mlrun.api.schemas.AuthorizationAction.update
124
+ resource=resource, action=mlrun.common.schemas.AuthorizationAction.update
127
125
  )
128
126
  else:
129
127
  auth_input = AuthorizationVerificationInput(
130
- resource=resource, action=mlrun.api.schemas.AuthorizationAction.read
128
+ resource=resource, action=mlrun.common.schemas.AuthorizationAction.read
131
129
  )
132
130
 
133
131
  db.verify_authorization(auth_input)
@@ -136,12 +134,12 @@ def get_feature_vector_by_uri(uri, project=None, update=True):
136
134
 
137
135
 
138
136
  def verify_feature_set_permissions(
139
- feature_set, action: mlrun.api.schemas.AuthorizationAction
137
+ feature_set, action: mlrun.common.schemas.AuthorizationAction
140
138
  ):
141
139
  project, _, _, _ = parse_feature_set_uri(feature_set.uri)
142
140
 
143
141
  resource = (
144
- mlrun.api.schemas.AuthorizationResourceTypes.feature_set.to_resource_string(
142
+ mlrun.common.schemas.AuthorizationResourceTypes.feature_set.to_resource_string(
145
143
  project, "feature-set"
146
144
  )
147
145
  )
@@ -164,14 +162,12 @@ def verify_feature_set_exists(feature_set):
164
162
 
165
163
 
166
164
  def verify_feature_vector_permissions(
167
- feature_vector, action: mlrun.api.schemas.AuthorizationAction
165
+ feature_vector, action: mlrun.common.schemas.AuthorizationAction
168
166
  ):
169
- project = feature_vector._metadata.project or mlconf.default_project
167
+ project = feature_vector._metadata.project or config.default_project
170
168
 
171
- resource = (
172
- mlrun.api.schemas.AuthorizationResourceTypes.feature_vector.to_resource_string(
173
- project, "feature-vector"
174
- )
169
+ resource = mlrun.common.schemas.AuthorizationResourceTypes.feature_vector.to_resource_string(
170
+ project, "feature-vector"
175
171
  )
176
172
 
177
173
  db = mlrun.get_run_db()
@@ -218,7 +214,7 @@ class RunConfig:
218
214
  config = RunConfig("mycode.py", image="mlrun/mlrun", requirements=["spacy"])
219
215
 
220
216
  # config for using function object
221
- function = mlrun.import_function("hub://some_function")
217
+ function = mlrun.import_function("hub://some-function")
222
218
  config = RunConfig(function)
223
219
 
224
220
  :param function: this can be function uri or function object or path to function code (.py/.ipynb)