mlrun 1.3.3rc1__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (444) hide show
  1. mlrun/__init__.py +3 -3
  2. mlrun/__main__.py +79 -37
  3. mlrun/api/__init__.py +1 -1
  4. mlrun/api/api/__init__.py +1 -1
  5. mlrun/api/api/api.py +4 -4
  6. mlrun/api/api/deps.py +10 -21
  7. mlrun/api/api/endpoints/__init__.py +1 -1
  8. mlrun/api/api/endpoints/artifacts.py +64 -36
  9. mlrun/api/api/endpoints/auth.py +4 -4
  10. mlrun/api/api/endpoints/background_tasks.py +11 -11
  11. mlrun/api/api/endpoints/client_spec.py +5 -5
  12. mlrun/api/api/endpoints/clusterization_spec.py +6 -4
  13. mlrun/api/api/endpoints/feature_store.py +124 -115
  14. mlrun/api/api/endpoints/files.py +22 -14
  15. mlrun/api/api/endpoints/frontend_spec.py +28 -21
  16. mlrun/api/api/endpoints/functions.py +142 -87
  17. mlrun/api/api/endpoints/grafana_proxy.py +89 -442
  18. mlrun/api/api/endpoints/healthz.py +20 -7
  19. mlrun/api/api/endpoints/hub.py +320 -0
  20. mlrun/api/api/endpoints/internal/__init__.py +1 -1
  21. mlrun/api/api/endpoints/internal/config.py +1 -1
  22. mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
  23. mlrun/api/api/endpoints/logs.py +11 -11
  24. mlrun/api/api/endpoints/model_endpoints.py +74 -70
  25. mlrun/api/api/endpoints/operations.py +13 -9
  26. mlrun/api/api/endpoints/pipelines.py +93 -88
  27. mlrun/api/api/endpoints/projects.py +35 -35
  28. mlrun/api/api/endpoints/runs.py +69 -27
  29. mlrun/api/api/endpoints/runtime_resources.py +28 -28
  30. mlrun/api/api/endpoints/schedules.py +98 -41
  31. mlrun/api/api/endpoints/secrets.py +37 -32
  32. mlrun/api/api/endpoints/submit.py +12 -12
  33. mlrun/api/api/endpoints/tags.py +20 -22
  34. mlrun/api/api/utils.py +251 -42
  35. mlrun/api/constants.py +1 -1
  36. mlrun/api/crud/__init__.py +18 -15
  37. mlrun/api/crud/artifacts.py +10 -10
  38. mlrun/api/crud/client_spec.py +4 -4
  39. mlrun/api/crud/clusterization_spec.py +3 -3
  40. mlrun/api/crud/feature_store.py +54 -46
  41. mlrun/api/crud/functions.py +3 -3
  42. mlrun/api/crud/hub.py +312 -0
  43. mlrun/api/crud/logs.py +11 -9
  44. mlrun/api/crud/model_monitoring/__init__.py +3 -3
  45. mlrun/api/crud/model_monitoring/grafana.py +435 -0
  46. mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
  47. mlrun/api/crud/notifications.py +149 -0
  48. mlrun/api/crud/pipelines.py +67 -52
  49. mlrun/api/crud/projects.py +51 -23
  50. mlrun/api/crud/runs.py +7 -5
  51. mlrun/api/crud/runtime_resources.py +13 -13
  52. mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
  53. mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
  54. mlrun/api/crud/runtimes/nuclio/function.py +505 -0
  55. mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
  56. mlrun/api/crud/secrets.py +88 -46
  57. mlrun/api/crud/tags.py +5 -5
  58. mlrun/api/db/__init__.py +1 -1
  59. mlrun/api/db/base.py +102 -54
  60. mlrun/api/db/init_db.py +2 -3
  61. mlrun/api/db/session.py +4 -12
  62. mlrun/api/db/sqldb/__init__.py +1 -1
  63. mlrun/api/db/sqldb/db.py +439 -196
  64. mlrun/api/db/sqldb/helpers.py +1 -1
  65. mlrun/api/db/sqldb/models/__init__.py +3 -3
  66. mlrun/api/db/sqldb/models/models_mysql.py +82 -64
  67. mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
  68. mlrun/api/db/sqldb/session.py +27 -20
  69. mlrun/api/initial_data.py +82 -24
  70. mlrun/api/launcher.py +196 -0
  71. mlrun/api/main.py +91 -22
  72. mlrun/api/middlewares.py +6 -5
  73. mlrun/api/migrations_mysql/env.py +1 -1
  74. mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
  75. mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
  76. mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
  77. mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
  78. mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
  79. mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
  80. mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
  81. mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
  82. mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
  83. mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
  84. mlrun/api/migrations_sqlite/env.py +1 -1
  85. mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
  86. mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
  87. mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
  88. mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
  89. mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
  90. mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
  91. mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
  92. mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
  93. mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
  94. mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
  95. mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
  96. mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
  97. mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
  98. mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
  99. mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
  100. mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
  101. mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
  102. mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
  103. mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
  104. mlrun/api/schemas/__init__.py +216 -138
  105. mlrun/api/utils/__init__.py +1 -1
  106. mlrun/api/utils/asyncio.py +1 -1
  107. mlrun/api/utils/auth/__init__.py +1 -1
  108. mlrun/api/utils/auth/providers/__init__.py +1 -1
  109. mlrun/api/utils/auth/providers/base.py +7 -7
  110. mlrun/api/utils/auth/providers/nop.py +6 -7
  111. mlrun/api/utils/auth/providers/opa.py +17 -17
  112. mlrun/api/utils/auth/verifier.py +36 -34
  113. mlrun/api/utils/background_tasks.py +24 -24
  114. mlrun/{builder.py → api/utils/builder.py} +216 -123
  115. mlrun/api/utils/clients/__init__.py +1 -1
  116. mlrun/api/utils/clients/chief.py +19 -4
  117. mlrun/api/utils/clients/iguazio.py +106 -60
  118. mlrun/api/utils/clients/log_collector.py +1 -1
  119. mlrun/api/utils/clients/nuclio.py +23 -23
  120. mlrun/api/utils/clients/protocols/grpc.py +2 -2
  121. mlrun/api/utils/db/__init__.py +1 -1
  122. mlrun/api/utils/db/alembic.py +1 -1
  123. mlrun/api/utils/db/backup.py +1 -1
  124. mlrun/api/utils/db/mysql.py +24 -25
  125. mlrun/api/utils/db/sql_collation.py +1 -1
  126. mlrun/api/utils/db/sqlite_migration.py +2 -2
  127. mlrun/api/utils/events/__init__.py +14 -0
  128. mlrun/api/utils/events/base.py +57 -0
  129. mlrun/api/utils/events/events_factory.py +41 -0
  130. mlrun/api/utils/events/iguazio.py +217 -0
  131. mlrun/api/utils/events/nop.py +55 -0
  132. mlrun/api/utils/helpers.py +16 -13
  133. mlrun/api/utils/memory_reports.py +1 -1
  134. mlrun/api/utils/periodic.py +6 -3
  135. mlrun/api/utils/projects/__init__.py +1 -1
  136. mlrun/api/utils/projects/follower.py +33 -33
  137. mlrun/api/utils/projects/leader.py +36 -34
  138. mlrun/api/utils/projects/member.py +27 -27
  139. mlrun/api/utils/projects/remotes/__init__.py +1 -1
  140. mlrun/api/utils/projects/remotes/follower.py +13 -13
  141. mlrun/api/utils/projects/remotes/leader.py +10 -10
  142. mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
  143. mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
  144. mlrun/api/utils/scheduler.py +140 -51
  145. mlrun/api/utils/singletons/__init__.py +1 -1
  146. mlrun/api/utils/singletons/db.py +9 -15
  147. mlrun/api/utils/singletons/k8s.py +677 -5
  148. mlrun/api/utils/singletons/logs_dir.py +1 -1
  149. mlrun/api/utils/singletons/project_member.py +1 -1
  150. mlrun/api/utils/singletons/scheduler.py +1 -1
  151. mlrun/artifacts/__init__.py +2 -2
  152. mlrun/artifacts/base.py +8 -2
  153. mlrun/artifacts/dataset.py +5 -3
  154. mlrun/artifacts/manager.py +7 -1
  155. mlrun/artifacts/model.py +15 -4
  156. mlrun/artifacts/plots.py +1 -1
  157. mlrun/common/__init__.py +1 -1
  158. mlrun/common/constants.py +15 -0
  159. mlrun/common/model_monitoring.py +209 -0
  160. mlrun/common/schemas/__init__.py +167 -0
  161. mlrun/{api → common}/schemas/artifact.py +13 -14
  162. mlrun/{api → common}/schemas/auth.py +10 -8
  163. mlrun/{api → common}/schemas/background_task.py +3 -3
  164. mlrun/{api → common}/schemas/client_spec.py +1 -1
  165. mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
  166. mlrun/{api → common}/schemas/constants.py +21 -8
  167. mlrun/common/schemas/events.py +36 -0
  168. mlrun/{api → common}/schemas/feature_store.py +2 -1
  169. mlrun/{api → common}/schemas/frontend_spec.py +7 -6
  170. mlrun/{api → common}/schemas/function.py +5 -5
  171. mlrun/{api → common}/schemas/http.py +3 -3
  172. mlrun/common/schemas/hub.py +134 -0
  173. mlrun/{api → common}/schemas/k8s.py +3 -3
  174. mlrun/{api → common}/schemas/memory_reports.py +1 -1
  175. mlrun/common/schemas/model_endpoints.py +342 -0
  176. mlrun/common/schemas/notification.py +57 -0
  177. mlrun/{api → common}/schemas/object.py +6 -6
  178. mlrun/{api → common}/schemas/pipeline.py +3 -3
  179. mlrun/{api → common}/schemas/project.py +6 -5
  180. mlrun/common/schemas/regex.py +24 -0
  181. mlrun/common/schemas/runs.py +30 -0
  182. mlrun/{api → common}/schemas/runtime_resource.py +3 -3
  183. mlrun/{api → common}/schemas/schedule.py +19 -7
  184. mlrun/{api → common}/schemas/secret.py +3 -3
  185. mlrun/{api → common}/schemas/tag.py +2 -2
  186. mlrun/common/types.py +25 -0
  187. mlrun/config.py +152 -20
  188. mlrun/data_types/__init__.py +7 -2
  189. mlrun/data_types/data_types.py +4 -2
  190. mlrun/data_types/infer.py +1 -1
  191. mlrun/data_types/spark.py +10 -3
  192. mlrun/datastore/__init__.py +10 -3
  193. mlrun/datastore/azure_blob.py +1 -1
  194. mlrun/datastore/base.py +185 -53
  195. mlrun/datastore/datastore.py +1 -1
  196. mlrun/datastore/filestore.py +1 -1
  197. mlrun/datastore/google_cloud_storage.py +1 -1
  198. mlrun/datastore/inmem.py +4 -1
  199. mlrun/datastore/redis.py +1 -1
  200. mlrun/datastore/s3.py +1 -1
  201. mlrun/datastore/sources.py +192 -70
  202. mlrun/datastore/spark_udf.py +44 -0
  203. mlrun/datastore/store_resources.py +4 -4
  204. mlrun/datastore/targets.py +115 -45
  205. mlrun/datastore/utils.py +127 -5
  206. mlrun/datastore/v3io.py +1 -1
  207. mlrun/datastore/wasbfs/__init__.py +1 -1
  208. mlrun/datastore/wasbfs/fs.py +1 -1
  209. mlrun/db/__init__.py +7 -5
  210. mlrun/db/base.py +112 -68
  211. mlrun/db/httpdb.py +445 -277
  212. mlrun/db/nopdb.py +491 -0
  213. mlrun/db/sqldb.py +112 -65
  214. mlrun/errors.py +6 -1
  215. mlrun/execution.py +44 -22
  216. mlrun/feature_store/__init__.py +1 -1
  217. mlrun/feature_store/api.py +143 -95
  218. mlrun/feature_store/common.py +16 -20
  219. mlrun/feature_store/feature_set.py +42 -12
  220. mlrun/feature_store/feature_vector.py +32 -21
  221. mlrun/feature_store/ingestion.py +9 -12
  222. mlrun/feature_store/retrieval/__init__.py +3 -2
  223. mlrun/feature_store/retrieval/base.py +388 -66
  224. mlrun/feature_store/retrieval/dask_merger.py +63 -151
  225. mlrun/feature_store/retrieval/job.py +30 -12
  226. mlrun/feature_store/retrieval/local_merger.py +40 -133
  227. mlrun/feature_store/retrieval/spark_merger.py +129 -127
  228. mlrun/feature_store/retrieval/storey_merger.py +173 -0
  229. mlrun/feature_store/steps.py +132 -15
  230. mlrun/features.py +8 -3
  231. mlrun/frameworks/__init__.py +1 -1
  232. mlrun/frameworks/_common/__init__.py +1 -1
  233. mlrun/frameworks/_common/artifacts_library.py +1 -1
  234. mlrun/frameworks/_common/mlrun_interface.py +1 -1
  235. mlrun/frameworks/_common/model_handler.py +1 -1
  236. mlrun/frameworks/_common/plan.py +1 -1
  237. mlrun/frameworks/_common/producer.py +1 -1
  238. mlrun/frameworks/_common/utils.py +1 -1
  239. mlrun/frameworks/_dl_common/__init__.py +1 -1
  240. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
  241. mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
  242. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
  243. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
  244. mlrun/frameworks/_dl_common/model_handler.py +1 -1
  245. mlrun/frameworks/_dl_common/utils.py +1 -1
  246. mlrun/frameworks/_ml_common/__init__.py +1 -1
  247. mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
  248. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
  249. mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
  250. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  251. mlrun/frameworks/_ml_common/model_handler.py +1 -1
  252. mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
  253. mlrun/frameworks/_ml_common/plan.py +1 -1
  254. mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
  255. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
  256. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
  257. mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
  258. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
  259. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
  260. mlrun/frameworks/_ml_common/producer.py +1 -1
  261. mlrun/frameworks/_ml_common/utils.py +1 -1
  262. mlrun/frameworks/auto_mlrun/__init__.py +1 -1
  263. mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
  264. mlrun/frameworks/huggingface/__init__.py +1 -1
  265. mlrun/frameworks/huggingface/model_server.py +1 -1
  266. mlrun/frameworks/lgbm/__init__.py +1 -1
  267. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
  268. mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
  269. mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
  270. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
  271. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
  272. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
  273. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
  274. mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
  275. mlrun/frameworks/lgbm/model_handler.py +1 -1
  276. mlrun/frameworks/lgbm/model_server.py +1 -1
  277. mlrun/frameworks/lgbm/utils.py +1 -1
  278. mlrun/frameworks/onnx/__init__.py +1 -1
  279. mlrun/frameworks/onnx/dataset.py +1 -1
  280. mlrun/frameworks/onnx/mlrun_interface.py +1 -1
  281. mlrun/frameworks/onnx/model_handler.py +1 -1
  282. mlrun/frameworks/onnx/model_server.py +1 -1
  283. mlrun/frameworks/parallel_coordinates.py +1 -1
  284. mlrun/frameworks/pytorch/__init__.py +1 -1
  285. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
  286. mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
  287. mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
  288. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
  289. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
  290. mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
  291. mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
  292. mlrun/frameworks/pytorch/model_handler.py +1 -1
  293. mlrun/frameworks/pytorch/model_server.py +1 -1
  294. mlrun/frameworks/pytorch/utils.py +1 -1
  295. mlrun/frameworks/sklearn/__init__.py +1 -1
  296. mlrun/frameworks/sklearn/estimator.py +1 -1
  297. mlrun/frameworks/sklearn/metric.py +1 -1
  298. mlrun/frameworks/sklearn/metrics_library.py +1 -1
  299. mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
  300. mlrun/frameworks/sklearn/model_handler.py +1 -1
  301. mlrun/frameworks/sklearn/utils.py +1 -1
  302. mlrun/frameworks/tf_keras/__init__.py +1 -1
  303. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
  304. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  305. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
  306. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
  307. mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
  308. mlrun/frameworks/tf_keras/model_handler.py +1 -1
  309. mlrun/frameworks/tf_keras/model_server.py +1 -1
  310. mlrun/frameworks/tf_keras/utils.py +1 -1
  311. mlrun/frameworks/xgboost/__init__.py +1 -1
  312. mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
  313. mlrun/frameworks/xgboost/model_handler.py +1 -1
  314. mlrun/frameworks/xgboost/utils.py +1 -1
  315. mlrun/k8s_utils.py +14 -765
  316. mlrun/kfpops.py +14 -17
  317. mlrun/launcher/__init__.py +13 -0
  318. mlrun/launcher/base.py +406 -0
  319. mlrun/launcher/client.py +159 -0
  320. mlrun/launcher/factory.py +50 -0
  321. mlrun/launcher/local.py +276 -0
  322. mlrun/launcher/remote.py +178 -0
  323. mlrun/lists.py +10 -2
  324. mlrun/mlutils/__init__.py +1 -1
  325. mlrun/mlutils/data.py +1 -1
  326. mlrun/mlutils/models.py +1 -1
  327. mlrun/mlutils/plots.py +1 -1
  328. mlrun/model.py +252 -14
  329. mlrun/model_monitoring/__init__.py +41 -0
  330. mlrun/model_monitoring/features_drift_table.py +1 -1
  331. mlrun/model_monitoring/helpers.py +123 -38
  332. mlrun/model_monitoring/model_endpoint.py +144 -0
  333. mlrun/model_monitoring/model_monitoring_batch.py +310 -259
  334. mlrun/model_monitoring/stores/__init__.py +106 -0
  335. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
  336. mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
  337. mlrun/model_monitoring/stores/models/__init__.py +23 -0
  338. mlrun/model_monitoring/stores/models/base.py +18 -0
  339. mlrun/model_monitoring/stores/models/mysql.py +100 -0
  340. mlrun/model_monitoring/stores/models/sqlite.py +98 -0
  341. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
  342. mlrun/model_monitoring/stream_processing_fs.py +239 -271
  343. mlrun/package/__init__.py +163 -0
  344. mlrun/package/context_handler.py +325 -0
  345. mlrun/package/errors.py +47 -0
  346. mlrun/package/packager.py +298 -0
  347. mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
  348. mlrun/package/packagers/default_packager.py +422 -0
  349. mlrun/package/packagers/numpy_packagers.py +612 -0
  350. mlrun/package/packagers/pandas_packagers.py +968 -0
  351. mlrun/package/packagers/python_standard_library_packagers.py +616 -0
  352. mlrun/package/packagers_manager.py +786 -0
  353. mlrun/package/utils/__init__.py +53 -0
  354. mlrun/package/utils/_archiver.py +226 -0
  355. mlrun/package/utils/_formatter.py +211 -0
  356. mlrun/package/utils/_pickler.py +234 -0
  357. mlrun/package/utils/_supported_format.py +71 -0
  358. mlrun/package/utils/log_hint_utils.py +93 -0
  359. mlrun/package/utils/type_hint_utils.py +298 -0
  360. mlrun/platforms/__init__.py +1 -1
  361. mlrun/platforms/iguazio.py +34 -2
  362. mlrun/platforms/other.py +1 -1
  363. mlrun/projects/__init__.py +1 -1
  364. mlrun/projects/operations.py +14 -9
  365. mlrun/projects/pipelines.py +31 -13
  366. mlrun/projects/project.py +762 -238
  367. mlrun/render.py +49 -19
  368. mlrun/run.py +57 -326
  369. mlrun/runtimes/__init__.py +3 -9
  370. mlrun/runtimes/base.py +247 -784
  371. mlrun/runtimes/constants.py +1 -1
  372. mlrun/runtimes/daskjob.py +45 -41
  373. mlrun/runtimes/funcdoc.py +43 -7
  374. mlrun/runtimes/function.py +66 -656
  375. mlrun/runtimes/function_reference.py +1 -1
  376. mlrun/runtimes/generators.py +1 -1
  377. mlrun/runtimes/kubejob.py +99 -116
  378. mlrun/runtimes/local.py +59 -66
  379. mlrun/runtimes/mpijob/__init__.py +1 -1
  380. mlrun/runtimes/mpijob/abstract.py +13 -15
  381. mlrun/runtimes/mpijob/v1.py +3 -1
  382. mlrun/runtimes/mpijob/v1alpha1.py +1 -1
  383. mlrun/runtimes/nuclio.py +1 -1
  384. mlrun/runtimes/pod.py +51 -26
  385. mlrun/runtimes/remotesparkjob.py +3 -1
  386. mlrun/runtimes/serving.py +12 -4
  387. mlrun/runtimes/sparkjob/__init__.py +1 -2
  388. mlrun/runtimes/sparkjob/abstract.py +44 -31
  389. mlrun/runtimes/sparkjob/spark3job.py +11 -9
  390. mlrun/runtimes/utils.py +61 -42
  391. mlrun/secrets.py +16 -18
  392. mlrun/serving/__init__.py +3 -2
  393. mlrun/serving/merger.py +1 -1
  394. mlrun/serving/remote.py +1 -1
  395. mlrun/serving/routers.py +39 -42
  396. mlrun/serving/server.py +23 -13
  397. mlrun/serving/serving_wrapper.py +1 -1
  398. mlrun/serving/states.py +172 -39
  399. mlrun/serving/utils.py +1 -1
  400. mlrun/serving/v1_serving.py +1 -1
  401. mlrun/serving/v2_serving.py +29 -21
  402. mlrun/utils/__init__.py +1 -2
  403. mlrun/utils/async_http.py +8 -1
  404. mlrun/utils/azure_vault.py +1 -1
  405. mlrun/utils/clones.py +2 -2
  406. mlrun/utils/condition_evaluator.py +65 -0
  407. mlrun/utils/db.py +52 -0
  408. mlrun/utils/helpers.py +188 -13
  409. mlrun/utils/http.py +89 -54
  410. mlrun/utils/logger.py +48 -8
  411. mlrun/utils/model_monitoring.py +132 -100
  412. mlrun/utils/notifications/__init__.py +1 -1
  413. mlrun/utils/notifications/notification/__init__.py +8 -6
  414. mlrun/utils/notifications/notification/base.py +20 -14
  415. mlrun/utils/notifications/notification/console.py +7 -4
  416. mlrun/utils/notifications/notification/git.py +36 -19
  417. mlrun/utils/notifications/notification/ipython.py +10 -8
  418. mlrun/utils/notifications/notification/slack.py +18 -13
  419. mlrun/utils/notifications/notification_pusher.py +377 -56
  420. mlrun/utils/regex.py +6 -1
  421. mlrun/utils/singleton.py +1 -1
  422. mlrun/utils/v3io_clients.py +1 -1
  423. mlrun/utils/vault.py +270 -269
  424. mlrun/utils/version/__init__.py +1 -1
  425. mlrun/utils/version/version.json +2 -2
  426. mlrun/utils/version/version.py +1 -1
  427. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
  428. mlrun-1.4.0.dist-info/RECORD +434 -0
  429. mlrun/api/api/endpoints/marketplace.py +0 -257
  430. mlrun/api/crud/marketplace.py +0 -221
  431. mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
  432. mlrun/api/db/filedb/db.py +0 -518
  433. mlrun/api/schemas/marketplace.py +0 -128
  434. mlrun/api/schemas/model_endpoints.py +0 -185
  435. mlrun/db/filedb.py +0 -891
  436. mlrun/feature_store/retrieval/online.py +0 -92
  437. mlrun/model_monitoring/constants.py +0 -67
  438. mlrun/runtimes/package/context_handler.py +0 -711
  439. mlrun/runtimes/sparkjob/spark2job.py +0 -59
  440. mlrun-1.3.3rc1.dist-info/RECORD +0 -381
  441. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
  442. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
  443. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
  444. {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
mlrun/config.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -48,6 +48,10 @@ default_env_file = os.getenv("MLRUN_DEFAULT_ENV_FILE", "~/.mlrun.env")
48
48
 
49
49
  default_config = {
50
50
  "namespace": "", # default kubernetes namespace
51
+ "kubernetes": {
52
+ "kubeconfig_path": "", # local path to kubeconfig file (for development purposes),
53
+ # empty by default as the API already running inside k8s cluster
54
+ },
51
55
  "dbpath": "", # db/api url
52
56
  # url to nuclio dashboard api (can be with user & token, e.g. https://username:password@dashboard-url.com)
53
57
  "nuclio_dashboard_url": "",
@@ -74,9 +78,10 @@ default_config = {
74
78
  "spark_app_image_tag": "", # image tag to use for spark operator app runtime
75
79
  "spark_history_server_path": "", # spark logs directory for spark history server
76
80
  "spark_operator_version": "spark-3", # the version of the spark operator in use
77
- "builder_alpine_image": "alpine:3.13.1", # builder alpine image (as kaniko's initContainer)
78
81
  "package_path": "mlrun", # mlrun pip package
79
82
  "default_base_image": "mlrun/mlrun", # default base image when doing .deploy()
83
+ # template for project default image name. Parameter {name} will be replaced with project name
84
+ "default_project_image_name": ".mlrun-project-image-{name}",
80
85
  "default_project": "default", # default project name
81
86
  "default_archive": "", # default remote archive URL (for build tar.gz)
82
87
  "mpijob_crd_version": "", # mpijob crd version (e.g: "v1alpha1". must be in: mlrun.runtime.MPIJobCRDVersions)
@@ -155,7 +160,7 @@ default_config = {
155
160
  # default security context to be applied to all functions - json string base64 encoded format
156
161
  # in camelCase format: {"runAsUser": 1000, "runAsGroup": 3000}
157
162
  "default": "e30=", # encoded empty dict
158
- # see mlrun.api.schemas.function.SecurityContextEnrichmentModes for available options
163
+ # see mlrun.common.schemas.function.SecurityContextEnrichmentModes for available options
159
164
  "enrichment_mode": "disabled",
160
165
  # default 65534 (nogroup), set to -1 to use the user unix id or
161
166
  # function.spec.security_context.pipelines.kfp_pod_user_unix_id for kfp pods
@@ -178,7 +183,7 @@ default_config = {
178
183
  "mpijob": "mlrun/ml-models",
179
184
  },
180
185
  # see enrich_function_preemption_spec for more info,
181
- # and mlrun.api.schemas.function.PreemptionModes for available options
186
+ # and mlrun.common.schemas.function.PreemptionModes for available options
182
187
  "preemption_mode": "prevent",
183
188
  },
184
189
  "httpdb": {
@@ -219,7 +224,7 @@ default_config = {
219
224
  "allowed_file_paths": "s3://,gcs://,gs://,az://",
220
225
  "db_type": "sqldb",
221
226
  "max_workers": 64,
222
- # See mlrun.api.schemas.APIStates for options
227
+ # See mlrun.common.schemas.APIStates for options
223
228
  "state": "online",
224
229
  "retry_api_call_on_exception": "enabled",
225
230
  "http_connection_timeout_keep_alive": 11,
@@ -230,10 +235,10 @@ default_config = {
230
235
  "conflict_retry_interval": None,
231
236
  # Whether to perform data migrations on initialization. enabled or disabled
232
237
  "data_migrations_mode": "enabled",
233
- # Whether or not to perform database migration from sqlite to mysql on initialization
238
+ # Whether to perform database migration from sqlite to mysql on initialization
234
239
  "database_migration_mode": "enabled",
235
240
  "backup": {
236
- # Whether or not to use db backups on initialization
241
+ # Whether to use db backups on initialization
237
242
  "mode": "enabled",
238
243
  "file_format": "db_backup_%Y%m%d%H%M.db",
239
244
  "use_rotation": True,
@@ -244,6 +249,14 @@ default_config = {
244
249
  # None will set this to be equal to the httpdb.max_workers
245
250
  "connections_pool_size": None,
246
251
  "connections_pool_max_overflow": None,
252
+ # below is a db-specific configuration
253
+ "mysql": {
254
+ # comma separated mysql modes (globally) to set on runtime
255
+ # optional values (as per https://dev.mysql.com/doc/refman/8.0/en/sql-mode.html#sql-mode-full):
256
+ #
257
+ # if set to "nil" or "none", nothing would be set
258
+ "modes": "STRICT_TRANS_TABLES",
259
+ },
247
260
  },
248
261
  "jobs": {
249
262
  # whether to allow to run local runtimes in the API - configurable to allow the scheduler testing to work
@@ -357,9 +370,12 @@ default_config = {
357
370
  # git+https://github.com/mlrun/mlrun@development. by default uses the version
358
371
  "mlrun_version_specifier": "",
359
372
  "kaniko_image": "gcr.io/kaniko-project/executor:v1.8.0", # kaniko builder image
360
- "kaniko_init_container_image": "alpine:3.13.1",
373
+ "kaniko_init_container_image": "alpine:3.18",
361
374
  # image for kaniko init container when docker registry is ECR
362
375
  "kaniko_aws_cli_image": "amazon/aws-cli:2.7.10",
376
+ # kaniko sometimes fails to get filesystem from image, this is a workaround to retry the process
377
+ # a known issue in Kaniko - https://github.com/GoogleContainerTools/kaniko/issues/1717
378
+ "kaniko_image_fs_extraction_retries": "3",
363
379
  # additional docker build args in json encoded base64 format
364
380
  "build_args": "",
365
381
  "pip_ca_secret_name": "",
@@ -372,18 +388,37 @@ default_config = {
372
388
  },
373
389
  "v3io_api": "",
374
390
  "v3io_framesd": "",
391
+ # If running from sdk and MLRUN_DBPATH is not set, the db will fallback to a nop db which will not preform any
392
+ # run db operations.
393
+ "nop_db": {
394
+ # if set to true, will raise an error for trying to use run db functionality
395
+ # if set to false, will use a nop db which will not preform any run db operations
396
+ "raise_error": False,
397
+ # if set to true, will log a warning for trying to use run db functionality while in nop db mode
398
+ "verbose": True,
399
+ },
375
400
  },
376
401
  "model_endpoint_monitoring": {
377
402
  "serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
378
403
  "drift_thresholds": {"default": {"possible_drift": 0.5, "drift_detected": 0.7}},
404
+ # Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
405
+ # stream, and endpoints.
379
406
  "store_prefixes": {
380
407
  "default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
381
408
  "user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
409
+ "stream": "",
382
410
  },
411
+ # Offline storage path can be either relative or a full path. This path is used for general offline data
412
+ # storage such as the parquet file which is generated from the monitoring stream function for the drift analysis
413
+ "offline_storage_path": "model-endpoints/{kind}",
414
+ # Default http path that points to the monitoring stream nuclio function. Will be used as a stream path
415
+ # when the user is working in CE environment and has not provided any stream path.
416
+ "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.mlrun.svc.cluster.local:8080",
383
417
  "batch_processing_function_branch": "master",
384
418
  "parquet_batching_max_events": 10000,
385
- # See mlrun.api.schemas.ModelEndpointStoreType for available options
386
- "store_type": "kv",
419
+ # See mlrun.common.schemas.ModelEndpointStoreType for available options
420
+ "store_type": "v3io-nosql",
421
+ "endpoint_store_connection": "",
387
422
  },
388
423
  "secret_stores": {
389
424
  "vault": {
@@ -427,15 +462,16 @@ default_config = {
427
462
  "projects_prefix": "projects", # The UI link prefix for projects
428
463
  "url": "", # remote/external mlrun UI url (for hyperlinks)
429
464
  },
430
- "marketplace": {
431
- "k8s_secrets_project_name": "-marketplace-secrets",
465
+ "hub": {
466
+ "k8s_secrets_project_name": "-hub-secrets",
432
467
  "catalog_filename": "catalog.json",
433
468
  "default_source": {
434
- # Set to false to avoid creating a global source (for example in a dark site)
469
+ # Set false to avoid creating a global source (for example in a dark site)
435
470
  "create": True,
436
471
  "name": "mlrun_global_hub",
437
472
  "description": "MLRun global function hub",
438
- "url": "https://raw.githubusercontent.com/mlrun/marketplace",
473
+ "url": "https://raw.githubusercontent.com/mlrun/marketplace/master",
474
+ "object_type": "functions",
439
475
  "channel": "master",
440
476
  },
441
477
  },
@@ -507,6 +543,27 @@ default_config = {
507
543
  # interval for stopping log collection for runs which are in a terminal state
508
544
  "stop_logs_interval": 3600,
509
545
  },
546
+ # Configurations for the `mlrun.package` sub-package involving packagers - logging returned outputs and parsing
547
+ # inputs data items:
548
+ "packagers": {
549
+ # Whether to enable packagers. True will wrap each run in the `mlrun.package.handler` decorator to log and parse
550
+ # using packagers.
551
+ "enabled": True,
552
+ # Whether to treat returned tuples from functions as a tuple and not as multiple returned items. If True, all
553
+ # returned values will be packaged together as the tuple they are returned in. Default is False to enable
554
+ # logging multiple returned items.
555
+ "pack_tuples": False,
556
+ },
557
+ # Events are currently (and only) used to audit changes and record access to MLRun entities (such as secrets)
558
+ "events": {
559
+ # supported modes "enabled", "disabled".
560
+ # "enabled" - events are emitted.
561
+ # "disabled" - a nop client is used (aka doing nothing).
562
+ "mode": "disabled",
563
+ "verbose": False,
564
+ # used for igz client when emitting events
565
+ "access_key": "",
566
+ },
510
567
  }
511
568
 
512
569
  _is_running_as_api = None
@@ -517,8 +574,7 @@ def is_running_as_api():
517
574
  global _is_running_as_api
518
575
 
519
576
  if _is_running_as_api is None:
520
- # os.getenv will load the env var as string, and json.loads will convert it to a bool
521
- _is_running_as_api = json.loads(os.getenv("MLRUN_IS_API_SERVER", "false"))
577
+ _is_running_as_api = os.getenv("MLRUN_IS_API_SERVER", "false").lower() == "true"
522
578
 
523
579
  return _is_running_as_api
524
580
 
@@ -926,6 +982,68 @@ class Config:
926
982
  # Get v3io access key from the environment
927
983
  return os.environ.get("V3IO_ACCESS_KEY")
928
984
 
985
+ def get_model_monitoring_file_target_path(
986
+ self,
987
+ project: str = "",
988
+ kind: str = "",
989
+ target: str = "online",
990
+ artifact_path: str = None,
991
+ ) -> str:
992
+ """Get the full path from the configuration based on the provided project and kind.
993
+
994
+ :param project: Project name.
995
+ :param kind: Kind of target path (e.g. events, log_stream, endpoints, etc.)
996
+ :param target: Can be either online or offline. If the target is online, then we try to get a specific
997
+ path for the provided kind. If it doesn't exist, use the default path.
998
+ If the target path is offline and the offline path is already a full path in the
999
+ configuration, then the result will be that path as-is. If the offline path is a
1000
+ relative path, then the result will be based on the project artifact path and the offline
1001
+ relative path. If project artifact path wasn't provided, then we use MLRun artifact
1002
+ path instead.
1003
+ :param artifact_path: Optional artifact path that will be used as a relative path. If not provided, the
1004
+ relative artifact path will be taken from the global MLRun artifact path.
1005
+
1006
+ :return: Full configured path for the provided kind.
1007
+ """
1008
+
1009
+ if target != "offline":
1010
+ store_prefix_dict = (
1011
+ mlrun.mlconf.model_endpoint_monitoring.store_prefixes.to_dict()
1012
+ )
1013
+ if store_prefix_dict.get(kind):
1014
+ # Target exist in store prefix and has a valid string value
1015
+ return store_prefix_dict[kind].format(project=project)
1016
+ return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
1017
+ project=project, kind=kind
1018
+ )
1019
+
1020
+ # Get the current offline path from the configuration
1021
+ file_path = mlrun.mlconf.model_endpoint_monitoring.offline_storage_path.format(
1022
+ project=project, kind=kind
1023
+ )
1024
+
1025
+ # Absolute path
1026
+ if any(value in file_path for value in ["://", ":///"]) or os.path.isabs(
1027
+ file_path
1028
+ ):
1029
+ return file_path
1030
+
1031
+ # Relative path
1032
+ else:
1033
+ artifact_path = artifact_path or config.artifact_path
1034
+ if artifact_path[-1] != "/":
1035
+ artifact_path += "/"
1036
+
1037
+ return mlrun.utils.helpers.fill_artifact_path_template(
1038
+ artifact_path=artifact_path + file_path, project=project
1039
+ )
1040
+
1041
+ def is_ce_mode(self) -> bool:
1042
+ # True if the setup is in CE environment
1043
+ return isinstance(mlrun.mlconf.ce, mlrun.config.Config) and any(
1044
+ ver in mlrun.mlconf.ce.mode for ver in ["lite", "full"]
1045
+ )
1046
+
929
1047
 
930
1048
  # Global configuration
931
1049
  config = Config.from_dict(default_config)
@@ -946,7 +1064,7 @@ def _populate(skip_errors=False):
946
1064
  def _do_populate(env=None, skip_errors=False):
947
1065
  global config
948
1066
 
949
- if not os.environ.get("MLRUN_IGNORE_ENV_FILE") and not is_running_as_api():
1067
+ if not os.environ.get("MLRUN_IGNORE_ENV_FILE"):
950
1068
  if "MLRUN_ENV_FILE" in os.environ:
951
1069
  env_file = os.path.expanduser(os.environ["MLRUN_ENV_FILE"])
952
1070
  dotenv.load_dotenv(env_file, override=True)
@@ -983,12 +1101,10 @@ def _do_populate(env=None, skip_errors=False):
983
1101
 
984
1102
 
985
1103
  def _validate_config(config):
986
- import mlrun.k8s_utils
987
-
988
1104
  try:
989
1105
  limits_gpu = config.default_function_pod_resources.limits.gpu
990
1106
  requests_gpu = config.default_function_pod_resources.requests.gpu
991
- mlrun.k8s_utils.verify_gpu_requests_and_limits(
1107
+ _verify_gpu_requests_and_limits(
992
1108
  requests_gpu=requests_gpu,
993
1109
  limits_gpu=limits_gpu,
994
1110
  )
@@ -998,6 +1114,19 @@ def _validate_config(config):
998
1114
  config.verify_security_context_enrichment_mode_is_allowed()
999
1115
 
1000
1116
 
1117
+ def _verify_gpu_requests_and_limits(requests_gpu: str = None, limits_gpu: str = None):
1118
+ # https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/
1119
+ if requests_gpu and not limits_gpu:
1120
+ raise mlrun.errors.MLRunConflictError(
1121
+ "You cannot specify GPU requests without specifying limits"
1122
+ )
1123
+ if requests_gpu and limits_gpu and requests_gpu != limits_gpu:
1124
+ raise mlrun.errors.MLRunConflictError(
1125
+ f"When specifying both GPU requests and limits these two values must be equal, "
1126
+ f"requests_gpu={requests_gpu}, limits_gpu={limits_gpu}"
1127
+ )
1128
+
1129
+
1001
1130
  def _convert_resources_to_str(config: dict = None):
1002
1131
  resources_types = ["cpu", "memory", "gpu"]
1003
1132
  resource_requirements = ["requests", "limits"]
@@ -1048,15 +1177,18 @@ def read_env(env=None, prefix=env_prefix):
1048
1177
  cfg[path[0]] = value
1049
1178
 
1050
1179
  env_dbpath = env.get("MLRUN_DBPATH", "")
1180
+ # expected format: https://mlrun-api.tenant.default-tenant.app.some-system.some-namespace.com
1051
1181
  is_remote_mlrun = (
1052
1182
  env_dbpath.startswith("https://mlrun-api.") and "tenant." in env_dbpath
1053
1183
  )
1184
+
1054
1185
  # It's already a standard to set this env var to configure the v3io api, so we're supporting it (instead
1055
1186
  # of MLRUN_V3IO_API), in remote usage this can be auto detected from the DBPATH
1056
1187
  v3io_api = env.get("V3IO_API")
1057
1188
  if v3io_api:
1058
1189
  config["v3io_api"] = v3io_api
1059
1190
  elif is_remote_mlrun:
1191
+ # in remote mlrun we can't use http, so we'll use https
1060
1192
  config["v3io_api"] = env_dbpath.replace("https://mlrun-api.", "https://webapi.")
1061
1193
 
1062
1194
  # It's already a standard to set this env var to configure the v3io framesd, so we're supporting it (instead
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -14,7 +14,12 @@
14
14
  #
15
15
  # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
16
 
17
- from .data_types import InferOptions, ValueType, pd_schema_to_value_type
17
+ from .data_types import (
18
+ InferOptions,
19
+ ValueType,
20
+ pd_schema_to_value_type,
21
+ python_type_to_value_type,
22
+ )
18
23
  from .infer import DFDataInfer
19
24
 
20
25
 
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -79,7 +79,9 @@ def pa_type_to_value_type(type_):
79
79
 
80
80
 
81
81
  def python_type_to_value_type(value_type):
82
- type_name = value_type.__name__
82
+ type_name = (
83
+ value_type.__name__ if hasattr(value_type, "__name__") else str(value_type)
84
+ )
83
85
  type_map = {
84
86
  "int": ValueType.INT64,
85
87
  "str": ValueType.STRING,
mlrun/data_types/infer.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
mlrun/data_types/spark.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -16,6 +16,8 @@ from datetime import datetime
16
16
  from os import environ
17
17
 
18
18
  import numpy as np
19
+ import pytz
20
+ from pyspark.sql.functions import to_utc_timestamp
19
21
  from pyspark.sql.types import BooleanType, DoubleType, TimestampType
20
22
 
21
23
  from mlrun.utils import logger
@@ -143,6 +145,9 @@ def get_df_stats_spark(df, options, num_bins=20, sample_size=None):
143
145
  is_timestamp = isinstance(field.dataType, TimestampType)
144
146
  is_boolean = isinstance(field.dataType, BooleanType)
145
147
  if is_timestamp:
148
+ df_after_type_casts = df_after_type_casts.withColumn(
149
+ field.name, to_utc_timestamp(df_after_type_casts[field.name], "UTC")
150
+ )
146
151
  timestamp_columns.add(field.name)
147
152
  if is_boolean:
148
153
  boolean_columns.add(field.name)
@@ -210,11 +215,13 @@ def get_df_stats_spark(df, options, num_bins=20, sample_size=None):
210
215
  if col in timestamp_columns:
211
216
  for stat, val in stats.items():
212
217
  if stat == "mean" or stat in original_type_stats:
213
- stats[stat] = datetime.fromtimestamp(val).isoformat()
218
+ stats[stat] = datetime.fromtimestamp(val, tz=pytz.UTC).isoformat()
214
219
  elif stat == "hist":
215
220
  values = stats[stat][1]
216
221
  for i in range(len(values)):
217
- values[i] = datetime.fromtimestamp(values[i]).isoformat()
222
+ values[i] = datetime.fromtimestamp(
223
+ values[i], tz=pytz.UTC
224
+ ).isoformat()
218
225
  # for boolean values, keep mean and histogram values numeric (0 to 1 representation)
219
226
  if col in boolean_columns:
220
227
  for stat, val in stats.items():
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -33,7 +33,12 @@ __all__ = [
33
33
 
34
34
  import mlrun.datastore.wasbfs
35
35
 
36
- from ..platforms.iguazio import KafkaOutputStream, OutputStream, parse_path
36
+ from ..platforms.iguazio import (
37
+ HTTPOutputStream,
38
+ KafkaOutputStream,
39
+ OutputStream,
40
+ parse_path,
41
+ )
37
42
  from ..utils import logger
38
43
  from .base import DataItem
39
44
  from .datastore import StoreManager, in_memory_store, uri_to_ipython
@@ -69,7 +74,7 @@ def get_in_memory_items():
69
74
 
70
75
 
71
76
  def get_stream_pusher(stream_path: str, **kwargs):
72
- """get a stream pusher object from URL, currently only support v3io stream
77
+ """get a stream pusher object from URL.
73
78
 
74
79
  common kwargs::
75
80
 
@@ -87,6 +92,8 @@ def get_stream_pusher(stream_path: str, **kwargs):
87
92
  return KafkaOutputStream(
88
93
  topic, bootstrap_servers, kwargs.get("kafka_producer_options")
89
94
  )
95
+ elif stream_path.startswith("http://") or stream_path.startswith("https://"):
96
+ return HTTPOutputStream(stream_path=stream_path)
90
97
  elif "://" not in stream_path:
91
98
  return OutputStream(stream_path, **kwargs)
92
99
  elif stream_path.startswith("v3io"):
@@ -1,4 +1,4 @@
1
- # Copyright 2018 Iguazio
1
+ # Copyright 2023 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.