mage-ai 0.9.69__py3-none-any.whl → 0.9.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mage-ai might be problematic. Click here for more details.

Files changed (624) hide show
  1. mage_ai/ai/utils/xgboost.py +222 -0
  2. mage_ai/api/errors.py +37 -25
  3. mage_ai/api/operations/base.py +13 -1
  4. mage_ai/api/parsers/PipelineScheduleParser.py +1 -1
  5. mage_ai/api/policies/BackfillPolicy.py +1 -0
  6. mage_ai/api/policies/BlockOutputPolicy.py +40 -17
  7. mage_ai/api/policies/GlobalDataProductPolicy.py +91 -41
  8. mage_ai/api/policies/KernelPolicy.py +55 -32
  9. mage_ai/api/policies/KernelProcessPolicy.py +56 -0
  10. mage_ai/api/policies/OutputPolicy.py +73 -41
  11. mage_ai/api/policies/PipelinePolicy.py +206 -137
  12. mage_ai/api/policies/WorkspacePolicy.py +1 -0
  13. mage_ai/api/presenters/BackfillPresenter.py +1 -0
  14. mage_ai/api/presenters/BlockLayoutItemPresenter.py +9 -7
  15. mage_ai/api/presenters/BlockPresenter.py +1 -1
  16. mage_ai/api/presenters/GlobalDataProductPresenter.py +6 -1
  17. mage_ai/api/presenters/KernelPresenter.py +5 -26
  18. mage_ai/api/presenters/KernelProcessPresenter.py +28 -0
  19. mage_ai/api/presenters/PipelinePresenter.py +18 -5
  20. mage_ai/api/presenters/StatusPresenter.py +2 -0
  21. mage_ai/api/presenters/SyncPresenter.py +25 -0
  22. mage_ai/api/resources/AutocompleteItemResource.py +1 -1
  23. mage_ai/api/resources/BlockLayoutItemResource.py +90 -44
  24. mage_ai/api/resources/BlockOutputResource.py +42 -9
  25. mage_ai/api/resources/BlockResource.py +4 -3
  26. mage_ai/api/resources/BlockRunResource.py +27 -22
  27. mage_ai/api/resources/ClusterResource.py +4 -1
  28. mage_ai/api/resources/CustomTemplateResource.py +34 -14
  29. mage_ai/api/resources/DataProviderResource.py +1 -1
  30. mage_ai/api/resources/ExecutionStateResource.py +3 -1
  31. mage_ai/api/resources/FileContentResource.py +8 -2
  32. mage_ai/api/resources/FileResource.py +10 -4
  33. mage_ai/api/resources/FileVersionResource.py +3 -1
  34. mage_ai/api/resources/GitBranchResource.py +101 -31
  35. mage_ai/api/resources/GitCustomBranchResource.py +29 -1
  36. mage_ai/api/resources/GlobalDataProductResource.py +44 -7
  37. mage_ai/api/resources/GlobalHookResource.py +4 -1
  38. mage_ai/api/resources/IntegrationDestinationResource.py +6 -2
  39. mage_ai/api/resources/IntegrationSourceResource.py +8 -4
  40. mage_ai/api/resources/IntegrationSourceStreamResource.py +6 -2
  41. mage_ai/api/resources/KernelProcessResource.py +44 -0
  42. mage_ai/api/resources/KernelResource.py +25 -3
  43. mage_ai/api/resources/OauthResource.py +1 -1
  44. mage_ai/api/resources/OutputResource.py +33 -11
  45. mage_ai/api/resources/PageBlockLayoutResource.py +34 -23
  46. mage_ai/api/resources/PipelineInteractionResource.py +31 -15
  47. mage_ai/api/resources/PipelineResource.py +258 -125
  48. mage_ai/api/resources/PipelineRunResource.py +52 -7
  49. mage_ai/api/resources/PipelineScheduleResource.py +11 -2
  50. mage_ai/api/resources/PipelineTriggerResource.py +6 -1
  51. mage_ai/api/resources/ProjectResource.py +18 -7
  52. mage_ai/api/resources/PullRequestResource.py +6 -4
  53. mage_ai/api/resources/SecretResource.py +1 -1
  54. mage_ai/api/resources/SeedResource.py +8 -1
  55. mage_ai/api/resources/StatusResource.py +21 -6
  56. mage_ai/api/resources/SyncResource.py +6 -8
  57. mage_ai/api/resources/VariableResource.py +46 -26
  58. mage_ai/api/resources/VersionControlProjectResource.py +9 -2
  59. mage_ai/api/resources/WidgetResource.py +1 -1
  60. mage_ai/api/resources/WorkspaceResource.py +6 -5
  61. mage_ai/api/views.py +47 -40
  62. mage_ai/authentication/permissions/seed.py +16 -2
  63. mage_ai/authentication/providers/oidc.py +21 -1
  64. mage_ai/autocomplete/utils.py +13 -9
  65. mage_ai/cache/base.py +1 -1
  66. mage_ai/cache/block.py +18 -12
  67. mage_ai/cache/block_action_object/__init__.py +33 -5
  68. mage_ai/cache/file.py +22 -19
  69. mage_ai/cache/pipeline.py +18 -12
  70. mage_ai/cli/main.py +1 -0
  71. mage_ai/cluster_manager/aws/emr_cluster_manager.py +9 -5
  72. mage_ai/cluster_manager/config.py +2 -2
  73. mage_ai/cluster_manager/kubernetes/workload_manager.py +52 -1
  74. mage_ai/cluster_manager/manage.py +1 -1
  75. mage_ai/cluster_manager/workspace/base.py +7 -1
  76. mage_ai/cluster_manager/workspace/kubernetes.py +22 -1
  77. mage_ai/command_center/applications/factory.py +10 -7
  78. mage_ai/command_center/applications/utils.py +2 -2
  79. mage_ai/command_center/files/factory.py +17 -15
  80. mage_ai/command_center/presenters/text.py +1 -1
  81. mage_ai/command_center/utils.py +25 -13
  82. mage_ai/data/__init__.py +0 -0
  83. mage_ai/data/constants.py +45 -0
  84. mage_ai/data/models/__init__.py +0 -0
  85. mage_ai/data/models/base.py +119 -0
  86. mage_ai/data/models/constants.py +1 -0
  87. mage_ai/data/models/generator.py +115 -0
  88. mage_ai/data/models/manager.py +168 -0
  89. mage_ai/data/models/pyarrow/__init__.py +0 -0
  90. mage_ai/data/models/pyarrow/record_batch.py +55 -0
  91. mage_ai/data/models/pyarrow/shared.py +21 -0
  92. mage_ai/data/models/pyarrow/table.py +8 -0
  93. mage_ai/data/models/reader.py +103 -0
  94. mage_ai/data/models/utils.py +59 -0
  95. mage_ai/data/models/writer.py +91 -0
  96. mage_ai/data/tabular/__init__.py +0 -0
  97. mage_ai/data/tabular/constants.py +23 -0
  98. mage_ai/data/tabular/mocks.py +19 -0
  99. mage_ai/data/tabular/models.py +126 -0
  100. mage_ai/data/tabular/reader.py +602 -0
  101. mage_ai/data/tabular/utils.py +102 -0
  102. mage_ai/data/tabular/writer.py +266 -0
  103. mage_ai/data/variables/__init__.py +0 -0
  104. mage_ai/data/variables/wrapper.py +54 -0
  105. mage_ai/data_cleaner/analysis/charts.py +61 -39
  106. mage_ai/data_cleaner/column_types/column_type_detector.py +53 -31
  107. mage_ai/data_cleaner/estimators/encoders.py +5 -2
  108. mage_ai/data_integrations/utils/scheduler.py +16 -11
  109. mage_ai/data_preparation/decorators.py +1 -0
  110. mage_ai/data_preparation/executors/block_executor.py +237 -155
  111. mage_ai/data_preparation/executors/k8s_block_executor.py +30 -7
  112. mage_ai/data_preparation/executors/k8s_pipeline_executor.py +30 -7
  113. mage_ai/data_preparation/executors/streaming_pipeline_executor.py +2 -2
  114. mage_ai/data_preparation/git/__init__.py +77 -29
  115. mage_ai/data_preparation/git/api.py +69 -8
  116. mage_ai/data_preparation/git/utils.py +64 -34
  117. mage_ai/data_preparation/logging/logger_manager.py +4 -3
  118. mage_ai/data_preparation/models/block/__init__.py +1562 -879
  119. mage_ai/data_preparation/models/block/data_integration/mixins.py +4 -3
  120. mage_ai/data_preparation/models/block/dynamic/__init__.py +17 -6
  121. mage_ai/data_preparation/models/block/dynamic/child.py +41 -102
  122. mage_ai/data_preparation/models/block/dynamic/constants.py +1 -0
  123. mage_ai/data_preparation/models/block/dynamic/counter.py +296 -0
  124. mage_ai/data_preparation/models/block/dynamic/data.py +16 -0
  125. mage_ai/data_preparation/models/block/dynamic/factory.py +163 -0
  126. mage_ai/data_preparation/models/block/dynamic/models.py +19 -0
  127. mage_ai/data_preparation/models/block/dynamic/shared.py +92 -0
  128. mage_ai/data_preparation/models/block/dynamic/utils.py +295 -167
  129. mage_ai/data_preparation/models/block/dynamic/variables.py +384 -144
  130. mage_ai/data_preparation/models/block/dynamic/wrappers.py +77 -0
  131. mage_ai/data_preparation/models/block/extension/utils.py +10 -1
  132. mage_ai/data_preparation/models/block/global_data_product/__init__.py +35 -3
  133. mage_ai/data_preparation/models/block/integration/__init__.py +6 -2
  134. mage_ai/data_preparation/models/block/outputs.py +722 -0
  135. mage_ai/data_preparation/models/block/platform/mixins.py +7 -8
  136. mage_ai/data_preparation/models/block/r/__init__.py +56 -38
  137. mage_ai/data_preparation/models/block/remote/__init__.py +0 -0
  138. mage_ai/data_preparation/models/block/remote/models.py +58 -0
  139. mage_ai/data_preparation/models/block/settings/__init__.py +0 -0
  140. mage_ai/data_preparation/models/block/settings/dynamic/__init__.py +0 -0
  141. mage_ai/data_preparation/models/block/settings/dynamic/constants.py +7 -0
  142. mage_ai/data_preparation/models/block/settings/dynamic/mixins.py +118 -0
  143. mage_ai/data_preparation/models/block/settings/dynamic/models.py +31 -0
  144. mage_ai/data_preparation/models/block/settings/global_data_products/__init__.py +0 -0
  145. mage_ai/data_preparation/models/block/settings/global_data_products/mixins.py +20 -0
  146. mage_ai/data_preparation/models/block/settings/global_data_products/models.py +46 -0
  147. mage_ai/data_preparation/models/block/settings/variables/__init__.py +0 -0
  148. mage_ai/data_preparation/models/block/settings/variables/mixins.py +74 -0
  149. mage_ai/data_preparation/models/block/settings/variables/models.py +49 -0
  150. mage_ai/data_preparation/models/block/spark/mixins.py +2 -1
  151. mage_ai/data_preparation/models/block/sql/__init__.py +30 -5
  152. mage_ai/data_preparation/models/block/sql/utils/shared.py +21 -3
  153. mage_ai/data_preparation/models/block/utils.py +164 -69
  154. mage_ai/data_preparation/models/constants.py +21 -14
  155. mage_ai/data_preparation/models/custom_templates/custom_block_template.py +18 -13
  156. mage_ai/data_preparation/models/custom_templates/custom_pipeline_template.py +33 -16
  157. mage_ai/data_preparation/models/custom_templates/utils.py +1 -1
  158. mage_ai/data_preparation/models/file.py +41 -28
  159. mage_ai/data_preparation/models/global_data_product/__init__.py +100 -58
  160. mage_ai/data_preparation/models/global_hooks/models.py +1 -0
  161. mage_ai/data_preparation/models/interfaces.py +29 -0
  162. mage_ai/data_preparation/models/pipeline.py +374 -185
  163. mage_ai/data_preparation/models/pipelines/integration_pipeline.py +1 -2
  164. mage_ai/data_preparation/models/pipelines/seed.py +1 -1
  165. mage_ai/data_preparation/models/project/__init__.py +66 -18
  166. mage_ai/data_preparation/models/project/constants.py +2 -0
  167. mage_ai/data_preparation/models/triggers/__init__.py +124 -26
  168. mage_ai/data_preparation/models/utils.py +467 -17
  169. mage_ai/data_preparation/models/variable.py +1028 -137
  170. mage_ai/data_preparation/models/variables/__init__.py +0 -0
  171. mage_ai/data_preparation/models/variables/cache.py +149 -0
  172. mage_ai/data_preparation/models/variables/constants.py +72 -0
  173. mage_ai/data_preparation/models/variables/summarizer.py +336 -0
  174. mage_ai/data_preparation/models/variables/utils.py +77 -0
  175. mage_ai/data_preparation/models/widget/__init__.py +63 -41
  176. mage_ai/data_preparation/models/widget/charts.py +40 -27
  177. mage_ai/data_preparation/models/widget/constants.py +2 -0
  178. mage_ai/data_preparation/models/widget/utils.py +3 -3
  179. mage_ai/data_preparation/preferences.py +3 -3
  180. mage_ai/data_preparation/repo_manager.py +55 -21
  181. mage_ai/data_preparation/storage/base_storage.py +2 -2
  182. mage_ai/data_preparation/storage/gcs_storage.py +7 -4
  183. mage_ai/data_preparation/storage/local_storage.py +18 -9
  184. mage_ai/data_preparation/storage/s3_storage.py +5 -2
  185. mage_ai/data_preparation/templates/data_exporters/streaming/oracledb.yaml +8 -0
  186. mage_ai/data_preparation/variable_manager.py +281 -76
  187. mage_ai/io/base.py +3 -2
  188. mage_ai/io/bigquery.py +1 -0
  189. mage_ai/io/redshift.py +7 -5
  190. mage_ai/kernels/__init__.py +0 -0
  191. mage_ai/kernels/models.py +188 -0
  192. mage_ai/kernels/utils.py +169 -0
  193. mage_ai/orchestration/concurrency.py +6 -2
  194. mage_ai/orchestration/db/__init__.py +1 -0
  195. mage_ai/orchestration/db/migrations/versions/0227396a216c_add_userproject_table.py +38 -0
  196. mage_ai/orchestration/db/migrations/versions/42a14d6143f1_update_token_column_type.py +54 -0
  197. mage_ai/orchestration/db/models/dynamic/__init__.py +0 -0
  198. mage_ai/orchestration/db/models/dynamic/controller.py +67 -0
  199. mage_ai/orchestration/db/models/oauth.py +12 -18
  200. mage_ai/orchestration/db/models/projects.py +10 -0
  201. mage_ai/orchestration/db/models/schedules.py +225 -187
  202. mage_ai/orchestration/db/models/schedules_project_platform.py +18 -12
  203. mage_ai/orchestration/db/models/utils.py +46 -5
  204. mage_ai/orchestration/metrics/pipeline_run.py +8 -9
  205. mage_ai/orchestration/notification/sender.py +38 -15
  206. mage_ai/orchestration/pipeline_scheduler_original.py +64 -33
  207. mage_ai/orchestration/pipeline_scheduler_project_platform.py +1 -1
  208. mage_ai/orchestration/run_status_checker.py +11 -4
  209. mage_ai/orchestration/triggers/api.py +41 -2
  210. mage_ai/orchestration/triggers/global_data_product.py +9 -4
  211. mage_ai/orchestration/triggers/utils.py +10 -1
  212. mage_ai/orchestration/utils/resources.py +3 -0
  213. mage_ai/presenters/charts/data_sources/base.py +4 -2
  214. mage_ai/presenters/charts/data_sources/block.py +15 -9
  215. mage_ai/presenters/charts/data_sources/chart_code.py +8 -5
  216. mage_ai/presenters/charts/data_sources/constants.py +1 -0
  217. mage_ai/presenters/charts/data_sources/system_metrics.py +22 -0
  218. mage_ai/presenters/interactions/models.py +11 -7
  219. mage_ai/presenters/pages/loaders/pipelines.py +5 -3
  220. mage_ai/presenters/pages/models/page_components/pipeline_schedules.py +3 -1
  221. mage_ai/presenters/utils.py +2 -0
  222. mage_ai/server/api/blocks.py +2 -1
  223. mage_ai/server/api/downloads.py +9 -2
  224. mage_ai/server/api/runs.py +151 -0
  225. mage_ai/server/api/triggers.py +3 -1
  226. mage_ai/server/constants.py +1 -1
  227. mage_ai/server/frontend_dist/404.html +8 -8
  228. mage_ai/server/frontend_dist/_next/static/UZLabyPgcxtZvp0O0EUUS/_buildManifest.js +1 -0
  229. mage_ai/server/frontend_dist/_next/static/chunks/1376-22de38b4ad008d8a.js +1 -0
  230. mage_ai/server/frontend_dist/_next/static/chunks/1557-25a7d985d5564fd3.js +1 -0
  231. mage_ai/server/frontend_dist/_next/static/chunks/1668-30b4619b9534519b.js +1 -0
  232. mage_ai/server/frontend_dist/_next/static/chunks/1799-c42db95a015689ee.js +1 -0
  233. mage_ai/server/frontend_dist/_next/static/chunks/2996-2108b53b9d371d8d.js +1 -0
  234. mage_ai/server/frontend_dist/_next/static/chunks/{3548-fa0792ddb88f4646.js → 3548-9d26185b3fb663b1.js} +1 -1
  235. mage_ai/server/frontend_dist/_next/static/chunks/{3763-61b542dafdbf5754.js → 3763-40780c6d1e4b261d.js} +1 -1
  236. mage_ai/server/frontend_dist/_next/static/chunks/3782-129dd2a2448a2e36.js +1 -0
  237. mage_ai/server/frontend_dist/_next/static/chunks/3958-bcdfa414ccfa1eb2.js +1 -0
  238. mage_ai/server/frontend_dist/_next/static/chunks/4168-97fd1578d1a38315.js +1 -0
  239. mage_ai/server/frontend_dist/_next/static/chunks/4982-fa5a238b139fbdd2.js +1 -0
  240. mage_ai/server/frontend_dist/_next/static/chunks/5699-176f445e1313f001.js +1 -0
  241. mage_ai/server/frontend_dist/_next/static/chunks/7162-7dd03f0f605de721.js +1 -0
  242. mage_ai/server/frontend_dist/_next/static/chunks/7779-68d2b72a90c5f925.js +1 -0
  243. mage_ai/server/frontend_dist/_next/static/chunks/7966-5446a8e43711e2f9.js +1 -0
  244. mage_ai/server/frontend_dist/_next/static/chunks/8023-6c2f172f48dcb99b.js +1 -0
  245. mage_ai/server/frontend_dist/_next/static/chunks/8095-c351b8a735d73e0c.js +1 -0
  246. mage_ai/server/frontend_dist/_next/static/chunks/9624-8b8e100079ab69e1.js +1 -0
  247. mage_ai/server/frontend_dist/_next/static/chunks/{main-77fe248a6fbd12d8.js → main-b99d4e30a88d9dc7.js} +1 -1
  248. mage_ai/server/frontend_dist/_next/static/chunks/pages/_app-9fe2d9d07c94e968.js +1 -0
  249. mage_ai/server/frontend_dist/_next/static/chunks/pages/{block-layout-14f952f66964022f.js → block-layout-7f4b735c67115df5.js} +1 -1
  250. mage_ai/server/frontend_dist/_next/static/chunks/pages/global-data-products/[...slug]-e7d48e6b0c3068ac.js +1 -0
  251. mage_ai/server/frontend_dist/_next/static/chunks/pages/global-data-products-b943f31f050fc3a4.js +1 -0
  252. mage_ai/server/frontend_dist/_next/static/chunks/pages/manage-4bfc84ff07d7656f.js +1 -0
  253. mage_ai/server/frontend_dist/_next/static/chunks/pages/{overview-597b74828bf105db.js → overview-9f1ac4ec003884f3.js} +1 -1
  254. mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/pipeline-runs-3edc6270c5b0e962.js → frontend_dist/_next/static/chunks/pages/pipeline-runs-6d183f91a2ff6668.js} +1 -1
  255. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills/[...slug]-7e737f6fc7e83e9b.js +1 -0
  256. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills-38e1fbcfbfc1014e.js +1 -0
  257. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/dashboard-d94488e3f2eeef36.js +1 -0
  258. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/edit-cc641a7fa8473796.js +1 -0
  259. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/{block-runs-a5c0362763a21fa8.js → block-runs-284309877f3c5a5a.js} +1 -1
  260. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-26250e5335194ade.js +1 -0
  261. mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors-7acc7afc00df17c2.js → frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors-5f4c8128b2413fd8.js} +1 -1
  262. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/runs/[run]-4ebfc8e400315dda.js +1 -0
  263. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/settings-e5e0150a256aadb3.js +1 -0
  264. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/triggers/[...slug]-eb11c5390c982b49.js +1 -0
  265. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{triggers-1bdfda8edc9cf4a8.js → triggers-4612d15a65c35912.js} +1 -1
  266. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/account/{profile-3f0df3decc856ee9.js → profile-3ae43c932537b254.js} +1 -1
  267. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/platform/preferences-b603d7fe4b175256.js +1 -0
  268. mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/settings/platform/settings-c2e9ef989c8bfa73.js → frontend_dist/_next/static/chunks/pages/settings/platform/settings-319ddbabc239e91b.js} +1 -1
  269. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/permissions/{[...slug]-47b64ced27c24985.js → [...slug]-5c360f72e4498855.js} +1 -1
  270. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/{permissions-e5a4d3d815cec25d.js → permissions-fb29fa6c2bd90bb0.js} +1 -1
  271. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/preferences-3b76fa959ffa09d3.js +1 -0
  272. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/roles/{[...slug]-379e1ee292504842.js → [...slug]-3b787b42f1093b1f.js} +1 -1
  273. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/roles-0b83fbdd39e85f5b.js +1 -0
  274. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/sync-data-a1e6950974d643a8.js +1 -0
  275. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/users/{[...slug]-2af9afbe727d88aa.js → [...slug]-0aa019d87db8b0b8.js} +1 -1
  276. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/{users-a4db8710f703c729.js → users-88c694d19207f2ec.js} +1 -1
  277. mage_ai/server/frontend_dist/_next/static/chunks/pages/{triggers-9cba3211434a8966.js → triggers-a599c6ac89be8c8d.js} +1 -1
  278. mage_ai/server/frontend_dist/_next/static/chunks/pages/version-control-31d0d50f7f30462b.js +1 -0
  279. mage_ai/server/frontend_dist/_next/static/chunks/{webpack-d079359c241db804.js → webpack-ac7fdc472bedf682.js} +1 -1
  280. mage_ai/server/frontend_dist/block-layout.html +3 -3
  281. mage_ai/server/frontend_dist/compute.html +6 -6
  282. mage_ai/server/frontend_dist/files.html +6 -6
  283. mage_ai/server/frontend_dist/global-data-products/[...slug].html +6 -6
  284. mage_ai/server/frontend_dist/global-data-products.html +6 -6
  285. mage_ai/server/frontend_dist/global-hooks/[...slug].html +6 -6
  286. mage_ai/server/frontend_dist/global-hooks.html +6 -6
  287. mage_ai/server/frontend_dist/index.html +3 -3
  288. mage_ai/server/frontend_dist/manage/files.html +6 -6
  289. mage_ai/server/frontend_dist/manage/settings.html +6 -6
  290. mage_ai/server/frontend_dist/manage/users/[user].html +6 -6
  291. mage_ai/server/frontend_dist/manage/users/new.html +6 -6
  292. mage_ai/server/frontend_dist/manage/users.html +6 -6
  293. mage_ai/server/frontend_dist/manage.html +6 -6
  294. mage_ai/server/frontend_dist/oauth.html +5 -5
  295. mage_ai/server/frontend_dist/overview.html +6 -6
  296. mage_ai/server/frontend_dist/pipeline-runs.html +6 -6
  297. mage_ai/server/frontend_dist/pipelines/[pipeline]/backfills/[...slug].html +6 -6
  298. mage_ai/server/frontend_dist/pipelines/[pipeline]/backfills.html +6 -6
  299. mage_ai/server/frontend_dist/pipelines/[pipeline]/dashboard.html +6 -6
  300. mage_ai/server/frontend_dist/pipelines/[pipeline]/edit.html +3 -3
  301. mage_ai/server/frontend_dist/pipelines/[pipeline]/logs.html +6 -6
  302. mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors/block-runs.html +6 -6
  303. mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors/block-runtime.html +6 -6
  304. mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors.html +6 -6
  305. mage_ai/server/frontend_dist/pipelines/[pipeline]/runs/[run].html +6 -6
  306. mage_ai/server/frontend_dist/pipelines/[pipeline]/runs.html +6 -6
  307. mage_ai/server/frontend_dist/pipelines/[pipeline]/settings.html +6 -6
  308. mage_ai/server/frontend_dist/pipelines/[pipeline]/syncs.html +6 -6
  309. mage_ai/server/frontend_dist/pipelines/[pipeline]/triggers/[...slug].html +6 -6
  310. mage_ai/server/frontend_dist/pipelines/[pipeline]/triggers.html +6 -6
  311. mage_ai/server/frontend_dist/pipelines/[pipeline].html +3 -3
  312. mage_ai/server/frontend_dist/pipelines.html +6 -6
  313. mage_ai/server/frontend_dist/platform/global-hooks/[...slug].html +6 -6
  314. mage_ai/server/frontend_dist/platform/global-hooks.html +6 -6
  315. mage_ai/server/frontend_dist/settings/account/profile.html +6 -6
  316. mage_ai/server/frontend_dist/settings/platform/preferences.html +6 -6
  317. mage_ai/server/frontend_dist/settings/platform/settings.html +6 -6
  318. mage_ai/server/frontend_dist/settings/workspace/permissions/[...slug].html +6 -6
  319. mage_ai/server/frontend_dist/settings/workspace/permissions.html +6 -6
  320. mage_ai/server/frontend_dist/settings/workspace/preferences.html +6 -6
  321. mage_ai/server/frontend_dist/settings/workspace/roles/[...slug].html +6 -6
  322. mage_ai/server/frontend_dist/settings/workspace/roles.html +6 -6
  323. mage_ai/server/frontend_dist/settings/workspace/sync-data.html +6 -6
  324. mage_ai/server/frontend_dist/settings/workspace/users/[...slug].html +6 -6
  325. mage_ai/server/frontend_dist/settings/workspace/users.html +6 -6
  326. mage_ai/server/frontend_dist/settings.html +3 -3
  327. mage_ai/server/frontend_dist/sign-in.html +15 -15
  328. mage_ai/server/frontend_dist/templates/[...slug].html +6 -6
  329. mage_ai/server/frontend_dist/templates.html +6 -6
  330. mage_ai/server/frontend_dist/terminal.html +6 -6
  331. mage_ai/server/frontend_dist/test.html +3 -3
  332. mage_ai/server/frontend_dist/triggers.html +6 -6
  333. mage_ai/server/frontend_dist/version-control.html +6 -6
  334. mage_ai/server/frontend_dist_base_path_template/404.html +8 -8
  335. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/1376-22de38b4ad008d8a.js +1 -0
  336. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/1557-25a7d985d5564fd3.js +1 -0
  337. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/1668-30b4619b9534519b.js +1 -0
  338. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/1799-c42db95a015689ee.js +1 -0
  339. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/2996-2108b53b9d371d8d.js +1 -0
  340. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/{3548-fa0792ddb88f4646.js → 3548-9d26185b3fb663b1.js} +1 -1
  341. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/{3763-61b542dafdbf5754.js → 3763-40780c6d1e4b261d.js} +1 -1
  342. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/3782-129dd2a2448a2e36.js +1 -0
  343. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/3958-bcdfa414ccfa1eb2.js +1 -0
  344. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/4168-97fd1578d1a38315.js +1 -0
  345. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/4982-fa5a238b139fbdd2.js +1 -0
  346. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/5699-176f445e1313f001.js +1 -0
  347. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7162-7dd03f0f605de721.js +1 -0
  348. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7779-68d2b72a90c5f925.js +1 -0
  349. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7966-5446a8e43711e2f9.js +1 -0
  350. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/8023-6c2f172f48dcb99b.js +1 -0
  351. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/8095-c351b8a735d73e0c.js +1 -0
  352. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/9624-8b8e100079ab69e1.js +1 -0
  353. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/{main-70b78159c2bb3fe1.js → main-384298e9133cec76.js} +1 -1
  354. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/_app-13a578bce3b7f30c.js +1 -0
  355. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/{block-layout-14f952f66964022f.js → block-layout-7f4b735c67115df5.js} +1 -1
  356. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/global-data-products/[...slug]-e7d48e6b0c3068ac.js +1 -0
  357. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/global-data-products-b943f31f050fc3a4.js +1 -0
  358. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/manage-4bfc84ff07d7656f.js +1 -0
  359. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/{overview-597b74828bf105db.js → overview-9f1ac4ec003884f3.js} +1 -1
  360. mage_ai/server/{frontend_dist/_next/static/chunks/pages/pipeline-runs-3edc6270c5b0e962.js → frontend_dist_base_path_template/_next/static/chunks/pages/pipeline-runs-6d183f91a2ff6668.js} +1 -1
  361. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/backfills/[...slug]-7e737f6fc7e83e9b.js +1 -0
  362. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/backfills-38e1fbcfbfc1014e.js +1 -0
  363. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/dashboard-d94488e3f2eeef36.js +1 -0
  364. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/edit-cc641a7fa8473796.js +1 -0
  365. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors/{block-runs-a5c0362763a21fa8.js → block-runs-284309877f3c5a5a.js} +1 -1
  366. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-26250e5335194ade.js +1 -0
  367. mage_ai/server/{frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors-7acc7afc00df17c2.js → frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors-5f4c8128b2413fd8.js} +1 -1
  368. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/runs/[run]-4ebfc8e400315dda.js +1 -0
  369. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/settings-e5e0150a256aadb3.js +1 -0
  370. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/triggers/[...slug]-eb11c5390c982b49.js +1 -0
  371. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/{triggers-1bdfda8edc9cf4a8.js → triggers-4612d15a65c35912.js} +1 -1
  372. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/account/{profile-3f0df3decc856ee9.js → profile-3ae43c932537b254.js} +1 -1
  373. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/platform/preferences-b603d7fe4b175256.js +1 -0
  374. mage_ai/server/{frontend_dist/_next/static/chunks/pages/settings/platform/settings-c2e9ef989c8bfa73.js → frontend_dist_base_path_template/_next/static/chunks/pages/settings/platform/settings-319ddbabc239e91b.js} +1 -1
  375. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/permissions/{[...slug]-47b64ced27c24985.js → [...slug]-5c360f72e4498855.js} +1 -1
  376. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/{permissions-e5a4d3d815cec25d.js → permissions-fb29fa6c2bd90bb0.js} +1 -1
  377. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/preferences-3b76fa959ffa09d3.js +1 -0
  378. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/roles/{[...slug]-379e1ee292504842.js → [...slug]-3b787b42f1093b1f.js} +1 -1
  379. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/roles-0b83fbdd39e85f5b.js +1 -0
  380. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/sync-data-a1e6950974d643a8.js +1 -0
  381. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/users/{[...slug]-2af9afbe727d88aa.js → [...slug]-0aa019d87db8b0b8.js} +1 -1
  382. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/{users-a4db8710f703c729.js → users-88c694d19207f2ec.js} +1 -1
  383. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/{triggers-9cba3211434a8966.js → triggers-a599c6ac89be8c8d.js} +1 -1
  384. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/version-control-31d0d50f7f30462b.js +1 -0
  385. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/{webpack-68c003fb6a175cd7.js → webpack-481689d9989710cd.js} +1 -1
  386. mage_ai/server/frontend_dist_base_path_template/_next/static/kcptwoOU-JJJg6Vwpkfmx/_buildManifest.js +1 -0
  387. mage_ai/server/frontend_dist_base_path_template/block-layout.html +3 -3
  388. mage_ai/server/frontend_dist_base_path_template/compute.html +6 -6
  389. mage_ai/server/frontend_dist_base_path_template/files.html +6 -6
  390. mage_ai/server/frontend_dist_base_path_template/global-data-products/[...slug].html +6 -6
  391. mage_ai/server/frontend_dist_base_path_template/global-data-products.html +6 -6
  392. mage_ai/server/frontend_dist_base_path_template/global-hooks/[...slug].html +6 -6
  393. mage_ai/server/frontend_dist_base_path_template/global-hooks.html +6 -6
  394. mage_ai/server/frontend_dist_base_path_template/index.html +3 -3
  395. mage_ai/server/frontend_dist_base_path_template/manage/files.html +6 -6
  396. mage_ai/server/frontend_dist_base_path_template/manage/settings.html +6 -6
  397. mage_ai/server/frontend_dist_base_path_template/manage/users/[user].html +6 -6
  398. mage_ai/server/frontend_dist_base_path_template/manage/users/new.html +6 -6
  399. mage_ai/server/frontend_dist_base_path_template/manage/users.html +6 -6
  400. mage_ai/server/frontend_dist_base_path_template/manage.html +6 -6
  401. mage_ai/server/frontend_dist_base_path_template/oauth.html +5 -5
  402. mage_ai/server/frontend_dist_base_path_template/overview.html +6 -6
  403. mage_ai/server/frontend_dist_base_path_template/pipeline-runs.html +6 -6
  404. mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/backfills/[...slug].html +6 -6
  405. mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/backfills.html +6 -6
  406. mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/dashboard.html +6 -6
  407. mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/edit.html +3 -3
  408. mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/logs.html +6 -6
  409. mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/monitors/block-runs.html +6 -6
  410. mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/monitors/block-runtime.html +6 -6
  411. mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/monitors.html +6 -6
  412. mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/runs/[run].html +6 -6
  413. mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/runs.html +6 -6
  414. mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/settings.html +6 -6
  415. mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/syncs.html +6 -6
  416. mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/triggers/[...slug].html +6 -6
  417. mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/triggers.html +6 -6
  418. mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline].html +3 -3
  419. mage_ai/server/frontend_dist_base_path_template/pipelines.html +6 -6
  420. mage_ai/server/frontend_dist_base_path_template/platform/global-hooks/[...slug].html +6 -6
  421. mage_ai/server/frontend_dist_base_path_template/platform/global-hooks.html +6 -6
  422. mage_ai/server/frontend_dist_base_path_template/settings/account/profile.html +6 -6
  423. mage_ai/server/frontend_dist_base_path_template/settings/platform/preferences.html +6 -6
  424. mage_ai/server/frontend_dist_base_path_template/settings/platform/settings.html +6 -6
  425. mage_ai/server/frontend_dist_base_path_template/settings/workspace/permissions/[...slug].html +6 -6
  426. mage_ai/server/frontend_dist_base_path_template/settings/workspace/permissions.html +6 -6
  427. mage_ai/server/frontend_dist_base_path_template/settings/workspace/preferences.html +6 -6
  428. mage_ai/server/frontend_dist_base_path_template/settings/workspace/roles/[...slug].html +6 -6
  429. mage_ai/server/frontend_dist_base_path_template/settings/workspace/roles.html +6 -6
  430. mage_ai/server/frontend_dist_base_path_template/settings/workspace/sync-data.html +6 -6
  431. mage_ai/server/frontend_dist_base_path_template/settings/workspace/users/[...slug].html +6 -6
  432. mage_ai/server/frontend_dist_base_path_template/settings/workspace/users.html +6 -6
  433. mage_ai/server/frontend_dist_base_path_template/settings.html +3 -3
  434. mage_ai/server/frontend_dist_base_path_template/sign-in.html +15 -15
  435. mage_ai/server/frontend_dist_base_path_template/templates/[...slug].html +6 -6
  436. mage_ai/server/frontend_dist_base_path_template/templates.html +6 -6
  437. mage_ai/server/frontend_dist_base_path_template/terminal.html +6 -6
  438. mage_ai/server/frontend_dist_base_path_template/test.html +3 -3
  439. mage_ai/server/frontend_dist_base_path_template/triggers.html +6 -6
  440. mage_ai/server/frontend_dist_base_path_template/version-control.html +6 -6
  441. mage_ai/server/kernel_output_parser.py +4 -1
  442. mage_ai/server/scheduler_manager.py +12 -1
  443. mage_ai/server/server.py +69 -42
  444. mage_ai/server/utils/custom_output.py +284 -0
  445. mage_ai/server/utils/execute_custom_code.py +245 -0
  446. mage_ai/server/utils/output_display.py +123 -289
  447. mage_ai/server/websocket_server.py +116 -69
  448. mage_ai/services/aws/ecs/ecs.py +1 -0
  449. mage_ai/services/k8s/config.py +27 -4
  450. mage_ai/services/k8s/job_manager.py +6 -1
  451. mage_ai/services/k8s/utils.py +97 -0
  452. mage_ai/services/ssh/aws/emr/utils.py +8 -8
  453. mage_ai/settings/keys/auth.py +1 -0
  454. mage_ai/settings/platform/__init__.py +159 -38
  455. mage_ai/settings/platform/constants.py +5 -0
  456. mage_ai/settings/platform/utils.py +53 -10
  457. mage_ai/settings/repo.py +26 -12
  458. mage_ai/settings/server.py +128 -37
  459. mage_ai/shared/array.py +24 -1
  460. mage_ai/shared/complex.py +45 -0
  461. mage_ai/shared/config.py +2 -1
  462. mage_ai/shared/custom_logger.py +11 -0
  463. mage_ai/shared/dates.py +10 -6
  464. mage_ai/shared/files.py +63 -8
  465. mage_ai/shared/hash.py +33 -9
  466. mage_ai/shared/io.py +9 -5
  467. mage_ai/shared/models.py +82 -24
  468. mage_ai/shared/outputs.py +87 -0
  469. mage_ai/shared/parsers.py +144 -13
  470. mage_ai/shared/path_fixer.py +11 -7
  471. mage_ai/shared/singletons/__init__.py +0 -0
  472. mage_ai/shared/singletons/base.py +47 -0
  473. mage_ai/shared/singletons/memory.py +38 -0
  474. mage_ai/shared/strings.py +34 -1
  475. mage_ai/shared/yaml.py +24 -0
  476. mage_ai/streaming/sinks/oracledb.py +57 -0
  477. mage_ai/streaming/sinks/sink_factory.py +4 -0
  478. mage_ai/system/__init__.py +0 -0
  479. mage_ai/system/constants.py +14 -0
  480. mage_ai/system/memory/__init__.py +0 -0
  481. mage_ai/system/memory/constants.py +1 -0
  482. mage_ai/system/memory/manager.py +174 -0
  483. mage_ai/system/memory/presenters.py +158 -0
  484. mage_ai/system/memory/process.py +216 -0
  485. mage_ai/system/memory/samples.py +13 -0
  486. mage_ai/system/memory/utils.py +656 -0
  487. mage_ai/system/memory/wrappers.py +177 -0
  488. mage_ai/system/models.py +58 -0
  489. mage_ai/system/storage/__init__.py +0 -0
  490. mage_ai/system/storage/utils.py +29 -0
  491. mage_ai/tests/api/endpoints/mixins.py +2 -2
  492. mage_ai/tests/api/endpoints/test_blocks.py +2 -1
  493. mage_ai/tests/api/endpoints/test_custom_designs.py +4 -4
  494. mage_ai/tests/api/endpoints/test_pipeline_runs.py +2 -2
  495. mage_ai/tests/api/endpoints/test_projects.py +2 -1
  496. mage_ai/tests/api/operations/base/mixins.py +1 -1
  497. mage_ai/tests/api/operations/base/test_base.py +27 -27
  498. mage_ai/tests/api/operations/base/test_base_with_user_authentication.py +27 -27
  499. mage_ai/tests/api/operations/base/test_base_with_user_permissions.py +23 -23
  500. mage_ai/tests/api/operations/test_syncs.py +6 -4
  501. mage_ai/tests/api/resources/test_pipeline_resource.py +11 -4
  502. mage_ai/tests/authentication/oauth/test_utils.py +1 -1
  503. mage_ai/tests/authentication/providers/test_oidc.py +59 -0
  504. mage_ai/tests/base_test.py +2 -2
  505. mage_ai/tests/data/__init__.py +0 -0
  506. mage_ai/tests/data/models/__init__.py +0 -0
  507. mage_ai/tests/data_preparation/executors/test_block_executor.py +23 -16
  508. mage_ai/tests/data_preparation/git/test_git.py +4 -1
  509. mage_ai/tests/data_preparation/models/block/dynamic/test_combos.py +305 -0
  510. mage_ai/tests/data_preparation/models/block/dynamic/test_counter.py +212 -0
  511. mage_ai/tests/data_preparation/models/block/dynamic/test_factory.py +360 -0
  512. mage_ai/tests/data_preparation/models/block/dynamic/test_variables.py +332 -0
  513. mage_ai/tests/data_preparation/models/block/hook/test_hook_block.py +2 -2
  514. mage_ai/tests/data_preparation/models/block/platform/test_mixins.py +1 -1
  515. mage_ai/tests/data_preparation/models/block/sql/utils/test_shared.py +26 -1
  516. mage_ai/tests/data_preparation/models/block/test_global_data_product.py +5 -2
  517. mage_ai/tests/data_preparation/models/custom_templates/test_utils.py +5 -4
  518. mage_ai/tests/data_preparation/models/global_hooks/test_hook.py +3 -0
  519. mage_ai/tests/data_preparation/models/global_hooks/test_predicates.py +9 -3
  520. mage_ai/tests/data_preparation/models/test_block.py +115 -120
  521. mage_ai/tests/data_preparation/models/test_blocks_helper.py +114 -0
  522. mage_ai/tests/data_preparation/models/test_global_data_product.py +41 -24
  523. mage_ai/tests/data_preparation/models/test_pipeline.py +9 -6
  524. mage_ai/tests/data_preparation/models/test_project.py +4 -1
  525. mage_ai/tests/data_preparation/models/test_utils.py +80 -0
  526. mage_ai/tests/data_preparation/models/test_variable.py +242 -69
  527. mage_ai/tests/data_preparation/models/variables/__init__.py +0 -0
  528. mage_ai/tests/data_preparation/models/variables/test_summarizer.py +481 -0
  529. mage_ai/tests/data_preparation/storage/shared/__init__.py +0 -0
  530. mage_ai/tests/data_preparation/test_repo_manager.py +6 -7
  531. mage_ai/tests/data_preparation/test_variable_manager.py +57 -48
  532. mage_ai/tests/factory.py +64 -43
  533. mage_ai/tests/orchestration/db/models/test_schedules.py +3 -3
  534. mage_ai/tests/orchestration/db/models/test_schedules_dynamic_blocks.py +279 -0
  535. mage_ai/tests/orchestration/test_pipeline_scheduler.py +1 -0
  536. mage_ai/tests/orchestration/triggers/test_global_data_product.py +141 -138
  537. mage_ai/tests/orchestration/triggers/test_utils.py +3 -2
  538. mage_ai/tests/server/test_server.py +19 -0
  539. mage_ai/tests/services/k8s/test_job_manager.py +27 -6
  540. mage_ai/tests/streaming/sinks/test_oracledb.py +38 -0
  541. mage_ai/tests/test_shared.py +61 -0
  542. mage_ai/usage_statistics/logger.py +7 -2
  543. mage_ai/utils/code.py +33 -19
  544. mage_ai/version_control/branch/utils.py +2 -1
  545. mage_ai/version_control/models.py +3 -2
  546. {mage_ai-0.9.69.dist-info → mage_ai-0.9.71.dist-info}/METADATA +6 -3
  547. {mage_ai-0.9.69.dist-info → mage_ai-0.9.71.dist-info}/RECORD +555 -454
  548. mage_ai/data_preparation/models/global_data_product/constants.py +0 -6
  549. mage_ai/server/frontend_dist/_next/static/_krrrgup_C-dPOpX36S8I/_buildManifest.js +0 -1
  550. mage_ai/server/frontend_dist/_next/static/chunks/1557-df144fbd8b2208c3.js +0 -1
  551. mage_ai/server/frontend_dist/_next/static/chunks/2631-b9f9bea3f1cf906d.js +0 -1
  552. mage_ai/server/frontend_dist/_next/static/chunks/3782-ef4cd4f0b52072d0.js +0 -1
  553. mage_ai/server/frontend_dist/_next/static/chunks/4783-422429203610c318.js +0 -1
  554. mage_ai/server/frontend_dist/_next/static/chunks/5699-6d708c6b2153ea08.js +0 -1
  555. mage_ai/server/frontend_dist/_next/static/chunks/635-0d6b7c8804bcd2dc.js +0 -1
  556. mage_ai/server/frontend_dist/_next/static/chunks/7022-0d52dd8868621fb0.js +0 -1
  557. mage_ai/server/frontend_dist/_next/static/chunks/7361-8a23dd8360593e7a.js +0 -1
  558. mage_ai/server/frontend_dist/_next/static/chunks/7966-f07b2913f7326b50.js +0 -1
  559. mage_ai/server/frontend_dist/_next/static/chunks/8095-bdce03896ef9639a.js +0 -1
  560. mage_ai/server/frontend_dist/_next/static/chunks/8146-6bed4e7401e067e6.js +0 -1
  561. mage_ai/server/frontend_dist/_next/static/chunks/9265-d2a1aaec75ec69b8.js +0 -1
  562. mage_ai/server/frontend_dist/_next/static/chunks/9440-4069842b90d4b801.js +0 -1
  563. mage_ai/server/frontend_dist/_next/static/chunks/9624-59b2f803f9c88cd6.js +0 -1
  564. mage_ai/server/frontend_dist/_next/static/chunks/9832-67896490f6e8a014.js +0 -1
  565. mage_ai/server/frontend_dist/_next/static/chunks/pages/_app-d9c89527266296f7.js +0 -1
  566. mage_ai/server/frontend_dist/_next/static/chunks/pages/global-data-products/[...slug]-591abd392dc50ed4.js +0 -1
  567. mage_ai/server/frontend_dist/_next/static/chunks/pages/global-data-products-78e8e88f2a757a18.js +0 -1
  568. mage_ai/server/frontend_dist/_next/static/chunks/pages/manage-852d403c7bda21b3.js +0 -1
  569. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills/[...slug]-ff4bd7a8ec3bab40.js +0 -1
  570. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills-a8b61d8d239fd16f.js +0 -1
  571. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/dashboard-95ffcd3e2b27e567.js +0 -1
  572. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/edit-e1dd1ed71d26c10d.js +0 -1
  573. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-1ed9045b2f1dfd65.js +0 -1
  574. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/runs/[run]-1417ad1c821d720a.js +0 -1
  575. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/settings-59aca25a5b1d3998.js +0 -1
  576. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/triggers/[...slug]-f028ef3880ed856c.js +0 -1
  577. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/platform/preferences-503049734a8b082f.js +0 -1
  578. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/preferences-5b26eeda8aed8a7b.js +0 -1
  579. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/roles-36fa165a48af586b.js +0 -1
  580. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/sync-data-8b793b3b696a2cd3.js +0 -1
  581. mage_ai/server/frontend_dist/_next/static/chunks/pages/version-control-5753fac7c1bfdc88.js +0 -1
  582. mage_ai/server/frontend_dist_base_path_template/_next/static/KLL5mirre9d7_ZeEpaw3s/_buildManifest.js +0 -1
  583. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/1557-df144fbd8b2208c3.js +0 -1
  584. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/2631-b9f9bea3f1cf906d.js +0 -1
  585. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/3782-ef4cd4f0b52072d0.js +0 -1
  586. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/4783-422429203610c318.js +0 -1
  587. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/5699-6d708c6b2153ea08.js +0 -1
  588. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/635-0d6b7c8804bcd2dc.js +0 -1
  589. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7022-0d52dd8868621fb0.js +0 -1
  590. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7361-8a23dd8360593e7a.js +0 -1
  591. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7966-f07b2913f7326b50.js +0 -1
  592. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/8095-bdce03896ef9639a.js +0 -1
  593. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/8146-6bed4e7401e067e6.js +0 -1
  594. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/9265-d2a1aaec75ec69b8.js +0 -1
  595. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/9440-4069842b90d4b801.js +0 -1
  596. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/9624-59b2f803f9c88cd6.js +0 -1
  597. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/9832-67896490f6e8a014.js +0 -1
  598. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/_app-d9c89527266296f7.js +0 -1
  599. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/global-data-products/[...slug]-591abd392dc50ed4.js +0 -1
  600. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/global-data-products-78e8e88f2a757a18.js +0 -1
  601. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/manage-852d403c7bda21b3.js +0 -1
  602. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/backfills/[...slug]-ff4bd7a8ec3bab40.js +0 -1
  603. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/backfills-a8b61d8d239fd16f.js +0 -1
  604. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/dashboard-95ffcd3e2b27e567.js +0 -1
  605. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/edit-e1dd1ed71d26c10d.js +0 -1
  606. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-1ed9045b2f1dfd65.js +0 -1
  607. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/runs/[run]-1417ad1c821d720a.js +0 -1
  608. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/settings-59aca25a5b1d3998.js +0 -1
  609. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/triggers/[...slug]-f028ef3880ed856c.js +0 -1
  610. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/platform/preferences-503049734a8b082f.js +0 -1
  611. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/preferences-5b26eeda8aed8a7b.js +0 -1
  612. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/roles-36fa165a48af586b.js +0 -1
  613. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/sync-data-8b793b3b696a2cd3.js +0 -1
  614. mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/version-control-5753fac7c1bfdc88.js +0 -1
  615. mage_ai/shared/memory.py +0 -90
  616. mage_ai/tests/data_preparation/models/block/dynamic/test_dynamic_helpers.py +0 -48
  617. /mage_ai/{tests/data_preparation/shared → ai/utils}/__init__.py +0 -0
  618. /mage_ai/server/frontend_dist/_next/static/{_krrrgup_C-dPOpX36S8I → UZLabyPgcxtZvp0O0EUUS}/_ssgManifest.js +0 -0
  619. /mage_ai/server/frontend_dist_base_path_template/_next/static/{KLL5mirre9d7_ZeEpaw3s → kcptwoOU-JJJg6Vwpkfmx}/_ssgManifest.js +0 -0
  620. /mage_ai/tests/data_preparation/{shared → storage/shared}/test_secrets.py +0 -0
  621. {mage_ai-0.9.69.dist-info → mage_ai-0.9.71.dist-info}/LICENSE +0 -0
  622. {mage_ai-0.9.69.dist-info → mage_ai-0.9.71.dist-info}/WHEEL +0 -0
  623. {mage_ai-0.9.69.dist-info → mage_ai-0.9.71.dist-info}/entry_points.txt +0 -0
  624. {mage_ai-0.9.69.dist-info → mage_ai-0.9.71.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,22 @@
1
+ from __future__ import annotations
2
+
1
3
  import os
2
4
  import traceback
3
5
  from contextlib import contextmanager
4
- from enum import Enum
5
- from typing import Any, Dict, List
6
+ from pathlib import Path
7
+ from typing import Any, Dict, List, Optional, Tuple, Union
6
8
 
7
9
  import numpy as np
8
10
  import pandas as pd
9
11
  import polars as pl
12
+ import scipy
10
13
  from pandas.api.types import infer_dtype, is_object_dtype
11
14
  from pandas.core.indexes.range import RangeIndex
12
15
 
16
+ from mage_ai.data.constants import InputDataType, VariableType
17
+ from mage_ai.data.models.manager import DataManager
18
+ from mage_ai.data.tabular.models import BatchSettings
19
+ from mage_ai.data.tabular.reader import read_metadata
13
20
  from mage_ai.data_cleaner.shared.utils import is_geo_dataframe, is_spark_dataframe
14
21
  from mage_ai.data_preparation.models.constants import (
15
22
  DATAFRAME_ANALYSIS_KEYS,
@@ -24,32 +31,40 @@ from mage_ai.data_preparation.models.utils import ( # dask_from_pandas,
24
31
  cast_column_types,
25
32
  cast_column_types_polars,
26
33
  deserialize_columns,
34
+ deserialize_complex,
35
+ infer_variable_type,
36
+ is_basic_iterable,
27
37
  serialize_columns,
38
+ serialize_complex,
28
39
  should_deserialize_pandas,
29
40
  should_serialize_pandas,
30
41
  )
42
+ from mage_ai.data_preparation.models.variables.constants import (
43
+ DATAFRAME_COLUMN_TYPES_FILE,
44
+ DATAFRAME_CSV_FILE,
45
+ DATAFRAME_PARQUET_FILE,
46
+ DATAFRAME_PARQUET_SAMPLE_FILE,
47
+ JOBLIB_FILE,
48
+ JOBLIB_OBJECT_FILE,
49
+ JSON_FILE,
50
+ JSON_SAMPLE_FILE,
51
+ METADATA_FILE,
52
+ RESOURCE_USAGE_FILE,
53
+ UBJSON_MODEL_FILENAME,
54
+ )
55
+ from mage_ai.data_preparation.models.variables.summarizer import get_part_uuids
31
56
  from mage_ai.data_preparation.storage.base_storage import BaseStorage
32
57
  from mage_ai.data_preparation.storage.local_storage import LocalStorage
33
- from mage_ai.shared.parsers import sample_output
58
+ from mage_ai.settings.repo import get_variables_dir
59
+ from mage_ai.shared.array import is_iterable
60
+ from mage_ai.shared.environments import is_debug
61
+ from mage_ai.shared.hash import flatten_dict
62
+ from mage_ai.shared.outputs import load_custom_object, save_custom_object
63
+ from mage_ai.shared.parsers import deserialize_matrix, sample_output, serialize_matrix
34
64
  from mage_ai.shared.utils import clean_name
35
-
36
- DATAFRAME_COLUMN_TYPES_FILE = 'data_column_types.json'
37
- DATAFRAME_PARQUET_FILE = 'data.parquet'
38
- DATAFRAME_PARQUET_SAMPLE_FILE = 'sample_data.parquet'
39
- DATAFRAME_CSV_FILE = 'data.csv'
40
-
41
- METADATA_FILE = 'type.json'
42
-
43
- JSON_FILE = 'data.json'
44
- JSON_SAMPLE_FILE = 'sample_data.json'
45
-
46
-
47
- class VariableType(str, Enum):
48
- DATAFRAME = 'dataframe'
49
- DATAFRAME_ANALYSIS = 'dataframe_analysis'
50
- GEO_DATAFRAME = 'geo_dataframe'
51
- POLARS_DATAFRAME = 'polars_dataframe'
52
- SPARK_DATAFRAME = 'spark_dataframe'
65
+ from mage_ai.system.memory.manager import MemoryManager
66
+ from mage_ai.system.models import ResourceUsage
67
+ from mage_ai.system.storage.utils import size_of_path
53
68
 
54
69
 
55
70
  class Variable:
@@ -58,19 +73,31 @@ class Variable:
58
73
  uuid: str,
59
74
  pipeline_path: str,
60
75
  block_uuid: str,
61
- partition: str = None,
62
- spark=None,
63
- storage: BaseStorage = None,
64
- variable_type: VariableType = None,
76
+ partition: Optional[str] = None,
77
+ skip_check_variable_type: Optional[bool] = None,
78
+ spark: Optional[Any] = None,
79
+ storage: Optional[BaseStorage] = None,
80
+ variable_type: Optional[VariableType] = None,
81
+ variable_types: Optional[List[VariableType]] = None,
65
82
  clean_block_uuid: bool = True,
83
+ validate_pipeline_path: bool = False,
84
+ input_data_types: Optional[List[InputDataType]] = None,
85
+ resource_usage: Optional[ResourceUsage] = None,
86
+ read_batch_settings: Optional[BatchSettings] = None,
87
+ read_chunks: Optional[List] = None,
88
+ variables_dir: Optional[str] = None,
89
+ write_batch_settings: Optional[BatchSettings] = None,
90
+ write_chunks: Optional[List] = None,
66
91
  ) -> None:
67
92
  self.uuid = uuid
68
93
  if storage is None:
69
94
  self.storage = LocalStorage()
70
95
  else:
71
96
  self.storage = storage
72
- # if not self.storage.path_exists(pipeline_path):
73
- # raise Exception(f'Pipeline path {pipeline_path} does not exist.')
97
+
98
+ if validate_pipeline_path and not self.storage.path_exists(pipeline_path):
99
+ raise Exception(f'Pipeline path {pipeline_path} does not exist.')
100
+
74
101
  self.pipeline_path = pipeline_path
75
102
  self.block_uuid = block_uuid
76
103
  self.block_dir_name = clean_name(self.block_uuid) if clean_block_uuid else self.block_uuid
@@ -84,8 +111,27 @@ class Variable:
84
111
  if not self.storage.path_exists(self.variable_dir_path):
85
112
  self.storage.makedirs(self.variable_dir_path)
86
113
 
114
+ self._data_manager = None
115
+ self._part_uuids = None
116
+ self._parts = None
117
+ self._resource_usage = resource_usage
118
+
119
+ self.input_data_types = input_data_types
120
+ self.read_batch_settings = read_batch_settings
121
+ self.read_chunks = read_chunks
122
+ self.variables_dir = variables_dir or get_variables_dir()
123
+ self.write_batch_settings = write_batch_settings
124
+ self.write_chunks = write_chunks
125
+
87
126
  self.variable_type = variable_type
88
- self.check_variable_type(spark=spark)
127
+ self.variable_types = variable_types or []
128
+
129
+ if not skip_check_variable_type:
130
+ self.check_variable_type(spark=spark)
131
+
132
+ @classmethod
133
+ def dir_path(cls, pipeline_path, block_uuid):
134
+ return os.path.join(pipeline_path, VARIABLE_DIR, clean_name(block_uuid))
89
135
 
90
136
  @property
91
137
  def variable_path(self):
@@ -95,11 +141,71 @@ class Variable:
95
141
  def metadata_path(self):
96
142
  return os.path.join(self.variable_path, METADATA_FILE)
97
143
 
98
- @classmethod
99
- def dir_path(self, pipeline_path, block_uuid):
100
- return os.path.join(pipeline_path, VARIABLE_DIR, clean_name(block_uuid))
144
+ def resource_usage_path(self, index: Optional[int] = None) -> str:
145
+ return os.path.join(
146
+ self.variable_path, str(index) if index is not None else '', RESOURCE_USAGE_FILE
147
+ )
101
148
 
102
- def check_variable_type(self, spark=None):
149
+ @property
150
+ def data_manager(self) -> Optional[DataManager]:
151
+ if self._data_manager is None:
152
+ self._data_manager = DataManager(
153
+ input_data_types=self.input_data_types,
154
+ read_batch_settings=self.read_batch_settings,
155
+ read_chunks=self.read_chunks,
156
+ storage=self.storage,
157
+ uuid=self.__scope_uuid(),
158
+ variable_dir_path=self.variable_dir_path,
159
+ variable_path=self.variable_path,
160
+ variables_dir=self.variables_dir,
161
+ variable_type=self.variable_type,
162
+ variable_types=self.variable_types,
163
+ write_batch_settings=self.write_batch_settings,
164
+ write_chunks=self.write_chunks,
165
+ )
166
+ return self._data_manager
167
+
168
+ @property
169
+ def resource_usage(self) -> ResourceUsage:
170
+ if self._resource_usage is None:
171
+ self._resource_usage = ResourceUsage()
172
+ return self._resource_usage
173
+
174
+ @property
175
+ def part_uuids(self) -> Optional[List[str]]:
176
+ if self._part_uuids is not None:
177
+ return self._part_uuids
178
+
179
+ self._part_uuids = get_part_uuids(self)
180
+ if self._part_uuids is not None:
181
+ self._part_uuids = sorted(self._part_uuids)
182
+
183
+ return self._part_uuids
184
+
185
+ def get_resource_usage(self, index: Optional[int] = None) -> Optional[ResourceUsage]:
186
+ if self.storage.path_exists(self.resource_usage_path(index)):
187
+ try:
188
+ data = self.storage.read_json_file(
189
+ self.resource_usage_path(index),
190
+ default_value={},
191
+ raise_exception=False,
192
+ )
193
+ if data:
194
+ self._resource_usage = ResourceUsage.load(**{
195
+ **self.resource_usage.to_dict(),
196
+ **data,
197
+ })
198
+ except Exception as err:
199
+ print(f'[ERROR] Variable.resource_usage: {err}')
200
+ return self.resource_usage
201
+
202
+ def get_analysis(self, index: Optional[int] = None) -> Dict[str, Dict]:
203
+ return self.__read_dataframe_analysis(
204
+ dataframe_analysis_keys=['statistics'],
205
+ index=index,
206
+ )
207
+
208
+ def check_variable_type(self, spark: Optional[Any] = None) -> Optional[VariableType]:
103
209
  """
104
210
  If the variable has a metadata file, read the variable type from the metadata file.
105
211
  Fallback to inferring variable type based on data in the storage.
@@ -107,8 +213,39 @@ class Variable:
107
213
  if self.variable_type is None:
108
214
  try:
109
215
  if self.storage.path_exists(self.metadata_path):
110
- metadata = self.storage.read_json_file(self.metadata_path, raise_exception=True)
216
+ metadata = self.storage.read_json_file(
217
+ self.metadata_path, raise_exception=is_debug()
218
+ )
111
219
  self.variable_type = metadata.get('type')
220
+ if self.variable_type:
221
+ self.variable_type = VariableType(self.variable_type)
222
+ self.variable_types = metadata.get('types') or []
223
+ self.variable_types = [
224
+ VariableType(t) for t in (self.variable_types or []) if t is not None
225
+ ]
226
+ except Exception:
227
+ traceback.print_exc()
228
+
229
+ if (
230
+ self.variable_type is None
231
+ and not self.variable_types
232
+ and self.__memory_manager_v2_enabled
233
+ and self.part_uuids is not None
234
+ and len(self.part_uuids) >= 1
235
+ ):
236
+ try:
237
+ variable_types = []
238
+ for part_uuid in self.part_uuids:
239
+ path = os.path.join(self.variable_path, str(part_uuid), METADATA_FILE)
240
+ if self.storage.path_exists(path):
241
+ metadata = self.storage.read_json_file(path, raise_exception=is_debug())
242
+ var_type = metadata.get('type')
243
+ if var_type:
244
+ variable_types.append(var_type)
245
+ if len(variable_types) >= 1:
246
+ self.variable_type = VariableType.ITERABLE
247
+ self.variable_types = [VariableType(t) for t in (variable_types or [])]
248
+ self.write_metadata()
112
249
  except Exception:
113
250
  traceback.print_exc()
114
251
 
@@ -119,9 +256,7 @@ class Variable:
119
256
  self.variable_type = VariableType.DATAFRAME
120
257
  elif (
121
258
  self.variable_type == VariableType.DATAFRAME or self.variable_type is None
122
- ) and os.path.exists(
123
- os.path.join(self.variable_path, f'{self.uuid}', 'data.sh')
124
- ):
259
+ ) and os.path.exists(os.path.join(self.variable_path, f'{self.uuid}', 'data.sh')):
125
260
  self.variable_type = VariableType.GEO_DATAFRAME
126
261
  elif (
127
262
  self.variable_type is None
@@ -130,9 +265,11 @@ class Variable:
130
265
  ):
131
266
  self.variable_type = VariableType.SPARK_DATAFRAME
132
267
 
268
+ return self.variable_type
269
+
133
270
  def convert_parquet_to_csv(self):
134
271
  """
135
- For DATAFRAME variable, convert parquet files to csv files.
272
+ For DATAFRAME variable, convert parquet files to csv files. Used in R blocks.
136
273
  """
137
274
  if self.variable_type != VariableType.DATAFRAME:
138
275
  return
@@ -142,7 +279,7 @@ class Variable:
142
279
  df = self.__read_parquet()
143
280
  self.storage.write_csv(df, csv_file_path)
144
281
 
145
- def delete(self):
282
+ def delete(self) -> None:
146
283
  """
147
284
  Delete the variable data.
148
285
  """
@@ -155,19 +292,142 @@ class Variable:
155
292
  self.__delete_parquet()
156
293
  elif self.variable_type == VariableType.DATAFRAME_ANALYSIS:
157
294
  return self.__delete_dataframe_analysis()
295
+
296
+ # TODO (dangerous): How do we delete other variable types?
297
+
158
298
  return self.__delete_json()
159
299
 
300
+ def data_exists(self) -> bool:
301
+ path = self.__data_file_path()
302
+ num_rows = self.__parquet_num_rows(path)
303
+ parts = self.part_uuids
304
+ return (
305
+ (parts is not None and len(parts) >= 1)
306
+ or (num_rows is not None and num_rows >= 1)
307
+ or self.storage.path_exists(path)
308
+ )
309
+
310
+ def is_partial_data_readable(
311
+ self, part_uuid: Optional[Union[int, str]] = None, path: Optional[str] = None
312
+ ) -> bool:
313
+ """
314
+ We can only read partial data if 1 of the following criteria is met:
315
+ - The variable has parts: e.g. output_0/0, output_0/1, output_0/2, etc
316
+ - The variable is stored as a parquet file
317
+ """
318
+
319
+ return self.__memory_manager_v2_enabled and (
320
+ self.__is_part_readable(part_uuid) or self.__is_parquet_readable(path)
321
+ )
322
+
323
+ def read_partial_data(
324
+ self,
325
+ batch_settings: Optional[BatchSettings] = None,
326
+ chunks: Optional[List] = None,
327
+ input_data_types: Optional[List[InputDataType]] = None,
328
+ part_uuid: Optional[Union[int, str]] = None,
329
+ sample: bool = False,
330
+ sample_count: Optional[int] = None,
331
+ spark: Optional[Any] = None,
332
+ ) -> Any:
333
+ """
334
+ We can only read partial data if:
335
+ - The variable has parts: e.g. output_0/0, output_0/1, output_0/2, etc
336
+ - The variable is stored as a parquet file
337
+ """
338
+ if part_uuid is not None and self.__is_part_readable(part_uuid):
339
+ variable = self.__class__(
340
+ os.path.join(self.uuid, str(part_uuid)),
341
+ self.pipeline_path,
342
+ self.block_uuid,
343
+ clean_block_uuid=False,
344
+ input_data_types=input_data_types or self.input_data_types,
345
+ partition=self.partition,
346
+ read_batch_settings=batch_settings or self.read_batch_settings,
347
+ read_chunks=chunks or self.read_chunks,
348
+ resource_usage=self.resource_usage,
349
+ storage=self.storage,
350
+ validate_pipeline_path=False,
351
+ # DO NOT PASS variable_types
352
+ # this in or else the data_manager will add its own part to the path
353
+ # variable_type=self.variable_type,
354
+ # variable_types=self.variable_types,
355
+ variables_dir=self.variables_dir,
356
+ write_batch_settings=self.write_batch_settings,
357
+ write_chunks=self.write_chunks,
358
+ )
359
+
360
+ return variable.read_data()
361
+ elif self.__is_parquet_readable():
362
+ data_manager = self.__class__(
363
+ self.uuid,
364
+ self.pipeline_path,
365
+ self.block_uuid,
366
+ clean_block_uuid=False,
367
+ input_data_types=input_data_types or self.input_data_types,
368
+ partition=self.partition,
369
+ read_batch_settings=batch_settings or self.read_batch_settings,
370
+ read_chunks=chunks or self.read_chunks,
371
+ resource_usage=self.resource_usage,
372
+ storage=self.storage,
373
+ validate_pipeline_path=False,
374
+ variable_type=self.variable_type,
375
+ variable_types=self.variable_types,
376
+ variables_dir=self.variables_dir,
377
+ write_batch_settings=self.write_batch_settings,
378
+ write_chunks=self.write_chunks,
379
+ ).data_manager
380
+ if data_manager:
381
+ return data_manager.read_sync(
382
+ part=int(part_uuid) if part_uuid is not None else None
383
+ )
384
+
160
385
  def read_data(
161
386
  self,
162
- dataframe_analysis_keys: List[str] = None,
387
+ dataframe_analysis_keys: Optional[List[str]] = None,
163
388
  raise_exception: bool = False,
164
389
  sample: bool = False,
165
- sample_count: int = None,
166
- spark=None,
390
+ sample_count: Optional[int] = None,
391
+ spark: Optional[Any] = None,
167
392
  ) -> Any:
168
393
  """
169
- Read variable data.
394
+ Used by
395
+ block.get_outputs
396
+ WebSocker server sending block output to the IDE
397
+ fetch_input_variables
398
+ pipeline.get_block_variable
399
+ """
400
+
401
+ def __read(
402
+ dataframe_analysis_keys=dataframe_analysis_keys,
403
+ raise_exception=raise_exception,
404
+ sample=sample,
405
+ sample_count=sample_count,
406
+ spark=spark,
407
+ ):
408
+ return self.__read_data(
409
+ dataframe_analysis_keys=dataframe_analysis_keys,
410
+ raise_exception=raise_exception,
411
+ sample=sample,
412
+ sample_count=sample_count,
413
+ spark=spark,
414
+ )
415
+
416
+ # if self.__memory_manager_v2_enabled and False:
417
+ # with MemoryManager(scope_uuid=self.__scope_uuid(), process_uuid='variable.read_data'):
418
+ # return __read()
419
+ return __read()
170
420
 
421
+ def __read_data(
422
+ self,
423
+ dataframe_analysis_keys: Optional[List[str]] = None,
424
+ raise_exception: bool = False,
425
+ sample: bool = False,
426
+ sample_count: Optional[int] = None,
427
+ spark: Optional[Any] = None,
428
+ ) -> Any:
429
+ """
430
+ Read variable data.
171
431
  Args:
172
432
  dataframe_analysis_keys (List[str], optional): For DATAFRAME_ANALYSIS variable,
173
433
  only read the selected keys.
@@ -179,7 +439,44 @@ class Variable:
179
439
  DATAFRAME variable.
180
440
  spark (None, optional): Spark context, used to read SPARK_DATAFRAME variable.
181
441
  """
182
- if self.variable_type == VariableType.DATAFRAME:
442
+ if (
443
+ sample
444
+ and self.part_uuids is not None
445
+ and len(self.part_uuids) >= 1
446
+ and self.is_partial_data_readable(self.part_uuids[0])
447
+ ):
448
+ return self.read_partial_data(
449
+ part_uuid=self.part_uuids[0],
450
+ sample=sample,
451
+ sample_count=sample_count,
452
+ spark=spark,
453
+ )
454
+ elif self.data_manager and self.data_manager.readable():
455
+ try:
456
+ data = self.data_manager.read_sync(
457
+ sample=sample,
458
+ sample_count=sample_count,
459
+ )
460
+ except FileNotFoundError as err:
461
+ print(f'[ERROR] Variable.read_data: {err}\n{traceback.format_exc()}')
462
+ print(f'variable_type: {self.variable_type}')
463
+ print(f'variable_types: {self.variable_types}')
464
+ print(f'variable_uuid: {self.uuid}')
465
+ print(f'variable_dir_path: {self.variable_dir_path}')
466
+ print(f'variable_path: {self.variable_path}')
467
+ print('Data sources:')
468
+ for source in self.data_manager.data_source:
469
+ print(f' {source}')
470
+ print('\n')
471
+
472
+ traceback.print_exc()
473
+ return None
474
+ return data
475
+
476
+ if (
477
+ self.variable_type == VariableType.DATAFRAME
478
+ or self.variable_type == VariableType.SERIES_PANDAS
479
+ ):
183
480
  return self.__read_parquet(
184
481
  raise_exception=raise_exception,
185
482
  sample=sample,
@@ -197,15 +494,71 @@ class Variable:
197
494
  return self.__read_geo_dataframe(sample=sample, sample_count=sample_count)
198
495
  elif self.variable_type == VariableType.DATAFRAME_ANALYSIS:
199
496
  return self.__read_dataframe_analysis(dataframe_analysis_keys=dataframe_analysis_keys)
200
- return self.__read_json(raise_exception=raise_exception, sample=sample)
497
+ else:
498
+ data = self.__should_load_object()
499
+ if data is not None:
500
+ return data
501
+
502
+ data = self.__read_json(raise_exception=raise_exception, sample=sample)
503
+
504
+ if self.variable_type == VariableType.MATRIX_SPARSE:
505
+ data = self.__read_matrix_sparse(data, sample=sample, sample_count=sample_count)
506
+ elif (
507
+ VariableType.DICTIONARY_COMPLEX == self.variable_type
508
+ or VariableType.LIST_COMPLEX == self.variable_type
509
+ ):
510
+ data = self.__read_complex_object(data)
511
+
512
+ return data
201
513
 
202
514
  async def read_data_async(
203
515
  self,
204
- dataframe_analysis_keys: List[str] = None,
516
+ dataframe_analysis_keys: Optional[List[str]] = None,
205
517
  sample: bool = False,
206
- sample_count: int = None,
207
- spark=None,
208
- ):
518
+ sample_count: Optional[int] = None,
519
+ spark: Optional[Any] = None,
520
+ limit_parts: Optional[int] = None,
521
+ input_data_types: Optional[List[InputDataType]] = None,
522
+ ) -> Any:
523
+ """
524
+ Used by
525
+ block.to_dict_async
526
+ GET /pipelines/[:uuid]
527
+ """
528
+
529
+ async def __read(
530
+ dataframe_analysis_keys=dataframe_analysis_keys,
531
+ limit_parts=limit_parts,
532
+ sample=sample,
533
+ sample_count=sample_count,
534
+ spark=spark,
535
+ ):
536
+ return await self.__read_data_async(
537
+ dataframe_analysis_keys=dataframe_analysis_keys,
538
+ limit_parts=limit_parts,
539
+ sample=sample,
540
+ sample_count=sample_count,
541
+ spark=spark,
542
+ )
543
+
544
+ # if self.__memory_manager_v2_enabled and False:
545
+ # with MemoryManager(
546
+ # scope_uuid=self.__scope_uuid(), process_uuid='variable.read_data_async'
547
+ # ):
548
+ # data = await __read()
549
+ # else:
550
+ # data = await __read()
551
+
552
+ return await __read()
553
+
554
+ async def __read_data_async(
555
+ self,
556
+ dataframe_analysis_keys: Optional[List[str]] = None,
557
+ limit_parts: Optional[int] = None,
558
+ sample: bool = False,
559
+ sample_count: Optional[int] = None,
560
+ spark: Optional[Any] = None,
561
+ ) -> Any:
209
562
  """
210
563
  Read variable data asynchronously.
211
564
 
@@ -217,8 +570,50 @@ class Variable:
217
570
  sample_count (int, optional): The number of rows to sample, used for
218
571
  DATAFRAME variable.
219
572
  spark (None, optional): Spark context, used to read SPARK_DATAFRAME variable.
573
+
574
+ Used by
575
+ block.to_dict_async
576
+ GET /pipelines/[:uuid]
220
577
  """
221
- if self.variable_type == VariableType.DATAFRAME:
578
+ if (
579
+ sample
580
+ and self.part_uuids is not None
581
+ and len(self.part_uuids) >= 1
582
+ and self.is_partial_data_readable(self.part_uuids[0])
583
+ ):
584
+ return self.read_partial_data(
585
+ part_uuid=self.part_uuids[0],
586
+ sample=sample,
587
+ sample_count=sample_count,
588
+ spark=spark,
589
+ )
590
+ elif self.data_manager and self.data_manager.readable():
591
+ try:
592
+ data = await self.data_manager.read_async(
593
+ limit_parts=limit_parts,
594
+ sample=sample,
595
+ sample_count=sample_count,
596
+ )
597
+ return data
598
+ except FileNotFoundError as err:
599
+ print(f'[ERROR] Variable.read_data: {err}\n{traceback.format_exc()}')
600
+ print(f'variable_type: {self.variable_type}')
601
+ print(f'variable_types: {self.variable_types}')
602
+ print(f'variable_uuid: {self.uuid}')
603
+ print(f'variable_dir_path: {self.variable_dir_path}')
604
+ print(f'variable_path: {self.variable_path}')
605
+ print('Data sources:')
606
+ for source in self.data_manager.data_source:
607
+ print(f' {source}')
608
+ print('\n')
609
+
610
+ traceback.print_exc()
611
+ return None
612
+
613
+ if (
614
+ self.variable_type == VariableType.DATAFRAME
615
+ or self.variable_type == VariableType.SERIES_PANDAS
616
+ ):
222
617
  return self.__read_parquet(sample=sample, sample_count=sample_count)
223
618
  elif self.variable_type == VariableType.POLARS_DATAFRAME:
224
619
  return self.__read_polars_parquet(
@@ -231,108 +626,330 @@ class Variable:
231
626
  return await self.__read_dataframe_analysis_async(
232
627
  dataframe_analysis_keys=dataframe_analysis_keys,
233
628
  )
234
- return await self.__read_json_async(sample=sample)
629
+ else:
630
+ data = self.__should_load_object()
631
+ if data is not None:
632
+ return data
633
+
634
+ data = await self.__read_json_async(sample=sample)
635
+
636
+ if self.variable_type == VariableType.MATRIX_SPARSE:
637
+ data = self.__read_matrix_sparse(data, sample=sample, sample_count=sample_count)
638
+ elif (
639
+ VariableType.DICTIONARY_COMPLEX == self.variable_type
640
+ or VariableType.LIST_COMPLEX == self.variable_type
641
+ ):
642
+ data = self.__read_complex_object(data)
643
+
644
+ return data
645
+
646
+ def __read_complex_object(self, data: Union[Dict, List]) -> Union[Dict, List]:
647
+ column_types_filename = os.path.join(self.variable_path, DATAFRAME_COLUMN_TYPES_FILE)
648
+ if self.storage.path_exists(column_types_filename):
649
+ column_types = self.storage.read_json_file(column_types_filename)
650
+ data = deserialize_complex(
651
+ data,
652
+ column_types,
653
+ unflatten=isinstance(data, dict),
654
+ )
655
+
656
+ return data
657
+
658
+ def __save_complex_object(self, data: Union[Dict, List]) -> Union[Dict, List]:
659
+ data, column_types = serialize_complex(
660
+ flatten_dict(data) if isinstance(data, dict) else data,
661
+ save_path=self.variable_path,
662
+ )
663
+
664
+ self.storage.write_json_file(
665
+ os.path.join(self.variable_path, DATAFRAME_COLUMN_TYPES_FILE), column_types
666
+ )
667
+ self.resource_usage.update_attributes(
668
+ directory=self.variable_path,
669
+ size=size_of_path(self.variable_path),
670
+ )
671
+
672
+ return data
673
+
674
+ async def __save_complex_object_async(self, data: Union[Dict, List]) -> Union[Dict, List]:
675
+ data, column_types = serialize_complex(
676
+ flatten_dict(data) if isinstance(data, dict) else data,
677
+ save_path=self.variable_path,
678
+ )
679
+ await self.storage.write_json_file_async(
680
+ os.path.join(self.variable_path, DATAFRAME_COLUMN_TYPES_FILE),
681
+ column_types,
682
+ )
683
+
684
+ self.resource_usage.update_attributes(
685
+ directory=self.variable_path,
686
+ size=size_of_path(self.variable_path),
687
+ )
688
+
689
+ return data
690
+
691
+ def __should_save_object(self, data: Any) -> Dict[str, Any]:
692
+ data, full_path = save_custom_object(
693
+ data, self.variable_path, variable_type=self.variable_type
694
+ )
695
+
696
+ self.resource_usage.update_attributes(
697
+ directory=self.variable_path,
698
+ size=size_of_path(self.variable_path),
699
+ )
700
+
701
+ return data
702
+
703
+ def __should_load_object(self) -> Optional[Any]:
704
+ return load_custom_object(self.variable_path, self.variable_type)
235
705
 
236
706
  @contextmanager
237
- def open_to_write(self, filename: str) -> None:
707
+ def open_to_write(self, filename: str):
238
708
  if not self.storage.isdir(self.variable_path):
239
709
  self.storage.makedirs(self.variable_path, exist_ok=True)
240
710
 
241
- with self.storage.open_to_write(self.full_path(filename)) as f:
242
- yield f
711
+ with self.storage.open_to_write(self.full_path(filename)) as fi:
712
+ yield fi
243
713
 
244
- def full_path(self, filename: str = None) -> str:
714
+ def full_path(self, filename: Optional[str] = None) -> str:
245
715
  if filename:
246
716
  return os.path.join(self.variable_path, filename)
247
717
 
248
718
  return self.variable_path
249
719
 
250
720
  def write_data(self, data: Any) -> None:
721
+ if self.__memory_manager_v2_enabled and False:
722
+ with MemoryManager(scope_uuid=self.__scope_uuid(), process_uuid='variable.write_data'):
723
+ self.__write_data(data)
724
+ else:
725
+ self.__write_data(data)
726
+
727
+ def __write_data(self, data: Any) -> None:
251
728
  """
252
729
  Write variable data to the persistent storage.
253
730
 
254
731
  Args:
255
732
  data (Any): Variable data to be written to storage.
256
- """
257
- if isinstance(data, pd.Series):
258
- data = data.to_list()
259
733
 
260
- if self.variable_type is None and type(data) is pd.DataFrame:
261
- self.variable_type = VariableType.DATAFRAME
262
- elif self.variable_type is None and type(data) is pl.DataFrame:
263
- self.variable_type = VariableType.POLARS_DATAFRAME
264
- elif is_spark_dataframe(data):
265
- self.variable_type = VariableType.SPARK_DATAFRAME
266
- elif is_geo_dataframe(data):
267
- self.variable_type = VariableType.GEO_DATAFRAME
268
-
269
- # Dataframe analysis variables share the same uuid as the original dataframe variable
270
- # so we won't write the metadata file for them
271
- if self.variable_type == VariableType.DATAFRAME_ANALYSIS:
272
- self.__write_dataframe_analysis(data)
273
- return
274
-
275
- if self.variable_type == VariableType.DATAFRAME:
276
- self.__write_parquet(data)
277
- elif self.variable_type == VariableType.POLARS_DATAFRAME:
278
- self.__write_polars_dataframe(data)
279
- elif self.variable_type == VariableType.SPARK_DATAFRAME:
280
- self.__write_spark_parquet(data)
281
- elif self.variable_type == VariableType.GEO_DATAFRAME:
282
- self.__write_geo_dataframe(data)
734
+ Used by:
735
+ VariableManager
736
+ """
737
+ if self.data_manager and self.data_manager.writeable(data):
738
+ metadata = self.data_manager.write_sync(data)
739
+ if metadata:
740
+ self.__write_dataframe_analysis(
741
+ dict(
742
+ statistics=dict(
743
+ original_row_count=metadata.get('rows'),
744
+ original_column_count=metadata.get('columns'),
745
+ ),
746
+ )
747
+ )
748
+ self.resource_usage.update_attributes(
749
+ directory=self.data_manager.resource_usage.directory,
750
+ size=self.data_manager.resource_usage.size,
751
+ )
283
752
  else:
284
- self.__write_json(data)
753
+ if isinstance(data, pd.Series) and self.variable_type != VariableType.SERIES_PANDAS:
754
+ data = data.to_list()
755
+
756
+ if self.variable_type is None and isinstance(data, pd.DataFrame):
757
+ self.variable_type = VariableType.DATAFRAME
758
+ elif self.variable_type is None and isinstance(data, pl.DataFrame):
759
+ self.variable_type = VariableType.POLARS_DATAFRAME
760
+ elif is_spark_dataframe(data):
761
+ self.variable_type = VariableType.SPARK_DATAFRAME
762
+ elif is_geo_dataframe(data):
763
+ self.variable_type = VariableType.GEO_DATAFRAME
764
+
765
+ # Dataframe analysis variables share the same uuid as the original dataframe variable
766
+ # so we won't write the metadata file for them
767
+ if self.variable_type == VariableType.DATAFRAME_ANALYSIS:
768
+ self.__write_dataframe_analysis(data)
769
+ return
770
+
771
+ if self.variable_type == VariableType.DATAFRAME:
772
+ self.__write_parquet(data)
773
+ elif self.variable_type == VariableType.POLARS_DATAFRAME:
774
+ self.__write_polars_dataframe(data)
775
+ elif self.variable_type == VariableType.SPARK_DATAFRAME:
776
+ self.__write_spark_parquet(data)
777
+ elif self.variable_type == VariableType.GEO_DATAFRAME:
778
+ self.__write_geo_dataframe(data)
779
+ elif self.variable_type == VariableType.MATRIX_SPARSE:
780
+ self.__write_matrix_sparse(data)
781
+ elif self.variable_type == VariableType.SERIES_PANDAS:
782
+ if not self.__write_series_pandas(data):
783
+ self.__write_json(data)
784
+ else:
785
+ if (
786
+ VariableType.DICTIONARY_COMPLEX == self.variable_type
787
+ or VariableType.LIST_COMPLEX == self.variable_type
788
+ ):
789
+ data = self.__save_complex_object(data)
790
+ else:
791
+ data = self.__should_save_object(data)
285
792
 
793
+ self.__write_json(data)
794
+
795
+ # Shared logic across most variable types
286
796
  if self.variable_type != VariableType.SPARK_DATAFRAME:
287
797
  # Not write json file in spark data directory to avoid read error
288
798
  self.write_metadata()
289
799
 
800
+ self.__write_resource_usage()
801
+
802
+ if self.variable_type in [
803
+ VariableType.ITERABLE,
804
+ VariableType.LIST_COMPLEX,
805
+ ]:
806
+ self.__write_dataframe_analysis(
807
+ dict(
808
+ statistics=dict(
809
+ original_row_count=len(data),
810
+ ),
811
+ )
812
+ )
813
+
290
814
  async def write_data_async(self, data: Any) -> None:
815
+ if self.__memory_manager_v2_enabled and False:
816
+ with MemoryManager(
817
+ scope_uuid=self.__scope_uuid(), process_uuid='variable.write_data_async'
818
+ ):
819
+ await self.__write_data_async(data)
820
+ else:
821
+ await self.__write_data_async(data)
822
+
823
+ async def __write_data_async(self, data: Any) -> None:
291
824
  """
292
825
  Write variable data to the persistent storage.
293
826
 
294
827
  Args:
295
828
  data (Any): Variable data to be written to storage.
296
- """
297
- if self.variable_type is None and type(data) is pd.DataFrame:
298
- self.variable_type = VariableType.DATAFRAME
299
- elif self.variable_type is None and type(data) is pl.DataFrame:
300
- self.variable_type = VariableType.POLARS_DATAFRAME
301
- elif is_spark_dataframe(data):
302
- self.variable_type = VariableType.SPARK_DATAFRAME
303
- elif is_geo_dataframe(data):
304
- self.variable_type = VariableType.GEO_DATAFRAME
305
-
306
- if self.variable_type == VariableType.DATAFRAME_ANALYSIS:
307
- self.__write_dataframe_analysis(data)
308
- return
309
829
 
310
- if self.variable_type == VariableType.DATAFRAME:
311
- self.__write_parquet(data)
312
- elif self.variable_type == VariableType.POLARS_DATAFRAME:
313
- self.__write_polars_dataframe(data)
314
- elif self.variable_type == VariableType.SPARK_DATAFRAME:
315
- self.__write_spark_parquet(data)
316
- elif self.variable_type == VariableType.GEO_DATAFRAME:
317
- self.__write_geo_dataframe(data)
830
+ Used by:
831
+ VariableManager
832
+ """
833
+ if self.data_manager and self.data_manager.writeable(data):
834
+ metadata = await self.data_manager.write_async(data)
835
+ if metadata:
836
+ self.__write_dataframe_analysis(
837
+ dict(
838
+ statistics=dict(
839
+ original_row_count=metadata.get('rows'),
840
+ original_column_count=metadata.get('columns'),
841
+ ),
842
+ )
843
+ )
844
+ self.resource_usage.update_attributes(
845
+ directory=self.data_manager.resource_usage.directory,
846
+ size=self.data_manager.resource_usage.size,
847
+ )
318
848
  else:
319
- await self.__write_json_async(data)
849
+ if self.variable_type is None and isinstance(data, pd.DataFrame):
850
+ self.variable_type = VariableType.DATAFRAME
851
+ elif self.variable_type is None and isinstance(data, pl.DataFrame):
852
+ self.variable_type = VariableType.POLARS_DATAFRAME
853
+ elif is_spark_dataframe(data):
854
+ self.variable_type = VariableType.SPARK_DATAFRAME
855
+ elif is_geo_dataframe(data):
856
+ self.variable_type = VariableType.GEO_DATAFRAME
857
+
858
+ if self.variable_type == VariableType.DATAFRAME_ANALYSIS:
859
+ self.__write_dataframe_analysis(data)
860
+ return
861
+
862
+ if self.variable_type == VariableType.DATAFRAME:
863
+ self.__write_parquet(data)
864
+ elif self.variable_type == VariableType.POLARS_DATAFRAME:
865
+ self.__write_polars_dataframe(data)
866
+ elif self.variable_type == VariableType.SPARK_DATAFRAME:
867
+ self.__write_spark_parquet(data)
868
+ elif self.variable_type == VariableType.GEO_DATAFRAME:
869
+ self.__write_geo_dataframe(data)
870
+ elif self.variable_type == VariableType.MATRIX_SPARSE:
871
+ self.__write_matrix_sparse(data)
872
+ elif self.variable_type == VariableType.SERIES_PANDAS:
873
+ if not self.__write_series_pandas(data):
874
+ await self.__write_json_async(data)
875
+ else:
876
+ if (
877
+ VariableType.DICTIONARY_COMPLEX == self.variable_type
878
+ or VariableType.LIST_COMPLEX == self.variable_type
879
+ ):
880
+ data = await self.__save_complex_object_asycn(data)
881
+ else:
882
+ data = self.__should_save_object(data)
883
+ await self.__write_json_async(data)
320
884
 
321
885
  if self.variable_type != VariableType.SPARK_DATAFRAME:
322
886
  # Not write json file in spark data directory to avoid read error
323
887
  self.write_metadata()
324
888
 
889
+ self.__write_resource_usage()
890
+
891
+ if (
892
+ self.variable_type
893
+ in [
894
+ VariableType.DICTIONARY_COMPLEX,
895
+ VariableType.ITERABLE,
896
+ VariableType.LIST_COMPLEX,
897
+ ]
898
+ or is_basic_iterable(data)
899
+ ) and hasattr(data, '__len__'):
900
+ self.__write_dataframe_analysis(
901
+ dict(
902
+ statistics=dict(
903
+ original_row_count=len(data),
904
+ ),
905
+ )
906
+ )
907
+
325
908
  def write_metadata(self) -> None:
326
909
  """
327
910
  Write metadata to the persistent storage.
328
911
  """
329
912
  metadata = dict(
330
- type=self.variable_type.value
331
- if isinstance(self.variable_type, VariableType)
332
- else self.variable_type,
913
+ type=(
914
+ self.variable_type.value
915
+ if isinstance(self.variable_type, VariableType)
916
+ else self.variable_type
917
+ ),
333
918
  )
919
+
920
+ if self.variable_types:
921
+ metadata['types'] = [
922
+ variable_type.value if isinstance(variable_type, VariableType) else variable_type
923
+ for variable_type in self.variable_types
924
+ ]
925
+
334
926
  self.storage.write_json_file(self.metadata_path, metadata)
335
927
 
928
+ def items_count(self, include_parts: Optional[bool] = None) -> Optional[int]:
929
+ if self.__memory_manager_v2_enabled:
930
+ row_count = None
931
+ if self.part_uuids is not None:
932
+ if include_parts:
933
+ row_count = self.__parquet_num_rows(self.variable_path)
934
+ else:
935
+ row_count = len(self.part_uuids)
936
+ elif self.storage.path_exists(os.path.join(self.variable_path, 'statistics.json')):
937
+ statistics = self.storage.read_json_file(
938
+ os.path.join(self.variable_path, 'statistics.json')
939
+ )
940
+ if statistics and isinstance(statistics, dict):
941
+ row_count = statistics.get('original_row_count')
942
+ else:
943
+ row_count = self.__parquet_num_rows(self.variable_path)
944
+
945
+ if row_count is not None and isinstance(row_count, (float, int, str)):
946
+ return int(row_count)
947
+
948
+ def __write_resource_usage(self) -> None:
949
+ if self.resource_usage:
950
+ os.makedirs(self.variable_dir_path, exist_ok=True)
951
+ self.storage.write_json_file(self.resource_usage_path(), self.resource_usage.to_dict())
952
+
336
953
  def __delete_dataframe_analysis(self) -> None:
337
954
  for k in DATAFRAME_ANALYSIS_KEYS:
338
955
  file_path = os.path.join(self.variable_path, f'{k}.json')
@@ -356,6 +973,24 @@ class Variable:
356
973
  self.storage.remove(file_path)
357
974
  self.storage.remove_dir(self.variable_path)
358
975
 
976
+ def __data_file_path(self) -> str:
977
+ if self.variable_type in [
978
+ VariableType.DATAFRAME,
979
+ VariableType.POLARS_DATAFRAME,
980
+ VariableType.SERIES_PANDAS,
981
+ VariableType.SERIES_POLARS,
982
+ ]:
983
+ return os.path.join(self.variable_path, DATAFRAME_PARQUET_FILE)
984
+ elif VariableType.GEO_DATAFRAME == self.variable_type:
985
+ return os.path.join(self.variable_path, 'data.sh')
986
+ elif VariableType.MODEL_SKLEARN == self.variable_type:
987
+ return os.path.join(self.variable_path, JOBLIB_FILE)
988
+ elif VariableType.MODEL_XGBOOST == self.variable_type:
989
+ return os.path.join(self.variable_path, UBJSON_MODEL_FILENAME)
990
+ elif VariableType.CUSTOM_OBJECT == self.variable_type:
991
+ return os.path.join(self.variable_path, JOBLIB_OBJECT_FILE)
992
+ return os.path.join(self.variable_path, JSON_FILE)
993
+
359
994
  def __read_json(
360
995
  self,
361
996
  default_value: Dict = None,
@@ -380,14 +1015,20 @@ class Variable:
380
1015
  if self.storage.path_exists(file_path):
381
1016
  try:
382
1017
  data = self.storage.read_json_file(
383
- file_path, default_value=default_value, raise_exception=raise_exception)
1018
+ file_path,
1019
+ default_value=default_value,
1020
+ raise_exception=raise_exception,
1021
+ )
384
1022
  except Exception as ex:
385
1023
  if raise_exception:
386
1024
  raise Exception(f'Failed to read json file: {file_path}') from ex
387
1025
  else:
388
1026
  try:
389
1027
  data = self.storage.read_json_file(
390
- old_file_path, default_value=default_value, raise_exception=raise_exception)
1028
+ old_file_path,
1029
+ default_value=default_value,
1030
+ raise_exception=raise_exception,
1031
+ )
391
1032
  except Exception as ex:
392
1033
  if raise_exception:
393
1034
  raise Exception(f'Failed to read json file: {old_file_path}') from ex
@@ -419,26 +1060,41 @@ class Variable:
419
1060
  data = sample_output(data)[0]
420
1061
  return data
421
1062
 
422
- def __write_json(self, data) -> None:
1063
+ def __write_json(self, data) -> Any:
423
1064
  if not self.storage.isdir(self.variable_path):
424
1065
  self.storage.makedirs(self.variable_path, exist_ok=True)
1066
+
425
1067
  file_path = os.path.join(self.variable_path, JSON_FILE)
426
1068
  sample_file_path = os.path.join(self.variable_path, JSON_SAMPLE_FILE)
427
1069
  self.storage.write_json_file(file_path, data)
428
1070
  self.storage.write_json_file(sample_file_path, sample_output(data)[0])
429
1071
 
1072
+ self.resource_usage.update_attributes(
1073
+ size=size_of_path(self.variable_path),
1074
+ path=file_path,
1075
+ )
1076
+
1077
+ return data
1078
+
430
1079
  async def __write_json_async(self, data) -> None:
431
1080
  if not self.storage.isdir(self.variable_path):
432
1081
  self.storage.makedirs(self.variable_path, exist_ok=True)
1082
+
433
1083
  file_path = os.path.join(self.variable_path, JSON_FILE)
434
1084
  sample_file_path = os.path.join(self.variable_path, JSON_SAMPLE_FILE)
1085
+
1086
+ self.resource_usage.update_attributes(
1087
+ size=size_of_path(self.variable_path),
1088
+ path=file_path,
1089
+ )
1090
+
435
1091
  try:
436
1092
  await self.storage.write_json_file_async(file_path, data)
437
1093
  await self.storage.write_json_file_async(sample_file_path, sample_output(data)[0])
438
1094
  except Exception:
439
1095
  traceback.print_exc()
440
1096
 
441
- def __read_geo_dataframe(self, sample: bool = False, sample_count: int = None):
1097
+ def __read_geo_dataframe(self, sample: bool = False, sample_count: Optional[int] = None):
442
1098
  import geopandas as gpd
443
1099
 
444
1100
  file_path = os.path.join(self.variable_path, 'data.sh')
@@ -461,7 +1117,7 @@ class Variable:
461
1117
  def __read_parquet(
462
1118
  self,
463
1119
  sample: bool = False,
464
- sample_count: int = None,
1120
+ sample_count: Optional[int] = None,
465
1121
  raise_exception: bool = False,
466
1122
  ) -> pd.DataFrame:
467
1123
  file_path = os.path.join(self.variable_path, DATAFRAME_PARQUET_FILE)
@@ -491,9 +1147,19 @@ class Variable:
491
1147
  if df.shape[0] > sample_count:
492
1148
  df = df.iloc[:sample_count]
493
1149
 
1150
+ column_types_raw = None
494
1151
  column_types_filename = os.path.join(self.variable_path, DATAFRAME_COLUMN_TYPES_FILE)
495
1152
  if self.storage.path_exists(column_types_filename):
496
- column_types = self.storage.read_json_file(column_types_filename)
1153
+ column_types_raw = self.storage.read_json_file(column_types_filename)
1154
+ column_types = {}
1155
+
1156
+ if self.variable_type == VariableType.SERIES_PANDAS:
1157
+ if isinstance(column_types_raw, list):
1158
+ for col_data in column_types_raw:
1159
+ column_types.update(col_data['column_types'])
1160
+ else:
1161
+ column_types = column_types_raw
1162
+
497
1163
  # ddf = dask_from_pandas(df)
498
1164
  if should_deserialize_pandas(column_types):
499
1165
  df = apply_transform_pandas(
@@ -501,18 +1167,65 @@ class Variable:
501
1167
  lambda row: deserialize_columns(row, column_types),
502
1168
  )
503
1169
  df = cast_column_types(df, column_types)
1170
+
1171
+ if self.variable_type == VariableType.SERIES_PANDAS:
1172
+ if column_types_raw and isinstance(column_types_raw, list):
1173
+ series_list = []
1174
+
1175
+ for col_data in column_types_raw:
1176
+ column_mapping = col_data.get('column_mapping')
1177
+ index = col_data.get('index')
1178
+
1179
+ columns_idx = []
1180
+ columns = []
1181
+ for col_idx, col in column_mapping.items():
1182
+ columns_idx.append(col_idx)
1183
+ columns.append(col)
1184
+
1185
+ df_series = df.iloc[: len(index)][columns_idx]
1186
+ df_series.columns = columns
1187
+ for col in df_series.columns:
1188
+ series = df_series[col]
1189
+ series.set_axis(index)
1190
+ series_list.append(series)
1191
+
1192
+ return series_list
1193
+ else:
1194
+ df = df.iloc[:, 0]
1195
+
504
1196
  return df
505
1197
 
1198
+ def __read_matrix_sparse(
1199
+ self,
1200
+ json_dict: Union[Dict, List[Dict], Tuple[Dict]],
1201
+ sample: bool = False,
1202
+ sample_count: Optional[int] = None,
1203
+ ) -> scipy.sparse._csr.csr_matrix:
1204
+ if isinstance(json_dict, list) or isinstance(json_dict, Tuple):
1205
+ return [self.__deserialize_matrix_sparse(d, sample, sample_count) for d in json_dict]
1206
+
1207
+ return self.__deserialize_matrix_sparse(json_dict, sample, sample_count)
1208
+
1209
+ def __deserialize_matrix_sparse(
1210
+ self,
1211
+ json_dict: Dict,
1212
+ sample: bool = False,
1213
+ sample_count: Optional[int] = None,
1214
+ ) -> scipy.sparse._csr.csr_matrix:
1215
+ csr_matrix = deserialize_matrix(json_dict)
1216
+ if sample:
1217
+ return csr_matrix[:sample_count, :DATAFRAME_SAMPLE_MAX_COLUMNS]
1218
+
1219
+ return csr_matrix
1220
+
506
1221
  def __read_polars_parquet(
507
1222
  self,
508
1223
  sample: bool = False,
509
- sample_count: int = None,
1224
+ sample_count: Optional[int] = None,
510
1225
  raise_exception: bool = False,
511
1226
  ) -> pl.DataFrame:
512
1227
  file_path = os.path.join(self.variable_path, DATAFRAME_PARQUET_FILE)
513
- sample_file_path = os.path.join(
514
- self.variable_path, DATAFRAME_PARQUET_SAMPLE_FILE
515
- )
1228
+ sample_file_path = os.path.join(self.variable_path, DATAFRAME_PARQUET_SAMPLE_FILE)
516
1229
 
517
1230
  read_sample_success = False
518
1231
  if sample:
@@ -521,9 +1234,7 @@ class Variable:
521
1234
  read_sample_success = True
522
1235
  except Exception as ex:
523
1236
  if raise_exception:
524
- raise Exception(
525
- f'Failed to read parquet file: {sample_file_path}'
526
- ) from ex
1237
+ raise Exception(f'Failed to read parquet file: {sample_file_path}') from ex
527
1238
  else:
528
1239
  traceback.print_exc()
529
1240
  if not read_sample_success:
@@ -548,12 +1259,13 @@ class Variable:
548
1259
  df = cast_column_types_polars(df, column_types)
549
1260
  return df
550
1261
 
551
- def __read_spark_parquet(self, sample: bool = False, sample_count: int = None, spark=None):
1262
+ def __read_spark_parquet(
1263
+ self, sample: bool = False, sample_count: Optional[int] = None, spark=None
1264
+ ):
552
1265
  if spark is None:
553
1266
  return None
554
1267
  df = (
555
- spark.read
556
- .format('parquet')
1268
+ spark.read.format('parquet')
557
1269
  .option('header', 'true')
558
1270
  .option('inferSchema', 'true')
559
1271
  .option('delimiter', ',')
@@ -569,7 +1281,7 @@ class Variable:
569
1281
  df_sample_output = data.iloc[:DATAFRAME_SAMPLE_COUNT]
570
1282
  df_sample_output.to_file(os.path.join(self.variable_path, 'sample_data.sh'))
571
1283
 
572
- def __write_parquet(self, data: pd.DataFrame) -> None:
1284
+ def __get_column_types(self, data: pd.DataFrame) -> Tuple[Dict, pd.DataFrame]:
573
1285
  column_types = {}
574
1286
  df_output = data.copy()
575
1287
  # Clean up data types since parquet doesn't support mixed data types
@@ -612,6 +1324,49 @@ class Variable:
612
1324
  column_types[c] = coltype.__name__
613
1325
  else:
614
1326
  column_types[c] = type(series_non_null.iloc[0].item()).__name__
1327
+ return column_types, df_output
1328
+
1329
+ def __write_parquet(
1330
+ self,
1331
+ data: Union[pd.DataFrame, List[pd.Series]],
1332
+ ) -> None:
1333
+ column_types_to_test = {}
1334
+
1335
+ is_series_list = (
1336
+ (isinstance(data, list) or isinstance(data, tuple))
1337
+ and len(data) >= 1
1338
+ and isinstance(data[0], pd.Series)
1339
+ )
1340
+
1341
+ if is_series_list:
1342
+ df_output = pd.DataFrame()
1343
+
1344
+ column_types = []
1345
+ for idx, series in enumerate(data):
1346
+ df_series = series.to_frame()
1347
+ column_mapping = {}
1348
+
1349
+ columns = []
1350
+ for col in df_series.columns:
1351
+ col_idx = f'{col}_{idx}'
1352
+ column_mapping[col_idx] = col
1353
+ columns.append(col_idx)
1354
+
1355
+ df_series.columns = columns
1356
+ col_types, df_series = self.__get_column_types(df_series)
1357
+
1358
+ df_output = pd.concat([df_output, df_series], axis=1)
1359
+ column_types.append(
1360
+ dict(
1361
+ column_mapping=column_mapping,
1362
+ column_types=col_types,
1363
+ index=series.index.to_list(),
1364
+ )
1365
+ )
1366
+ column_types_to_test.update(col_types)
1367
+ else:
1368
+ column_types, df_output = self.__get_column_types(data)
1369
+ column_types_to_test.update(column_types)
615
1370
 
616
1371
  self.storage.makedirs(self.variable_path, exist_ok=True)
617
1372
  self.storage.write_json_file(
@@ -619,10 +1374,14 @@ class Variable:
619
1374
  column_types,
620
1375
  )
621
1376
 
622
- if should_serialize_pandas(column_types):
1377
+ if should_serialize_pandas(column_types_to_test):
623
1378
  # Try using Polars to write the dataframe to improve performance
624
- if type(df_output.index) is RangeIndex and df_output.index.start == 0 \
625
- and df_output.index.stop == df_output.shape[0] and df_output.index.step == 1:
1379
+ if (
1380
+ type(df_output.index) is RangeIndex
1381
+ and df_output.index.start == 0
1382
+ and df_output.index.stop == df_output.shape[0]
1383
+ and df_output.index.step == 1
1384
+ ):
626
1385
  # Polars ignores any index
627
1386
  try:
628
1387
  pl_df = pl.from_pandas(df_output)
@@ -637,11 +1396,12 @@ class Variable:
637
1396
  # ddf = dask_from_pandas(df_output)
638
1397
  df_output_serialized = apply_transform_pandas(
639
1398
  df_output,
640
- lambda row: serialize_columns(row, column_types),
1399
+ lambda row: serialize_columns(row, column_types_to_test),
641
1400
  )
642
1401
  else:
643
1402
  df_output_serialized = df_output
644
1403
 
1404
+ df_output_serialized.columns = [str(col) for col in df_output_serialized.columns]
645
1405
  self.storage.write_parquet(
646
1406
  df_output_serialized,
647
1407
  os.path.join(self.variable_path, DATAFRAME_PARQUET_FILE),
@@ -661,6 +1421,20 @@ class Variable:
661
1421
  print(f'Sample output error: {err}.')
662
1422
  traceback.print_exc()
663
1423
 
1424
+ try:
1425
+ n_rows, n_cols = df_output_serialized.shape
1426
+ self.__write_dataframe_analysis(
1427
+ dict(
1428
+ statistics=dict(
1429
+ original_row_count=n_rows,
1430
+ original_column_count=n_cols,
1431
+ ),
1432
+ )
1433
+ )
1434
+ except Exception as err:
1435
+ print(f'Writing DataFrame analysis failed during writing parquet: {err}.')
1436
+ traceback.print_exc()
1437
+
664
1438
  def __write_polars_dataframe(self, data: pl.DataFrame) -> None:
665
1439
  self.storage.makedirs(self.variable_path, exist_ok=True)
666
1440
 
@@ -685,16 +1459,12 @@ class Variable:
685
1459
  traceback.print_exc()
686
1460
 
687
1461
  def __write_spark_parquet(self, data) -> None:
688
- (
689
- data.write
690
- .option('header', 'True')
691
- .mode('overwrite')
692
- .parquet(self.variable_path)
693
- )
1462
+ (data.write.option('header', 'True').mode('overwrite').parquet(self.variable_path))
694
1463
 
695
1464
  def __read_dataframe_analysis(
696
1465
  self,
697
- dataframe_analysis_keys: List[str] = None,
1466
+ dataframe_analysis_keys: Optional[List[str]] = None,
1467
+ index: Optional[int] = None,
698
1468
  ) -> Dict[str, Dict]:
699
1469
  """
700
1470
  Read the following files
@@ -703,13 +1473,14 @@ class Variable:
703
1473
  3. insights.json
704
1474
  4. suggestions.json
705
1475
  """
706
- if not self.storage.path_exists(self.variable_path):
1476
+ base_path = os.path.join(self.variable_path, str(index) if index is not None else '')
1477
+ if not self.storage.path_exists(base_path):
707
1478
  return dict()
708
1479
  result = dict()
709
1480
  for k in DATAFRAME_ANALYSIS_KEYS:
710
1481
  if dataframe_analysis_keys is not None and k not in dataframe_analysis_keys:
711
1482
  continue
712
- result[k] = self.storage.read_json_file(os.path.join(self.variable_path, f'{k}.json'))
1483
+ result[k] = self.storage.read_json_file(os.path.join(base_path, f'{k}.json'))
713
1484
  return result
714
1485
 
715
1486
  async def __read_dataframe_analysis_async(
@@ -744,4 +1515,124 @@ class Variable:
744
1515
  """
745
1516
  self.storage.makedirs(self.variable_path, exist_ok=True)
746
1517
  for k in DATAFRAME_ANALYSIS_KEYS:
747
- self.storage.write_json_file(os.path.join(self.variable_path, f'{k}.json'), data.get(k))
1518
+ self.storage.write_json_file(
1519
+ os.path.join(self.variable_path, f'{k}.json'), data.get(k)
1520
+ )
1521
+
1522
+ def __write_series_pandas(self, data: Union[List[pd.Series], pd.Series]) -> bool:
1523
+ var_type, basic_iterable = infer_variable_type(data)
1524
+ if VariableType.SERIES_PANDAS == var_type:
1525
+ if basic_iterable:
1526
+ self.__write_parquet(data)
1527
+ else:
1528
+ self.__write_parquet(data.to_frame())
1529
+
1530
+ row_count = None
1531
+
1532
+ if isinstance(data, pd.Series):
1533
+ row_count = data.shape[0]
1534
+ elif is_iterable(data) and len(data) >= 1 and isinstance(data[0], pd.Series):
1535
+ row_count = sum([s.shape[0] for s in data])
1536
+
1537
+ if row_count is not None:
1538
+ self.__write_dataframe_analysis(
1539
+ dict(
1540
+ statistics=dict(
1541
+ original_row_count=row_count,
1542
+ original_column_count=1,
1543
+ ),
1544
+ )
1545
+ )
1546
+
1547
+ return True
1548
+
1549
+ return False
1550
+
1551
+ def __write_matrix_sparse(
1552
+ self,
1553
+ csr_matrix: Union[scipy.sparse._csr.csr_matrix, List[scipy.sparse._csr.csr_matrix]],
1554
+ ) -> None:
1555
+ if not self.storage.isdir(self.variable_path):
1556
+ self.storage.makedirs(self.variable_path, exist_ok=True)
1557
+
1558
+ if isinstance(csr_matrix, list) or isinstance(csr_matrix, tuple):
1559
+ arr1 = []
1560
+ arr2 = []
1561
+ for matrix in csr_matrix:
1562
+ m_1, m_2 = self.__serialize_matrix_sparse(matrix)
1563
+ arr1.append(m_1)
1564
+ arr2.append(m_2)
1565
+ data = arr1
1566
+ data_sample = arr2
1567
+ else:
1568
+ data, data_sample = self.__serialize_matrix_sparse(csr_matrix)
1569
+
1570
+ sample_file_path = os.path.join(self.variable_path, JSON_SAMPLE_FILE)
1571
+ self.storage.write_json_file(sample_file_path, data_sample)
1572
+
1573
+ file_path = os.path.join(self.variable_path, JSON_FILE)
1574
+ self.storage.write_json_file(file_path, data)
1575
+
1576
+ if isinstance(csr_matrix, scipy.sparse._csr.csr_matrix):
1577
+ self.__write_dataframe_analysis(
1578
+ dict(
1579
+ statistics=dict(
1580
+ original_row_count=csr_matrix.shape[0],
1581
+ original_column_count=csr_matrix.shape[1],
1582
+ ),
1583
+ )
1584
+ )
1585
+
1586
+ def __serialize_matrix_sparse(
1587
+ self, csr_matrix: scipy.sparse._csr.csr_matrix
1588
+ ) -> Tuple[Dict, Dict]:
1589
+ sample = csr_matrix[:DATAFRAME_SAMPLE_COUNT, :DATAFRAME_SAMPLE_MAX_COLUMNS]
1590
+ data_sample = serialize_matrix(sample)
1591
+ data = serialize_matrix(csr_matrix)
1592
+
1593
+ return data, data_sample
1594
+
1595
+ def __parquet_num_rows(self, path: str) -> Optional[int]:
1596
+ if self.data_manager and self.data_manager.readable():
1597
+ metadata = read_metadata(path)
1598
+ row_count = metadata.get('num_rows')
1599
+ if row_count is not None and isinstance(row_count, (float, int, str)):
1600
+ return int(row_count)
1601
+
1602
+ def __scope_uuid(self) -> str:
1603
+ path_parts = [self.block_dir_name or '']
1604
+ try:
1605
+ path_parts.insert(
1606
+ 0, str(Path(self.pipeline_path).relative_to(Path(self.variables_dir)))
1607
+ )
1608
+ except ValueError:
1609
+ pass
1610
+
1611
+ return os.path.join(*path_parts)
1612
+
1613
+ def __is_part_readable(self, part_uuid: Optional[Union[int, str]] = None) -> bool:
1614
+ if part_uuid is not None:
1615
+ part_uuid = str(part_uuid) if not isinstance(part_uuid, str) else part_uuid
1616
+
1617
+ return (
1618
+ self.part_uuids is not None
1619
+ and len(self.part_uuids) >= 1
1620
+ and (part_uuid is None or part_uuid in self.part_uuids)
1621
+ )
1622
+
1623
+ def __is_parquet_readable(self, path: Optional[str] = None) -> bool:
1624
+ from mage_ai.settings.server import (
1625
+ MEMORY_MANAGER_PANDAS_V2,
1626
+ MEMORY_MANAGER_POLARS_V2,
1627
+ )
1628
+
1629
+ if MEMORY_MANAGER_PANDAS_V2 or MEMORY_MANAGER_POLARS_V2:
1630
+ row_count = self.__parquet_num_rows(path or self.variable_path)
1631
+ return row_count is not None and row_count >= 1
1632
+ return False
1633
+
1634
+ @property
1635
+ def __memory_manager_v2_enabled(self):
1636
+ from mage_ai.settings.server import MEMORY_MANAGER_V2
1637
+
1638
+ return MEMORY_MANAGER_V2