mage-ai 0.9.70__py3-none-any.whl → 0.9.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mage-ai might be problematic. Click here for more details.
- mage_ai/ai/utils/xgboost.py +222 -0
- mage_ai/api/errors.py +37 -25
- mage_ai/api/operations/base.py +13 -1
- mage_ai/api/parsers/PipelineScheduleParser.py +1 -1
- mage_ai/api/policies/BlockOutputPolicy.py +40 -17
- mage_ai/api/policies/GlobalDataProductPolicy.py +91 -41
- mage_ai/api/policies/KernelPolicy.py +55 -32
- mage_ai/api/policies/KernelProcessPolicy.py +56 -0
- mage_ai/api/policies/OutputPolicy.py +73 -41
- mage_ai/api/policies/PipelinePolicy.py +206 -138
- mage_ai/api/presenters/BlockLayoutItemPresenter.py +9 -7
- mage_ai/api/presenters/BlockPresenter.py +1 -1
- mage_ai/api/presenters/GlobalDataProductPresenter.py +6 -1
- mage_ai/api/presenters/KernelPresenter.py +5 -26
- mage_ai/api/presenters/KernelProcessPresenter.py +28 -0
- mage_ai/api/presenters/PipelinePresenter.py +18 -5
- mage_ai/api/presenters/StatusPresenter.py +2 -0
- mage_ai/api/presenters/SyncPresenter.py +25 -0
- mage_ai/api/resources/AutocompleteItemResource.py +1 -1
- mage_ai/api/resources/BlockLayoutItemResource.py +90 -44
- mage_ai/api/resources/BlockOutputResource.py +42 -9
- mage_ai/api/resources/BlockResource.py +4 -3
- mage_ai/api/resources/BlockRunResource.py +27 -22
- mage_ai/api/resources/ClusterResource.py +4 -1
- mage_ai/api/resources/CustomTemplateResource.py +34 -14
- mage_ai/api/resources/DataProviderResource.py +1 -1
- mage_ai/api/resources/ExecutionStateResource.py +3 -1
- mage_ai/api/resources/FileContentResource.py +8 -2
- mage_ai/api/resources/FileResource.py +10 -4
- mage_ai/api/resources/FileVersionResource.py +3 -1
- mage_ai/api/resources/GitBranchResource.py +46 -9
- mage_ai/api/resources/GlobalDataProductResource.py +44 -7
- mage_ai/api/resources/GlobalHookResource.py +4 -1
- mage_ai/api/resources/IntegrationDestinationResource.py +6 -2
- mage_ai/api/resources/IntegrationSourceResource.py +8 -4
- mage_ai/api/resources/IntegrationSourceStreamResource.py +6 -2
- mage_ai/api/resources/KernelProcessResource.py +44 -0
- mage_ai/api/resources/KernelResource.py +25 -3
- mage_ai/api/resources/OutputResource.py +33 -11
- mage_ai/api/resources/PageBlockLayoutResource.py +34 -23
- mage_ai/api/resources/PipelineInteractionResource.py +31 -15
- mage_ai/api/resources/PipelineResource.py +250 -123
- mage_ai/api/resources/PipelineRunResource.py +11 -3
- mage_ai/api/resources/PipelineScheduleResource.py +7 -2
- mage_ai/api/resources/PipelineTriggerResource.py +6 -1
- mage_ai/api/resources/ProjectResource.py +18 -7
- mage_ai/api/resources/SecretResource.py +1 -1
- mage_ai/api/resources/SeedResource.py +8 -1
- mage_ai/api/resources/StatusResource.py +21 -6
- mage_ai/api/resources/SyncResource.py +6 -8
- mage_ai/api/resources/VariableResource.py +46 -26
- mage_ai/api/resources/VersionControlProjectResource.py +9 -2
- mage_ai/api/resources/WidgetResource.py +1 -1
- mage_ai/api/resources/WorkspaceResource.py +1 -1
- mage_ai/api/views.py +47 -40
- mage_ai/authentication/permissions/seed.py +16 -2
- mage_ai/authentication/providers/oidc.py +21 -1
- mage_ai/autocomplete/utils.py +13 -9
- mage_ai/cache/base.py +1 -1
- mage_ai/cache/block.py +18 -12
- mage_ai/cache/block_action_object/__init__.py +32 -4
- mage_ai/cache/file.py +22 -19
- mage_ai/cache/pipeline.py +18 -12
- mage_ai/cli/main.py +1 -0
- mage_ai/cluster_manager/aws/emr_cluster_manager.py +9 -5
- mage_ai/cluster_manager/config.py +2 -2
- mage_ai/cluster_manager/manage.py +1 -1
- mage_ai/cluster_manager/workspace/base.py +1 -1
- mage_ai/command_center/applications/factory.py +10 -7
- mage_ai/command_center/files/factory.py +17 -15
- mage_ai/command_center/utils.py +25 -13
- mage_ai/data/__init__.py +0 -0
- mage_ai/data/constants.py +45 -0
- mage_ai/data/models/__init__.py +0 -0
- mage_ai/data/models/base.py +119 -0
- mage_ai/data/models/constants.py +1 -0
- mage_ai/data/models/generator.py +115 -0
- mage_ai/data/models/manager.py +168 -0
- mage_ai/data/models/pyarrow/__init__.py +0 -0
- mage_ai/data/models/pyarrow/record_batch.py +55 -0
- mage_ai/data/models/pyarrow/shared.py +21 -0
- mage_ai/data/models/pyarrow/table.py +8 -0
- mage_ai/data/models/reader.py +103 -0
- mage_ai/data/models/utils.py +59 -0
- mage_ai/data/models/writer.py +91 -0
- mage_ai/data/tabular/__init__.py +0 -0
- mage_ai/data/tabular/constants.py +23 -0
- mage_ai/data/tabular/mocks.py +19 -0
- mage_ai/data/tabular/models.py +126 -0
- mage_ai/data/tabular/reader.py +602 -0
- mage_ai/data/tabular/utils.py +102 -0
- mage_ai/data/tabular/writer.py +266 -0
- mage_ai/data/variables/__init__.py +0 -0
- mage_ai/data/variables/wrapper.py +54 -0
- mage_ai/data_cleaner/analysis/charts.py +61 -39
- mage_ai/data_cleaner/column_types/column_type_detector.py +53 -31
- mage_ai/data_cleaner/estimators/encoders.py +5 -2
- mage_ai/data_integrations/utils/scheduler.py +16 -11
- mage_ai/data_preparation/decorators.py +1 -0
- mage_ai/data_preparation/executors/block_executor.py +237 -155
- mage_ai/data_preparation/executors/streaming_pipeline_executor.py +1 -1
- mage_ai/data_preparation/git/__init__.py +27 -7
- mage_ai/data_preparation/git/api.py +7 -1
- mage_ai/data_preparation/git/utils.py +22 -16
- mage_ai/data_preparation/logging/logger_manager.py +4 -3
- mage_ai/data_preparation/models/block/__init__.py +1542 -878
- mage_ai/data_preparation/models/block/data_integration/mixins.py +4 -3
- mage_ai/data_preparation/models/block/dynamic/__init__.py +17 -6
- mage_ai/data_preparation/models/block/dynamic/child.py +41 -102
- mage_ai/data_preparation/models/block/dynamic/constants.py +1 -0
- mage_ai/data_preparation/models/block/dynamic/counter.py +296 -0
- mage_ai/data_preparation/models/block/dynamic/data.py +16 -0
- mage_ai/data_preparation/models/block/dynamic/factory.py +163 -0
- mage_ai/data_preparation/models/block/dynamic/models.py +19 -0
- mage_ai/data_preparation/models/block/dynamic/shared.py +92 -0
- mage_ai/data_preparation/models/block/dynamic/utils.py +291 -168
- mage_ai/data_preparation/models/block/dynamic/variables.py +384 -144
- mage_ai/data_preparation/models/block/dynamic/wrappers.py +77 -0
- mage_ai/data_preparation/models/block/extension/utils.py +10 -1
- mage_ai/data_preparation/models/block/global_data_product/__init__.py +10 -1
- mage_ai/data_preparation/models/block/integration/__init__.py +6 -2
- mage_ai/data_preparation/models/block/outputs.py +722 -0
- mage_ai/data_preparation/models/block/platform/mixins.py +7 -8
- mage_ai/data_preparation/models/block/r/__init__.py +56 -38
- mage_ai/data_preparation/models/block/settings/__init__.py +0 -0
- mage_ai/data_preparation/models/block/settings/dynamic/__init__.py +0 -0
- mage_ai/data_preparation/models/block/settings/dynamic/constants.py +7 -0
- mage_ai/data_preparation/models/block/settings/dynamic/mixins.py +118 -0
- mage_ai/data_preparation/models/block/settings/dynamic/models.py +31 -0
- mage_ai/data_preparation/models/block/settings/global_data_products/__init__.py +0 -0
- mage_ai/data_preparation/models/block/settings/global_data_products/mixins.py +20 -0
- mage_ai/data_preparation/models/block/settings/global_data_products/models.py +46 -0
- mage_ai/data_preparation/models/block/settings/variables/__init__.py +0 -0
- mage_ai/data_preparation/models/block/settings/variables/mixins.py +74 -0
- mage_ai/data_preparation/models/block/settings/variables/models.py +49 -0
- mage_ai/data_preparation/models/block/spark/mixins.py +2 -1
- mage_ai/data_preparation/models/block/sql/__init__.py +30 -5
- mage_ai/data_preparation/models/block/sql/utils/shared.py +21 -3
- mage_ai/data_preparation/models/block/utils.py +127 -70
- mage_ai/data_preparation/models/constants.py +19 -14
- mage_ai/data_preparation/models/custom_templates/custom_block_template.py +18 -13
- mage_ai/data_preparation/models/custom_templates/custom_pipeline_template.py +33 -16
- mage_ai/data_preparation/models/custom_templates/utils.py +1 -1
- mage_ai/data_preparation/models/file.py +41 -28
- mage_ai/data_preparation/models/global_data_product/__init__.py +88 -58
- mage_ai/data_preparation/models/global_hooks/models.py +1 -0
- mage_ai/data_preparation/models/interfaces.py +29 -0
- mage_ai/data_preparation/models/pipeline.py +365 -180
- mage_ai/data_preparation/models/pipelines/integration_pipeline.py +1 -2
- mage_ai/data_preparation/models/pipelines/seed.py +1 -1
- mage_ai/data_preparation/models/project/__init__.py +66 -18
- mage_ai/data_preparation/models/project/constants.py +2 -0
- mage_ai/data_preparation/models/triggers/__init__.py +120 -24
- mage_ai/data_preparation/models/utils.py +467 -17
- mage_ai/data_preparation/models/variable.py +1028 -137
- mage_ai/data_preparation/models/variables/__init__.py +0 -0
- mage_ai/data_preparation/models/variables/cache.py +149 -0
- mage_ai/data_preparation/models/variables/constants.py +72 -0
- mage_ai/data_preparation/models/variables/summarizer.py +336 -0
- mage_ai/data_preparation/models/variables/utils.py +77 -0
- mage_ai/data_preparation/models/widget/__init__.py +63 -41
- mage_ai/data_preparation/models/widget/charts.py +40 -27
- mage_ai/data_preparation/models/widget/constants.py +2 -0
- mage_ai/data_preparation/models/widget/utils.py +3 -3
- mage_ai/data_preparation/preferences.py +3 -3
- mage_ai/data_preparation/repo_manager.py +55 -21
- mage_ai/data_preparation/storage/base_storage.py +2 -2
- mage_ai/data_preparation/storage/gcs_storage.py +7 -4
- mage_ai/data_preparation/storage/local_storage.py +6 -3
- mage_ai/data_preparation/storage/s3_storage.py +5 -2
- mage_ai/data_preparation/templates/data_exporters/streaming/oracledb.yaml +8 -0
- mage_ai/data_preparation/variable_manager.py +281 -76
- mage_ai/io/base.py +3 -2
- mage_ai/io/bigquery.py +1 -0
- mage_ai/io/redshift.py +7 -5
- mage_ai/kernels/__init__.py +0 -0
- mage_ai/kernels/models.py +188 -0
- mage_ai/kernels/utils.py +169 -0
- mage_ai/orchestration/concurrency.py +6 -2
- mage_ai/orchestration/db/__init__.py +1 -0
- mage_ai/orchestration/db/migrations/versions/0227396a216c_add_userproject_table.py +38 -0
- mage_ai/orchestration/db/models/dynamic/__init__.py +0 -0
- mage_ai/orchestration/db/models/dynamic/controller.py +67 -0
- mage_ai/orchestration/db/models/oauth.py +2 -9
- mage_ai/orchestration/db/models/projects.py +10 -0
- mage_ai/orchestration/db/models/schedules.py +204 -187
- mage_ai/orchestration/db/models/schedules_project_platform.py +18 -12
- mage_ai/orchestration/db/models/utils.py +46 -5
- mage_ai/orchestration/metrics/pipeline_run.py +8 -9
- mage_ai/orchestration/notification/sender.py +1 -0
- mage_ai/orchestration/pipeline_scheduler_original.py +32 -8
- mage_ai/orchestration/pipeline_scheduler_project_platform.py +1 -1
- mage_ai/orchestration/run_status_checker.py +11 -4
- mage_ai/orchestration/triggers/api.py +12 -1
- mage_ai/presenters/charts/data_sources/base.py +4 -2
- mage_ai/presenters/charts/data_sources/block.py +15 -9
- mage_ai/presenters/charts/data_sources/chart_code.py +8 -5
- mage_ai/presenters/charts/data_sources/constants.py +1 -0
- mage_ai/presenters/charts/data_sources/system_metrics.py +22 -0
- mage_ai/presenters/interactions/models.py +11 -7
- mage_ai/presenters/pages/loaders/pipelines.py +5 -3
- mage_ai/presenters/pages/models/page_components/pipeline_schedules.py +3 -1
- mage_ai/presenters/utils.py +2 -0
- mage_ai/server/api/blocks.py +2 -1
- mage_ai/server/api/downloads.py +5 -1
- mage_ai/server/api/triggers.py +3 -1
- mage_ai/server/constants.py +1 -1
- mage_ai/server/frontend_dist/404.html +5 -5
- mage_ai/server/frontend_dist/_next/static/UZLabyPgcxtZvp0O0EUUS/_buildManifest.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/1376-22de38b4ad008d8a.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/{1557-b3502f3f1aa92ac7.js → 1557-25a7d985d5564fd3.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/1668-30b4619b9534519b.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/1799-c42db95a015689ee.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/2996-2108b53b9d371d8d.js +1 -0
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/3763-61b542dafdbf5754.js → frontend_dist/_next/static/chunks/3763-40780c6d1e4b261d.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/3782-129dd2a2448a2e36.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/3958-bcdfa414ccfa1eb2.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/4168-97fd1578d1a38315.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/4982-fa5a238b139fbdd2.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/5699-176f445e1313f001.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/7162-7dd03f0f605de721.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/7779-68d2b72a90c5f925.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/7966-5446a8e43711e2f9.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/8023-6c2f172f48dcb99b.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/8095-c351b8a735d73e0c.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/{main-77fe248a6fbd12d8.js → main-b99d4e30a88d9dc7.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/_app-9fe2d9d07c94e968.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/{block-layout-14f952f66964022f.js → block-layout-7f4b735c67115df5.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/global-data-products/[...slug]-e7d48e6b0c3068ac.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/global-data-products-b943f31f050fc3a4.js +1 -0
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/overview-597b74828bf105db.js → frontend_dist/_next/static/chunks/pages/overview-9f1ac4ec003884f3.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills/{[...slug]-7181b086c93784d2.js → [...slug]-7e737f6fc7e83e9b.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/dashboard-d94488e3f2eeef36.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/edit-cc641a7fa8473796.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/{block-runs-a5c0362763a21fa8.js → block-runs-284309877f3c5a5a.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-26250e5335194ade.js +1 -0
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors-7acc7afc00df17c2.js → frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors-5f4c8128b2413fd8.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/runs/[run]-4ebfc8e400315dda.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/settings-e5e0150a256aadb3.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/account/{profile-3f0df3decc856ee9.js → profile-3ae43c932537b254.js} +1 -1
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/settings/platform/preferences-32985f3f7c7dd3ab.js → frontend_dist/_next/static/chunks/pages/settings/platform/preferences-b603d7fe4b175256.js} +1 -1
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/settings/platform/settings-c2e9ef989c8bfa73.js → frontend_dist/_next/static/chunks/pages/settings/platform/settings-319ddbabc239e91b.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/permissions/{[...slug]-47b64ced27c24985.js → [...slug]-5c360f72e4498855.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/{permissions-e5a4d3d815cec25d.js → permissions-fb29fa6c2bd90bb0.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/preferences-3b76fa959ffa09d3.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/roles/{[...slug]-379e1ee292504842.js → [...slug]-3b787b42f1093b1f.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/roles-0b83fbdd39e85f5b.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/sync-data-a1e6950974d643a8.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/users/{[...slug]-2af9afbe727d88aa.js → [...slug]-0aa019d87db8b0b8.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/{users-a4db8710f703c729.js → users-88c694d19207f2ec.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/version-control-31d0d50f7f30462b.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/{webpack-d079359c241db804.js → webpack-ac7fdc472bedf682.js} +1 -1
- mage_ai/server/frontend_dist/block-layout.html +3 -3
- mage_ai/server/frontend_dist/compute.html +6 -6
- mage_ai/server/frontend_dist/files.html +6 -6
- mage_ai/server/frontend_dist/global-data-products/[...slug].html +6 -6
- mage_ai/server/frontend_dist/global-data-products.html +6 -6
- mage_ai/server/frontend_dist/global-hooks/[...slug].html +6 -6
- mage_ai/server/frontend_dist/global-hooks.html +6 -6
- mage_ai/server/frontend_dist/index.html +3 -3
- mage_ai/server/frontend_dist/manage/files.html +6 -6
- mage_ai/server/frontend_dist/manage/settings.html +6 -6
- mage_ai/server/frontend_dist/manage/users/[user].html +6 -6
- mage_ai/server/frontend_dist/manage/users/new.html +6 -6
- mage_ai/server/frontend_dist/manage/users.html +6 -6
- mage_ai/server/frontend_dist/manage.html +6 -6
- mage_ai/server/frontend_dist/oauth.html +5 -5
- mage_ai/server/frontend_dist/overview.html +6 -6
- mage_ai/server/frontend_dist/pipeline-runs.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/backfills/[...slug].html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/backfills.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/dashboard.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/edit.html +3 -3
- mage_ai/server/frontend_dist/pipelines/[pipeline]/logs.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors/block-runs.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors/block-runtime.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/runs/[run].html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/runs.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/settings.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/syncs.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/triggers/[...slug].html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/triggers.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline].html +3 -3
- mage_ai/server/frontend_dist/pipelines.html +6 -6
- mage_ai/server/frontend_dist/platform/global-hooks/[...slug].html +6 -6
- mage_ai/server/frontend_dist/platform/global-hooks.html +6 -6
- mage_ai/server/frontend_dist/settings/account/profile.html +6 -6
- mage_ai/server/frontend_dist/settings/platform/preferences.html +6 -6
- mage_ai/server/frontend_dist/settings/platform/settings.html +6 -6
- mage_ai/server/frontend_dist/settings/workspace/permissions/[...slug].html +6 -6
- mage_ai/server/frontend_dist/settings/workspace/permissions.html +6 -6
- mage_ai/server/frontend_dist/settings/workspace/preferences.html +6 -6
- mage_ai/server/frontend_dist/settings/workspace/roles/[...slug].html +6 -6
- mage_ai/server/frontend_dist/settings/workspace/roles.html +6 -6
- mage_ai/server/frontend_dist/settings/workspace/sync-data.html +6 -6
- mage_ai/server/frontend_dist/settings/workspace/users/[...slug].html +6 -6
- mage_ai/server/frontend_dist/settings/workspace/users.html +6 -6
- mage_ai/server/frontend_dist/settings.html +3 -3
- mage_ai/server/frontend_dist/sign-in.html +12 -12
- mage_ai/server/frontend_dist/templates/[...slug].html +6 -6
- mage_ai/server/frontend_dist/templates.html +6 -6
- mage_ai/server/frontend_dist/terminal.html +6 -6
- mage_ai/server/frontend_dist/test.html +3 -3
- mage_ai/server/frontend_dist/triggers.html +6 -6
- mage_ai/server/frontend_dist/version-control.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/404.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/1376-22de38b4ad008d8a.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/{1557-b3502f3f1aa92ac7.js → 1557-25a7d985d5564fd3.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/1668-30b4619b9534519b.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/1799-c42db95a015689ee.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/2996-2108b53b9d371d8d.js +1 -0
- mage_ai/server/{frontend_dist/_next/static/chunks/3763-61b542dafdbf5754.js → frontend_dist_base_path_template/_next/static/chunks/3763-40780c6d1e4b261d.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/3782-129dd2a2448a2e36.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/3958-bcdfa414ccfa1eb2.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/4168-97fd1578d1a38315.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/4982-fa5a238b139fbdd2.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/5699-176f445e1313f001.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7162-7dd03f0f605de721.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7779-68d2b72a90c5f925.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7966-5446a8e43711e2f9.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/8023-6c2f172f48dcb99b.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/8095-c351b8a735d73e0c.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/{main-70b78159c2bb3fe1.js → main-384298e9133cec76.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/_app-13a578bce3b7f30c.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/{block-layout-14f952f66964022f.js → block-layout-7f4b735c67115df5.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/global-data-products/[...slug]-e7d48e6b0c3068ac.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/global-data-products-b943f31f050fc3a4.js +1 -0
- mage_ai/server/{frontend_dist/_next/static/chunks/pages/overview-597b74828bf105db.js → frontend_dist_base_path_template/_next/static/chunks/pages/overview-9f1ac4ec003884f3.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/backfills/{[...slug]-7181b086c93784d2.js → [...slug]-7e737f6fc7e83e9b.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/dashboard-d94488e3f2eeef36.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/edit-cc641a7fa8473796.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors/{block-runs-a5c0362763a21fa8.js → block-runs-284309877f3c5a5a.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-26250e5335194ade.js +1 -0
- mage_ai/server/{frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors-7acc7afc00df17c2.js → frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors-5f4c8128b2413fd8.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/runs/[run]-4ebfc8e400315dda.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/settings-e5e0150a256aadb3.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/account/{profile-3f0df3decc856ee9.js → profile-3ae43c932537b254.js} +1 -1
- mage_ai/server/{frontend_dist/_next/static/chunks/pages/settings/platform/preferences-32985f3f7c7dd3ab.js → frontend_dist_base_path_template/_next/static/chunks/pages/settings/platform/preferences-b603d7fe4b175256.js} +1 -1
- mage_ai/server/{frontend_dist/_next/static/chunks/pages/settings/platform/settings-c2e9ef989c8bfa73.js → frontend_dist_base_path_template/_next/static/chunks/pages/settings/platform/settings-319ddbabc239e91b.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/permissions/{[...slug]-47b64ced27c24985.js → [...slug]-5c360f72e4498855.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/{permissions-e5a4d3d815cec25d.js → permissions-fb29fa6c2bd90bb0.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/preferences-3b76fa959ffa09d3.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/roles/{[...slug]-379e1ee292504842.js → [...slug]-3b787b42f1093b1f.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/roles-0b83fbdd39e85f5b.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/sync-data-a1e6950974d643a8.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/users/{[...slug]-2af9afbe727d88aa.js → [...slug]-0aa019d87db8b0b8.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/{users-a4db8710f703c729.js → users-88c694d19207f2ec.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/version-control-31d0d50f7f30462b.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/{webpack-68c003fb6a175cd7.js → webpack-481689d9989710cd.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/kcptwoOU-JJJg6Vwpkfmx/_buildManifest.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/block-layout.html +3 -3
- mage_ai/server/frontend_dist_base_path_template/compute.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/files.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/global-data-products/[...slug].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/global-data-products.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/global-hooks/[...slug].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/global-hooks.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/index.html +3 -3
- mage_ai/server/frontend_dist_base_path_template/manage/files.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/manage/settings.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/manage/users/[user].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/manage/users/new.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/manage/users.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/manage.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/oauth.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/overview.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipeline-runs.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/backfills/[...slug].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/backfills.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/dashboard.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/edit.html +3 -3
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/logs.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/monitors/block-runs.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/monitors/block-runtime.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/monitors.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/runs/[run].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/runs.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/settings.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/syncs.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/triggers/[...slug].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/triggers.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline].html +3 -3
- mage_ai/server/frontend_dist_base_path_template/pipelines.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/platform/global-hooks/[...slug].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/platform/global-hooks.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/account/profile.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/platform/preferences.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/platform/settings.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/permissions/[...slug].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/permissions.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/preferences.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/roles/[...slug].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/roles.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/sync-data.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/users/[...slug].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/users.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings.html +3 -3
- mage_ai/server/frontend_dist_base_path_template/sign-in.html +12 -12
- mage_ai/server/frontend_dist_base_path_template/templates/[...slug].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/templates.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/terminal.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/test.html +3 -3
- mage_ai/server/frontend_dist_base_path_template/triggers.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/version-control.html +6 -6
- mage_ai/server/kernel_output_parser.py +4 -1
- mage_ai/server/scheduler_manager.py +9 -0
- mage_ai/server/server.py +35 -31
- mage_ai/server/utils/custom_output.py +284 -0
- mage_ai/server/utils/execute_custom_code.py +245 -0
- mage_ai/server/utils/output_display.py +123 -289
- mage_ai/server/websocket_server.py +116 -69
- mage_ai/services/k8s/config.py +23 -0
- mage_ai/services/k8s/job_manager.py +6 -1
- mage_ai/services/ssh/aws/emr/utils.py +8 -8
- mage_ai/settings/keys/auth.py +1 -0
- mage_ai/settings/platform/__init__.py +159 -38
- mage_ai/settings/platform/constants.py +5 -0
- mage_ai/settings/platform/utils.py +53 -10
- mage_ai/settings/repo.py +26 -12
- mage_ai/settings/server.py +128 -37
- mage_ai/shared/array.py +24 -1
- mage_ai/shared/complex.py +45 -0
- mage_ai/shared/config.py +2 -1
- mage_ai/shared/custom_logger.py +11 -0
- mage_ai/shared/dates.py +10 -6
- mage_ai/shared/files.py +63 -8
- mage_ai/shared/hash.py +33 -9
- mage_ai/shared/io.py +9 -5
- mage_ai/shared/models.py +82 -24
- mage_ai/shared/outputs.py +87 -0
- mage_ai/shared/parsers.py +141 -15
- mage_ai/shared/path_fixer.py +11 -7
- mage_ai/shared/singletons/__init__.py +0 -0
- mage_ai/shared/singletons/base.py +47 -0
- mage_ai/shared/singletons/memory.py +38 -0
- mage_ai/shared/strings.py +34 -1
- mage_ai/shared/yaml.py +24 -0
- mage_ai/streaming/sinks/oracledb.py +57 -0
- mage_ai/streaming/sinks/sink_factory.py +4 -0
- mage_ai/system/__init__.py +0 -0
- mage_ai/system/constants.py +14 -0
- mage_ai/system/memory/__init__.py +0 -0
- mage_ai/system/memory/constants.py +1 -0
- mage_ai/system/memory/manager.py +174 -0
- mage_ai/system/memory/presenters.py +158 -0
- mage_ai/system/memory/process.py +216 -0
- mage_ai/system/memory/samples.py +13 -0
- mage_ai/system/memory/utils.py +656 -0
- mage_ai/system/memory/wrappers.py +177 -0
- mage_ai/system/models.py +58 -0
- mage_ai/system/storage/__init__.py +0 -0
- mage_ai/system/storage/utils.py +29 -0
- mage_ai/tests/api/endpoints/mixins.py +2 -2
- mage_ai/tests/api/endpoints/test_blocks.py +2 -1
- mage_ai/tests/api/endpoints/test_custom_designs.py +4 -4
- mage_ai/tests/api/endpoints/test_pipeline_runs.py +2 -2
- mage_ai/tests/api/endpoints/test_projects.py +2 -1
- mage_ai/tests/api/operations/base/test_base.py +27 -27
- mage_ai/tests/api/operations/base/test_base_with_user_authentication.py +27 -27
- mage_ai/tests/api/operations/base/test_base_with_user_permissions.py +23 -23
- mage_ai/tests/api/operations/test_syncs.py +6 -4
- mage_ai/tests/api/resources/test_pipeline_resource.py +9 -2
- mage_ai/tests/authentication/providers/test_oidc.py +59 -0
- mage_ai/tests/base_test.py +2 -2
- mage_ai/tests/data/__init__.py +0 -0
- mage_ai/tests/data/models/__init__.py +0 -0
- mage_ai/tests/data_preparation/executors/test_block_executor.py +23 -16
- mage_ai/tests/data_preparation/git/test_git.py +4 -1
- mage_ai/tests/data_preparation/models/block/dynamic/test_combos.py +305 -0
- mage_ai/tests/data_preparation/models/block/dynamic/test_counter.py +212 -0
- mage_ai/tests/data_preparation/models/block/dynamic/test_factory.py +360 -0
- mage_ai/tests/data_preparation/models/block/dynamic/test_variables.py +332 -0
- mage_ai/tests/data_preparation/models/block/hook/test_hook_block.py +2 -2
- mage_ai/tests/data_preparation/models/block/platform/test_mixins.py +1 -1
- mage_ai/tests/data_preparation/models/block/sql/utils/test_shared.py +26 -1
- mage_ai/tests/data_preparation/models/block/test_global_data_product.py +3 -2
- mage_ai/tests/data_preparation/models/custom_templates/test_utils.py +5 -4
- mage_ai/tests/data_preparation/models/global_hooks/test_hook.py +3 -0
- mage_ai/tests/data_preparation/models/global_hooks/test_predicates.py +9 -3
- mage_ai/tests/data_preparation/models/test_block.py +115 -120
- mage_ai/tests/data_preparation/models/test_blocks_helper.py +114 -0
- mage_ai/tests/data_preparation/models/test_global_data_product.py +41 -24
- mage_ai/tests/data_preparation/models/test_pipeline.py +9 -6
- mage_ai/tests/data_preparation/models/test_project.py +4 -1
- mage_ai/tests/data_preparation/models/test_utils.py +80 -0
- mage_ai/tests/data_preparation/models/test_variable.py +242 -69
- mage_ai/tests/data_preparation/models/variables/__init__.py +0 -0
- mage_ai/tests/data_preparation/models/variables/test_summarizer.py +481 -0
- mage_ai/tests/data_preparation/storage/shared/__init__.py +0 -0
- mage_ai/tests/data_preparation/test_repo_manager.py +6 -7
- mage_ai/tests/data_preparation/test_variable_manager.py +57 -48
- mage_ai/tests/factory.py +64 -43
- mage_ai/tests/orchestration/db/models/test_schedules.py +3 -3
- mage_ai/tests/orchestration/db/models/test_schedules_dynamic_blocks.py +279 -0
- mage_ai/tests/orchestration/test_pipeline_scheduler.py +1 -0
- mage_ai/tests/orchestration/triggers/test_global_data_product.py +3 -2
- mage_ai/tests/orchestration/triggers/test_utils.py +3 -2
- mage_ai/tests/services/k8s/test_job_manager.py +18 -0
- mage_ai/tests/streaming/sinks/test_oracledb.py +38 -0
- mage_ai/tests/test_shared.py +61 -0
- mage_ai/usage_statistics/logger.py +7 -2
- mage_ai/utils/code.py +33 -19
- {mage_ai-0.9.70.dist-info → mage_ai-0.9.71.dist-info}/METADATA +5 -2
- {mage_ai-0.9.70.dist-info → mage_ai-0.9.71.dist-info}/RECORD +513 -417
- mage_ai/data_preparation/models/global_data_product/constants.py +0 -6
- mage_ai/server/frontend_dist/_next/static/RhDiJSkcjCsh4xxX4BFBk/_buildManifest.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/2631-b9f9bea3f1cf906d.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/3782-ef4cd4f0b52072d0.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/4783-422429203610c318.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/5699-6d708c6b2153ea08.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/635-0d6b7c8804bcd2dc.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/7022-0d52dd8868621fb0.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/7361-8a23dd8360593e7a.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/7966-b9b85ba10667e654.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/8095-bdce03896ef9639a.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/8146-6bed4e7401e067e6.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/9265-d2a1aaec75ec69b8.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/9440-4069842b90d4b801.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/9832-67896490f6e8a014.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/_app-2a69553d8c6eeb53.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/global-data-products/[...slug]-591abd392dc50ed4.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/global-data-products-78e8e88f2a757a18.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/dashboard-95ffcd3e2b27e567.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/edit-b645a6d13ab9fe3a.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-1ed9045b2f1dfd65.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/runs/[run]-1417ad1c821d720a.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/settings-59aca25a5b1d3998.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/preferences-349af617d05f001b.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/roles-36fa165a48af586b.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/sync-data-60d01d3887e31136.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/version-control-3433c8b22e8342aa.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/TdpLLFome13qvM0gXvpHs/_buildManifest.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/2631-b9f9bea3f1cf906d.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/3782-ef4cd4f0b52072d0.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/4783-422429203610c318.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/5699-6d708c6b2153ea08.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/635-0d6b7c8804bcd2dc.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7022-0d52dd8868621fb0.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7361-8a23dd8360593e7a.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7966-b9b85ba10667e654.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/8095-bdce03896ef9639a.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/8146-6bed4e7401e067e6.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/9265-d2a1aaec75ec69b8.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/9440-4069842b90d4b801.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/9832-67896490f6e8a014.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/_app-2a69553d8c6eeb53.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/global-data-products/[...slug]-591abd392dc50ed4.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/global-data-products-78e8e88f2a757a18.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/dashboard-95ffcd3e2b27e567.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/edit-b645a6d13ab9fe3a.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-1ed9045b2f1dfd65.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/runs/[run]-1417ad1c821d720a.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/settings-59aca25a5b1d3998.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/preferences-349af617d05f001b.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/roles-36fa165a48af586b.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/sync-data-60d01d3887e31136.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/version-control-3433c8b22e8342aa.js +0 -1
- mage_ai/shared/memory.py +0 -90
- mage_ai/tests/data_preparation/models/block/dynamic/test_dynamic_helpers.py +0 -48
- /mage_ai/{tests/data_preparation/shared → ai/utils}/__init__.py +0 -0
- /mage_ai/server/frontend_dist/_next/static/{RhDiJSkcjCsh4xxX4BFBk → UZLabyPgcxtZvp0O0EUUS}/_ssgManifest.js +0 -0
- /mage_ai/server/frontend_dist_base_path_template/_next/static/{TdpLLFome13qvM0gXvpHs → kcptwoOU-JJJg6Vwpkfmx}/_ssgManifest.js +0 -0
- /mage_ai/tests/data_preparation/{shared → storage/shared}/test_secrets.py +0 -0
- {mage_ai-0.9.70.dist-info → mage_ai-0.9.71.dist-info}/LICENSE +0 -0
- {mage_ai-0.9.70.dist-info → mage_ai-0.9.71.dist-info}/WHEEL +0 -0
- {mage_ai-0.9.70.dist-info → mage_ai-0.9.71.dist-info}/entry_points.txt +0 -0
- {mage_ai-0.9.70.dist-info → mage_ai-0.9.71.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,22 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import os
|
|
2
4
|
import traceback
|
|
3
5
|
from contextlib import contextmanager
|
|
4
|
-
from
|
|
5
|
-
from typing import Any, Dict, List
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
6
8
|
|
|
7
9
|
import numpy as np
|
|
8
10
|
import pandas as pd
|
|
9
11
|
import polars as pl
|
|
12
|
+
import scipy
|
|
10
13
|
from pandas.api.types import infer_dtype, is_object_dtype
|
|
11
14
|
from pandas.core.indexes.range import RangeIndex
|
|
12
15
|
|
|
16
|
+
from mage_ai.data.constants import InputDataType, VariableType
|
|
17
|
+
from mage_ai.data.models.manager import DataManager
|
|
18
|
+
from mage_ai.data.tabular.models import BatchSettings
|
|
19
|
+
from mage_ai.data.tabular.reader import read_metadata
|
|
13
20
|
from mage_ai.data_cleaner.shared.utils import is_geo_dataframe, is_spark_dataframe
|
|
14
21
|
from mage_ai.data_preparation.models.constants import (
|
|
15
22
|
DATAFRAME_ANALYSIS_KEYS,
|
|
@@ -24,32 +31,40 @@ from mage_ai.data_preparation.models.utils import ( # dask_from_pandas,
|
|
|
24
31
|
cast_column_types,
|
|
25
32
|
cast_column_types_polars,
|
|
26
33
|
deserialize_columns,
|
|
34
|
+
deserialize_complex,
|
|
35
|
+
infer_variable_type,
|
|
36
|
+
is_basic_iterable,
|
|
27
37
|
serialize_columns,
|
|
38
|
+
serialize_complex,
|
|
28
39
|
should_deserialize_pandas,
|
|
29
40
|
should_serialize_pandas,
|
|
30
41
|
)
|
|
42
|
+
from mage_ai.data_preparation.models.variables.constants import (
|
|
43
|
+
DATAFRAME_COLUMN_TYPES_FILE,
|
|
44
|
+
DATAFRAME_CSV_FILE,
|
|
45
|
+
DATAFRAME_PARQUET_FILE,
|
|
46
|
+
DATAFRAME_PARQUET_SAMPLE_FILE,
|
|
47
|
+
JOBLIB_FILE,
|
|
48
|
+
JOBLIB_OBJECT_FILE,
|
|
49
|
+
JSON_FILE,
|
|
50
|
+
JSON_SAMPLE_FILE,
|
|
51
|
+
METADATA_FILE,
|
|
52
|
+
RESOURCE_USAGE_FILE,
|
|
53
|
+
UBJSON_MODEL_FILENAME,
|
|
54
|
+
)
|
|
55
|
+
from mage_ai.data_preparation.models.variables.summarizer import get_part_uuids
|
|
31
56
|
from mage_ai.data_preparation.storage.base_storage import BaseStorage
|
|
32
57
|
from mage_ai.data_preparation.storage.local_storage import LocalStorage
|
|
33
|
-
from mage_ai.
|
|
58
|
+
from mage_ai.settings.repo import get_variables_dir
|
|
59
|
+
from mage_ai.shared.array import is_iterable
|
|
60
|
+
from mage_ai.shared.environments import is_debug
|
|
61
|
+
from mage_ai.shared.hash import flatten_dict
|
|
62
|
+
from mage_ai.shared.outputs import load_custom_object, save_custom_object
|
|
63
|
+
from mage_ai.shared.parsers import deserialize_matrix, sample_output, serialize_matrix
|
|
34
64
|
from mage_ai.shared.utils import clean_name
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
DATAFRAME_PARQUET_SAMPLE_FILE = 'sample_data.parquet'
|
|
39
|
-
DATAFRAME_CSV_FILE = 'data.csv'
|
|
40
|
-
|
|
41
|
-
METADATA_FILE = 'type.json'
|
|
42
|
-
|
|
43
|
-
JSON_FILE = 'data.json'
|
|
44
|
-
JSON_SAMPLE_FILE = 'sample_data.json'
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
class VariableType(str, Enum):
|
|
48
|
-
DATAFRAME = 'dataframe'
|
|
49
|
-
DATAFRAME_ANALYSIS = 'dataframe_analysis'
|
|
50
|
-
GEO_DATAFRAME = 'geo_dataframe'
|
|
51
|
-
POLARS_DATAFRAME = 'polars_dataframe'
|
|
52
|
-
SPARK_DATAFRAME = 'spark_dataframe'
|
|
65
|
+
from mage_ai.system.memory.manager import MemoryManager
|
|
66
|
+
from mage_ai.system.models import ResourceUsage
|
|
67
|
+
from mage_ai.system.storage.utils import size_of_path
|
|
53
68
|
|
|
54
69
|
|
|
55
70
|
class Variable:
|
|
@@ -58,19 +73,31 @@ class Variable:
|
|
|
58
73
|
uuid: str,
|
|
59
74
|
pipeline_path: str,
|
|
60
75
|
block_uuid: str,
|
|
61
|
-
partition: str = None,
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
76
|
+
partition: Optional[str] = None,
|
|
77
|
+
skip_check_variable_type: Optional[bool] = None,
|
|
78
|
+
spark: Optional[Any] = None,
|
|
79
|
+
storage: Optional[BaseStorage] = None,
|
|
80
|
+
variable_type: Optional[VariableType] = None,
|
|
81
|
+
variable_types: Optional[List[VariableType]] = None,
|
|
65
82
|
clean_block_uuid: bool = True,
|
|
83
|
+
validate_pipeline_path: bool = False,
|
|
84
|
+
input_data_types: Optional[List[InputDataType]] = None,
|
|
85
|
+
resource_usage: Optional[ResourceUsage] = None,
|
|
86
|
+
read_batch_settings: Optional[BatchSettings] = None,
|
|
87
|
+
read_chunks: Optional[List] = None,
|
|
88
|
+
variables_dir: Optional[str] = None,
|
|
89
|
+
write_batch_settings: Optional[BatchSettings] = None,
|
|
90
|
+
write_chunks: Optional[List] = None,
|
|
66
91
|
) -> None:
|
|
67
92
|
self.uuid = uuid
|
|
68
93
|
if storage is None:
|
|
69
94
|
self.storage = LocalStorage()
|
|
70
95
|
else:
|
|
71
96
|
self.storage = storage
|
|
72
|
-
|
|
73
|
-
|
|
97
|
+
|
|
98
|
+
if validate_pipeline_path and not self.storage.path_exists(pipeline_path):
|
|
99
|
+
raise Exception(f'Pipeline path {pipeline_path} does not exist.')
|
|
100
|
+
|
|
74
101
|
self.pipeline_path = pipeline_path
|
|
75
102
|
self.block_uuid = block_uuid
|
|
76
103
|
self.block_dir_name = clean_name(self.block_uuid) if clean_block_uuid else self.block_uuid
|
|
@@ -84,8 +111,27 @@ class Variable:
|
|
|
84
111
|
if not self.storage.path_exists(self.variable_dir_path):
|
|
85
112
|
self.storage.makedirs(self.variable_dir_path)
|
|
86
113
|
|
|
114
|
+
self._data_manager = None
|
|
115
|
+
self._part_uuids = None
|
|
116
|
+
self._parts = None
|
|
117
|
+
self._resource_usage = resource_usage
|
|
118
|
+
|
|
119
|
+
self.input_data_types = input_data_types
|
|
120
|
+
self.read_batch_settings = read_batch_settings
|
|
121
|
+
self.read_chunks = read_chunks
|
|
122
|
+
self.variables_dir = variables_dir or get_variables_dir()
|
|
123
|
+
self.write_batch_settings = write_batch_settings
|
|
124
|
+
self.write_chunks = write_chunks
|
|
125
|
+
|
|
87
126
|
self.variable_type = variable_type
|
|
88
|
-
self.
|
|
127
|
+
self.variable_types = variable_types or []
|
|
128
|
+
|
|
129
|
+
if not skip_check_variable_type:
|
|
130
|
+
self.check_variable_type(spark=spark)
|
|
131
|
+
|
|
132
|
+
@classmethod
|
|
133
|
+
def dir_path(cls, pipeline_path, block_uuid):
|
|
134
|
+
return os.path.join(pipeline_path, VARIABLE_DIR, clean_name(block_uuid))
|
|
89
135
|
|
|
90
136
|
@property
|
|
91
137
|
def variable_path(self):
|
|
@@ -95,11 +141,71 @@ class Variable:
|
|
|
95
141
|
def metadata_path(self):
|
|
96
142
|
return os.path.join(self.variable_path, METADATA_FILE)
|
|
97
143
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
144
|
+
def resource_usage_path(self, index: Optional[int] = None) -> str:
|
|
145
|
+
return os.path.join(
|
|
146
|
+
self.variable_path, str(index) if index is not None else '', RESOURCE_USAGE_FILE
|
|
147
|
+
)
|
|
101
148
|
|
|
102
|
-
|
|
149
|
+
@property
|
|
150
|
+
def data_manager(self) -> Optional[DataManager]:
|
|
151
|
+
if self._data_manager is None:
|
|
152
|
+
self._data_manager = DataManager(
|
|
153
|
+
input_data_types=self.input_data_types,
|
|
154
|
+
read_batch_settings=self.read_batch_settings,
|
|
155
|
+
read_chunks=self.read_chunks,
|
|
156
|
+
storage=self.storage,
|
|
157
|
+
uuid=self.__scope_uuid(),
|
|
158
|
+
variable_dir_path=self.variable_dir_path,
|
|
159
|
+
variable_path=self.variable_path,
|
|
160
|
+
variables_dir=self.variables_dir,
|
|
161
|
+
variable_type=self.variable_type,
|
|
162
|
+
variable_types=self.variable_types,
|
|
163
|
+
write_batch_settings=self.write_batch_settings,
|
|
164
|
+
write_chunks=self.write_chunks,
|
|
165
|
+
)
|
|
166
|
+
return self._data_manager
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def resource_usage(self) -> ResourceUsage:
|
|
170
|
+
if self._resource_usage is None:
|
|
171
|
+
self._resource_usage = ResourceUsage()
|
|
172
|
+
return self._resource_usage
|
|
173
|
+
|
|
174
|
+
@property
|
|
175
|
+
def part_uuids(self) -> Optional[List[str]]:
|
|
176
|
+
if self._part_uuids is not None:
|
|
177
|
+
return self._part_uuids
|
|
178
|
+
|
|
179
|
+
self._part_uuids = get_part_uuids(self)
|
|
180
|
+
if self._part_uuids is not None:
|
|
181
|
+
self._part_uuids = sorted(self._part_uuids)
|
|
182
|
+
|
|
183
|
+
return self._part_uuids
|
|
184
|
+
|
|
185
|
+
def get_resource_usage(self, index: Optional[int] = None) -> Optional[ResourceUsage]:
|
|
186
|
+
if self.storage.path_exists(self.resource_usage_path(index)):
|
|
187
|
+
try:
|
|
188
|
+
data = self.storage.read_json_file(
|
|
189
|
+
self.resource_usage_path(index),
|
|
190
|
+
default_value={},
|
|
191
|
+
raise_exception=False,
|
|
192
|
+
)
|
|
193
|
+
if data:
|
|
194
|
+
self._resource_usage = ResourceUsage.load(**{
|
|
195
|
+
**self.resource_usage.to_dict(),
|
|
196
|
+
**data,
|
|
197
|
+
})
|
|
198
|
+
except Exception as err:
|
|
199
|
+
print(f'[ERROR] Variable.resource_usage: {err}')
|
|
200
|
+
return self.resource_usage
|
|
201
|
+
|
|
202
|
+
def get_analysis(self, index: Optional[int] = None) -> Dict[str, Dict]:
|
|
203
|
+
return self.__read_dataframe_analysis(
|
|
204
|
+
dataframe_analysis_keys=['statistics'],
|
|
205
|
+
index=index,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
def check_variable_type(self, spark: Optional[Any] = None) -> Optional[VariableType]:
|
|
103
209
|
"""
|
|
104
210
|
If the variable has a metadata file, read the variable type from the metadata file.
|
|
105
211
|
Fallback to inferring variable type based on data in the storage.
|
|
@@ -107,8 +213,39 @@ class Variable:
|
|
|
107
213
|
if self.variable_type is None:
|
|
108
214
|
try:
|
|
109
215
|
if self.storage.path_exists(self.metadata_path):
|
|
110
|
-
metadata = self.storage.read_json_file(
|
|
216
|
+
metadata = self.storage.read_json_file(
|
|
217
|
+
self.metadata_path, raise_exception=is_debug()
|
|
218
|
+
)
|
|
111
219
|
self.variable_type = metadata.get('type')
|
|
220
|
+
if self.variable_type:
|
|
221
|
+
self.variable_type = VariableType(self.variable_type)
|
|
222
|
+
self.variable_types = metadata.get('types') or []
|
|
223
|
+
self.variable_types = [
|
|
224
|
+
VariableType(t) for t in (self.variable_types or []) if t is not None
|
|
225
|
+
]
|
|
226
|
+
except Exception:
|
|
227
|
+
traceback.print_exc()
|
|
228
|
+
|
|
229
|
+
if (
|
|
230
|
+
self.variable_type is None
|
|
231
|
+
and not self.variable_types
|
|
232
|
+
and self.__memory_manager_v2_enabled
|
|
233
|
+
and self.part_uuids is not None
|
|
234
|
+
and len(self.part_uuids) >= 1
|
|
235
|
+
):
|
|
236
|
+
try:
|
|
237
|
+
variable_types = []
|
|
238
|
+
for part_uuid in self.part_uuids:
|
|
239
|
+
path = os.path.join(self.variable_path, str(part_uuid), METADATA_FILE)
|
|
240
|
+
if self.storage.path_exists(path):
|
|
241
|
+
metadata = self.storage.read_json_file(path, raise_exception=is_debug())
|
|
242
|
+
var_type = metadata.get('type')
|
|
243
|
+
if var_type:
|
|
244
|
+
variable_types.append(var_type)
|
|
245
|
+
if len(variable_types) >= 1:
|
|
246
|
+
self.variable_type = VariableType.ITERABLE
|
|
247
|
+
self.variable_types = [VariableType(t) for t in (variable_types or [])]
|
|
248
|
+
self.write_metadata()
|
|
112
249
|
except Exception:
|
|
113
250
|
traceback.print_exc()
|
|
114
251
|
|
|
@@ -119,9 +256,7 @@ class Variable:
|
|
|
119
256
|
self.variable_type = VariableType.DATAFRAME
|
|
120
257
|
elif (
|
|
121
258
|
self.variable_type == VariableType.DATAFRAME or self.variable_type is None
|
|
122
|
-
) and os.path.exists(
|
|
123
|
-
os.path.join(self.variable_path, f'{self.uuid}', 'data.sh')
|
|
124
|
-
):
|
|
259
|
+
) and os.path.exists(os.path.join(self.variable_path, f'{self.uuid}', 'data.sh')):
|
|
125
260
|
self.variable_type = VariableType.GEO_DATAFRAME
|
|
126
261
|
elif (
|
|
127
262
|
self.variable_type is None
|
|
@@ -130,9 +265,11 @@ class Variable:
|
|
|
130
265
|
):
|
|
131
266
|
self.variable_type = VariableType.SPARK_DATAFRAME
|
|
132
267
|
|
|
268
|
+
return self.variable_type
|
|
269
|
+
|
|
133
270
|
def convert_parquet_to_csv(self):
|
|
134
271
|
"""
|
|
135
|
-
For DATAFRAME variable, convert parquet files to csv files.
|
|
272
|
+
For DATAFRAME variable, convert parquet files to csv files. Used in R blocks.
|
|
136
273
|
"""
|
|
137
274
|
if self.variable_type != VariableType.DATAFRAME:
|
|
138
275
|
return
|
|
@@ -142,7 +279,7 @@ class Variable:
|
|
|
142
279
|
df = self.__read_parquet()
|
|
143
280
|
self.storage.write_csv(df, csv_file_path)
|
|
144
281
|
|
|
145
|
-
def delete(self):
|
|
282
|
+
def delete(self) -> None:
|
|
146
283
|
"""
|
|
147
284
|
Delete the variable data.
|
|
148
285
|
"""
|
|
@@ -155,19 +292,142 @@ class Variable:
|
|
|
155
292
|
self.__delete_parquet()
|
|
156
293
|
elif self.variable_type == VariableType.DATAFRAME_ANALYSIS:
|
|
157
294
|
return self.__delete_dataframe_analysis()
|
|
295
|
+
|
|
296
|
+
# TODO (dangerous): How do we delete other variable types?
|
|
297
|
+
|
|
158
298
|
return self.__delete_json()
|
|
159
299
|
|
|
300
|
+
def data_exists(self) -> bool:
|
|
301
|
+
path = self.__data_file_path()
|
|
302
|
+
num_rows = self.__parquet_num_rows(path)
|
|
303
|
+
parts = self.part_uuids
|
|
304
|
+
return (
|
|
305
|
+
(parts is not None and len(parts) >= 1)
|
|
306
|
+
or (num_rows is not None and num_rows >= 1)
|
|
307
|
+
or self.storage.path_exists(path)
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
def is_partial_data_readable(
|
|
311
|
+
self, part_uuid: Optional[Union[int, str]] = None, path: Optional[str] = None
|
|
312
|
+
) -> bool:
|
|
313
|
+
"""
|
|
314
|
+
We can only read partial data if 1 of the following criteria is met:
|
|
315
|
+
- The variable has parts: e.g. output_0/0, output_0/1, output_0/2, etc
|
|
316
|
+
- The variable is stored as a parquet file
|
|
317
|
+
"""
|
|
318
|
+
|
|
319
|
+
return self.__memory_manager_v2_enabled and (
|
|
320
|
+
self.__is_part_readable(part_uuid) or self.__is_parquet_readable(path)
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
def read_partial_data(
|
|
324
|
+
self,
|
|
325
|
+
batch_settings: Optional[BatchSettings] = None,
|
|
326
|
+
chunks: Optional[List] = None,
|
|
327
|
+
input_data_types: Optional[List[InputDataType]] = None,
|
|
328
|
+
part_uuid: Optional[Union[int, str]] = None,
|
|
329
|
+
sample: bool = False,
|
|
330
|
+
sample_count: Optional[int] = None,
|
|
331
|
+
spark: Optional[Any] = None,
|
|
332
|
+
) -> Any:
|
|
333
|
+
"""
|
|
334
|
+
We can only read partial data if:
|
|
335
|
+
- The variable has parts: e.g. output_0/0, output_0/1, output_0/2, etc
|
|
336
|
+
- The variable is stored as a parquet file
|
|
337
|
+
"""
|
|
338
|
+
if part_uuid is not None and self.__is_part_readable(part_uuid):
|
|
339
|
+
variable = self.__class__(
|
|
340
|
+
os.path.join(self.uuid, str(part_uuid)),
|
|
341
|
+
self.pipeline_path,
|
|
342
|
+
self.block_uuid,
|
|
343
|
+
clean_block_uuid=False,
|
|
344
|
+
input_data_types=input_data_types or self.input_data_types,
|
|
345
|
+
partition=self.partition,
|
|
346
|
+
read_batch_settings=batch_settings or self.read_batch_settings,
|
|
347
|
+
read_chunks=chunks or self.read_chunks,
|
|
348
|
+
resource_usage=self.resource_usage,
|
|
349
|
+
storage=self.storage,
|
|
350
|
+
validate_pipeline_path=False,
|
|
351
|
+
# DO NOT PASS variable_types
|
|
352
|
+
# this in or else the data_manager will add its own part to the path
|
|
353
|
+
# variable_type=self.variable_type,
|
|
354
|
+
# variable_types=self.variable_types,
|
|
355
|
+
variables_dir=self.variables_dir,
|
|
356
|
+
write_batch_settings=self.write_batch_settings,
|
|
357
|
+
write_chunks=self.write_chunks,
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
return variable.read_data()
|
|
361
|
+
elif self.__is_parquet_readable():
|
|
362
|
+
data_manager = self.__class__(
|
|
363
|
+
self.uuid,
|
|
364
|
+
self.pipeline_path,
|
|
365
|
+
self.block_uuid,
|
|
366
|
+
clean_block_uuid=False,
|
|
367
|
+
input_data_types=input_data_types or self.input_data_types,
|
|
368
|
+
partition=self.partition,
|
|
369
|
+
read_batch_settings=batch_settings or self.read_batch_settings,
|
|
370
|
+
read_chunks=chunks or self.read_chunks,
|
|
371
|
+
resource_usage=self.resource_usage,
|
|
372
|
+
storage=self.storage,
|
|
373
|
+
validate_pipeline_path=False,
|
|
374
|
+
variable_type=self.variable_type,
|
|
375
|
+
variable_types=self.variable_types,
|
|
376
|
+
variables_dir=self.variables_dir,
|
|
377
|
+
write_batch_settings=self.write_batch_settings,
|
|
378
|
+
write_chunks=self.write_chunks,
|
|
379
|
+
).data_manager
|
|
380
|
+
if data_manager:
|
|
381
|
+
return data_manager.read_sync(
|
|
382
|
+
part=int(part_uuid) if part_uuid is not None else None
|
|
383
|
+
)
|
|
384
|
+
|
|
160
385
|
def read_data(
|
|
161
386
|
self,
|
|
162
|
-
dataframe_analysis_keys: List[str] = None,
|
|
387
|
+
dataframe_analysis_keys: Optional[List[str]] = None,
|
|
163
388
|
raise_exception: bool = False,
|
|
164
389
|
sample: bool = False,
|
|
165
|
-
sample_count: int = None,
|
|
166
|
-
spark=None,
|
|
390
|
+
sample_count: Optional[int] = None,
|
|
391
|
+
spark: Optional[Any] = None,
|
|
167
392
|
) -> Any:
|
|
168
393
|
"""
|
|
169
|
-
|
|
394
|
+
Used by
|
|
395
|
+
block.get_outputs
|
|
396
|
+
WebSocker server sending block output to the IDE
|
|
397
|
+
fetch_input_variables
|
|
398
|
+
pipeline.get_block_variable
|
|
399
|
+
"""
|
|
400
|
+
|
|
401
|
+
def __read(
|
|
402
|
+
dataframe_analysis_keys=dataframe_analysis_keys,
|
|
403
|
+
raise_exception=raise_exception,
|
|
404
|
+
sample=sample,
|
|
405
|
+
sample_count=sample_count,
|
|
406
|
+
spark=spark,
|
|
407
|
+
):
|
|
408
|
+
return self.__read_data(
|
|
409
|
+
dataframe_analysis_keys=dataframe_analysis_keys,
|
|
410
|
+
raise_exception=raise_exception,
|
|
411
|
+
sample=sample,
|
|
412
|
+
sample_count=sample_count,
|
|
413
|
+
spark=spark,
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
# if self.__memory_manager_v2_enabled and False:
|
|
417
|
+
# with MemoryManager(scope_uuid=self.__scope_uuid(), process_uuid='variable.read_data'):
|
|
418
|
+
# return __read()
|
|
419
|
+
return __read()
|
|
170
420
|
|
|
421
|
+
def __read_data(
|
|
422
|
+
self,
|
|
423
|
+
dataframe_analysis_keys: Optional[List[str]] = None,
|
|
424
|
+
raise_exception: bool = False,
|
|
425
|
+
sample: bool = False,
|
|
426
|
+
sample_count: Optional[int] = None,
|
|
427
|
+
spark: Optional[Any] = None,
|
|
428
|
+
) -> Any:
|
|
429
|
+
"""
|
|
430
|
+
Read variable data.
|
|
171
431
|
Args:
|
|
172
432
|
dataframe_analysis_keys (List[str], optional): For DATAFRAME_ANALYSIS variable,
|
|
173
433
|
only read the selected keys.
|
|
@@ -179,7 +439,44 @@ class Variable:
|
|
|
179
439
|
DATAFRAME variable.
|
|
180
440
|
spark (None, optional): Spark context, used to read SPARK_DATAFRAME variable.
|
|
181
441
|
"""
|
|
182
|
-
if
|
|
442
|
+
if (
|
|
443
|
+
sample
|
|
444
|
+
and self.part_uuids is not None
|
|
445
|
+
and len(self.part_uuids) >= 1
|
|
446
|
+
and self.is_partial_data_readable(self.part_uuids[0])
|
|
447
|
+
):
|
|
448
|
+
return self.read_partial_data(
|
|
449
|
+
part_uuid=self.part_uuids[0],
|
|
450
|
+
sample=sample,
|
|
451
|
+
sample_count=sample_count,
|
|
452
|
+
spark=spark,
|
|
453
|
+
)
|
|
454
|
+
elif self.data_manager and self.data_manager.readable():
|
|
455
|
+
try:
|
|
456
|
+
data = self.data_manager.read_sync(
|
|
457
|
+
sample=sample,
|
|
458
|
+
sample_count=sample_count,
|
|
459
|
+
)
|
|
460
|
+
except FileNotFoundError as err:
|
|
461
|
+
print(f'[ERROR] Variable.read_data: {err}\n{traceback.format_exc()}')
|
|
462
|
+
print(f'variable_type: {self.variable_type}')
|
|
463
|
+
print(f'variable_types: {self.variable_types}')
|
|
464
|
+
print(f'variable_uuid: {self.uuid}')
|
|
465
|
+
print(f'variable_dir_path: {self.variable_dir_path}')
|
|
466
|
+
print(f'variable_path: {self.variable_path}')
|
|
467
|
+
print('Data sources:')
|
|
468
|
+
for source in self.data_manager.data_source:
|
|
469
|
+
print(f' {source}')
|
|
470
|
+
print('\n')
|
|
471
|
+
|
|
472
|
+
traceback.print_exc()
|
|
473
|
+
return None
|
|
474
|
+
return data
|
|
475
|
+
|
|
476
|
+
if (
|
|
477
|
+
self.variable_type == VariableType.DATAFRAME
|
|
478
|
+
or self.variable_type == VariableType.SERIES_PANDAS
|
|
479
|
+
):
|
|
183
480
|
return self.__read_parquet(
|
|
184
481
|
raise_exception=raise_exception,
|
|
185
482
|
sample=sample,
|
|
@@ -197,15 +494,71 @@ class Variable:
|
|
|
197
494
|
return self.__read_geo_dataframe(sample=sample, sample_count=sample_count)
|
|
198
495
|
elif self.variable_type == VariableType.DATAFRAME_ANALYSIS:
|
|
199
496
|
return self.__read_dataframe_analysis(dataframe_analysis_keys=dataframe_analysis_keys)
|
|
200
|
-
|
|
497
|
+
else:
|
|
498
|
+
data = self.__should_load_object()
|
|
499
|
+
if data is not None:
|
|
500
|
+
return data
|
|
501
|
+
|
|
502
|
+
data = self.__read_json(raise_exception=raise_exception, sample=sample)
|
|
503
|
+
|
|
504
|
+
if self.variable_type == VariableType.MATRIX_SPARSE:
|
|
505
|
+
data = self.__read_matrix_sparse(data, sample=sample, sample_count=sample_count)
|
|
506
|
+
elif (
|
|
507
|
+
VariableType.DICTIONARY_COMPLEX == self.variable_type
|
|
508
|
+
or VariableType.LIST_COMPLEX == self.variable_type
|
|
509
|
+
):
|
|
510
|
+
data = self.__read_complex_object(data)
|
|
511
|
+
|
|
512
|
+
return data
|
|
201
513
|
|
|
202
514
|
async def read_data_async(
|
|
203
515
|
self,
|
|
204
|
-
dataframe_analysis_keys: List[str] = None,
|
|
516
|
+
dataframe_analysis_keys: Optional[List[str]] = None,
|
|
205
517
|
sample: bool = False,
|
|
206
|
-
sample_count: int = None,
|
|
207
|
-
spark=None,
|
|
208
|
-
|
|
518
|
+
sample_count: Optional[int] = None,
|
|
519
|
+
spark: Optional[Any] = None,
|
|
520
|
+
limit_parts: Optional[int] = None,
|
|
521
|
+
input_data_types: Optional[List[InputDataType]] = None,
|
|
522
|
+
) -> Any:
|
|
523
|
+
"""
|
|
524
|
+
Used by
|
|
525
|
+
block.to_dict_async
|
|
526
|
+
GET /pipelines/[:uuid]
|
|
527
|
+
"""
|
|
528
|
+
|
|
529
|
+
async def __read(
|
|
530
|
+
dataframe_analysis_keys=dataframe_analysis_keys,
|
|
531
|
+
limit_parts=limit_parts,
|
|
532
|
+
sample=sample,
|
|
533
|
+
sample_count=sample_count,
|
|
534
|
+
spark=spark,
|
|
535
|
+
):
|
|
536
|
+
return await self.__read_data_async(
|
|
537
|
+
dataframe_analysis_keys=dataframe_analysis_keys,
|
|
538
|
+
limit_parts=limit_parts,
|
|
539
|
+
sample=sample,
|
|
540
|
+
sample_count=sample_count,
|
|
541
|
+
spark=spark,
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
# if self.__memory_manager_v2_enabled and False:
|
|
545
|
+
# with MemoryManager(
|
|
546
|
+
# scope_uuid=self.__scope_uuid(), process_uuid='variable.read_data_async'
|
|
547
|
+
# ):
|
|
548
|
+
# data = await __read()
|
|
549
|
+
# else:
|
|
550
|
+
# data = await __read()
|
|
551
|
+
|
|
552
|
+
return await __read()
|
|
553
|
+
|
|
554
|
+
async def __read_data_async(
|
|
555
|
+
self,
|
|
556
|
+
dataframe_analysis_keys: Optional[List[str]] = None,
|
|
557
|
+
limit_parts: Optional[int] = None,
|
|
558
|
+
sample: bool = False,
|
|
559
|
+
sample_count: Optional[int] = None,
|
|
560
|
+
spark: Optional[Any] = None,
|
|
561
|
+
) -> Any:
|
|
209
562
|
"""
|
|
210
563
|
Read variable data asynchronously.
|
|
211
564
|
|
|
@@ -217,8 +570,50 @@ class Variable:
|
|
|
217
570
|
sample_count (int, optional): The number of rows to sample, used for
|
|
218
571
|
DATAFRAME variable.
|
|
219
572
|
spark (None, optional): Spark context, used to read SPARK_DATAFRAME variable.
|
|
573
|
+
|
|
574
|
+
Used by
|
|
575
|
+
block.to_dict_async
|
|
576
|
+
GET /pipelines/[:uuid]
|
|
220
577
|
"""
|
|
221
|
-
if
|
|
578
|
+
if (
|
|
579
|
+
sample
|
|
580
|
+
and self.part_uuids is not None
|
|
581
|
+
and len(self.part_uuids) >= 1
|
|
582
|
+
and self.is_partial_data_readable(self.part_uuids[0])
|
|
583
|
+
):
|
|
584
|
+
return self.read_partial_data(
|
|
585
|
+
part_uuid=self.part_uuids[0],
|
|
586
|
+
sample=sample,
|
|
587
|
+
sample_count=sample_count,
|
|
588
|
+
spark=spark,
|
|
589
|
+
)
|
|
590
|
+
elif self.data_manager and self.data_manager.readable():
|
|
591
|
+
try:
|
|
592
|
+
data = await self.data_manager.read_async(
|
|
593
|
+
limit_parts=limit_parts,
|
|
594
|
+
sample=sample,
|
|
595
|
+
sample_count=sample_count,
|
|
596
|
+
)
|
|
597
|
+
return data
|
|
598
|
+
except FileNotFoundError as err:
|
|
599
|
+
print(f'[ERROR] Variable.read_data: {err}\n{traceback.format_exc()}')
|
|
600
|
+
print(f'variable_type: {self.variable_type}')
|
|
601
|
+
print(f'variable_types: {self.variable_types}')
|
|
602
|
+
print(f'variable_uuid: {self.uuid}')
|
|
603
|
+
print(f'variable_dir_path: {self.variable_dir_path}')
|
|
604
|
+
print(f'variable_path: {self.variable_path}')
|
|
605
|
+
print('Data sources:')
|
|
606
|
+
for source in self.data_manager.data_source:
|
|
607
|
+
print(f' {source}')
|
|
608
|
+
print('\n')
|
|
609
|
+
|
|
610
|
+
traceback.print_exc()
|
|
611
|
+
return None
|
|
612
|
+
|
|
613
|
+
if (
|
|
614
|
+
self.variable_type == VariableType.DATAFRAME
|
|
615
|
+
or self.variable_type == VariableType.SERIES_PANDAS
|
|
616
|
+
):
|
|
222
617
|
return self.__read_parquet(sample=sample, sample_count=sample_count)
|
|
223
618
|
elif self.variable_type == VariableType.POLARS_DATAFRAME:
|
|
224
619
|
return self.__read_polars_parquet(
|
|
@@ -231,108 +626,330 @@ class Variable:
|
|
|
231
626
|
return await self.__read_dataframe_analysis_async(
|
|
232
627
|
dataframe_analysis_keys=dataframe_analysis_keys,
|
|
233
628
|
)
|
|
234
|
-
|
|
629
|
+
else:
|
|
630
|
+
data = self.__should_load_object()
|
|
631
|
+
if data is not None:
|
|
632
|
+
return data
|
|
633
|
+
|
|
634
|
+
data = await self.__read_json_async(sample=sample)
|
|
635
|
+
|
|
636
|
+
if self.variable_type == VariableType.MATRIX_SPARSE:
|
|
637
|
+
data = self.__read_matrix_sparse(data, sample=sample, sample_count=sample_count)
|
|
638
|
+
elif (
|
|
639
|
+
VariableType.DICTIONARY_COMPLEX == self.variable_type
|
|
640
|
+
or VariableType.LIST_COMPLEX == self.variable_type
|
|
641
|
+
):
|
|
642
|
+
data = self.__read_complex_object(data)
|
|
643
|
+
|
|
644
|
+
return data
|
|
645
|
+
|
|
646
|
+
def __read_complex_object(self, data: Union[Dict, List]) -> Union[Dict, List]:
|
|
647
|
+
column_types_filename = os.path.join(self.variable_path, DATAFRAME_COLUMN_TYPES_FILE)
|
|
648
|
+
if self.storage.path_exists(column_types_filename):
|
|
649
|
+
column_types = self.storage.read_json_file(column_types_filename)
|
|
650
|
+
data = deserialize_complex(
|
|
651
|
+
data,
|
|
652
|
+
column_types,
|
|
653
|
+
unflatten=isinstance(data, dict),
|
|
654
|
+
)
|
|
655
|
+
|
|
656
|
+
return data
|
|
657
|
+
|
|
658
|
+
def __save_complex_object(self, data: Union[Dict, List]) -> Union[Dict, List]:
|
|
659
|
+
data, column_types = serialize_complex(
|
|
660
|
+
flatten_dict(data) if isinstance(data, dict) else data,
|
|
661
|
+
save_path=self.variable_path,
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
self.storage.write_json_file(
|
|
665
|
+
os.path.join(self.variable_path, DATAFRAME_COLUMN_TYPES_FILE), column_types
|
|
666
|
+
)
|
|
667
|
+
self.resource_usage.update_attributes(
|
|
668
|
+
directory=self.variable_path,
|
|
669
|
+
size=size_of_path(self.variable_path),
|
|
670
|
+
)
|
|
671
|
+
|
|
672
|
+
return data
|
|
673
|
+
|
|
674
|
+
async def __save_complex_object_async(self, data: Union[Dict, List]) -> Union[Dict, List]:
|
|
675
|
+
data, column_types = serialize_complex(
|
|
676
|
+
flatten_dict(data) if isinstance(data, dict) else data,
|
|
677
|
+
save_path=self.variable_path,
|
|
678
|
+
)
|
|
679
|
+
await self.storage.write_json_file_async(
|
|
680
|
+
os.path.join(self.variable_path, DATAFRAME_COLUMN_TYPES_FILE),
|
|
681
|
+
column_types,
|
|
682
|
+
)
|
|
683
|
+
|
|
684
|
+
self.resource_usage.update_attributes(
|
|
685
|
+
directory=self.variable_path,
|
|
686
|
+
size=size_of_path(self.variable_path),
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
return data
|
|
690
|
+
|
|
691
|
+
def __should_save_object(self, data: Any) -> Dict[str, Any]:
|
|
692
|
+
data, full_path = save_custom_object(
|
|
693
|
+
data, self.variable_path, variable_type=self.variable_type
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
self.resource_usage.update_attributes(
|
|
697
|
+
directory=self.variable_path,
|
|
698
|
+
size=size_of_path(self.variable_path),
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
return data
|
|
702
|
+
|
|
703
|
+
def __should_load_object(self) -> Optional[Any]:
|
|
704
|
+
return load_custom_object(self.variable_path, self.variable_type)
|
|
235
705
|
|
|
236
706
|
@contextmanager
|
|
237
|
-
def open_to_write(self, filename: str)
|
|
707
|
+
def open_to_write(self, filename: str):
|
|
238
708
|
if not self.storage.isdir(self.variable_path):
|
|
239
709
|
self.storage.makedirs(self.variable_path, exist_ok=True)
|
|
240
710
|
|
|
241
|
-
with self.storage.open_to_write(self.full_path(filename)) as
|
|
242
|
-
yield
|
|
711
|
+
with self.storage.open_to_write(self.full_path(filename)) as fi:
|
|
712
|
+
yield fi
|
|
243
713
|
|
|
244
|
-
def full_path(self, filename: str = None) -> str:
|
|
714
|
+
def full_path(self, filename: Optional[str] = None) -> str:
|
|
245
715
|
if filename:
|
|
246
716
|
return os.path.join(self.variable_path, filename)
|
|
247
717
|
|
|
248
718
|
return self.variable_path
|
|
249
719
|
|
|
250
720
|
def write_data(self, data: Any) -> None:
|
|
721
|
+
if self.__memory_manager_v2_enabled and False:
|
|
722
|
+
with MemoryManager(scope_uuid=self.__scope_uuid(), process_uuid='variable.write_data'):
|
|
723
|
+
self.__write_data(data)
|
|
724
|
+
else:
|
|
725
|
+
self.__write_data(data)
|
|
726
|
+
|
|
727
|
+
def __write_data(self, data: Any) -> None:
|
|
251
728
|
"""
|
|
252
729
|
Write variable data to the persistent storage.
|
|
253
730
|
|
|
254
731
|
Args:
|
|
255
732
|
data (Any): Variable data to be written to storage.
|
|
256
|
-
"""
|
|
257
|
-
if isinstance(data, pd.Series):
|
|
258
|
-
data = data.to_list()
|
|
259
733
|
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
self.__write_polars_dataframe(data)
|
|
279
|
-
elif self.variable_type == VariableType.SPARK_DATAFRAME:
|
|
280
|
-
self.__write_spark_parquet(data)
|
|
281
|
-
elif self.variable_type == VariableType.GEO_DATAFRAME:
|
|
282
|
-
self.__write_geo_dataframe(data)
|
|
734
|
+
Used by:
|
|
735
|
+
VariableManager
|
|
736
|
+
"""
|
|
737
|
+
if self.data_manager and self.data_manager.writeable(data):
|
|
738
|
+
metadata = self.data_manager.write_sync(data)
|
|
739
|
+
if metadata:
|
|
740
|
+
self.__write_dataframe_analysis(
|
|
741
|
+
dict(
|
|
742
|
+
statistics=dict(
|
|
743
|
+
original_row_count=metadata.get('rows'),
|
|
744
|
+
original_column_count=metadata.get('columns'),
|
|
745
|
+
),
|
|
746
|
+
)
|
|
747
|
+
)
|
|
748
|
+
self.resource_usage.update_attributes(
|
|
749
|
+
directory=self.data_manager.resource_usage.directory,
|
|
750
|
+
size=self.data_manager.resource_usage.size,
|
|
751
|
+
)
|
|
283
752
|
else:
|
|
284
|
-
|
|
753
|
+
if isinstance(data, pd.Series) and self.variable_type != VariableType.SERIES_PANDAS:
|
|
754
|
+
data = data.to_list()
|
|
755
|
+
|
|
756
|
+
if self.variable_type is None and isinstance(data, pd.DataFrame):
|
|
757
|
+
self.variable_type = VariableType.DATAFRAME
|
|
758
|
+
elif self.variable_type is None and isinstance(data, pl.DataFrame):
|
|
759
|
+
self.variable_type = VariableType.POLARS_DATAFRAME
|
|
760
|
+
elif is_spark_dataframe(data):
|
|
761
|
+
self.variable_type = VariableType.SPARK_DATAFRAME
|
|
762
|
+
elif is_geo_dataframe(data):
|
|
763
|
+
self.variable_type = VariableType.GEO_DATAFRAME
|
|
764
|
+
|
|
765
|
+
# Dataframe analysis variables share the same uuid as the original dataframe variable
|
|
766
|
+
# so we won't write the metadata file for them
|
|
767
|
+
if self.variable_type == VariableType.DATAFRAME_ANALYSIS:
|
|
768
|
+
self.__write_dataframe_analysis(data)
|
|
769
|
+
return
|
|
770
|
+
|
|
771
|
+
if self.variable_type == VariableType.DATAFRAME:
|
|
772
|
+
self.__write_parquet(data)
|
|
773
|
+
elif self.variable_type == VariableType.POLARS_DATAFRAME:
|
|
774
|
+
self.__write_polars_dataframe(data)
|
|
775
|
+
elif self.variable_type == VariableType.SPARK_DATAFRAME:
|
|
776
|
+
self.__write_spark_parquet(data)
|
|
777
|
+
elif self.variable_type == VariableType.GEO_DATAFRAME:
|
|
778
|
+
self.__write_geo_dataframe(data)
|
|
779
|
+
elif self.variable_type == VariableType.MATRIX_SPARSE:
|
|
780
|
+
self.__write_matrix_sparse(data)
|
|
781
|
+
elif self.variable_type == VariableType.SERIES_PANDAS:
|
|
782
|
+
if not self.__write_series_pandas(data):
|
|
783
|
+
self.__write_json(data)
|
|
784
|
+
else:
|
|
785
|
+
if (
|
|
786
|
+
VariableType.DICTIONARY_COMPLEX == self.variable_type
|
|
787
|
+
or VariableType.LIST_COMPLEX == self.variable_type
|
|
788
|
+
):
|
|
789
|
+
data = self.__save_complex_object(data)
|
|
790
|
+
else:
|
|
791
|
+
data = self.__should_save_object(data)
|
|
285
792
|
|
|
793
|
+
self.__write_json(data)
|
|
794
|
+
|
|
795
|
+
# Shared logic across most variable types
|
|
286
796
|
if self.variable_type != VariableType.SPARK_DATAFRAME:
|
|
287
797
|
# Not write json file in spark data directory to avoid read error
|
|
288
798
|
self.write_metadata()
|
|
289
799
|
|
|
800
|
+
self.__write_resource_usage()
|
|
801
|
+
|
|
802
|
+
if self.variable_type in [
|
|
803
|
+
VariableType.ITERABLE,
|
|
804
|
+
VariableType.LIST_COMPLEX,
|
|
805
|
+
]:
|
|
806
|
+
self.__write_dataframe_analysis(
|
|
807
|
+
dict(
|
|
808
|
+
statistics=dict(
|
|
809
|
+
original_row_count=len(data),
|
|
810
|
+
),
|
|
811
|
+
)
|
|
812
|
+
)
|
|
813
|
+
|
|
290
814
|
async def write_data_async(self, data: Any) -> None:
|
|
815
|
+
if self.__memory_manager_v2_enabled and False:
|
|
816
|
+
with MemoryManager(
|
|
817
|
+
scope_uuid=self.__scope_uuid(), process_uuid='variable.write_data_async'
|
|
818
|
+
):
|
|
819
|
+
await self.__write_data_async(data)
|
|
820
|
+
else:
|
|
821
|
+
await self.__write_data_async(data)
|
|
822
|
+
|
|
823
|
+
async def __write_data_async(self, data: Any) -> None:
|
|
291
824
|
"""
|
|
292
825
|
Write variable data to the persistent storage.
|
|
293
826
|
|
|
294
827
|
Args:
|
|
295
828
|
data (Any): Variable data to be written to storage.
|
|
296
|
-
"""
|
|
297
|
-
if self.variable_type is None and type(data) is pd.DataFrame:
|
|
298
|
-
self.variable_type = VariableType.DATAFRAME
|
|
299
|
-
elif self.variable_type is None and type(data) is pl.DataFrame:
|
|
300
|
-
self.variable_type = VariableType.POLARS_DATAFRAME
|
|
301
|
-
elif is_spark_dataframe(data):
|
|
302
|
-
self.variable_type = VariableType.SPARK_DATAFRAME
|
|
303
|
-
elif is_geo_dataframe(data):
|
|
304
|
-
self.variable_type = VariableType.GEO_DATAFRAME
|
|
305
|
-
|
|
306
|
-
if self.variable_type == VariableType.DATAFRAME_ANALYSIS:
|
|
307
|
-
self.__write_dataframe_analysis(data)
|
|
308
|
-
return
|
|
309
829
|
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
830
|
+
Used by:
|
|
831
|
+
VariableManager
|
|
832
|
+
"""
|
|
833
|
+
if self.data_manager and self.data_manager.writeable(data):
|
|
834
|
+
metadata = await self.data_manager.write_async(data)
|
|
835
|
+
if metadata:
|
|
836
|
+
self.__write_dataframe_analysis(
|
|
837
|
+
dict(
|
|
838
|
+
statistics=dict(
|
|
839
|
+
original_row_count=metadata.get('rows'),
|
|
840
|
+
original_column_count=metadata.get('columns'),
|
|
841
|
+
),
|
|
842
|
+
)
|
|
843
|
+
)
|
|
844
|
+
self.resource_usage.update_attributes(
|
|
845
|
+
directory=self.data_manager.resource_usage.directory,
|
|
846
|
+
size=self.data_manager.resource_usage.size,
|
|
847
|
+
)
|
|
318
848
|
else:
|
|
319
|
-
|
|
849
|
+
if self.variable_type is None and isinstance(data, pd.DataFrame):
|
|
850
|
+
self.variable_type = VariableType.DATAFRAME
|
|
851
|
+
elif self.variable_type is None and isinstance(data, pl.DataFrame):
|
|
852
|
+
self.variable_type = VariableType.POLARS_DATAFRAME
|
|
853
|
+
elif is_spark_dataframe(data):
|
|
854
|
+
self.variable_type = VariableType.SPARK_DATAFRAME
|
|
855
|
+
elif is_geo_dataframe(data):
|
|
856
|
+
self.variable_type = VariableType.GEO_DATAFRAME
|
|
857
|
+
|
|
858
|
+
if self.variable_type == VariableType.DATAFRAME_ANALYSIS:
|
|
859
|
+
self.__write_dataframe_analysis(data)
|
|
860
|
+
return
|
|
861
|
+
|
|
862
|
+
if self.variable_type == VariableType.DATAFRAME:
|
|
863
|
+
self.__write_parquet(data)
|
|
864
|
+
elif self.variable_type == VariableType.POLARS_DATAFRAME:
|
|
865
|
+
self.__write_polars_dataframe(data)
|
|
866
|
+
elif self.variable_type == VariableType.SPARK_DATAFRAME:
|
|
867
|
+
self.__write_spark_parquet(data)
|
|
868
|
+
elif self.variable_type == VariableType.GEO_DATAFRAME:
|
|
869
|
+
self.__write_geo_dataframe(data)
|
|
870
|
+
elif self.variable_type == VariableType.MATRIX_SPARSE:
|
|
871
|
+
self.__write_matrix_sparse(data)
|
|
872
|
+
elif self.variable_type == VariableType.SERIES_PANDAS:
|
|
873
|
+
if not self.__write_series_pandas(data):
|
|
874
|
+
await self.__write_json_async(data)
|
|
875
|
+
else:
|
|
876
|
+
if (
|
|
877
|
+
VariableType.DICTIONARY_COMPLEX == self.variable_type
|
|
878
|
+
or VariableType.LIST_COMPLEX == self.variable_type
|
|
879
|
+
):
|
|
880
|
+
data = await self.__save_complex_object_asycn(data)
|
|
881
|
+
else:
|
|
882
|
+
data = self.__should_save_object(data)
|
|
883
|
+
await self.__write_json_async(data)
|
|
320
884
|
|
|
321
885
|
if self.variable_type != VariableType.SPARK_DATAFRAME:
|
|
322
886
|
# Not write json file in spark data directory to avoid read error
|
|
323
887
|
self.write_metadata()
|
|
324
888
|
|
|
889
|
+
self.__write_resource_usage()
|
|
890
|
+
|
|
891
|
+
if (
|
|
892
|
+
self.variable_type
|
|
893
|
+
in [
|
|
894
|
+
VariableType.DICTIONARY_COMPLEX,
|
|
895
|
+
VariableType.ITERABLE,
|
|
896
|
+
VariableType.LIST_COMPLEX,
|
|
897
|
+
]
|
|
898
|
+
or is_basic_iterable(data)
|
|
899
|
+
) and hasattr(data, '__len__'):
|
|
900
|
+
self.__write_dataframe_analysis(
|
|
901
|
+
dict(
|
|
902
|
+
statistics=dict(
|
|
903
|
+
original_row_count=len(data),
|
|
904
|
+
),
|
|
905
|
+
)
|
|
906
|
+
)
|
|
907
|
+
|
|
325
908
|
def write_metadata(self) -> None:
|
|
326
909
|
"""
|
|
327
910
|
Write metadata to the persistent storage.
|
|
328
911
|
"""
|
|
329
912
|
metadata = dict(
|
|
330
|
-
type=
|
|
331
|
-
|
|
332
|
-
|
|
913
|
+
type=(
|
|
914
|
+
self.variable_type.value
|
|
915
|
+
if isinstance(self.variable_type, VariableType)
|
|
916
|
+
else self.variable_type
|
|
917
|
+
),
|
|
333
918
|
)
|
|
919
|
+
|
|
920
|
+
if self.variable_types:
|
|
921
|
+
metadata['types'] = [
|
|
922
|
+
variable_type.value if isinstance(variable_type, VariableType) else variable_type
|
|
923
|
+
for variable_type in self.variable_types
|
|
924
|
+
]
|
|
925
|
+
|
|
334
926
|
self.storage.write_json_file(self.metadata_path, metadata)
|
|
335
927
|
|
|
928
|
+
def items_count(self, include_parts: Optional[bool] = None) -> Optional[int]:
|
|
929
|
+
if self.__memory_manager_v2_enabled:
|
|
930
|
+
row_count = None
|
|
931
|
+
if self.part_uuids is not None:
|
|
932
|
+
if include_parts:
|
|
933
|
+
row_count = self.__parquet_num_rows(self.variable_path)
|
|
934
|
+
else:
|
|
935
|
+
row_count = len(self.part_uuids)
|
|
936
|
+
elif self.storage.path_exists(os.path.join(self.variable_path, 'statistics.json')):
|
|
937
|
+
statistics = self.storage.read_json_file(
|
|
938
|
+
os.path.join(self.variable_path, 'statistics.json')
|
|
939
|
+
)
|
|
940
|
+
if statistics and isinstance(statistics, dict):
|
|
941
|
+
row_count = statistics.get('original_row_count')
|
|
942
|
+
else:
|
|
943
|
+
row_count = self.__parquet_num_rows(self.variable_path)
|
|
944
|
+
|
|
945
|
+
if row_count is not None and isinstance(row_count, (float, int, str)):
|
|
946
|
+
return int(row_count)
|
|
947
|
+
|
|
948
|
+
def __write_resource_usage(self) -> None:
|
|
949
|
+
if self.resource_usage:
|
|
950
|
+
os.makedirs(self.variable_dir_path, exist_ok=True)
|
|
951
|
+
self.storage.write_json_file(self.resource_usage_path(), self.resource_usage.to_dict())
|
|
952
|
+
|
|
336
953
|
def __delete_dataframe_analysis(self) -> None:
|
|
337
954
|
for k in DATAFRAME_ANALYSIS_KEYS:
|
|
338
955
|
file_path = os.path.join(self.variable_path, f'{k}.json')
|
|
@@ -356,6 +973,24 @@ class Variable:
|
|
|
356
973
|
self.storage.remove(file_path)
|
|
357
974
|
self.storage.remove_dir(self.variable_path)
|
|
358
975
|
|
|
976
|
+
def __data_file_path(self) -> str:
|
|
977
|
+
if self.variable_type in [
|
|
978
|
+
VariableType.DATAFRAME,
|
|
979
|
+
VariableType.POLARS_DATAFRAME,
|
|
980
|
+
VariableType.SERIES_PANDAS,
|
|
981
|
+
VariableType.SERIES_POLARS,
|
|
982
|
+
]:
|
|
983
|
+
return os.path.join(self.variable_path, DATAFRAME_PARQUET_FILE)
|
|
984
|
+
elif VariableType.GEO_DATAFRAME == self.variable_type:
|
|
985
|
+
return os.path.join(self.variable_path, 'data.sh')
|
|
986
|
+
elif VariableType.MODEL_SKLEARN == self.variable_type:
|
|
987
|
+
return os.path.join(self.variable_path, JOBLIB_FILE)
|
|
988
|
+
elif VariableType.MODEL_XGBOOST == self.variable_type:
|
|
989
|
+
return os.path.join(self.variable_path, UBJSON_MODEL_FILENAME)
|
|
990
|
+
elif VariableType.CUSTOM_OBJECT == self.variable_type:
|
|
991
|
+
return os.path.join(self.variable_path, JOBLIB_OBJECT_FILE)
|
|
992
|
+
return os.path.join(self.variable_path, JSON_FILE)
|
|
993
|
+
|
|
359
994
|
def __read_json(
|
|
360
995
|
self,
|
|
361
996
|
default_value: Dict = None,
|
|
@@ -380,14 +1015,20 @@ class Variable:
|
|
|
380
1015
|
if self.storage.path_exists(file_path):
|
|
381
1016
|
try:
|
|
382
1017
|
data = self.storage.read_json_file(
|
|
383
|
-
file_path,
|
|
1018
|
+
file_path,
|
|
1019
|
+
default_value=default_value,
|
|
1020
|
+
raise_exception=raise_exception,
|
|
1021
|
+
)
|
|
384
1022
|
except Exception as ex:
|
|
385
1023
|
if raise_exception:
|
|
386
1024
|
raise Exception(f'Failed to read json file: {file_path}') from ex
|
|
387
1025
|
else:
|
|
388
1026
|
try:
|
|
389
1027
|
data = self.storage.read_json_file(
|
|
390
|
-
old_file_path,
|
|
1028
|
+
old_file_path,
|
|
1029
|
+
default_value=default_value,
|
|
1030
|
+
raise_exception=raise_exception,
|
|
1031
|
+
)
|
|
391
1032
|
except Exception as ex:
|
|
392
1033
|
if raise_exception:
|
|
393
1034
|
raise Exception(f'Failed to read json file: {old_file_path}') from ex
|
|
@@ -419,26 +1060,41 @@ class Variable:
|
|
|
419
1060
|
data = sample_output(data)[0]
|
|
420
1061
|
return data
|
|
421
1062
|
|
|
422
|
-
def __write_json(self, data) ->
|
|
1063
|
+
def __write_json(self, data) -> Any:
|
|
423
1064
|
if not self.storage.isdir(self.variable_path):
|
|
424
1065
|
self.storage.makedirs(self.variable_path, exist_ok=True)
|
|
1066
|
+
|
|
425
1067
|
file_path = os.path.join(self.variable_path, JSON_FILE)
|
|
426
1068
|
sample_file_path = os.path.join(self.variable_path, JSON_SAMPLE_FILE)
|
|
427
1069
|
self.storage.write_json_file(file_path, data)
|
|
428
1070
|
self.storage.write_json_file(sample_file_path, sample_output(data)[0])
|
|
429
1071
|
|
|
1072
|
+
self.resource_usage.update_attributes(
|
|
1073
|
+
size=size_of_path(self.variable_path),
|
|
1074
|
+
path=file_path,
|
|
1075
|
+
)
|
|
1076
|
+
|
|
1077
|
+
return data
|
|
1078
|
+
|
|
430
1079
|
async def __write_json_async(self, data) -> None:
|
|
431
1080
|
if not self.storage.isdir(self.variable_path):
|
|
432
1081
|
self.storage.makedirs(self.variable_path, exist_ok=True)
|
|
1082
|
+
|
|
433
1083
|
file_path = os.path.join(self.variable_path, JSON_FILE)
|
|
434
1084
|
sample_file_path = os.path.join(self.variable_path, JSON_SAMPLE_FILE)
|
|
1085
|
+
|
|
1086
|
+
self.resource_usage.update_attributes(
|
|
1087
|
+
size=size_of_path(self.variable_path),
|
|
1088
|
+
path=file_path,
|
|
1089
|
+
)
|
|
1090
|
+
|
|
435
1091
|
try:
|
|
436
1092
|
await self.storage.write_json_file_async(file_path, data)
|
|
437
1093
|
await self.storage.write_json_file_async(sample_file_path, sample_output(data)[0])
|
|
438
1094
|
except Exception:
|
|
439
1095
|
traceback.print_exc()
|
|
440
1096
|
|
|
441
|
-
def __read_geo_dataframe(self, sample: bool = False, sample_count: int = None):
|
|
1097
|
+
def __read_geo_dataframe(self, sample: bool = False, sample_count: Optional[int] = None):
|
|
442
1098
|
import geopandas as gpd
|
|
443
1099
|
|
|
444
1100
|
file_path = os.path.join(self.variable_path, 'data.sh')
|
|
@@ -461,7 +1117,7 @@ class Variable:
|
|
|
461
1117
|
def __read_parquet(
|
|
462
1118
|
self,
|
|
463
1119
|
sample: bool = False,
|
|
464
|
-
sample_count: int = None,
|
|
1120
|
+
sample_count: Optional[int] = None,
|
|
465
1121
|
raise_exception: bool = False,
|
|
466
1122
|
) -> pd.DataFrame:
|
|
467
1123
|
file_path = os.path.join(self.variable_path, DATAFRAME_PARQUET_FILE)
|
|
@@ -491,9 +1147,19 @@ class Variable:
|
|
|
491
1147
|
if df.shape[0] > sample_count:
|
|
492
1148
|
df = df.iloc[:sample_count]
|
|
493
1149
|
|
|
1150
|
+
column_types_raw = None
|
|
494
1151
|
column_types_filename = os.path.join(self.variable_path, DATAFRAME_COLUMN_TYPES_FILE)
|
|
495
1152
|
if self.storage.path_exists(column_types_filename):
|
|
496
|
-
|
|
1153
|
+
column_types_raw = self.storage.read_json_file(column_types_filename)
|
|
1154
|
+
column_types = {}
|
|
1155
|
+
|
|
1156
|
+
if self.variable_type == VariableType.SERIES_PANDAS:
|
|
1157
|
+
if isinstance(column_types_raw, list):
|
|
1158
|
+
for col_data in column_types_raw:
|
|
1159
|
+
column_types.update(col_data['column_types'])
|
|
1160
|
+
else:
|
|
1161
|
+
column_types = column_types_raw
|
|
1162
|
+
|
|
497
1163
|
# ddf = dask_from_pandas(df)
|
|
498
1164
|
if should_deserialize_pandas(column_types):
|
|
499
1165
|
df = apply_transform_pandas(
|
|
@@ -501,18 +1167,65 @@ class Variable:
|
|
|
501
1167
|
lambda row: deserialize_columns(row, column_types),
|
|
502
1168
|
)
|
|
503
1169
|
df = cast_column_types(df, column_types)
|
|
1170
|
+
|
|
1171
|
+
if self.variable_type == VariableType.SERIES_PANDAS:
|
|
1172
|
+
if column_types_raw and isinstance(column_types_raw, list):
|
|
1173
|
+
series_list = []
|
|
1174
|
+
|
|
1175
|
+
for col_data in column_types_raw:
|
|
1176
|
+
column_mapping = col_data.get('column_mapping')
|
|
1177
|
+
index = col_data.get('index')
|
|
1178
|
+
|
|
1179
|
+
columns_idx = []
|
|
1180
|
+
columns = []
|
|
1181
|
+
for col_idx, col in column_mapping.items():
|
|
1182
|
+
columns_idx.append(col_idx)
|
|
1183
|
+
columns.append(col)
|
|
1184
|
+
|
|
1185
|
+
df_series = df.iloc[: len(index)][columns_idx]
|
|
1186
|
+
df_series.columns = columns
|
|
1187
|
+
for col in df_series.columns:
|
|
1188
|
+
series = df_series[col]
|
|
1189
|
+
series.set_axis(index)
|
|
1190
|
+
series_list.append(series)
|
|
1191
|
+
|
|
1192
|
+
return series_list
|
|
1193
|
+
else:
|
|
1194
|
+
df = df.iloc[:, 0]
|
|
1195
|
+
|
|
504
1196
|
return df
|
|
505
1197
|
|
|
1198
|
+
def __read_matrix_sparse(
|
|
1199
|
+
self,
|
|
1200
|
+
json_dict: Union[Dict, List[Dict], Tuple[Dict]],
|
|
1201
|
+
sample: bool = False,
|
|
1202
|
+
sample_count: Optional[int] = None,
|
|
1203
|
+
) -> scipy.sparse._csr.csr_matrix:
|
|
1204
|
+
if isinstance(json_dict, list) or isinstance(json_dict, Tuple):
|
|
1205
|
+
return [self.__deserialize_matrix_sparse(d, sample, sample_count) for d in json_dict]
|
|
1206
|
+
|
|
1207
|
+
return self.__deserialize_matrix_sparse(json_dict, sample, sample_count)
|
|
1208
|
+
|
|
1209
|
+
def __deserialize_matrix_sparse(
|
|
1210
|
+
self,
|
|
1211
|
+
json_dict: Dict,
|
|
1212
|
+
sample: bool = False,
|
|
1213
|
+
sample_count: Optional[int] = None,
|
|
1214
|
+
) -> scipy.sparse._csr.csr_matrix:
|
|
1215
|
+
csr_matrix = deserialize_matrix(json_dict)
|
|
1216
|
+
if sample:
|
|
1217
|
+
return csr_matrix[:sample_count, :DATAFRAME_SAMPLE_MAX_COLUMNS]
|
|
1218
|
+
|
|
1219
|
+
return csr_matrix
|
|
1220
|
+
|
|
506
1221
|
def __read_polars_parquet(
|
|
507
1222
|
self,
|
|
508
1223
|
sample: bool = False,
|
|
509
|
-
sample_count: int = None,
|
|
1224
|
+
sample_count: Optional[int] = None,
|
|
510
1225
|
raise_exception: bool = False,
|
|
511
1226
|
) -> pl.DataFrame:
|
|
512
1227
|
file_path = os.path.join(self.variable_path, DATAFRAME_PARQUET_FILE)
|
|
513
|
-
sample_file_path = os.path.join(
|
|
514
|
-
self.variable_path, DATAFRAME_PARQUET_SAMPLE_FILE
|
|
515
|
-
)
|
|
1228
|
+
sample_file_path = os.path.join(self.variable_path, DATAFRAME_PARQUET_SAMPLE_FILE)
|
|
516
1229
|
|
|
517
1230
|
read_sample_success = False
|
|
518
1231
|
if sample:
|
|
@@ -521,9 +1234,7 @@ class Variable:
|
|
|
521
1234
|
read_sample_success = True
|
|
522
1235
|
except Exception as ex:
|
|
523
1236
|
if raise_exception:
|
|
524
|
-
raise Exception(
|
|
525
|
-
f'Failed to read parquet file: {sample_file_path}'
|
|
526
|
-
) from ex
|
|
1237
|
+
raise Exception(f'Failed to read parquet file: {sample_file_path}') from ex
|
|
527
1238
|
else:
|
|
528
1239
|
traceback.print_exc()
|
|
529
1240
|
if not read_sample_success:
|
|
@@ -548,12 +1259,13 @@ class Variable:
|
|
|
548
1259
|
df = cast_column_types_polars(df, column_types)
|
|
549
1260
|
return df
|
|
550
1261
|
|
|
551
|
-
def __read_spark_parquet(
|
|
1262
|
+
def __read_spark_parquet(
|
|
1263
|
+
self, sample: bool = False, sample_count: Optional[int] = None, spark=None
|
|
1264
|
+
):
|
|
552
1265
|
if spark is None:
|
|
553
1266
|
return None
|
|
554
1267
|
df = (
|
|
555
|
-
spark.read
|
|
556
|
-
.format('parquet')
|
|
1268
|
+
spark.read.format('parquet')
|
|
557
1269
|
.option('header', 'true')
|
|
558
1270
|
.option('inferSchema', 'true')
|
|
559
1271
|
.option('delimiter', ',')
|
|
@@ -569,7 +1281,7 @@ class Variable:
|
|
|
569
1281
|
df_sample_output = data.iloc[:DATAFRAME_SAMPLE_COUNT]
|
|
570
1282
|
df_sample_output.to_file(os.path.join(self.variable_path, 'sample_data.sh'))
|
|
571
1283
|
|
|
572
|
-
def
|
|
1284
|
+
def __get_column_types(self, data: pd.DataFrame) -> Tuple[Dict, pd.DataFrame]:
|
|
573
1285
|
column_types = {}
|
|
574
1286
|
df_output = data.copy()
|
|
575
1287
|
# Clean up data types since parquet doesn't support mixed data types
|
|
@@ -612,6 +1324,49 @@ class Variable:
|
|
|
612
1324
|
column_types[c] = coltype.__name__
|
|
613
1325
|
else:
|
|
614
1326
|
column_types[c] = type(series_non_null.iloc[0].item()).__name__
|
|
1327
|
+
return column_types, df_output
|
|
1328
|
+
|
|
1329
|
+
def __write_parquet(
|
|
1330
|
+
self,
|
|
1331
|
+
data: Union[pd.DataFrame, List[pd.Series]],
|
|
1332
|
+
) -> None:
|
|
1333
|
+
column_types_to_test = {}
|
|
1334
|
+
|
|
1335
|
+
is_series_list = (
|
|
1336
|
+
(isinstance(data, list) or isinstance(data, tuple))
|
|
1337
|
+
and len(data) >= 1
|
|
1338
|
+
and isinstance(data[0], pd.Series)
|
|
1339
|
+
)
|
|
1340
|
+
|
|
1341
|
+
if is_series_list:
|
|
1342
|
+
df_output = pd.DataFrame()
|
|
1343
|
+
|
|
1344
|
+
column_types = []
|
|
1345
|
+
for idx, series in enumerate(data):
|
|
1346
|
+
df_series = series.to_frame()
|
|
1347
|
+
column_mapping = {}
|
|
1348
|
+
|
|
1349
|
+
columns = []
|
|
1350
|
+
for col in df_series.columns:
|
|
1351
|
+
col_idx = f'{col}_{idx}'
|
|
1352
|
+
column_mapping[col_idx] = col
|
|
1353
|
+
columns.append(col_idx)
|
|
1354
|
+
|
|
1355
|
+
df_series.columns = columns
|
|
1356
|
+
col_types, df_series = self.__get_column_types(df_series)
|
|
1357
|
+
|
|
1358
|
+
df_output = pd.concat([df_output, df_series], axis=1)
|
|
1359
|
+
column_types.append(
|
|
1360
|
+
dict(
|
|
1361
|
+
column_mapping=column_mapping,
|
|
1362
|
+
column_types=col_types,
|
|
1363
|
+
index=series.index.to_list(),
|
|
1364
|
+
)
|
|
1365
|
+
)
|
|
1366
|
+
column_types_to_test.update(col_types)
|
|
1367
|
+
else:
|
|
1368
|
+
column_types, df_output = self.__get_column_types(data)
|
|
1369
|
+
column_types_to_test.update(column_types)
|
|
615
1370
|
|
|
616
1371
|
self.storage.makedirs(self.variable_path, exist_ok=True)
|
|
617
1372
|
self.storage.write_json_file(
|
|
@@ -619,10 +1374,14 @@ class Variable:
|
|
|
619
1374
|
column_types,
|
|
620
1375
|
)
|
|
621
1376
|
|
|
622
|
-
if should_serialize_pandas(
|
|
1377
|
+
if should_serialize_pandas(column_types_to_test):
|
|
623
1378
|
# Try using Polars to write the dataframe to improve performance
|
|
624
|
-
if
|
|
625
|
-
|
|
1379
|
+
if (
|
|
1380
|
+
type(df_output.index) is RangeIndex
|
|
1381
|
+
and df_output.index.start == 0
|
|
1382
|
+
and df_output.index.stop == df_output.shape[0]
|
|
1383
|
+
and df_output.index.step == 1
|
|
1384
|
+
):
|
|
626
1385
|
# Polars ignores any index
|
|
627
1386
|
try:
|
|
628
1387
|
pl_df = pl.from_pandas(df_output)
|
|
@@ -637,11 +1396,12 @@ class Variable:
|
|
|
637
1396
|
# ddf = dask_from_pandas(df_output)
|
|
638
1397
|
df_output_serialized = apply_transform_pandas(
|
|
639
1398
|
df_output,
|
|
640
|
-
lambda row: serialize_columns(row,
|
|
1399
|
+
lambda row: serialize_columns(row, column_types_to_test),
|
|
641
1400
|
)
|
|
642
1401
|
else:
|
|
643
1402
|
df_output_serialized = df_output
|
|
644
1403
|
|
|
1404
|
+
df_output_serialized.columns = [str(col) for col in df_output_serialized.columns]
|
|
645
1405
|
self.storage.write_parquet(
|
|
646
1406
|
df_output_serialized,
|
|
647
1407
|
os.path.join(self.variable_path, DATAFRAME_PARQUET_FILE),
|
|
@@ -661,6 +1421,20 @@ class Variable:
|
|
|
661
1421
|
print(f'Sample output error: {err}.')
|
|
662
1422
|
traceback.print_exc()
|
|
663
1423
|
|
|
1424
|
+
try:
|
|
1425
|
+
n_rows, n_cols = df_output_serialized.shape
|
|
1426
|
+
self.__write_dataframe_analysis(
|
|
1427
|
+
dict(
|
|
1428
|
+
statistics=dict(
|
|
1429
|
+
original_row_count=n_rows,
|
|
1430
|
+
original_column_count=n_cols,
|
|
1431
|
+
),
|
|
1432
|
+
)
|
|
1433
|
+
)
|
|
1434
|
+
except Exception as err:
|
|
1435
|
+
print(f'Writing DataFrame analysis failed during writing parquet: {err}.')
|
|
1436
|
+
traceback.print_exc()
|
|
1437
|
+
|
|
664
1438
|
def __write_polars_dataframe(self, data: pl.DataFrame) -> None:
|
|
665
1439
|
self.storage.makedirs(self.variable_path, exist_ok=True)
|
|
666
1440
|
|
|
@@ -685,16 +1459,12 @@ class Variable:
|
|
|
685
1459
|
traceback.print_exc()
|
|
686
1460
|
|
|
687
1461
|
def __write_spark_parquet(self, data) -> None:
|
|
688
|
-
(
|
|
689
|
-
data.write
|
|
690
|
-
.option('header', 'True')
|
|
691
|
-
.mode('overwrite')
|
|
692
|
-
.parquet(self.variable_path)
|
|
693
|
-
)
|
|
1462
|
+
(data.write.option('header', 'True').mode('overwrite').parquet(self.variable_path))
|
|
694
1463
|
|
|
695
1464
|
def __read_dataframe_analysis(
|
|
696
1465
|
self,
|
|
697
|
-
dataframe_analysis_keys: List[str] = None,
|
|
1466
|
+
dataframe_analysis_keys: Optional[List[str]] = None,
|
|
1467
|
+
index: Optional[int] = None,
|
|
698
1468
|
) -> Dict[str, Dict]:
|
|
699
1469
|
"""
|
|
700
1470
|
Read the following files
|
|
@@ -703,13 +1473,14 @@ class Variable:
|
|
|
703
1473
|
3. insights.json
|
|
704
1474
|
4. suggestions.json
|
|
705
1475
|
"""
|
|
706
|
-
|
|
1476
|
+
base_path = os.path.join(self.variable_path, str(index) if index is not None else '')
|
|
1477
|
+
if not self.storage.path_exists(base_path):
|
|
707
1478
|
return dict()
|
|
708
1479
|
result = dict()
|
|
709
1480
|
for k in DATAFRAME_ANALYSIS_KEYS:
|
|
710
1481
|
if dataframe_analysis_keys is not None and k not in dataframe_analysis_keys:
|
|
711
1482
|
continue
|
|
712
|
-
result[k] = self.storage.read_json_file(os.path.join(
|
|
1483
|
+
result[k] = self.storage.read_json_file(os.path.join(base_path, f'{k}.json'))
|
|
713
1484
|
return result
|
|
714
1485
|
|
|
715
1486
|
async def __read_dataframe_analysis_async(
|
|
@@ -744,4 +1515,124 @@ class Variable:
|
|
|
744
1515
|
"""
|
|
745
1516
|
self.storage.makedirs(self.variable_path, exist_ok=True)
|
|
746
1517
|
for k in DATAFRAME_ANALYSIS_KEYS:
|
|
747
|
-
self.storage.write_json_file(
|
|
1518
|
+
self.storage.write_json_file(
|
|
1519
|
+
os.path.join(self.variable_path, f'{k}.json'), data.get(k)
|
|
1520
|
+
)
|
|
1521
|
+
|
|
1522
|
+
def __write_series_pandas(self, data: Union[List[pd.Series], pd.Series]) -> bool:
|
|
1523
|
+
var_type, basic_iterable = infer_variable_type(data)
|
|
1524
|
+
if VariableType.SERIES_PANDAS == var_type:
|
|
1525
|
+
if basic_iterable:
|
|
1526
|
+
self.__write_parquet(data)
|
|
1527
|
+
else:
|
|
1528
|
+
self.__write_parquet(data.to_frame())
|
|
1529
|
+
|
|
1530
|
+
row_count = None
|
|
1531
|
+
|
|
1532
|
+
if isinstance(data, pd.Series):
|
|
1533
|
+
row_count = data.shape[0]
|
|
1534
|
+
elif is_iterable(data) and len(data) >= 1 and isinstance(data[0], pd.Series):
|
|
1535
|
+
row_count = sum([s.shape[0] for s in data])
|
|
1536
|
+
|
|
1537
|
+
if row_count is not None:
|
|
1538
|
+
self.__write_dataframe_analysis(
|
|
1539
|
+
dict(
|
|
1540
|
+
statistics=dict(
|
|
1541
|
+
original_row_count=row_count,
|
|
1542
|
+
original_column_count=1,
|
|
1543
|
+
),
|
|
1544
|
+
)
|
|
1545
|
+
)
|
|
1546
|
+
|
|
1547
|
+
return True
|
|
1548
|
+
|
|
1549
|
+
return False
|
|
1550
|
+
|
|
1551
|
+
def __write_matrix_sparse(
|
|
1552
|
+
self,
|
|
1553
|
+
csr_matrix: Union[scipy.sparse._csr.csr_matrix, List[scipy.sparse._csr.csr_matrix]],
|
|
1554
|
+
) -> None:
|
|
1555
|
+
if not self.storage.isdir(self.variable_path):
|
|
1556
|
+
self.storage.makedirs(self.variable_path, exist_ok=True)
|
|
1557
|
+
|
|
1558
|
+
if isinstance(csr_matrix, list) or isinstance(csr_matrix, tuple):
|
|
1559
|
+
arr1 = []
|
|
1560
|
+
arr2 = []
|
|
1561
|
+
for matrix in csr_matrix:
|
|
1562
|
+
m_1, m_2 = self.__serialize_matrix_sparse(matrix)
|
|
1563
|
+
arr1.append(m_1)
|
|
1564
|
+
arr2.append(m_2)
|
|
1565
|
+
data = arr1
|
|
1566
|
+
data_sample = arr2
|
|
1567
|
+
else:
|
|
1568
|
+
data, data_sample = self.__serialize_matrix_sparse(csr_matrix)
|
|
1569
|
+
|
|
1570
|
+
sample_file_path = os.path.join(self.variable_path, JSON_SAMPLE_FILE)
|
|
1571
|
+
self.storage.write_json_file(sample_file_path, data_sample)
|
|
1572
|
+
|
|
1573
|
+
file_path = os.path.join(self.variable_path, JSON_FILE)
|
|
1574
|
+
self.storage.write_json_file(file_path, data)
|
|
1575
|
+
|
|
1576
|
+
if isinstance(csr_matrix, scipy.sparse._csr.csr_matrix):
|
|
1577
|
+
self.__write_dataframe_analysis(
|
|
1578
|
+
dict(
|
|
1579
|
+
statistics=dict(
|
|
1580
|
+
original_row_count=csr_matrix.shape[0],
|
|
1581
|
+
original_column_count=csr_matrix.shape[1],
|
|
1582
|
+
),
|
|
1583
|
+
)
|
|
1584
|
+
)
|
|
1585
|
+
|
|
1586
|
+
def __serialize_matrix_sparse(
|
|
1587
|
+
self, csr_matrix: scipy.sparse._csr.csr_matrix
|
|
1588
|
+
) -> Tuple[Dict, Dict]:
|
|
1589
|
+
sample = csr_matrix[:DATAFRAME_SAMPLE_COUNT, :DATAFRAME_SAMPLE_MAX_COLUMNS]
|
|
1590
|
+
data_sample = serialize_matrix(sample)
|
|
1591
|
+
data = serialize_matrix(csr_matrix)
|
|
1592
|
+
|
|
1593
|
+
return data, data_sample
|
|
1594
|
+
|
|
1595
|
+
def __parquet_num_rows(self, path: str) -> Optional[int]:
|
|
1596
|
+
if self.data_manager and self.data_manager.readable():
|
|
1597
|
+
metadata = read_metadata(path)
|
|
1598
|
+
row_count = metadata.get('num_rows')
|
|
1599
|
+
if row_count is not None and isinstance(row_count, (float, int, str)):
|
|
1600
|
+
return int(row_count)
|
|
1601
|
+
|
|
1602
|
+
def __scope_uuid(self) -> str:
|
|
1603
|
+
path_parts = [self.block_dir_name or '']
|
|
1604
|
+
try:
|
|
1605
|
+
path_parts.insert(
|
|
1606
|
+
0, str(Path(self.pipeline_path).relative_to(Path(self.variables_dir)))
|
|
1607
|
+
)
|
|
1608
|
+
except ValueError:
|
|
1609
|
+
pass
|
|
1610
|
+
|
|
1611
|
+
return os.path.join(*path_parts)
|
|
1612
|
+
|
|
1613
|
+
def __is_part_readable(self, part_uuid: Optional[Union[int, str]] = None) -> bool:
|
|
1614
|
+
if part_uuid is not None:
|
|
1615
|
+
part_uuid = str(part_uuid) if not isinstance(part_uuid, str) else part_uuid
|
|
1616
|
+
|
|
1617
|
+
return (
|
|
1618
|
+
self.part_uuids is not None
|
|
1619
|
+
and len(self.part_uuids) >= 1
|
|
1620
|
+
and (part_uuid is None or part_uuid in self.part_uuids)
|
|
1621
|
+
)
|
|
1622
|
+
|
|
1623
|
+
def __is_parquet_readable(self, path: Optional[str] = None) -> bool:
|
|
1624
|
+
from mage_ai.settings.server import (
|
|
1625
|
+
MEMORY_MANAGER_PANDAS_V2,
|
|
1626
|
+
MEMORY_MANAGER_POLARS_V2,
|
|
1627
|
+
)
|
|
1628
|
+
|
|
1629
|
+
if MEMORY_MANAGER_PANDAS_V2 or MEMORY_MANAGER_POLARS_V2:
|
|
1630
|
+
row_count = self.__parquet_num_rows(path or self.variable_path)
|
|
1631
|
+
return row_count is not None and row_count >= 1
|
|
1632
|
+
return False
|
|
1633
|
+
|
|
1634
|
+
@property
|
|
1635
|
+
def __memory_manager_v2_enabled(self):
|
|
1636
|
+
from mage_ai.settings.server import MEMORY_MANAGER_V2
|
|
1637
|
+
|
|
1638
|
+
return MEMORY_MANAGER_V2
|