mage-ai 0.9.70__py3-none-any.whl → 0.9.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mage-ai might be problematic. Click here for more details.
- mage_ai/ai/utils/xgboost.py +222 -0
- mage_ai/api/errors.py +37 -25
- mage_ai/api/operations/base.py +13 -1
- mage_ai/api/parsers/PipelineScheduleParser.py +1 -1
- mage_ai/api/policies/BlockOutputPolicy.py +40 -17
- mage_ai/api/policies/GlobalDataProductPolicy.py +91 -41
- mage_ai/api/policies/KernelPolicy.py +55 -32
- mage_ai/api/policies/KernelProcessPolicy.py +56 -0
- mage_ai/api/policies/OutputPolicy.py +73 -41
- mage_ai/api/policies/PipelinePolicy.py +206 -138
- mage_ai/api/presenters/BlockLayoutItemPresenter.py +9 -7
- mage_ai/api/presenters/BlockPresenter.py +1 -1
- mage_ai/api/presenters/GlobalDataProductPresenter.py +6 -1
- mage_ai/api/presenters/KernelPresenter.py +5 -26
- mage_ai/api/presenters/KernelProcessPresenter.py +28 -0
- mage_ai/api/presenters/PipelinePresenter.py +18 -5
- mage_ai/api/presenters/StatusPresenter.py +2 -0
- mage_ai/api/presenters/SyncPresenter.py +25 -0
- mage_ai/api/resources/AutocompleteItemResource.py +1 -1
- mage_ai/api/resources/BlockLayoutItemResource.py +90 -44
- mage_ai/api/resources/BlockOutputResource.py +42 -9
- mage_ai/api/resources/BlockResource.py +4 -3
- mage_ai/api/resources/BlockRunResource.py +27 -22
- mage_ai/api/resources/ClusterResource.py +4 -1
- mage_ai/api/resources/CustomTemplateResource.py +34 -14
- mage_ai/api/resources/DataProviderResource.py +1 -1
- mage_ai/api/resources/ExecutionStateResource.py +3 -1
- mage_ai/api/resources/FileContentResource.py +8 -2
- mage_ai/api/resources/FileResource.py +10 -4
- mage_ai/api/resources/FileVersionResource.py +3 -1
- mage_ai/api/resources/GitBranchResource.py +46 -9
- mage_ai/api/resources/GlobalDataProductResource.py +44 -7
- mage_ai/api/resources/GlobalHookResource.py +4 -1
- mage_ai/api/resources/IntegrationDestinationResource.py +6 -2
- mage_ai/api/resources/IntegrationSourceResource.py +8 -4
- mage_ai/api/resources/IntegrationSourceStreamResource.py +6 -2
- mage_ai/api/resources/KernelProcessResource.py +44 -0
- mage_ai/api/resources/KernelResource.py +25 -3
- mage_ai/api/resources/OutputResource.py +33 -11
- mage_ai/api/resources/PageBlockLayoutResource.py +34 -23
- mage_ai/api/resources/PipelineInteractionResource.py +31 -15
- mage_ai/api/resources/PipelineResource.py +250 -123
- mage_ai/api/resources/PipelineRunResource.py +11 -3
- mage_ai/api/resources/PipelineScheduleResource.py +7 -2
- mage_ai/api/resources/PipelineTriggerResource.py +6 -1
- mage_ai/api/resources/ProjectResource.py +18 -7
- mage_ai/api/resources/SecretResource.py +1 -1
- mage_ai/api/resources/SeedResource.py +8 -1
- mage_ai/api/resources/StatusResource.py +21 -6
- mage_ai/api/resources/SyncResource.py +6 -8
- mage_ai/api/resources/VariableResource.py +46 -26
- mage_ai/api/resources/VersionControlProjectResource.py +9 -2
- mage_ai/api/resources/WidgetResource.py +1 -1
- mage_ai/api/resources/WorkspaceResource.py +1 -1
- mage_ai/api/views.py +47 -40
- mage_ai/authentication/permissions/seed.py +16 -2
- mage_ai/authentication/providers/oidc.py +21 -1
- mage_ai/autocomplete/utils.py +13 -9
- mage_ai/cache/base.py +1 -1
- mage_ai/cache/block.py +18 -12
- mage_ai/cache/block_action_object/__init__.py +32 -4
- mage_ai/cache/file.py +22 -19
- mage_ai/cache/pipeline.py +18 -12
- mage_ai/cli/main.py +1 -0
- mage_ai/cluster_manager/aws/emr_cluster_manager.py +9 -5
- mage_ai/cluster_manager/config.py +2 -2
- mage_ai/cluster_manager/manage.py +1 -1
- mage_ai/cluster_manager/workspace/base.py +1 -1
- mage_ai/command_center/applications/factory.py +10 -7
- mage_ai/command_center/files/factory.py +17 -15
- mage_ai/command_center/utils.py +25 -13
- mage_ai/data/__init__.py +0 -0
- mage_ai/data/constants.py +45 -0
- mage_ai/data/models/__init__.py +0 -0
- mage_ai/data/models/base.py +119 -0
- mage_ai/data/models/constants.py +1 -0
- mage_ai/data/models/generator.py +115 -0
- mage_ai/data/models/manager.py +168 -0
- mage_ai/data/models/pyarrow/__init__.py +0 -0
- mage_ai/data/models/pyarrow/record_batch.py +55 -0
- mage_ai/data/models/pyarrow/shared.py +21 -0
- mage_ai/data/models/pyarrow/table.py +8 -0
- mage_ai/data/models/reader.py +103 -0
- mage_ai/data/models/utils.py +59 -0
- mage_ai/data/models/writer.py +91 -0
- mage_ai/data/tabular/__init__.py +0 -0
- mage_ai/data/tabular/constants.py +23 -0
- mage_ai/data/tabular/mocks.py +19 -0
- mage_ai/data/tabular/models.py +126 -0
- mage_ai/data/tabular/reader.py +602 -0
- mage_ai/data/tabular/utils.py +102 -0
- mage_ai/data/tabular/writer.py +266 -0
- mage_ai/data/variables/__init__.py +0 -0
- mage_ai/data/variables/wrapper.py +54 -0
- mage_ai/data_cleaner/analysis/charts.py +61 -39
- mage_ai/data_cleaner/column_types/column_type_detector.py +53 -31
- mage_ai/data_cleaner/estimators/encoders.py +5 -2
- mage_ai/data_integrations/utils/scheduler.py +16 -11
- mage_ai/data_preparation/decorators.py +1 -0
- mage_ai/data_preparation/executors/block_executor.py +237 -155
- mage_ai/data_preparation/executors/streaming_pipeline_executor.py +1 -1
- mage_ai/data_preparation/git/__init__.py +27 -7
- mage_ai/data_preparation/git/api.py +7 -1
- mage_ai/data_preparation/git/utils.py +22 -16
- mage_ai/data_preparation/logging/logger_manager.py +4 -3
- mage_ai/data_preparation/models/block/__init__.py +1542 -878
- mage_ai/data_preparation/models/block/data_integration/mixins.py +4 -3
- mage_ai/data_preparation/models/block/dynamic/__init__.py +17 -6
- mage_ai/data_preparation/models/block/dynamic/child.py +41 -102
- mage_ai/data_preparation/models/block/dynamic/constants.py +1 -0
- mage_ai/data_preparation/models/block/dynamic/counter.py +296 -0
- mage_ai/data_preparation/models/block/dynamic/data.py +16 -0
- mage_ai/data_preparation/models/block/dynamic/factory.py +163 -0
- mage_ai/data_preparation/models/block/dynamic/models.py +19 -0
- mage_ai/data_preparation/models/block/dynamic/shared.py +92 -0
- mage_ai/data_preparation/models/block/dynamic/utils.py +291 -168
- mage_ai/data_preparation/models/block/dynamic/variables.py +384 -144
- mage_ai/data_preparation/models/block/dynamic/wrappers.py +77 -0
- mage_ai/data_preparation/models/block/extension/utils.py +10 -1
- mage_ai/data_preparation/models/block/global_data_product/__init__.py +10 -1
- mage_ai/data_preparation/models/block/integration/__init__.py +6 -2
- mage_ai/data_preparation/models/block/outputs.py +722 -0
- mage_ai/data_preparation/models/block/platform/mixins.py +7 -8
- mage_ai/data_preparation/models/block/r/__init__.py +56 -38
- mage_ai/data_preparation/models/block/settings/__init__.py +0 -0
- mage_ai/data_preparation/models/block/settings/dynamic/__init__.py +0 -0
- mage_ai/data_preparation/models/block/settings/dynamic/constants.py +7 -0
- mage_ai/data_preparation/models/block/settings/dynamic/mixins.py +118 -0
- mage_ai/data_preparation/models/block/settings/dynamic/models.py +31 -0
- mage_ai/data_preparation/models/block/settings/global_data_products/__init__.py +0 -0
- mage_ai/data_preparation/models/block/settings/global_data_products/mixins.py +20 -0
- mage_ai/data_preparation/models/block/settings/global_data_products/models.py +46 -0
- mage_ai/data_preparation/models/block/settings/variables/__init__.py +0 -0
- mage_ai/data_preparation/models/block/settings/variables/mixins.py +74 -0
- mage_ai/data_preparation/models/block/settings/variables/models.py +49 -0
- mage_ai/data_preparation/models/block/spark/mixins.py +2 -1
- mage_ai/data_preparation/models/block/sql/__init__.py +30 -5
- mage_ai/data_preparation/models/block/sql/utils/shared.py +21 -3
- mage_ai/data_preparation/models/block/utils.py +127 -70
- mage_ai/data_preparation/models/constants.py +19 -14
- mage_ai/data_preparation/models/custom_templates/custom_block_template.py +18 -13
- mage_ai/data_preparation/models/custom_templates/custom_pipeline_template.py +33 -16
- mage_ai/data_preparation/models/custom_templates/utils.py +1 -1
- mage_ai/data_preparation/models/file.py +41 -28
- mage_ai/data_preparation/models/global_data_product/__init__.py +88 -58
- mage_ai/data_preparation/models/global_hooks/models.py +1 -0
- mage_ai/data_preparation/models/interfaces.py +29 -0
- mage_ai/data_preparation/models/pipeline.py +365 -180
- mage_ai/data_preparation/models/pipelines/integration_pipeline.py +1 -2
- mage_ai/data_preparation/models/pipelines/seed.py +1 -1
- mage_ai/data_preparation/models/project/__init__.py +66 -18
- mage_ai/data_preparation/models/project/constants.py +2 -0
- mage_ai/data_preparation/models/triggers/__init__.py +120 -24
- mage_ai/data_preparation/models/utils.py +467 -17
- mage_ai/data_preparation/models/variable.py +1028 -137
- mage_ai/data_preparation/models/variables/__init__.py +0 -0
- mage_ai/data_preparation/models/variables/cache.py +149 -0
- mage_ai/data_preparation/models/variables/constants.py +72 -0
- mage_ai/data_preparation/models/variables/summarizer.py +336 -0
- mage_ai/data_preparation/models/variables/utils.py +77 -0
- mage_ai/data_preparation/models/widget/__init__.py +63 -41
- mage_ai/data_preparation/models/widget/charts.py +40 -27
- mage_ai/data_preparation/models/widget/constants.py +2 -0
- mage_ai/data_preparation/models/widget/utils.py +3 -3
- mage_ai/data_preparation/preferences.py +3 -3
- mage_ai/data_preparation/repo_manager.py +55 -21
- mage_ai/data_preparation/storage/base_storage.py +2 -2
- mage_ai/data_preparation/storage/gcs_storage.py +7 -4
- mage_ai/data_preparation/storage/local_storage.py +6 -3
- mage_ai/data_preparation/storage/s3_storage.py +5 -2
- mage_ai/data_preparation/templates/data_exporters/streaming/oracledb.yaml +8 -0
- mage_ai/data_preparation/variable_manager.py +281 -76
- mage_ai/io/base.py +3 -2
- mage_ai/io/bigquery.py +1 -0
- mage_ai/io/redshift.py +7 -5
- mage_ai/kernels/__init__.py +0 -0
- mage_ai/kernels/models.py +188 -0
- mage_ai/kernels/utils.py +169 -0
- mage_ai/orchestration/concurrency.py +6 -2
- mage_ai/orchestration/db/__init__.py +1 -0
- mage_ai/orchestration/db/migrations/versions/0227396a216c_add_userproject_table.py +38 -0
- mage_ai/orchestration/db/models/dynamic/__init__.py +0 -0
- mage_ai/orchestration/db/models/dynamic/controller.py +67 -0
- mage_ai/orchestration/db/models/oauth.py +2 -9
- mage_ai/orchestration/db/models/projects.py +10 -0
- mage_ai/orchestration/db/models/schedules.py +204 -187
- mage_ai/orchestration/db/models/schedules_project_platform.py +18 -12
- mage_ai/orchestration/db/models/utils.py +46 -5
- mage_ai/orchestration/metrics/pipeline_run.py +8 -9
- mage_ai/orchestration/notification/sender.py +1 -0
- mage_ai/orchestration/pipeline_scheduler_original.py +32 -8
- mage_ai/orchestration/pipeline_scheduler_project_platform.py +1 -1
- mage_ai/orchestration/run_status_checker.py +11 -4
- mage_ai/orchestration/triggers/api.py +12 -1
- mage_ai/presenters/charts/data_sources/base.py +4 -2
- mage_ai/presenters/charts/data_sources/block.py +15 -9
- mage_ai/presenters/charts/data_sources/chart_code.py +8 -5
- mage_ai/presenters/charts/data_sources/constants.py +1 -0
- mage_ai/presenters/charts/data_sources/system_metrics.py +22 -0
- mage_ai/presenters/interactions/models.py +11 -7
- mage_ai/presenters/pages/loaders/pipelines.py +5 -3
- mage_ai/presenters/pages/models/page_components/pipeline_schedules.py +3 -1
- mage_ai/presenters/utils.py +2 -0
- mage_ai/server/api/blocks.py +2 -1
- mage_ai/server/api/downloads.py +5 -1
- mage_ai/server/api/triggers.py +3 -1
- mage_ai/server/constants.py +1 -1
- mage_ai/server/frontend_dist/404.html +5 -5
- mage_ai/server/frontend_dist/_next/static/UZLabyPgcxtZvp0O0EUUS/_buildManifest.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/1376-22de38b4ad008d8a.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/{1557-b3502f3f1aa92ac7.js → 1557-25a7d985d5564fd3.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/1668-30b4619b9534519b.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/1799-c42db95a015689ee.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/2996-2108b53b9d371d8d.js +1 -0
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/3763-61b542dafdbf5754.js → frontend_dist/_next/static/chunks/3763-40780c6d1e4b261d.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/3782-129dd2a2448a2e36.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/3958-bcdfa414ccfa1eb2.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/4168-97fd1578d1a38315.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/4982-fa5a238b139fbdd2.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/5699-176f445e1313f001.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/7162-7dd03f0f605de721.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/7779-68d2b72a90c5f925.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/7966-5446a8e43711e2f9.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/8023-6c2f172f48dcb99b.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/8095-c351b8a735d73e0c.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/{main-77fe248a6fbd12d8.js → main-b99d4e30a88d9dc7.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/_app-9fe2d9d07c94e968.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/{block-layout-14f952f66964022f.js → block-layout-7f4b735c67115df5.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/global-data-products/[...slug]-e7d48e6b0c3068ac.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/global-data-products-b943f31f050fc3a4.js +1 -0
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/overview-597b74828bf105db.js → frontend_dist/_next/static/chunks/pages/overview-9f1ac4ec003884f3.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills/{[...slug]-7181b086c93784d2.js → [...slug]-7e737f6fc7e83e9b.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/dashboard-d94488e3f2eeef36.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/edit-cc641a7fa8473796.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/{block-runs-a5c0362763a21fa8.js → block-runs-284309877f3c5a5a.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-26250e5335194ade.js +1 -0
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors-7acc7afc00df17c2.js → frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors-5f4c8128b2413fd8.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/runs/[run]-4ebfc8e400315dda.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/settings-e5e0150a256aadb3.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/account/{profile-3f0df3decc856ee9.js → profile-3ae43c932537b254.js} +1 -1
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/settings/platform/preferences-32985f3f7c7dd3ab.js → frontend_dist/_next/static/chunks/pages/settings/platform/preferences-b603d7fe4b175256.js} +1 -1
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/settings/platform/settings-c2e9ef989c8bfa73.js → frontend_dist/_next/static/chunks/pages/settings/platform/settings-319ddbabc239e91b.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/permissions/{[...slug]-47b64ced27c24985.js → [...slug]-5c360f72e4498855.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/{permissions-e5a4d3d815cec25d.js → permissions-fb29fa6c2bd90bb0.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/preferences-3b76fa959ffa09d3.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/roles/{[...slug]-379e1ee292504842.js → [...slug]-3b787b42f1093b1f.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/roles-0b83fbdd39e85f5b.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/sync-data-a1e6950974d643a8.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/users/{[...slug]-2af9afbe727d88aa.js → [...slug]-0aa019d87db8b0b8.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/{users-a4db8710f703c729.js → users-88c694d19207f2ec.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/version-control-31d0d50f7f30462b.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/{webpack-d079359c241db804.js → webpack-ac7fdc472bedf682.js} +1 -1
- mage_ai/server/frontend_dist/block-layout.html +3 -3
- mage_ai/server/frontend_dist/compute.html +6 -6
- mage_ai/server/frontend_dist/files.html +6 -6
- mage_ai/server/frontend_dist/global-data-products/[...slug].html +6 -6
- mage_ai/server/frontend_dist/global-data-products.html +6 -6
- mage_ai/server/frontend_dist/global-hooks/[...slug].html +6 -6
- mage_ai/server/frontend_dist/global-hooks.html +6 -6
- mage_ai/server/frontend_dist/index.html +3 -3
- mage_ai/server/frontend_dist/manage/files.html +6 -6
- mage_ai/server/frontend_dist/manage/settings.html +6 -6
- mage_ai/server/frontend_dist/manage/users/[user].html +6 -6
- mage_ai/server/frontend_dist/manage/users/new.html +6 -6
- mage_ai/server/frontend_dist/manage/users.html +6 -6
- mage_ai/server/frontend_dist/manage.html +6 -6
- mage_ai/server/frontend_dist/oauth.html +5 -5
- mage_ai/server/frontend_dist/overview.html +6 -6
- mage_ai/server/frontend_dist/pipeline-runs.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/backfills/[...slug].html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/backfills.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/dashboard.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/edit.html +3 -3
- mage_ai/server/frontend_dist/pipelines/[pipeline]/logs.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors/block-runs.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors/block-runtime.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/runs/[run].html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/runs.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/settings.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/syncs.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/triggers/[...slug].html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline]/triggers.html +6 -6
- mage_ai/server/frontend_dist/pipelines/[pipeline].html +3 -3
- mage_ai/server/frontend_dist/pipelines.html +6 -6
- mage_ai/server/frontend_dist/platform/global-hooks/[...slug].html +6 -6
- mage_ai/server/frontend_dist/platform/global-hooks.html +6 -6
- mage_ai/server/frontend_dist/settings/account/profile.html +6 -6
- mage_ai/server/frontend_dist/settings/platform/preferences.html +6 -6
- mage_ai/server/frontend_dist/settings/platform/settings.html +6 -6
- mage_ai/server/frontend_dist/settings/workspace/permissions/[...slug].html +6 -6
- mage_ai/server/frontend_dist/settings/workspace/permissions.html +6 -6
- mage_ai/server/frontend_dist/settings/workspace/preferences.html +6 -6
- mage_ai/server/frontend_dist/settings/workspace/roles/[...slug].html +6 -6
- mage_ai/server/frontend_dist/settings/workspace/roles.html +6 -6
- mage_ai/server/frontend_dist/settings/workspace/sync-data.html +6 -6
- mage_ai/server/frontend_dist/settings/workspace/users/[...slug].html +6 -6
- mage_ai/server/frontend_dist/settings/workspace/users.html +6 -6
- mage_ai/server/frontend_dist/settings.html +3 -3
- mage_ai/server/frontend_dist/sign-in.html +12 -12
- mage_ai/server/frontend_dist/templates/[...slug].html +6 -6
- mage_ai/server/frontend_dist/templates.html +6 -6
- mage_ai/server/frontend_dist/terminal.html +6 -6
- mage_ai/server/frontend_dist/test.html +3 -3
- mage_ai/server/frontend_dist/triggers.html +6 -6
- mage_ai/server/frontend_dist/version-control.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/404.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/1376-22de38b4ad008d8a.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/{1557-b3502f3f1aa92ac7.js → 1557-25a7d985d5564fd3.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/1668-30b4619b9534519b.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/1799-c42db95a015689ee.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/2996-2108b53b9d371d8d.js +1 -0
- mage_ai/server/{frontend_dist/_next/static/chunks/3763-61b542dafdbf5754.js → frontend_dist_base_path_template/_next/static/chunks/3763-40780c6d1e4b261d.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/3782-129dd2a2448a2e36.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/3958-bcdfa414ccfa1eb2.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/4168-97fd1578d1a38315.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/4982-fa5a238b139fbdd2.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/5699-176f445e1313f001.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7162-7dd03f0f605de721.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7779-68d2b72a90c5f925.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7966-5446a8e43711e2f9.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/8023-6c2f172f48dcb99b.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/8095-c351b8a735d73e0c.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/{main-70b78159c2bb3fe1.js → main-384298e9133cec76.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/_app-13a578bce3b7f30c.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/{block-layout-14f952f66964022f.js → block-layout-7f4b735c67115df5.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/global-data-products/[...slug]-e7d48e6b0c3068ac.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/global-data-products-b943f31f050fc3a4.js +1 -0
- mage_ai/server/{frontend_dist/_next/static/chunks/pages/overview-597b74828bf105db.js → frontend_dist_base_path_template/_next/static/chunks/pages/overview-9f1ac4ec003884f3.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/backfills/{[...slug]-7181b086c93784d2.js → [...slug]-7e737f6fc7e83e9b.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/dashboard-d94488e3f2eeef36.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/edit-cc641a7fa8473796.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors/{block-runs-a5c0362763a21fa8.js → block-runs-284309877f3c5a5a.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-26250e5335194ade.js +1 -0
- mage_ai/server/{frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors-7acc7afc00df17c2.js → frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors-5f4c8128b2413fd8.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/runs/[run]-4ebfc8e400315dda.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/settings-e5e0150a256aadb3.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/account/{profile-3f0df3decc856ee9.js → profile-3ae43c932537b254.js} +1 -1
- mage_ai/server/{frontend_dist/_next/static/chunks/pages/settings/platform/preferences-32985f3f7c7dd3ab.js → frontend_dist_base_path_template/_next/static/chunks/pages/settings/platform/preferences-b603d7fe4b175256.js} +1 -1
- mage_ai/server/{frontend_dist/_next/static/chunks/pages/settings/platform/settings-c2e9ef989c8bfa73.js → frontend_dist_base_path_template/_next/static/chunks/pages/settings/platform/settings-319ddbabc239e91b.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/permissions/{[...slug]-47b64ced27c24985.js → [...slug]-5c360f72e4498855.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/{permissions-e5a4d3d815cec25d.js → permissions-fb29fa6c2bd90bb0.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/preferences-3b76fa959ffa09d3.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/roles/{[...slug]-379e1ee292504842.js → [...slug]-3b787b42f1093b1f.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/roles-0b83fbdd39e85f5b.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/sync-data-a1e6950974d643a8.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/users/{[...slug]-2af9afbe727d88aa.js → [...slug]-0aa019d87db8b0b8.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/{users-a4db8710f703c729.js → users-88c694d19207f2ec.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/version-control-31d0d50f7f30462b.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/{webpack-68c003fb6a175cd7.js → webpack-481689d9989710cd.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/kcptwoOU-JJJg6Vwpkfmx/_buildManifest.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/block-layout.html +3 -3
- mage_ai/server/frontend_dist_base_path_template/compute.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/files.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/global-data-products/[...slug].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/global-data-products.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/global-hooks/[...slug].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/global-hooks.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/index.html +3 -3
- mage_ai/server/frontend_dist_base_path_template/manage/files.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/manage/settings.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/manage/users/[user].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/manage/users/new.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/manage/users.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/manage.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/oauth.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/overview.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipeline-runs.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/backfills/[...slug].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/backfills.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/dashboard.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/edit.html +3 -3
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/logs.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/monitors/block-runs.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/monitors/block-runtime.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/monitors.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/runs/[run].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/runs.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/settings.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/syncs.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/triggers/[...slug].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/triggers.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline].html +3 -3
- mage_ai/server/frontend_dist_base_path_template/pipelines.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/platform/global-hooks/[...slug].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/platform/global-hooks.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/account/profile.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/platform/preferences.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/platform/settings.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/permissions/[...slug].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/permissions.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/preferences.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/roles/[...slug].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/roles.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/sync-data.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/users/[...slug].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/users.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/settings.html +3 -3
- mage_ai/server/frontend_dist_base_path_template/sign-in.html +12 -12
- mage_ai/server/frontend_dist_base_path_template/templates/[...slug].html +6 -6
- mage_ai/server/frontend_dist_base_path_template/templates.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/terminal.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/test.html +3 -3
- mage_ai/server/frontend_dist_base_path_template/triggers.html +6 -6
- mage_ai/server/frontend_dist_base_path_template/version-control.html +6 -6
- mage_ai/server/kernel_output_parser.py +4 -1
- mage_ai/server/scheduler_manager.py +9 -0
- mage_ai/server/server.py +35 -31
- mage_ai/server/utils/custom_output.py +284 -0
- mage_ai/server/utils/execute_custom_code.py +245 -0
- mage_ai/server/utils/output_display.py +123 -289
- mage_ai/server/websocket_server.py +116 -69
- mage_ai/services/k8s/config.py +23 -0
- mage_ai/services/k8s/job_manager.py +6 -1
- mage_ai/services/ssh/aws/emr/utils.py +8 -8
- mage_ai/settings/keys/auth.py +1 -0
- mage_ai/settings/platform/__init__.py +159 -38
- mage_ai/settings/platform/constants.py +5 -0
- mage_ai/settings/platform/utils.py +53 -10
- mage_ai/settings/repo.py +26 -12
- mage_ai/settings/server.py +128 -37
- mage_ai/shared/array.py +24 -1
- mage_ai/shared/complex.py +45 -0
- mage_ai/shared/config.py +2 -1
- mage_ai/shared/custom_logger.py +11 -0
- mage_ai/shared/dates.py +10 -6
- mage_ai/shared/files.py +63 -8
- mage_ai/shared/hash.py +33 -9
- mage_ai/shared/io.py +9 -5
- mage_ai/shared/models.py +82 -24
- mage_ai/shared/outputs.py +87 -0
- mage_ai/shared/parsers.py +141 -15
- mage_ai/shared/path_fixer.py +11 -7
- mage_ai/shared/singletons/__init__.py +0 -0
- mage_ai/shared/singletons/base.py +47 -0
- mage_ai/shared/singletons/memory.py +38 -0
- mage_ai/shared/strings.py +34 -1
- mage_ai/shared/yaml.py +24 -0
- mage_ai/streaming/sinks/oracledb.py +57 -0
- mage_ai/streaming/sinks/sink_factory.py +4 -0
- mage_ai/system/__init__.py +0 -0
- mage_ai/system/constants.py +14 -0
- mage_ai/system/memory/__init__.py +0 -0
- mage_ai/system/memory/constants.py +1 -0
- mage_ai/system/memory/manager.py +174 -0
- mage_ai/system/memory/presenters.py +158 -0
- mage_ai/system/memory/process.py +216 -0
- mage_ai/system/memory/samples.py +13 -0
- mage_ai/system/memory/utils.py +656 -0
- mage_ai/system/memory/wrappers.py +177 -0
- mage_ai/system/models.py +58 -0
- mage_ai/system/storage/__init__.py +0 -0
- mage_ai/system/storage/utils.py +29 -0
- mage_ai/tests/api/endpoints/mixins.py +2 -2
- mage_ai/tests/api/endpoints/test_blocks.py +2 -1
- mage_ai/tests/api/endpoints/test_custom_designs.py +4 -4
- mage_ai/tests/api/endpoints/test_pipeline_runs.py +2 -2
- mage_ai/tests/api/endpoints/test_projects.py +2 -1
- mage_ai/tests/api/operations/base/test_base.py +27 -27
- mage_ai/tests/api/operations/base/test_base_with_user_authentication.py +27 -27
- mage_ai/tests/api/operations/base/test_base_with_user_permissions.py +23 -23
- mage_ai/tests/api/operations/test_syncs.py +6 -4
- mage_ai/tests/api/resources/test_pipeline_resource.py +9 -2
- mage_ai/tests/authentication/providers/test_oidc.py +59 -0
- mage_ai/tests/base_test.py +2 -2
- mage_ai/tests/data/__init__.py +0 -0
- mage_ai/tests/data/models/__init__.py +0 -0
- mage_ai/tests/data_preparation/executors/test_block_executor.py +23 -16
- mage_ai/tests/data_preparation/git/test_git.py +4 -1
- mage_ai/tests/data_preparation/models/block/dynamic/test_combos.py +305 -0
- mage_ai/tests/data_preparation/models/block/dynamic/test_counter.py +212 -0
- mage_ai/tests/data_preparation/models/block/dynamic/test_factory.py +360 -0
- mage_ai/tests/data_preparation/models/block/dynamic/test_variables.py +332 -0
- mage_ai/tests/data_preparation/models/block/hook/test_hook_block.py +2 -2
- mage_ai/tests/data_preparation/models/block/platform/test_mixins.py +1 -1
- mage_ai/tests/data_preparation/models/block/sql/utils/test_shared.py +26 -1
- mage_ai/tests/data_preparation/models/block/test_global_data_product.py +3 -2
- mage_ai/tests/data_preparation/models/custom_templates/test_utils.py +5 -4
- mage_ai/tests/data_preparation/models/global_hooks/test_hook.py +3 -0
- mage_ai/tests/data_preparation/models/global_hooks/test_predicates.py +9 -3
- mage_ai/tests/data_preparation/models/test_block.py +115 -120
- mage_ai/tests/data_preparation/models/test_blocks_helper.py +114 -0
- mage_ai/tests/data_preparation/models/test_global_data_product.py +41 -24
- mage_ai/tests/data_preparation/models/test_pipeline.py +9 -6
- mage_ai/tests/data_preparation/models/test_project.py +4 -1
- mage_ai/tests/data_preparation/models/test_utils.py +80 -0
- mage_ai/tests/data_preparation/models/test_variable.py +242 -69
- mage_ai/tests/data_preparation/models/variables/__init__.py +0 -0
- mage_ai/tests/data_preparation/models/variables/test_summarizer.py +481 -0
- mage_ai/tests/data_preparation/storage/shared/__init__.py +0 -0
- mage_ai/tests/data_preparation/test_repo_manager.py +6 -7
- mage_ai/tests/data_preparation/test_variable_manager.py +57 -48
- mage_ai/tests/factory.py +64 -43
- mage_ai/tests/orchestration/db/models/test_schedules.py +3 -3
- mage_ai/tests/orchestration/db/models/test_schedules_dynamic_blocks.py +279 -0
- mage_ai/tests/orchestration/test_pipeline_scheduler.py +1 -0
- mage_ai/tests/orchestration/triggers/test_global_data_product.py +3 -2
- mage_ai/tests/orchestration/triggers/test_utils.py +3 -2
- mage_ai/tests/services/k8s/test_job_manager.py +18 -0
- mage_ai/tests/streaming/sinks/test_oracledb.py +38 -0
- mage_ai/tests/test_shared.py +61 -0
- mage_ai/usage_statistics/logger.py +7 -2
- mage_ai/utils/code.py +33 -19
- {mage_ai-0.9.70.dist-info → mage_ai-0.9.71.dist-info}/METADATA +5 -2
- {mage_ai-0.9.70.dist-info → mage_ai-0.9.71.dist-info}/RECORD +513 -417
- mage_ai/data_preparation/models/global_data_product/constants.py +0 -6
- mage_ai/server/frontend_dist/_next/static/RhDiJSkcjCsh4xxX4BFBk/_buildManifest.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/2631-b9f9bea3f1cf906d.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/3782-ef4cd4f0b52072d0.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/4783-422429203610c318.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/5699-6d708c6b2153ea08.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/635-0d6b7c8804bcd2dc.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/7022-0d52dd8868621fb0.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/7361-8a23dd8360593e7a.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/7966-b9b85ba10667e654.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/8095-bdce03896ef9639a.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/8146-6bed4e7401e067e6.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/9265-d2a1aaec75ec69b8.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/9440-4069842b90d4b801.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/9832-67896490f6e8a014.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/_app-2a69553d8c6eeb53.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/global-data-products/[...slug]-591abd392dc50ed4.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/global-data-products-78e8e88f2a757a18.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/dashboard-95ffcd3e2b27e567.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/edit-b645a6d13ab9fe3a.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-1ed9045b2f1dfd65.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/runs/[run]-1417ad1c821d720a.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/settings-59aca25a5b1d3998.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/preferences-349af617d05f001b.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/roles-36fa165a48af586b.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/sync-data-60d01d3887e31136.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/version-control-3433c8b22e8342aa.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/TdpLLFome13qvM0gXvpHs/_buildManifest.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/2631-b9f9bea3f1cf906d.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/3782-ef4cd4f0b52072d0.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/4783-422429203610c318.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/5699-6d708c6b2153ea08.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/635-0d6b7c8804bcd2dc.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7022-0d52dd8868621fb0.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7361-8a23dd8360593e7a.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7966-b9b85ba10667e654.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/8095-bdce03896ef9639a.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/8146-6bed4e7401e067e6.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/9265-d2a1aaec75ec69b8.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/9440-4069842b90d4b801.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/9832-67896490f6e8a014.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/_app-2a69553d8c6eeb53.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/global-data-products/[...slug]-591abd392dc50ed4.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/global-data-products-78e8e88f2a757a18.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/dashboard-95ffcd3e2b27e567.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/edit-b645a6d13ab9fe3a.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-1ed9045b2f1dfd65.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/runs/[run]-1417ad1c821d720a.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/settings-59aca25a5b1d3998.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/preferences-349af617d05f001b.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/roles-36fa165a48af586b.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/sync-data-60d01d3887e31136.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/version-control-3433c8b22e8342aa.js +0 -1
- mage_ai/shared/memory.py +0 -90
- mage_ai/tests/data_preparation/models/block/dynamic/test_dynamic_helpers.py +0 -48
- /mage_ai/{tests/data_preparation/shared → ai/utils}/__init__.py +0 -0
- /mage_ai/server/frontend_dist/_next/static/{RhDiJSkcjCsh4xxX4BFBk → UZLabyPgcxtZvp0O0EUUS}/_ssgManifest.js +0 -0
- /mage_ai/server/frontend_dist_base_path_template/_next/static/{TdpLLFome13qvM0gXvpHs → kcptwoOU-JJJg6Vwpkfmx}/_ssgManifest.js +0 -0
- /mage_ai/tests/data_preparation/{shared → storage/shared}/test_secrets.py +0 -0
- {mage_ai-0.9.70.dist-info → mage_ai-0.9.71.dist-info}/LICENSE +0 -0
- {mage_ai-0.9.70.dist-info → mage_ai-0.9.71.dist-info}/WHEEL +0 -0
- {mage_ai-0.9.70.dist-info → mage_ai-0.9.71.dist-info}/entry_points.txt +0 -0
- {mage_ai-0.9.70.dist-info → mage_ai-0.9.71.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,602 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
5
|
+
from functools import reduce
|
|
6
|
+
from typing import Any, AsyncGenerator, Dict, Generator, List, Optional, Tuple, Union
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import polars as pl
|
|
10
|
+
import pyarrow as pa
|
|
11
|
+
import pyarrow.dataset as ds
|
|
12
|
+
import pyarrow.parquet as pq
|
|
13
|
+
|
|
14
|
+
from mage_ai.data.constants import (
|
|
15
|
+
AsyncRecordBatchGenerator,
|
|
16
|
+
RecordBatch,
|
|
17
|
+
RecordBatchGenerator,
|
|
18
|
+
ScanBatchDatasetResult,
|
|
19
|
+
TaggedRecordBatch,
|
|
20
|
+
)
|
|
21
|
+
from mage_ai.data.models.generator import GeneratorWithMetadata
|
|
22
|
+
from mage_ai.data.tabular.constants import FilterComparison
|
|
23
|
+
from mage_ai.data.tabular.models import BatchSettings
|
|
24
|
+
from mage_ai.data.tabular.utils import compare_object
|
|
25
|
+
from mage_ai.shared.array import find, flatten
|
|
26
|
+
|
|
27
|
+
DatasetMetadata = Dict[
|
|
28
|
+
str,
|
|
29
|
+
Union[
|
|
30
|
+
int,
|
|
31
|
+
Dict[
|
|
32
|
+
str,
|
|
33
|
+
Dict[
|
|
34
|
+
str,
|
|
35
|
+
List[str],
|
|
36
|
+
],
|
|
37
|
+
],
|
|
38
|
+
List[
|
|
39
|
+
Dict[
|
|
40
|
+
str,
|
|
41
|
+
Union[str, int, Dict[str, str]],
|
|
42
|
+
]
|
|
43
|
+
],
|
|
44
|
+
],
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
async def run_in_executor(func, *args):
|
|
49
|
+
executor = ThreadPoolExecutor()
|
|
50
|
+
loop = asyncio.get_running_loop()
|
|
51
|
+
return await loop.run_in_executor(executor, func, *args)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def create_filter(*args) -> ds.Expression:
|
|
55
|
+
"""
|
|
56
|
+
Dynamically creates a filter expression for a given column, value, and comparison operation.
|
|
57
|
+
Args:
|
|
58
|
+
- column_name (str): The name of the column to filter on.
|
|
59
|
+
- value (Any): The value to compare against.
|
|
60
|
+
- comparison (str): Type of comparison ('==', '!=', '<', '<=', '>', '>=')
|
|
61
|
+
Returns:
|
|
62
|
+
- ds.Expression: A PyArrow dataset filter expression.
|
|
63
|
+
Raises:
|
|
64
|
+
- ValueError: If an unsupported comparison type is provided.
|
|
65
|
+
"""
|
|
66
|
+
expression = args[0] if len(args) == 1 else args
|
|
67
|
+
if isinstance(expression, str):
|
|
68
|
+
column_name, comparison, value = [s.strip() for s in expression.split(' ')]
|
|
69
|
+
else:
|
|
70
|
+
column_name, comparison, value = expression
|
|
71
|
+
|
|
72
|
+
value = FilterComparison(value) if isinstance(value, str) else value
|
|
73
|
+
|
|
74
|
+
schema_field = ds.field(column_name)
|
|
75
|
+
if FilterComparison.EQUAL == comparison:
|
|
76
|
+
return schema_field == value
|
|
77
|
+
elif FilterComparison.NOT_EQUAL == comparison:
|
|
78
|
+
return schema_field != value
|
|
79
|
+
elif FilterComparison.LESS_THAN == comparison:
|
|
80
|
+
return schema_field < value
|
|
81
|
+
elif FilterComparison.LESS_THAN_OR_EQUAL == comparison:
|
|
82
|
+
return schema_field <= value
|
|
83
|
+
elif FilterComparison.GREATER_THAN == comparison:
|
|
84
|
+
return schema_field > value
|
|
85
|
+
elif FilterComparison.GREATER_THAN_OR_EQUAL == comparison:
|
|
86
|
+
return schema_field >= value
|
|
87
|
+
else:
|
|
88
|
+
raise ValueError(f'Unsupported comparison type: {comparison}')
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def partition_from_path(file_path: str) -> Optional[Dict[str, str]]:
|
|
92
|
+
"""
|
|
93
|
+
Extracts partition key-value pairs from a given Parquet file path.
|
|
94
|
+
:param file_path: The full path to a Parquet file.
|
|
95
|
+
:return: A dictionary with partition key-value pairs if any exist in the path.
|
|
96
|
+
"""
|
|
97
|
+
partition_info = {}
|
|
98
|
+
|
|
99
|
+
# Split the path into segments
|
|
100
|
+
path_segments = file_path.split('/')
|
|
101
|
+
|
|
102
|
+
# Iterate over each segment to find partitions (<key>=<value>)
|
|
103
|
+
for segment in path_segments:
|
|
104
|
+
if '=' in segment:
|
|
105
|
+
key, value = segment.split('=', 1)
|
|
106
|
+
partition_info[key] = value
|
|
107
|
+
|
|
108
|
+
return partition_info if partition_info else None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def read_metadata(
|
|
112
|
+
directory: str, include_row_groups: bool = False, include_schema: bool = False
|
|
113
|
+
) -> DatasetMetadata:
|
|
114
|
+
"""
|
|
115
|
+
Reads metadata and optionally the schema from all Parquet files in the specified directory
|
|
116
|
+
without loading the full datasets into memory.
|
|
117
|
+
:param directory: Path to the directory containing Parquet files.
|
|
118
|
+
:param include_schema: A boolean flag to include schema information in the output.
|
|
119
|
+
:return: A dictionary containing metadata and optionally schema information.
|
|
120
|
+
"""
|
|
121
|
+
file_details_list = []
|
|
122
|
+
|
|
123
|
+
num_rows_total = 0
|
|
124
|
+
total_byte_size = 0 # Initialize total byte size
|
|
125
|
+
|
|
126
|
+
# List all Parquet files in the directory
|
|
127
|
+
parquet_files = []
|
|
128
|
+
for root, _dirs, files in os.walk(directory):
|
|
129
|
+
for file in files:
|
|
130
|
+
if file.endswith('.parquet') and not file.startswith('sample'):
|
|
131
|
+
parquet_files.append(os.path.join(root, file))
|
|
132
|
+
|
|
133
|
+
schema_combined = {}
|
|
134
|
+
|
|
135
|
+
for file_path in parquet_files:
|
|
136
|
+
file_details = {'file_path': file_path}
|
|
137
|
+
file_stats = os.stat(file_path) # Get file statistics
|
|
138
|
+
|
|
139
|
+
# Use file size from file system
|
|
140
|
+
file_byte_size = file_stats.st_size
|
|
141
|
+
total_byte_size += file_byte_size # Aggregate byte size
|
|
142
|
+
file_details['byte_size'] = file_byte_size
|
|
143
|
+
|
|
144
|
+
# Read only the metadata of the Parquet file
|
|
145
|
+
parquet_file = None
|
|
146
|
+
if include_schema:
|
|
147
|
+
parquet_file = pq.ParquetFile(file_path)
|
|
148
|
+
parquet_metadata = parquet_file.metadata
|
|
149
|
+
else:
|
|
150
|
+
parquet_metadata = pq.read_metadata(file_path)
|
|
151
|
+
|
|
152
|
+
# Extract Partition Details
|
|
153
|
+
partition = partition_from_path(file_path)
|
|
154
|
+
if partition:
|
|
155
|
+
file_details['partition'] = partition
|
|
156
|
+
|
|
157
|
+
num_rows = parquet_metadata.num_rows
|
|
158
|
+
num_rows_total += num_rows
|
|
159
|
+
|
|
160
|
+
file_details['num_rows'] = num_rows
|
|
161
|
+
file_details['num_columns'] = parquet_metadata.num_columns
|
|
162
|
+
file_details['num_row_groups'] = parquet_metadata.num_row_groups
|
|
163
|
+
|
|
164
|
+
if include_row_groups:
|
|
165
|
+
row_group_info = []
|
|
166
|
+
for i in range(parquet_metadata.num_row_groups):
|
|
167
|
+
row_group = parquet_metadata.row_group(i)
|
|
168
|
+
row_group_details = {'num_rows': row_group.num_rows, 'columns': []}
|
|
169
|
+
for j in range(row_group.num_columns):
|
|
170
|
+
column = row_group.column(j)
|
|
171
|
+
column_details = {'compression': column.compression}
|
|
172
|
+
|
|
173
|
+
# Using total_compressed_size if available,
|
|
174
|
+
# otherwise fallback to checking the compressed size or file_offset
|
|
175
|
+
if hasattr(column, 'total_compressed_size'):
|
|
176
|
+
column_details['byte_size'] = column.total_compressed_size
|
|
177
|
+
elif hasattr(column, 'compressed_size'): # if compressed_size attribute exists
|
|
178
|
+
column_details['byte_size'] = column.compressed_size
|
|
179
|
+
elif hasattr(
|
|
180
|
+
column, 'file_offset'
|
|
181
|
+
): # as a last resort or an alternative method
|
|
182
|
+
# file_offset doesn't give the size of the column data directly
|
|
183
|
+
# You might need to calculate or handle it differently.
|
|
184
|
+
column_details['byte_size'] = column.file_offset
|
|
185
|
+
|
|
186
|
+
column_details['schema_path'] = column.path_in_schema
|
|
187
|
+
|
|
188
|
+
row_group_details['columns'].append(column_details)
|
|
189
|
+
row_group_info.append(row_group_details)
|
|
190
|
+
|
|
191
|
+
file_details['row_groups'] = row_group_info
|
|
192
|
+
|
|
193
|
+
# Accessing key-value metadata correctly
|
|
194
|
+
key_value_metadata = {}
|
|
195
|
+
if parquet_metadata.metadata:
|
|
196
|
+
# Convert the result into a dictionary format if it's not None
|
|
197
|
+
key_value_metadata = {}
|
|
198
|
+
for k, v in parquet_metadata.metadata.items():
|
|
199
|
+
k = k.decode('utf-8') if isinstance(k, bytes) else k
|
|
200
|
+
v = v.decode('utf-8') if isinstance(v, bytes) else v
|
|
201
|
+
key_value_metadata[k] = v
|
|
202
|
+
file_details['metadata'] = key_value_metadata
|
|
203
|
+
|
|
204
|
+
if include_schema and parquet_file is not None:
|
|
205
|
+
schema_info = {}
|
|
206
|
+
schema = (
|
|
207
|
+
parquet_file.schema.to_arrow_schema()
|
|
208
|
+
) # Convert PyArrow Parquet schema to Arrow schema
|
|
209
|
+
for schema_field in schema:
|
|
210
|
+
column_name = schema_field.name
|
|
211
|
+
column_type = str(schema_field.type) # This should now work as expected
|
|
212
|
+
schema_info[column_name] = column_type
|
|
213
|
+
|
|
214
|
+
if column_name not in schema_combined:
|
|
215
|
+
schema_combined[column_name] = {}
|
|
216
|
+
if column_type not in schema_combined[column_name]:
|
|
217
|
+
schema_combined[column_name][column_type] = []
|
|
218
|
+
if partition:
|
|
219
|
+
schema_combined[column_name][column_type].append(partition)
|
|
220
|
+
|
|
221
|
+
file_details['schema'] = schema_info
|
|
222
|
+
|
|
223
|
+
file_details_list.append(file_details)
|
|
224
|
+
|
|
225
|
+
return {
|
|
226
|
+
'files': file_details_list,
|
|
227
|
+
'num_files': len(parquet_files),
|
|
228
|
+
'num_partitions': len(file_details_list),
|
|
229
|
+
'num_rows': num_rows_total,
|
|
230
|
+
'schema': schema_combined,
|
|
231
|
+
'total_byte_size': total_byte_size, # Return total byte size
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def get_file_metadata(file_details: Any) -> Dict[str, str]:
|
|
236
|
+
return file_details['metadata']
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def get_object_metadata(file_metadata: Dict[str, str]) -> Dict[str, str]:
|
|
240
|
+
if file_metadata is not None:
|
|
241
|
+
object_metadata_json: Optional[str] = file_metadata.get('object', None)
|
|
242
|
+
if object_metadata_json is not None:
|
|
243
|
+
return json.loads(object_metadata_json)
|
|
244
|
+
return {}
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def get_file_details(
|
|
248
|
+
dataset_metadata_dict: Any,
|
|
249
|
+
) -> List[
|
|
250
|
+
Dict[
|
|
251
|
+
str,
|
|
252
|
+
Union[str, int, Dict[str, str]],
|
|
253
|
+
]
|
|
254
|
+
]:
|
|
255
|
+
file_details_list = dataset_metadata_dict['files']
|
|
256
|
+
|
|
257
|
+
if isinstance(file_details_list, list) and len(file_details_list) >= 1:
|
|
258
|
+
return file_details_list
|
|
259
|
+
|
|
260
|
+
return []
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def get_all_objects_metadata(
|
|
264
|
+
metadatas: Optional[List[Any]] = None,
|
|
265
|
+
source: Optional[Union[List[str], str]] = None,
|
|
266
|
+
) -> List[Dict[str, str]]:
|
|
267
|
+
if metadatas is None and source is not None:
|
|
268
|
+
metadatas = [
|
|
269
|
+
read_metadata(directory)
|
|
270
|
+
for directory in (source if isinstance(source, list) else [source])
|
|
271
|
+
]
|
|
272
|
+
|
|
273
|
+
if metadatas is None:
|
|
274
|
+
return []
|
|
275
|
+
|
|
276
|
+
return flatten([
|
|
277
|
+
[
|
|
278
|
+
get_object_metadata(get_file_metadata(file_details))
|
|
279
|
+
for file_details in get_file_details(metadata)
|
|
280
|
+
]
|
|
281
|
+
for metadata in metadatas
|
|
282
|
+
])
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def get_series_object_metadata(
|
|
286
|
+
metadatas: Optional[List[Any]] = None,
|
|
287
|
+
source: Optional[Union[List[str], str]] = None,
|
|
288
|
+
) -> Optional[Dict[str, str]]:
|
|
289
|
+
def __check(object_metadata: Dict[str, str]) -> bool:
|
|
290
|
+
return (
|
|
291
|
+
compare_object(
|
|
292
|
+
pd.Series,
|
|
293
|
+
object_metadata,
|
|
294
|
+
)
|
|
295
|
+
or compare_object(
|
|
296
|
+
pl.Series,
|
|
297
|
+
object_metadata,
|
|
298
|
+
)
|
|
299
|
+
or compare_object(
|
|
300
|
+
pd.DataFrame,
|
|
301
|
+
object_metadata,
|
|
302
|
+
)
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
return find(__check, get_all_objects_metadata(metadatas=metadatas, source=source))
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def __builder_scanner_generator_configurations(
|
|
309
|
+
source: Union[List[str], str],
|
|
310
|
+
chunks: Optional[List[int]] = None,
|
|
311
|
+
columns: Optional[List[str]] = None,
|
|
312
|
+
filter: Optional[ds.Expression] = None,
|
|
313
|
+
filters: Optional[List[List[str]]] = None,
|
|
314
|
+
settings: Optional[BatchSettings] = None,
|
|
315
|
+
**kwargs,
|
|
316
|
+
) -> Tuple[
|
|
317
|
+
ds.Dataset,
|
|
318
|
+
Dict[str, Optional[Union[bool, int, List[str]]]],
|
|
319
|
+
List[DatasetMetadata],
|
|
320
|
+
]:
|
|
321
|
+
"""
|
|
322
|
+
Scans and optionally deserializes batches of records from a dataset.
|
|
323
|
+
|
|
324
|
+
Parameters:
|
|
325
|
+
- source: Union[List[str], str] - The path(s) to the dataset(s) in Parquet format.
|
|
326
|
+
- batch_value: Optional[int] = DEFAULT_BATCH_SIZE - The number of records to include
|
|
327
|
+
in each batch.
|
|
328
|
+
- chunks: Optional[List[int]] = None - Specific chunks of the dataset to scan.
|
|
329
|
+
- columns: Optional[List[str]] = None - Specific columns to include in the output.
|
|
330
|
+
- deserialize: Optional[bool] = False - Whether to deserialize the scanned batches into a
|
|
331
|
+
more user-friendly format.
|
|
332
|
+
- filter: Optional[ds.Expression] = None - A single filter expression to apply to the dataset.
|
|
333
|
+
- filters: Optional[List[List[str]]] = None - A list of filter expressions to apply to
|
|
334
|
+
the dataset.
|
|
335
|
+
- return_generator: Optional[bool] = False - Whether to return a generator or directly
|
|
336
|
+
yield the batches.
|
|
337
|
+
- scan: Optional[bool] = False - Whether to use the scanner interface, which can be more
|
|
338
|
+
efficient for certain operations.
|
|
339
|
+
Use scan_batches which yields record batches directly from the scan operation
|
|
340
|
+
- settings: Optional[BatchSettings] = None - Additional settings for batch scanning,
|
|
341
|
+
not used in this function but provided for extension.
|
|
342
|
+
- start_row: The starting row index.
|
|
343
|
+
- end_row: The ending row index.
|
|
344
|
+
|
|
345
|
+
Returns:
|
|
346
|
+
Iterator[Union[pa.RecordBatch, ds.TaggedRecordBatch]] -
|
|
347
|
+
An iterator over the scanned (and optionally deserialized) batches of records.
|
|
348
|
+
"""
|
|
349
|
+
if isinstance(source, list):
|
|
350
|
+
dataset = ds.dataset(
|
|
351
|
+
[ds.dataset(path, format='parquet', partitioning='hive') for path in source],
|
|
352
|
+
)
|
|
353
|
+
else:
|
|
354
|
+
dataset = ds.dataset(source, format='parquet', partitioning='hive')
|
|
355
|
+
|
|
356
|
+
metadatas = []
|
|
357
|
+
for directory in source if isinstance(source, list) else [source]:
|
|
358
|
+
metadatas.append(read_metadata(directory, include_schema=True))
|
|
359
|
+
|
|
360
|
+
if settings:
|
|
361
|
+
if isinstance(settings, dict):
|
|
362
|
+
settings = BatchSettings.load(**settings)
|
|
363
|
+
else:
|
|
364
|
+
settings = BatchSettings()
|
|
365
|
+
batch_size = settings.items.minimum or settings.items.maximum
|
|
366
|
+
|
|
367
|
+
def __create_filter(chunk_query: str, dataset=dataset):
|
|
368
|
+
column, value = chunk_query.split('=')
|
|
369
|
+
# Find the actual data type of the column in the dataset
|
|
370
|
+
actual_type = dataset.schema.field(column).type
|
|
371
|
+
|
|
372
|
+
# Attempt to convert the value to the actual data type of the column
|
|
373
|
+
if pa.types.is_string(actual_type):
|
|
374
|
+
# If the field is of string type, ensure the value is also treated as a string
|
|
375
|
+
value = str(value)
|
|
376
|
+
elif pa.types.is_integer(actual_type):
|
|
377
|
+
# If the field is of integer type, try converting the value to an integer
|
|
378
|
+
try:
|
|
379
|
+
value = int(value)
|
|
380
|
+
except ValueError:
|
|
381
|
+
raise ValueError(
|
|
382
|
+
f"Could not convert value '{value}' to integer for column '{column}'."
|
|
383
|
+
)
|
|
384
|
+
elif pa.types.is_float(actual_type):
|
|
385
|
+
# Similar conversion for floating-point types, as needed
|
|
386
|
+
try:
|
|
387
|
+
value = float(value)
|
|
388
|
+
except ValueError:
|
|
389
|
+
raise ValueError(
|
|
390
|
+
f"Could not convert value '{value}' to float for column '{column}'."
|
|
391
|
+
)
|
|
392
|
+
# Add more type checks as necessary for your use cases
|
|
393
|
+
|
|
394
|
+
# Return the filter expression
|
|
395
|
+
return ds.field(column) == value
|
|
396
|
+
|
|
397
|
+
filters_list = []
|
|
398
|
+
if chunks:
|
|
399
|
+
# ["chunk=1", "chunk=100", "chunk=1000"]
|
|
400
|
+
filters_list.append(
|
|
401
|
+
reduce(
|
|
402
|
+
lambda a, b: a | b,
|
|
403
|
+
[__create_filter(str(chunk)) for chunk in chunks],
|
|
404
|
+
)
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
if filters:
|
|
408
|
+
|
|
409
|
+
def __create_filters(filters_strings: List[str]) -> ds.Expression:
|
|
410
|
+
return reduce(lambda a, b: create_filter(a) & create_filter(b), filters_strings)
|
|
411
|
+
|
|
412
|
+
filters_list.append(
|
|
413
|
+
reduce(lambda a, b: __create_filters(a) | __create_filters(b), filters),
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
if filter:
|
|
417
|
+
filters_list.append(filter)
|
|
418
|
+
|
|
419
|
+
expression = None
|
|
420
|
+
if len(filters_list) >= 1:
|
|
421
|
+
expression = reduce(lambda a, b: a & b, filters_list)
|
|
422
|
+
|
|
423
|
+
"""
|
|
424
|
+
- `batch_size` controls how many rows are included in each batch.
|
|
425
|
+
- `columns`, if specified, controls which columns are loaded.
|
|
426
|
+
- `filter` allows filtering rows based on specific criteria before they are loaded.
|
|
427
|
+
- `use_threads=True` enables multithreading to potentially speed up the data loading process.
|
|
428
|
+
"""
|
|
429
|
+
scanner_settings: Dict[str, Optional[Union[bool, int, List[str]]]] = dict(
|
|
430
|
+
columns=columns,
|
|
431
|
+
filter=expression,
|
|
432
|
+
use_threads=True,
|
|
433
|
+
)
|
|
434
|
+
if batch_size and batch_size >= 1:
|
|
435
|
+
scanner_settings['batch_size'] = batch_size
|
|
436
|
+
|
|
437
|
+
return dataset, scanner_settings, metadatas
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def __wrap_generator(
|
|
441
|
+
generator: Generator,
|
|
442
|
+
metadatas: List[DatasetMetadata],
|
|
443
|
+
deserialize: Optional[bool] = None,
|
|
444
|
+
scan: Optional[bool] = False,
|
|
445
|
+
) -> RecordBatchGenerator:
|
|
446
|
+
object_metadata = get_series_object_metadata(metadatas=metadatas)
|
|
447
|
+
for tagged_or_record_batch in GeneratorWithMetadata(generator, metadata=metadatas):
|
|
448
|
+
record_batch_class = TaggedRecordBatch if scan else RecordBatch
|
|
449
|
+
batch: Union[TaggedRecordBatch, RecordBatch] = record_batch_class(
|
|
450
|
+
tagged_or_record_batch,
|
|
451
|
+
object_metadata=object_metadata,
|
|
452
|
+
)
|
|
453
|
+
if deserialize:
|
|
454
|
+
yield batch.deserialize()
|
|
455
|
+
else:
|
|
456
|
+
yield batch
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def scan_dataset_parts(
|
|
460
|
+
*args,
|
|
461
|
+
deserialize: Optional[bool] = None,
|
|
462
|
+
limit: Optional[int] = None,
|
|
463
|
+
offset: Optional[int] = None,
|
|
464
|
+
**kwargs,
|
|
465
|
+
) -> Any:
|
|
466
|
+
dataset, scanner_settings, metadatas = __builder_scanner_generator_configurations(
|
|
467
|
+
*args, **kwargs
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
num_rows = 0
|
|
471
|
+
if len(metadatas) >= 1:
|
|
472
|
+
num_rows = metadatas[0]['num_rows']
|
|
473
|
+
if not isinstance(num_rows, int):
|
|
474
|
+
num_rows = 0
|
|
475
|
+
|
|
476
|
+
offset = offset or 0
|
|
477
|
+
limit = limit or num_rows
|
|
478
|
+
|
|
479
|
+
total_rows_scanned = 0
|
|
480
|
+
start_row = offset or 0
|
|
481
|
+
rows_to_process = limit
|
|
482
|
+
|
|
483
|
+
generator = dataset.scanner(**scanner_settings).scan_batches()
|
|
484
|
+
|
|
485
|
+
for batch in generator:
|
|
486
|
+
if not batch:
|
|
487
|
+
break
|
|
488
|
+
|
|
489
|
+
if hasattr(batch, 'record_batch'):
|
|
490
|
+
batch = batch.record_batch
|
|
491
|
+
|
|
492
|
+
num_rows_in_batch = batch.num_rows
|
|
493
|
+
if total_rows_scanned + num_rows_in_batch < start_row:
|
|
494
|
+
# Entire batch is before the start_row; skip it.
|
|
495
|
+
total_rows_scanned += num_rows_in_batch
|
|
496
|
+
continue
|
|
497
|
+
|
|
498
|
+
# Calculate the slice of the current batch that is within [start_row, end_row].
|
|
499
|
+
offset_within_batch = max(start_row - total_rows_scanned, 0)
|
|
500
|
+
length_within_batch = min(rows_to_process, num_rows_in_batch - offset_within_batch)
|
|
501
|
+
|
|
502
|
+
if length_within_batch > 0:
|
|
503
|
+
object_metadata = get_series_object_metadata(metadatas=metadatas)
|
|
504
|
+
sliced_batch = batch.slice(offset=offset_within_batch, length=length_within_batch)
|
|
505
|
+
record_batch = RecordBatch(sliced_batch, object_metadata=object_metadata)
|
|
506
|
+
yield record_batch.deserialize() if deserialize else record_batch
|
|
507
|
+
rows_to_process -= length_within_batch
|
|
508
|
+
|
|
509
|
+
total_rows_scanned += num_rows_in_batch
|
|
510
|
+
if rows_to_process <= 0:
|
|
511
|
+
# Processed all rows in the range; exit loop.
|
|
512
|
+
break
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
def scan_batch_datasets_generator(
|
|
516
|
+
*args, deserialize: Optional[bool] = None, scan: Optional[bool] = False, **kwargs
|
|
517
|
+
) -> RecordBatchGenerator:
|
|
518
|
+
dataset, scanner_settings, metadatas = __builder_scanner_generator_configurations(
|
|
519
|
+
*args, **kwargs
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
if scan:
|
|
523
|
+
generator = dataset.scanner(**scanner_settings).scan_batches()
|
|
524
|
+
else:
|
|
525
|
+
generator = dataset.to_batches(**scanner_settings)
|
|
526
|
+
|
|
527
|
+
return __wrap_generator(
|
|
528
|
+
generator,
|
|
529
|
+
metadatas,
|
|
530
|
+
deserialize=deserialize,
|
|
531
|
+
scan=scan,
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
async def scan_batch_datasets_generator_async(
|
|
536
|
+
source: Union[List[str], str], **kwargs
|
|
537
|
+
) -> AsyncRecordBatchGenerator:
|
|
538
|
+
generator = scan_batch_datasets_generator(source, **kwargs)
|
|
539
|
+
|
|
540
|
+
async def async_generator_wrapper():
|
|
541
|
+
for item in generator:
|
|
542
|
+
yield await run_in_executor(lambda item=item: item)
|
|
543
|
+
|
|
544
|
+
return async_generator_wrapper()
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def scan_batch_datasets(source: Union[List[str], str], **kwargs) -> Generator:
|
|
548
|
+
"""
|
|
549
|
+
Cannot have a yield statement or else the return will never be reached.
|
|
550
|
+
In Python, if a function contains a `yield` statement,
|
|
551
|
+
it turns the function into a generator function.
|
|
552
|
+
This means that the function will return a generator object,
|
|
553
|
+
but none of its code will run immediately.
|
|
554
|
+
The function only executes on iteration.
|
|
555
|
+
"""
|
|
556
|
+
for tagged_or_record_batch_or_deserialized in scan_batch_datasets_generator(source, **kwargs):
|
|
557
|
+
yield tagged_or_record_batch_or_deserialized
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
async def scan_batch_datasets_async(source: Union[List[str], str], **kwargs) -> AsyncGenerator:
|
|
561
|
+
generator = await scan_batch_datasets_generator_async(source, **kwargs)
|
|
562
|
+
|
|
563
|
+
async for tagged_or_record_batch_or_deserialized in generator:
|
|
564
|
+
yield tagged_or_record_batch_or_deserialized
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
def sample_batch_datasets(
|
|
568
|
+
source: Union[List[str], str],
|
|
569
|
+
sample_count: Optional[int] = None,
|
|
570
|
+
settings: Optional[BatchSettings] = None,
|
|
571
|
+
**kwargs,
|
|
572
|
+
) -> Optional[ScanBatchDatasetResult]:
|
|
573
|
+
settings = settings if settings else BatchSettings()
|
|
574
|
+
if sample_count:
|
|
575
|
+
settings.items.maximum = sample_count
|
|
576
|
+
|
|
577
|
+
generator = scan_batch_datasets_generator(source, **kwargs, settings=settings)
|
|
578
|
+
|
|
579
|
+
try:
|
|
580
|
+
batch = next(generator)
|
|
581
|
+
if batch is not None:
|
|
582
|
+
return batch
|
|
583
|
+
except StopIteration:
|
|
584
|
+
pass
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
async def sample_batch_datasets_async(
|
|
588
|
+
source: Union[List[str], str],
|
|
589
|
+
sample_count: Optional[int] = None,
|
|
590
|
+
settings: Optional[BatchSettings] = None,
|
|
591
|
+
**kwargs,
|
|
592
|
+
) -> Optional[ScanBatchDatasetResult]:
|
|
593
|
+
settings = settings if settings else BatchSettings()
|
|
594
|
+
if sample_count:
|
|
595
|
+
settings.items.maximum = sample_count
|
|
596
|
+
|
|
597
|
+
generator = await scan_batch_datasets_generator_async(source, **kwargs, settings=settings)
|
|
598
|
+
|
|
599
|
+
async for batch in generator:
|
|
600
|
+
if batch is not None:
|
|
601
|
+
return batch
|
|
602
|
+
break
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from typing import Any, Dict, List, Optional, Union
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import polars as pl
|
|
5
|
+
import pyarrow as pa
|
|
6
|
+
import pyarrow.dataset as ds
|
|
7
|
+
|
|
8
|
+
from mage_ai.data.tabular.constants import COLUMN_CHUNK
|
|
9
|
+
from mage_ai.shared.parsers import object_to_dict
|
|
10
|
+
|
|
11
|
+
DeserializedBatch = Union[
|
|
12
|
+
pd.DataFrame,
|
|
13
|
+
pd.Series,
|
|
14
|
+
pl.DataFrame,
|
|
15
|
+
pl.Series,
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def convert_series_list_to_dataframe(series_list: List[pl.Series]) -> pl.DataFrame:
|
|
20
|
+
"""
|
|
21
|
+
Converts a list of Polars Series into a Polars DataFrame by treating each Series as a column.
|
|
22
|
+
"""
|
|
23
|
+
# Concatenate Series as columns to form a DataFrame
|
|
24
|
+
df = pl.DataFrame({s.name or f'series_{i}': s for i, s in enumerate(series_list)})
|
|
25
|
+
return df
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def series_to_dataframe(series: Union[pd.Series, pl.Series]) -> pl.DataFrame:
|
|
29
|
+
if isinstance(series, pd.Series):
|
|
30
|
+
series = pl.Series(series.name, series.to_numpy())
|
|
31
|
+
return pl.DataFrame(series)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def deserialize_batch(
|
|
35
|
+
batch: Union[pa.RecordBatch, ds.TaggedRecordBatch, pa.Table],
|
|
36
|
+
object_metadata: Optional[Dict[str, str]] = None,
|
|
37
|
+
) -> DeserializedBatch:
|
|
38
|
+
if isinstance(batch, pa.Table):
|
|
39
|
+
table = batch
|
|
40
|
+
else:
|
|
41
|
+
record_batch = batch if isinstance(batch, pa.RecordBatch) else batch.record_batch
|
|
42
|
+
table = pa.Table.from_batches([record_batch])
|
|
43
|
+
|
|
44
|
+
if COLUMN_CHUNK in table.column_names:
|
|
45
|
+
table = table.drop([COLUMN_CHUNK])
|
|
46
|
+
|
|
47
|
+
if object_metadata is not None and table.num_columns >= 1:
|
|
48
|
+
if compare_object(pd.DataFrame, object_metadata):
|
|
49
|
+
return table.to_pandas()
|
|
50
|
+
elif compare_object(pd.Series, object_metadata):
|
|
51
|
+
column_name = table.column_names[0]
|
|
52
|
+
return pd.Series(table.column(column_name).to_pandas())
|
|
53
|
+
elif compare_object(pl.Series, object_metadata):
|
|
54
|
+
# Convert the PyArrow Array/ChunkedArray directly to a Polars Series
|
|
55
|
+
column = table.column(0)
|
|
56
|
+
if column.num_chunks > 0:
|
|
57
|
+
# Handle the case where the column is chunked
|
|
58
|
+
chunk_array = column.chunk(0) # Assuming you want the first chunk
|
|
59
|
+
# Create a Polars Series from the PyArrow Array
|
|
60
|
+
return pl.Series(chunk_array.to_pylist())
|
|
61
|
+
else:
|
|
62
|
+
# Handle non-chunked column
|
|
63
|
+
return pl.Series(column.to_pylist())
|
|
64
|
+
|
|
65
|
+
return pl.from_arrow(table)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def compare_object(object: Any, object_metadata: Dict[str, str]) -> bool:
|
|
69
|
+
return (
|
|
70
|
+
object_metadata.get('module') == object.__module__
|
|
71
|
+
and object_metadata.get('name') == object.__name__
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def multi_series_to_frame(
|
|
76
|
+
df: Optional[Union[pd.DataFrame, pl.DataFrame, pl.Series, pd.Series]] = None,
|
|
77
|
+
dfs: Optional[Union[List[pd.DataFrame], List[pl.DataFrame], pl.Series, pd.Series]] = None,
|
|
78
|
+
):
|
|
79
|
+
object_metadata = (
|
|
80
|
+
object_to_dict(df, include_hash=False, include_uuid=False) if df is not None else None
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
series_sample = None
|
|
84
|
+
if dfs is not None:
|
|
85
|
+
if object_metadata is None:
|
|
86
|
+
object_metadata = object_to_dict(dfs[0], include_hash=False, include_uuid=False)
|
|
87
|
+
if all([isinstance(item, (pd.Series, pl.Series)) for item in dfs]):
|
|
88
|
+
series_sample = dfs[0]
|
|
89
|
+
dfs = [
|
|
90
|
+
series_to_dataframe(item)
|
|
91
|
+
for item in dfs
|
|
92
|
+
if isinstance(item, (pd.Series, pl.Series))
|
|
93
|
+
]
|
|
94
|
+
elif all([isinstance(item, pd.DataFrame) for item in dfs]):
|
|
95
|
+
dfs = [pl.from_pandas(item) for item in dfs if isinstance(item, pd.DataFrame)]
|
|
96
|
+
elif isinstance(df, (pd.Series, pl.Series)):
|
|
97
|
+
series_sample = df
|
|
98
|
+
df = series_to_dataframe(df)
|
|
99
|
+
elif isinstance(df, pd.DataFrame):
|
|
100
|
+
df = pl.from_pandas(df)
|
|
101
|
+
|
|
102
|
+
return df, dfs, series_sample, object_metadata
|