mage-ai 0.9.56__py3-none-any.whl → 0.9.57__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mage-ai might be problematic. Click here for more details.
- mage_ai/api/policies/GlobalHookPolicy.py +3 -0
- mage_ai/api/policies/ProjectPolicy.py +2 -0
- mage_ai/api/presenters/ProjectPresenter.py +3 -0
- mage_ai/api/resources/GlobalHookResource.py +55 -10
- mage_ai/api/resources/ProjectResource.py +15 -2
- mage_ai/cache/dbt/cache.py +1 -1
- mage_ai/cache/dbt/utils.py +22 -2
- mage_ai/data_preparation/models/block/__init__.py +15 -5
- mage_ai/data_preparation/models/block/dbt/block_sql.py +12 -3
- mage_ai/data_preparation/models/file.py +1 -1
- mage_ai/data_preparation/models/global_hooks/models.py +2 -2
- mage_ai/data_preparation/models/project/__init__.py +28 -1
- mage_ai/orchestration/db/models/schedules.py +64 -102
- mage_ai/orchestration/db/models/schedules_project_platform.py +365 -0
- mage_ai/orchestration/pipeline_scheduler.py +5 -1700
- mage_ai/orchestration/pipeline_scheduler_original.py +1635 -0
- mage_ai/orchestration/pipeline_scheduler_project_platform.py +1701 -0
- mage_ai/server/constants.py +1 -1
- mage_ai/server/frontend_dist/404.html +2 -2
- mage_ai/server/frontend_dist/_next/static/A7VXVGKgLQCukXcjdysDz/_buildManifest.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/1557-724bfc3eabd095f7.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/161-33f26e485ddef393.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/3437-ce26fc28e114b44e.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/3540-9bb48b08f439d0a0.js +1 -0
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/3745-51be3a2b7cd04895.js → frontend_dist/_next/static/chunks/3745-61b1c63bb56bb670.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/{5729.0f2748e9e6dab951.js → 5189.dca121eccea793be.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/5457-b373ebdf4d05d8e2.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/5533-3455832bc3f8429b.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/5729-9d8204ab91da631d.js +1 -0
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/5810-e26a0768db1cfdba.js → frontend_dist/_next/static/chunks/5810-addfa3491ae6b01c.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/8264-68db2a8334ad48f1.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/8432-0ace6fb7bdbc6864.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/8731-82571147875a2d58.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/{9264-a5e8a0274efa2b14.js → 9264-5df6e4c7b1e85c02.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/{_app-cc7f5a0c2456bc03.js → _app-4c0239ca6203e827.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/compute-822e66aa2e90cf4c.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/files-373217c5de51aeef.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/global-data-products/{[...slug]-1e4838e534c8f31e.js → [...slug]-0325e76a2f3e08c1.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/{global-data-products-bb38f73540efeac4.js → global-data-products-927ebbdc29529765.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/global-hooks/[...slug]-a172f5a447bd8925.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/global-hooks-97bec2ac883e0c26.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/manage/files-02d001d99eeaae3f.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/manage/settings-2e577bfd4f0db2b7.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/manage/users/[user]-1827574a4ba95a72.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/manage/users/new-a913c361bcc0d1a9.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/manage/users-4e6fdcbbfc931d67.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/manage-1c327edcf05df9c9.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/{oauth-eba7027969034415.js → oauth-bd8494f8875c5c97.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/overview-5a98e6a531410afb.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipeline-runs-1442183d13edec2e.js +1 -0
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/backfills/[...slug]-b526282c8ac49986.js → frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills/[...slug]-38b2241cdd10320c.js} +1 -1
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/backfills-515eb343210aa1fb.js → frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills-1c646dbef65d6a69.js} +1 -1
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/dashboard-fe4d1321da8291cb.js → frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/dashboard-35cb916a18ac4e1f.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/edit-cd1918632dfef29d.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{logs-c011d465f61138ee.js → logs-67b0572c84db0940.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/{block-runs-46c0dabd155581a0.js → block-runs-40201b626ea3a664.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/{block-runtime-f2a052cb8a20fe47.js → block-runtime-d1f23308effebe03.js} +1 -1
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors-579899cbaedcdefc.js → frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors-9a116d88f00916ff.js} +1 -1
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/runs/[run]-7a2265b7e44bed0b.js → frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/runs/[run]-2d5abcd019d4bea1.js} +1 -1
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/runs-9b53da919db959bd.js → frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/runs-5363a7ae9afe8983.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{settings-1839276725bfd757.js → settings-931a1b3112866a72.js} +1 -1
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/syncs-68d779bc62185470.js → frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/syncs-5ec5367cb877db38.js} +1 -1
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/triggers/[...slug]-01f4528602f4ba2a.js → frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/triggers/[...slug]-8366661f8e2b2959.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{triggers-358523847fdbf547.js → triggers-378f8dada7d0c1dd.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines-627be24ef4963cfb.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/platform/global-hooks/[...slug]-814bbd11e10c26dc.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/platform/global-hooks-021ec25e05862f8f.js +1 -0
- mage_ai/server/{frontend_dist_base_path_template/_next/static/chunks/pages/settings/account/profile-00393e67ab1e6788.js → frontend_dist/_next/static/chunks/pages/settings/account/profile-2b0aa123043519b8.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/platform/preferences-05186e17c94347c1.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/platform/settings-efe8bf1bf3177a7e.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/permissions/{[...slug]-eaeba99f9547a20a.js → [...slug]-aa5d871de1f3f7b0.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/{permissions-c3140516cc28e467.js → permissions-ce45aad47049d993.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/preferences-1bc694b056ff0bcb.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/roles/{[...slug]-f6ff0e219a4b9ffd.js → [...slug]-88d29d1774db67e4.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/{roles-22ff9d862736b2ec.js → roles-d8ca763e405cd9d1.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/{sync-data-614925e838d1974c.js → sync-data-42bd909eb8951040.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/users/{[...slug]-5b88dfb1c6d0d16c.js → [...slug]-250dfdf22bfe67e9.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/{users-eb904db7ac7ea57c.js → users-97c4ce119f7238b5.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/{sign-in-c734db1d5834dda2.js → sign-in-1af3ba18ff646dd4.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/templates/[...slug]-ff9d49355393daea.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/{templates-5ebfe79c24d8c4b4.js → templates-299a2c8f2dd89cf3.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/terminal-fb3f398009a02879.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/test-59a08e06f4ef6c3a.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/triggers-551b85802515e313.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/version-control-1362aeda4a31dd41.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/{webpack-fea697dd168c6d0c.js → webpack-17c3a8f588f14cfd.js} +1 -1
- mage_ai/server/frontend_dist/block-layout.html +2 -2
- mage_ai/server/frontend_dist/compute.html +2 -2
- mage_ai/server/frontend_dist/files.html +2 -2
- mage_ai/server/frontend_dist/global-data-products/[...slug].html +2 -2
- mage_ai/server/frontend_dist/global-data-products.html +2 -2
- mage_ai/server/frontend_dist/global-hooks/[...slug].html +2 -2
- mage_ai/server/frontend_dist/global-hooks.html +2 -2
- mage_ai/server/frontend_dist/index.html +2 -2
- mage_ai/server/frontend_dist/manage/files.html +2 -2
- mage_ai/server/frontend_dist/manage/settings.html +2 -2
- mage_ai/server/frontend_dist/manage/users/[user].html +2 -2
- mage_ai/server/frontend_dist/manage/users/new.html +2 -2
- mage_ai/server/frontend_dist/manage/users.html +2 -2
- mage_ai/server/frontend_dist/manage.html +2 -2
- mage_ai/server/frontend_dist/oauth.html +3 -3
- mage_ai/server/frontend_dist/overview.html +2 -2
- mage_ai/server/frontend_dist/pipeline-runs.html +2 -2
- mage_ai/server/frontend_dist/pipelines/[pipeline]/backfills/[...slug].html +2 -2
- mage_ai/server/frontend_dist/pipelines/[pipeline]/backfills.html +2 -2
- mage_ai/server/frontend_dist/pipelines/[pipeline]/dashboard.html +2 -2
- mage_ai/server/frontend_dist/pipelines/[pipeline]/edit.html +2 -2
- mage_ai/server/frontend_dist/pipelines/[pipeline]/logs.html +2 -2
- mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors/block-runs.html +2 -2
- mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors/block-runtime.html +2 -2
- mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors.html +2 -2
- mage_ai/server/frontend_dist/pipelines/[pipeline]/runs/[run].html +2 -2
- mage_ai/server/frontend_dist/pipelines/[pipeline]/runs.html +2 -2
- mage_ai/server/frontend_dist/pipelines/[pipeline]/settings.html +2 -2
- mage_ai/server/frontend_dist/pipelines/[pipeline]/syncs.html +2 -2
- mage_ai/server/frontend_dist/pipelines/[pipeline]/triggers/[...slug].html +2 -2
- mage_ai/server/frontend_dist/pipelines/[pipeline]/triggers.html +2 -2
- mage_ai/server/frontend_dist/pipelines/[pipeline].html +2 -2
- mage_ai/server/frontend_dist/pipelines.html +2 -2
- mage_ai/server/frontend_dist/platform/global-hooks/[...slug].html +24 -0
- mage_ai/server/frontend_dist/platform/global-hooks.html +24 -0
- mage_ai/server/frontend_dist/settings/account/profile.html +2 -2
- mage_ai/server/frontend_dist/settings/platform/preferences.html +24 -0
- mage_ai/server/frontend_dist/settings/platform/settings.html +24 -0
- mage_ai/server/frontend_dist/settings/workspace/permissions/[...slug].html +2 -2
- mage_ai/server/frontend_dist/settings/workspace/permissions.html +2 -2
- mage_ai/server/frontend_dist/settings/workspace/preferences.html +2 -2
- mage_ai/server/frontend_dist/settings/workspace/roles/[...slug].html +2 -2
- mage_ai/server/frontend_dist/settings/workspace/roles.html +2 -2
- mage_ai/server/frontend_dist/settings/workspace/sync-data.html +2 -2
- mage_ai/server/frontend_dist/settings/workspace/users/[...slug].html +2 -2
- mage_ai/server/frontend_dist/settings/workspace/users.html +2 -2
- mage_ai/server/frontend_dist/settings.html +2 -2
- mage_ai/server/frontend_dist/sign-in.html +5 -5
- mage_ai/server/frontend_dist/templates/[...slug].html +2 -2
- mage_ai/server/frontend_dist/templates.html +2 -2
- mage_ai/server/frontend_dist/terminal.html +2 -2
- mage_ai/server/frontend_dist/test.html +2 -2
- mage_ai/server/frontend_dist/triggers.html +2 -2
- mage_ai/server/frontend_dist/version-control.html +2 -2
- mage_ai/server/frontend_dist_base_path_template/404.html +4 -4
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/1557-724bfc3eabd095f7.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/161-33f26e485ddef393.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/3437-ce26fc28e114b44e.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/3540-9bb48b08f439d0a0.js +1 -0
- mage_ai/server/{frontend_dist/_next/static/chunks/3745-51be3a2b7cd04895.js → frontend_dist_base_path_template/_next/static/chunks/3745-61b1c63bb56bb670.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/{5729.0f2748e9e6dab951.js → 5189.dca121eccea793be.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/5457-b373ebdf4d05d8e2.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/5533-3455832bc3f8429b.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/5729-9d8204ab91da631d.js +1 -0
- mage_ai/server/{frontend_dist/_next/static/chunks/5810-e26a0768db1cfdba.js → frontend_dist_base_path_template/_next/static/chunks/5810-addfa3491ae6b01c.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/8264-68db2a8334ad48f1.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/8432-0ace6fb7bdbc6864.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/8731-82571147875a2d58.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/{9264-a5e8a0274efa2b14.js → 9264-5df6e4c7b1e85c02.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/{_app-cc7f5a0c2456bc03.js → _app-4c0239ca6203e827.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/compute-822e66aa2e90cf4c.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/files-373217c5de51aeef.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/global-data-products/{[...slug]-1e4838e534c8f31e.js → [...slug]-0325e76a2f3e08c1.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/{global-data-products-bb38f73540efeac4.js → global-data-products-927ebbdc29529765.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/global-hooks/[...slug]-a172f5a447bd8925.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/global-hooks-97bec2ac883e0c26.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/manage/files-02d001d99eeaae3f.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/manage/settings-2e577bfd4f0db2b7.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/manage/users/[user]-1827574a4ba95a72.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/manage/users/new-a913c361bcc0d1a9.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/manage/users-4e6fdcbbfc931d67.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/manage-1c327edcf05df9c9.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/{oauth-eba7027969034415.js → oauth-bd8494f8875c5c97.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/overview-5a98e6a531410afb.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipeline-runs-1442183d13edec2e.js +1 -0
- mage_ai/server/{frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills/[...slug]-b526282c8ac49986.js → frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/backfills/[...slug]-38b2241cdd10320c.js} +1 -1
- mage_ai/server/{frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills-515eb343210aa1fb.js → frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/backfills-1c646dbef65d6a69.js} +1 -1
- mage_ai/server/{frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/dashboard-fe4d1321da8291cb.js → frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/dashboard-35cb916a18ac4e1f.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/edit-cd1918632dfef29d.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/{logs-c011d465f61138ee.js → logs-67b0572c84db0940.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors/{block-runs-46c0dabd155581a0.js → block-runs-40201b626ea3a664.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors/{block-runtime-f2a052cb8a20fe47.js → block-runtime-d1f23308effebe03.js} +1 -1
- mage_ai/server/{frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors-579899cbaedcdefc.js → frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/monitors-9a116d88f00916ff.js} +1 -1
- mage_ai/server/{frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/runs/[run]-7a2265b7e44bed0b.js → frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/runs/[run]-2d5abcd019d4bea1.js} +1 -1
- mage_ai/server/{frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/runs-9b53da919db959bd.js → frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/runs-5363a7ae9afe8983.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/{settings-1839276725bfd757.js → settings-931a1b3112866a72.js} +1 -1
- mage_ai/server/{frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/syncs-68d779bc62185470.js → frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/syncs-5ec5367cb877db38.js} +1 -1
- mage_ai/server/{frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/triggers/[...slug]-01f4528602f4ba2a.js → frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/triggers/[...slug]-8366661f8e2b2959.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/{triggers-358523847fdbf547.js → triggers-378f8dada7d0c1dd.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines-627be24ef4963cfb.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/platform/global-hooks/[...slug]-814bbd11e10c26dc.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/platform/global-hooks-021ec25e05862f8f.js +1 -0
- mage_ai/server/{frontend_dist/_next/static/chunks/pages/settings/account/profile-00393e67ab1e6788.js → frontend_dist_base_path_template/_next/static/chunks/pages/settings/account/profile-2b0aa123043519b8.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/platform/preferences-05186e17c94347c1.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/platform/settings-efe8bf1bf3177a7e.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/permissions/{[...slug]-eaeba99f9547a20a.js → [...slug]-aa5d871de1f3f7b0.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/{permissions-c3140516cc28e467.js → permissions-ce45aad47049d993.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/preferences-1bc694b056ff0bcb.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/roles/{[...slug]-f6ff0e219a4b9ffd.js → [...slug]-88d29d1774db67e4.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/{roles-22ff9d862736b2ec.js → roles-d8ca763e405cd9d1.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/{sync-data-614925e838d1974c.js → sync-data-42bd909eb8951040.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/users/{[...slug]-5b88dfb1c6d0d16c.js → [...slug]-250dfdf22bfe67e9.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/{users-eb904db7ac7ea57c.js → users-97c4ce119f7238b5.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/{sign-in-c734db1d5834dda2.js → sign-in-1af3ba18ff646dd4.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/templates/[...slug]-ff9d49355393daea.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/{templates-5ebfe79c24d8c4b4.js → templates-299a2c8f2dd89cf3.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/terminal-fb3f398009a02879.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/test-59a08e06f4ef6c3a.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/triggers-551b85802515e313.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/version-control-1362aeda4a31dd41.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/{webpack-d30cb09c85b4c4f0.js → webpack-b55fe1e575d8ac9d.js} +1 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/wSRrM15xnvA8lOWcaque7/_buildManifest.js +1 -0
- mage_ai/server/frontend_dist_base_path_template/block-layout.html +2 -2
- mage_ai/server/frontend_dist_base_path_template/compute.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/files.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/global-data-products/[...slug].html +5 -5
- mage_ai/server/frontend_dist_base_path_template/global-data-products.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/global-hooks/[...slug].html +5 -5
- mage_ai/server/frontend_dist_base_path_template/global-hooks.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/index.html +2 -2
- mage_ai/server/frontend_dist_base_path_template/manage/files.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/manage/settings.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/manage/users/[user].html +5 -5
- mage_ai/server/frontend_dist_base_path_template/manage/users/new.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/manage/users.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/manage.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/oauth.html +4 -4
- mage_ai/server/frontend_dist_base_path_template/overview.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/pipeline-runs.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/backfills/[...slug].html +5 -5
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/backfills.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/dashboard.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/edit.html +2 -2
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/logs.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/monitors/block-runs.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/monitors/block-runtime.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/monitors.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/runs/[run].html +5 -5
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/runs.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/settings.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/syncs.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/triggers/[...slug].html +5 -5
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline]/triggers.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/pipelines/[pipeline].html +2 -2
- mage_ai/server/frontend_dist_base_path_template/pipelines.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/platform/global-hooks/[...slug].html +24 -0
- mage_ai/server/frontend_dist_base_path_template/platform/global-hooks.html +24 -0
- mage_ai/server/frontend_dist_base_path_template/settings/account/profile.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/settings/platform/preferences.html +24 -0
- mage_ai/server/frontend_dist_base_path_template/settings/platform/settings.html +24 -0
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/permissions/[...slug].html +5 -5
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/permissions.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/preferences.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/roles/[...slug].html +5 -5
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/roles.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/sync-data.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/users/[...slug].html +5 -5
- mage_ai/server/frontend_dist_base_path_template/settings/workspace/users.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/settings.html +2 -2
- mage_ai/server/frontend_dist_base_path_template/sign-in.html +14 -14
- mage_ai/server/frontend_dist_base_path_template/templates/[...slug].html +5 -5
- mage_ai/server/frontend_dist_base_path_template/templates.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/terminal.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/test.html +2 -2
- mage_ai/server/frontend_dist_base_path_template/triggers.html +5 -5
- mage_ai/server/frontend_dist_base_path_template/version-control.html +5 -5
- mage_ai/settings/models/configuration_option.py +15 -1
- mage_ai/settings/platform/__init__.py +29 -7
- mage_ai/streaming/sinks/rabbitmq.py +3 -1
- mage_ai/streaming/sources/rabbitmq.py +5 -2
- mage_ai/tests/api/endpoints/test_pipeline_runs.py +4 -0
- mage_ai/tests/data_preparation/models/test_project.py +27 -7
- mage_ai/tests/orchestration/db/models/test_schedules.py +25 -7
- mage_ai/tests/orchestration/test_pipeline_scheduler.py +6 -261
- mage_ai/tests/orchestration/test_pipeline_scheduler_project_platform.py +286 -0
- mage_ai/tests/shared/mixins.py +1 -0
- {mage_ai-0.9.56.dist-info → mage_ai-0.9.57.dist-info}/METADATA +1 -1
- {mage_ai-0.9.56.dist-info → mage_ai-0.9.57.dist-info}/RECORD +280 -258
- mage_ai/server/frontend_dist/_next/static/4hG0vHBR7gnry-ZWEsEF3/_buildManifest.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/1125-65883c05178efca1.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/1952-c4ba37bc172d7051.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/3437-91f6098316edaf17.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/5457-6e700aadac17ceec.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/7936-e25b6058d3f9b215.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/8264-dad1f090c4278137.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/8432-ec2b97cbf32ec5a7.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/8731-9e0ad513b0dfce83.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/9626-090ff01fd210431c.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/compute-5ead3afa88d14721.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/files-13a4f6d00e8a1c63.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/global-hooks/[...slug]-a7d74042d52c3c38.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/global-hooks-cf7f1ba0b44ec0fb.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/manage/files-259dea0c7cb30d2a.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/manage/settings-1fe2dfaa456529a6.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/manage/users/[user]-1e8731ba2559fef4.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/manage/users/new-a6307396dfa82c88.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/manage/users-fc5aafe5085739a9.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/manage-8fad54817f356e9f.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/overview-321f47b2dba4c780.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipeline-runs-bf4d162b5460acc6.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/edit-ed8c521227dd326e.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines-33d9fb90871be84c.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/preferences-f4ff09cfff8bad46.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/templates/[...slug]-04b0a55e8ad6f814.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/terminal-88025dd0ed3051a6.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/test-5b316b190ff4b226.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/triggers-e5e49ac3b9282aaa.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/version-control-b52d3a07a13452ff.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/MKDICBWwxQowqsGfMukac/_buildManifest.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/1125-65883c05178efca1.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/1952-c4ba37bc172d7051.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/3437-91f6098316edaf17.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/5457-6e700aadac17ceec.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/7936-e25b6058d3f9b215.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/8264-dad1f090c4278137.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/8432-ec2b97cbf32ec5a7.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/8731-9e0ad513b0dfce83.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/9626-090ff01fd210431c.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/compute-5ead3afa88d14721.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/files-13a4f6d00e8a1c63.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/global-hooks/[...slug]-a7d74042d52c3c38.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/global-hooks-cf7f1ba0b44ec0fb.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/manage/files-259dea0c7cb30d2a.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/manage/settings-1fe2dfaa456529a6.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/manage/users/[user]-1e8731ba2559fef4.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/manage/users/new-a6307396dfa82c88.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/manage/users-fc5aafe5085739a9.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/manage-8fad54817f356e9f.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/overview-321f47b2dba4c780.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipeline-runs-bf4d162b5460acc6.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines/[pipeline]/edit-ed8c521227dd326e.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/pipelines-33d9fb90871be84c.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/settings/workspace/preferences-f4ff09cfff8bad46.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/templates/[...slug]-04b0a55e8ad6f814.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/terminal-88025dd0ed3051a6.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/test-5b316b190ff4b226.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/triggers-e5e49ac3b9282aaa.js +0 -1
- mage_ai/server/frontend_dist_base_path_template/_next/static/chunks/pages/version-control-b52d3a07a13452ff.js +0 -1
- /mage_ai/server/frontend_dist/_next/static/{4hG0vHBR7gnry-ZWEsEF3 → A7VXVGKgLQCukXcjdysDz}/_ssgManifest.js +0 -0
- /mage_ai/server/frontend_dist_base_path_template/_next/static/{MKDICBWwxQowqsGfMukac → wSRrM15xnvA8lOWcaque7}/_ssgManifest.js +0 -0
- {mage_ai-0.9.56.dist-info → mage_ai-0.9.57.dist-info}/LICENSE +0 -0
- {mage_ai-0.9.56.dist-info → mage_ai-0.9.57.dist-info}/WHEEL +0 -0
- {mage_ai-0.9.56.dist-info → mage_ai-0.9.57.dist-info}/entry_points.txt +0 -0
- {mage_ai-0.9.56.dist-info → mage_ai-0.9.57.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1635 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import collections
|
|
3
|
+
import os
|
|
4
|
+
import traceback
|
|
5
|
+
from datetime import datetime, timedelta
|
|
6
|
+
from typing import Any, Dict, List, Set, Tuple
|
|
7
|
+
|
|
8
|
+
import pytz
|
|
9
|
+
from dateutil.relativedelta import relativedelta
|
|
10
|
+
from sqlalchemy import desc, func
|
|
11
|
+
|
|
12
|
+
from mage_ai.data_integrations.utils.scheduler import (
|
|
13
|
+
clear_source_output_files,
|
|
14
|
+
get_extra_variables,
|
|
15
|
+
initialize_state_and_runs,
|
|
16
|
+
)
|
|
17
|
+
from mage_ai.data_preparation.executors.executor_factory import ExecutorFactory
|
|
18
|
+
from mage_ai.data_preparation.logging.logger import DictLogger
|
|
19
|
+
from mage_ai.data_preparation.logging.logger_manager_factory import LoggerManagerFactory
|
|
20
|
+
from mage_ai.data_preparation.models.constants import ExecutorType, PipelineType
|
|
21
|
+
from mage_ai.data_preparation.models.pipeline import Pipeline
|
|
22
|
+
from mage_ai.data_preparation.models.triggers import (
|
|
23
|
+
ScheduleInterval,
|
|
24
|
+
ScheduleStatus,
|
|
25
|
+
ScheduleType,
|
|
26
|
+
get_triggers_by_pipeline,
|
|
27
|
+
)
|
|
28
|
+
from mage_ai.data_preparation.repo_manager import get_repo_config
|
|
29
|
+
from mage_ai.data_preparation.sync.git_sync import get_sync_config
|
|
30
|
+
from mage_ai.orchestration.concurrency import ConcurrencyConfig, OnLimitReached
|
|
31
|
+
from mage_ai.orchestration.db import db_connection, safe_db_query
|
|
32
|
+
from mage_ai.orchestration.db.models.schedules import (
|
|
33
|
+
Backfill,
|
|
34
|
+
BlockRun,
|
|
35
|
+
EventMatcher,
|
|
36
|
+
PipelineRun,
|
|
37
|
+
PipelineSchedule,
|
|
38
|
+
)
|
|
39
|
+
from mage_ai.orchestration.job_manager import JobType, job_manager
|
|
40
|
+
from mage_ai.orchestration.metrics.pipeline_run import (
|
|
41
|
+
calculate_destination_metrics,
|
|
42
|
+
calculate_pipeline_run_metrics,
|
|
43
|
+
calculate_source_metrics,
|
|
44
|
+
)
|
|
45
|
+
from mage_ai.orchestration.notification.config import NotificationConfig
|
|
46
|
+
from mage_ai.orchestration.notification.sender import NotificationSender
|
|
47
|
+
from mage_ai.orchestration.utils.distributed_lock import DistributedLock
|
|
48
|
+
from mage_ai.orchestration.utils.git import log_git_sync, run_git_sync
|
|
49
|
+
from mage_ai.orchestration.utils.resources import get_compute, get_memory
|
|
50
|
+
from mage_ai.server.logger import Logger
|
|
51
|
+
from mage_ai.settings import HOSTNAME
|
|
52
|
+
from mage_ai.settings.repo import get_repo_path
|
|
53
|
+
from mage_ai.shared.array import find
|
|
54
|
+
from mage_ai.shared.dates import compare
|
|
55
|
+
from mage_ai.shared.environments import get_env
|
|
56
|
+
from mage_ai.shared.hash import index_by, merge_dict
|
|
57
|
+
from mage_ai.shared.retry import retry
|
|
58
|
+
from mage_ai.usage_statistics.logger import UsageStatisticLogger
|
|
59
|
+
|
|
60
|
+
MEMORY_USAGE_MAXIMUM = 0.95
|
|
61
|
+
|
|
62
|
+
lock = DistributedLock()
|
|
63
|
+
logger = Logger().new_server_logger(__name__)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class PipelineScheduler:
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
pipeline_run: PipelineRun,
|
|
70
|
+
) -> None:
|
|
71
|
+
self.pipeline_run = pipeline_run
|
|
72
|
+
self.pipeline_schedule = pipeline_run.pipeline_schedule
|
|
73
|
+
self.pipeline = Pipeline.get(pipeline_run.pipeline_uuid)
|
|
74
|
+
|
|
75
|
+
# Get the list of integration stream if the pipeline is data integration pipeline
|
|
76
|
+
self.streams = []
|
|
77
|
+
if self.pipeline.type == PipelineType.INTEGRATION:
|
|
78
|
+
try:
|
|
79
|
+
self.streams = self.pipeline.streams(
|
|
80
|
+
self.pipeline_run.get_variables(
|
|
81
|
+
extra_variables=get_extra_variables(self.pipeline)
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
except Exception:
|
|
85
|
+
logger.exception(f'Fail to get streams for {pipeline_run}')
|
|
86
|
+
traceback.print_exc()
|
|
87
|
+
|
|
88
|
+
# Initialize the logger
|
|
89
|
+
self.logger_manager = LoggerManagerFactory.get_logger_manager(
|
|
90
|
+
pipeline_uuid=self.pipeline.uuid,
|
|
91
|
+
partition=self.pipeline_run.execution_partition,
|
|
92
|
+
repo_config=self.pipeline.repo_config,
|
|
93
|
+
)
|
|
94
|
+
self.logger = DictLogger(self.logger_manager.logger)
|
|
95
|
+
|
|
96
|
+
# Initialize the notification sender
|
|
97
|
+
self.notification_sender = NotificationSender(
|
|
98
|
+
NotificationConfig.load(
|
|
99
|
+
config=merge_dict(
|
|
100
|
+
self.pipeline.repo_config.notification_config,
|
|
101
|
+
self.pipeline.notification_config,
|
|
102
|
+
)
|
|
103
|
+
)
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
self.concurrency_config = ConcurrencyConfig.load(
|
|
107
|
+
config=self.pipeline.concurrency_config
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Other pipeline scheduling settings
|
|
111
|
+
self.allow_blocks_to_fail = (
|
|
112
|
+
self.pipeline_schedule.get_settings().allow_blocks_to_fail
|
|
113
|
+
if self.pipeline_schedule else False
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
@safe_db_query
|
|
117
|
+
def start(self, should_schedule: bool = True) -> bool:
|
|
118
|
+
"""Start the pipeline run.
|
|
119
|
+
|
|
120
|
+
This method starts the pipeline run by performing necessary actions
|
|
121
|
+
* Update the pipeline run status
|
|
122
|
+
* Optionally scheduling the pipeline execution
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
should_schedule (bool, optional): Flag indicating whether to schedule
|
|
126
|
+
the pipeline execution. Defaults to True.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
bool: Whether the pipeline run is started successfully
|
|
130
|
+
"""
|
|
131
|
+
if self.pipeline_run.status == PipelineRun.PipelineRunStatus.RUNNING:
|
|
132
|
+
return True
|
|
133
|
+
|
|
134
|
+
tags = self.build_tags()
|
|
135
|
+
|
|
136
|
+
is_integration = PipelineType.INTEGRATION == self.pipeline.type
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
block_runs = BlockRun.query.filter(
|
|
140
|
+
BlockRun.pipeline_run_id == self.pipeline_run.id).all()
|
|
141
|
+
|
|
142
|
+
if len(block_runs) == 0:
|
|
143
|
+
if is_integration:
|
|
144
|
+
clear_source_output_files(
|
|
145
|
+
self.pipeline_run,
|
|
146
|
+
self.logger,
|
|
147
|
+
)
|
|
148
|
+
initialize_state_and_runs(
|
|
149
|
+
self.pipeline_run,
|
|
150
|
+
self.logger,
|
|
151
|
+
self.pipeline_run.get_variables(),
|
|
152
|
+
)
|
|
153
|
+
else:
|
|
154
|
+
self.pipeline_run.create_block_runs()
|
|
155
|
+
except Exception as e:
|
|
156
|
+
error_msg = 'Fail to initialize block runs.'
|
|
157
|
+
self.logger.exception(
|
|
158
|
+
error_msg,
|
|
159
|
+
**merge_dict(tags, dict(
|
|
160
|
+
error=e,
|
|
161
|
+
)),
|
|
162
|
+
)
|
|
163
|
+
self.pipeline_run.update(status=PipelineRun.PipelineRunStatus.FAILED)
|
|
164
|
+
self.notification_sender.send_pipeline_run_failure_message(
|
|
165
|
+
pipeline=self.pipeline,
|
|
166
|
+
pipeline_run=self.pipeline_run,
|
|
167
|
+
error=error_msg,
|
|
168
|
+
)
|
|
169
|
+
return False
|
|
170
|
+
|
|
171
|
+
self.pipeline_run.update(
|
|
172
|
+
started_at=datetime.now(tz=pytz.UTC),
|
|
173
|
+
status=PipelineRun.PipelineRunStatus.RUNNING,
|
|
174
|
+
)
|
|
175
|
+
if should_schedule:
|
|
176
|
+
self.schedule()
|
|
177
|
+
return True
|
|
178
|
+
|
|
179
|
+
@safe_db_query
|
|
180
|
+
def stop(self) -> None:
|
|
181
|
+
stop_pipeline_run(
|
|
182
|
+
self.pipeline_run,
|
|
183
|
+
self.pipeline,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
@safe_db_query
|
|
187
|
+
def schedule(self, block_runs: List[BlockRun] = None) -> None:
|
|
188
|
+
if not lock.try_acquire_lock(f'pipeline_run_{self.pipeline_run.id}', timeout=10):
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
self.__run_heartbeat()
|
|
192
|
+
|
|
193
|
+
for b in self.pipeline_run.block_runs:
|
|
194
|
+
b.refresh()
|
|
195
|
+
|
|
196
|
+
if PipelineType.STREAMING == self.pipeline.type:
|
|
197
|
+
self.__schedule_pipeline()
|
|
198
|
+
else:
|
|
199
|
+
schedule = PipelineSchedule.get(
|
|
200
|
+
self.pipeline_run.pipeline_schedule_id,
|
|
201
|
+
)
|
|
202
|
+
backfills = schedule.backfills if schedule else []
|
|
203
|
+
backfill = backfills[0] if len(backfills) >= 1 else None
|
|
204
|
+
|
|
205
|
+
if backfill is not None and \
|
|
206
|
+
backfill.status == Backfill.Status.INITIAL and \
|
|
207
|
+
self.pipeline_run.status == PipelineRun.PipelineRunStatus.RUNNING:
|
|
208
|
+
backfill.update(
|
|
209
|
+
status=Backfill.Status.RUNNING,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
if self.pipeline_run.all_blocks_completed(self.allow_blocks_to_fail):
|
|
213
|
+
if PipelineType.INTEGRATION == self.pipeline.type:
|
|
214
|
+
tags = self.build_tags()
|
|
215
|
+
calculate_pipeline_run_metrics(
|
|
216
|
+
self.pipeline_run,
|
|
217
|
+
logger=self.logger,
|
|
218
|
+
logging_tags=tags,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
if self.pipeline_run.any_blocks_failed():
|
|
222
|
+
self.pipeline_run.update(
|
|
223
|
+
status=PipelineRun.PipelineRunStatus.FAILED,
|
|
224
|
+
completed_at=datetime.now(tz=pytz.UTC),
|
|
225
|
+
)
|
|
226
|
+
failed_block_runs = self.pipeline_run.failed_block_runs
|
|
227
|
+
error_msg = 'Failed blocks: '\
|
|
228
|
+
f'{", ".join([b.block_uuid for b in failed_block_runs])}.'
|
|
229
|
+
self.notification_sender.send_pipeline_run_failure_message(
|
|
230
|
+
error=error_msg,
|
|
231
|
+
pipeline=self.pipeline,
|
|
232
|
+
pipeline_run=self.pipeline_run,
|
|
233
|
+
)
|
|
234
|
+
else:
|
|
235
|
+
self.pipeline_run.complete()
|
|
236
|
+
self.notification_sender.send_pipeline_run_success_message(
|
|
237
|
+
pipeline=self.pipeline,
|
|
238
|
+
pipeline_run=self.pipeline_run,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
asyncio.run(UsageStatisticLogger().pipeline_run_ended(self.pipeline_run))
|
|
242
|
+
|
|
243
|
+
self.logger_manager.output_logs_to_destination()
|
|
244
|
+
|
|
245
|
+
if schedule:
|
|
246
|
+
if backfill is not None:
|
|
247
|
+
"""
|
|
248
|
+
Exclude old pipeline run retries associated with the backfill
|
|
249
|
+
(if a backfill's runs had failed and the backfill was retried, those
|
|
250
|
+
previous runs are no longer relevant) and check if the backfill's
|
|
251
|
+
latest pipeline runs with different execution dates were successfull.
|
|
252
|
+
"""
|
|
253
|
+
latest_pipeline_runs = \
|
|
254
|
+
PipelineSchedule.fetch_latest_pipeline_runs_without_retries(
|
|
255
|
+
[backfill.pipeline_schedule_id]
|
|
256
|
+
)
|
|
257
|
+
if all([PipelineRun.PipelineRunStatus.COMPLETED == pr.status
|
|
258
|
+
for pr in latest_pipeline_runs]):
|
|
259
|
+
backfill.update(
|
|
260
|
+
completed_at=datetime.now(tz=pytz.UTC),
|
|
261
|
+
status=Backfill.Status.COMPLETED,
|
|
262
|
+
)
|
|
263
|
+
schedule.update(
|
|
264
|
+
status=ScheduleStatus.INACTIVE,
|
|
265
|
+
)
|
|
266
|
+
# If running once, update the schedule to inactive when pipeline run is done
|
|
267
|
+
elif schedule.status == ScheduleStatus.ACTIVE and \
|
|
268
|
+
schedule.schedule_type == ScheduleType.TIME and \
|
|
269
|
+
schedule.schedule_interval == ScheduleInterval.ONCE:
|
|
270
|
+
|
|
271
|
+
schedule.update(status=ScheduleStatus.INACTIVE)
|
|
272
|
+
elif self.__check_pipeline_run_timeout() or \
|
|
273
|
+
(self.pipeline_run.any_blocks_failed() and
|
|
274
|
+
not self.allow_blocks_to_fail):
|
|
275
|
+
self.pipeline_run.update(
|
|
276
|
+
status=PipelineRun.PipelineRunStatus.FAILED)
|
|
277
|
+
|
|
278
|
+
# Backfill status updated to "failed" if at least 1 of its pipeline runs failed
|
|
279
|
+
if backfill is not None:
|
|
280
|
+
latest_pipeline_runs = \
|
|
281
|
+
PipelineSchedule.fetch_latest_pipeline_runs_without_retries(
|
|
282
|
+
[backfill.pipeline_schedule_id]
|
|
283
|
+
)
|
|
284
|
+
if any(
|
|
285
|
+
[PipelineRun.PipelineRunStatus.FAILED == pr.status
|
|
286
|
+
for pr in latest_pipeline_runs]
|
|
287
|
+
):
|
|
288
|
+
backfill.update(
|
|
289
|
+
status=Backfill.Status.FAILED,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
asyncio.run(UsageStatisticLogger().pipeline_run_ended(self.pipeline_run))
|
|
293
|
+
|
|
294
|
+
failed_block_runs = self.pipeline_run.failed_block_runs
|
|
295
|
+
if len(failed_block_runs) > 0:
|
|
296
|
+
error_msg = 'Failed blocks: '\
|
|
297
|
+
f'{", ".join([b.block_uuid for b in failed_block_runs])}.'
|
|
298
|
+
else:
|
|
299
|
+
error_msg = 'Pipelien run timed out.'
|
|
300
|
+
self.notification_sender.send_pipeline_run_failure_message(
|
|
301
|
+
pipeline=self.pipeline,
|
|
302
|
+
pipeline_run=self.pipeline_run,
|
|
303
|
+
error=error_msg,
|
|
304
|
+
)
|
|
305
|
+
# Cancel block runs that are still in progress for the pipeline run.
|
|
306
|
+
cancel_block_runs_and_jobs(self.pipeline_run, self.pipeline)
|
|
307
|
+
elif PipelineType.INTEGRATION == self.pipeline.type:
|
|
308
|
+
self.__schedule_integration_streams(block_runs)
|
|
309
|
+
elif self.pipeline.run_pipeline_in_one_process:
|
|
310
|
+
self.__schedule_pipeline()
|
|
311
|
+
else:
|
|
312
|
+
if not self.__check_block_run_timeout():
|
|
313
|
+
self.__schedule_blocks(block_runs)
|
|
314
|
+
|
|
315
|
+
@safe_db_query
|
|
316
|
+
def on_block_complete(
|
|
317
|
+
self,
|
|
318
|
+
block_uuid: str,
|
|
319
|
+
metrics: Dict = None,
|
|
320
|
+
) -> None:
|
|
321
|
+
block_run = BlockRun.get(pipeline_run_id=self.pipeline_run.id, block_uuid=block_uuid)
|
|
322
|
+
|
|
323
|
+
@retry(retries=2, delay=5)
|
|
324
|
+
def update_status(metrics=metrics):
|
|
325
|
+
metrics_prev = block_run.metrics or {}
|
|
326
|
+
if metrics:
|
|
327
|
+
metrics_prev.update(metrics)
|
|
328
|
+
|
|
329
|
+
block_run.update(
|
|
330
|
+
status=BlockRun.BlockRunStatus.COMPLETED,
|
|
331
|
+
completed_at=datetime.now(tz=pytz.UTC),
|
|
332
|
+
metrics=metrics_prev,
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
update_status()
|
|
336
|
+
|
|
337
|
+
self.logger.info(
|
|
338
|
+
f'BlockRun {block_run.id} (block_uuid: {block_uuid}) completes.',
|
|
339
|
+
**self.build_tags(
|
|
340
|
+
block_run_id=block_run.id,
|
|
341
|
+
block_uuid=block_run.block_uuid,
|
|
342
|
+
),
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
self.pipeline_run.refresh()
|
|
346
|
+
if self.pipeline_run.status != PipelineRun.PipelineRunStatus.RUNNING:
|
|
347
|
+
return
|
|
348
|
+
else:
|
|
349
|
+
self.schedule()
|
|
350
|
+
|
|
351
|
+
@safe_db_query
|
|
352
|
+
def on_block_complete_without_schedule(
|
|
353
|
+
self,
|
|
354
|
+
block_uuid: str,
|
|
355
|
+
metrics: Dict = None,
|
|
356
|
+
) -> None:
|
|
357
|
+
block_run = BlockRun.get(pipeline_run_id=self.pipeline_run.id, block_uuid=block_uuid)
|
|
358
|
+
|
|
359
|
+
@retry(retries=2, delay=5)
|
|
360
|
+
def update_status(metrics=metrics):
|
|
361
|
+
metrics_prev = block_run.metrics or {}
|
|
362
|
+
if metrics:
|
|
363
|
+
metrics_prev.update(metrics)
|
|
364
|
+
|
|
365
|
+
block_run.update(
|
|
366
|
+
status=BlockRun.BlockRunStatus.COMPLETED,
|
|
367
|
+
completed_at=datetime.now(tz=pytz.UTC),
|
|
368
|
+
metrics=metrics_prev,
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
update_status()
|
|
372
|
+
|
|
373
|
+
self.logger.info(
|
|
374
|
+
f'BlockRun {block_run.id} (block_uuid: {block_uuid}) completes.',
|
|
375
|
+
**self.build_tags(
|
|
376
|
+
block_run_id=block_run.id,
|
|
377
|
+
block_uuid=block_run.block_uuid,
|
|
378
|
+
),
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
@safe_db_query
|
|
382
|
+
def on_block_failure(self, block_uuid: str, **kwargs) -> None:
|
|
383
|
+
block_run = BlockRun.get(pipeline_run_id=self.pipeline_run.id, block_uuid=block_uuid)
|
|
384
|
+
metrics = block_run.metrics or {}
|
|
385
|
+
|
|
386
|
+
@retry(retries=2, delay=5)
|
|
387
|
+
def update_status():
|
|
388
|
+
block_run.update(
|
|
389
|
+
metrics=metrics,
|
|
390
|
+
status=BlockRun.BlockRunStatus.FAILED,
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
error = kwargs.get('error', {})
|
|
394
|
+
if error:
|
|
395
|
+
metrics['error'] = dict(
|
|
396
|
+
error=str(error.get('error')),
|
|
397
|
+
errors=error.get('errors'),
|
|
398
|
+
message=error.get('message')
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
update_status()
|
|
402
|
+
|
|
403
|
+
tags = self.build_tags(
|
|
404
|
+
block_run_id=block_run.id,
|
|
405
|
+
block_uuid=block_run.block_uuid,
|
|
406
|
+
error=error.get('error')
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
self.logger.exception(
|
|
410
|
+
f'BlockRun {block_run.id} (block_uuid: {block_uuid}) failed.',
|
|
411
|
+
**tags,
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
if not self.allow_blocks_to_fail:
|
|
415
|
+
if PipelineType.INTEGRATION == self.pipeline.type:
|
|
416
|
+
# If a block/stream fails, stop all other streams
|
|
417
|
+
job_manager.kill_pipeline_run_job(self.pipeline_run.id)
|
|
418
|
+
for stream in self.streams:
|
|
419
|
+
job_manager.kill_integration_stream_job(
|
|
420
|
+
self.pipeline_run.id,
|
|
421
|
+
stream.get('tap_stream_id')
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
calculate_pipeline_run_metrics(
|
|
425
|
+
self.pipeline_run,
|
|
426
|
+
logger=self.logger,
|
|
427
|
+
logging_tags=tags,
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
def memory_usage_failure(self, tags: Dict = None) -> None:
|
|
431
|
+
if tags is None:
|
|
432
|
+
tags = dict()
|
|
433
|
+
msg = 'Memory usage across all pipeline runs has reached or exceeded the maximum '\
|
|
434
|
+
f'limit of {int(MEMORY_USAGE_MAXIMUM * 100)}%.'
|
|
435
|
+
self.logger.info(msg, tags=tags)
|
|
436
|
+
|
|
437
|
+
self.stop()
|
|
438
|
+
|
|
439
|
+
self.notification_sender.send_pipeline_run_failure_message(
|
|
440
|
+
pipeline=self.pipeline,
|
|
441
|
+
pipeline_run=self.pipeline_run,
|
|
442
|
+
summary=msg,
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
if PipelineType.INTEGRATION == self.pipeline.type:
|
|
446
|
+
calculate_pipeline_run_metrics(
|
|
447
|
+
self.pipeline_run,
|
|
448
|
+
logger=self.logger,
|
|
449
|
+
logging_tags=tags,
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
def build_tags(self, **kwargs):
|
|
453
|
+
base_tags = dict(
|
|
454
|
+
pipeline_run_id=self.pipeline_run.id,
|
|
455
|
+
pipeline_schedule_id=self.pipeline_run.pipeline_schedule_id,
|
|
456
|
+
pipeline_uuid=self.pipeline.uuid,
|
|
457
|
+
)
|
|
458
|
+
if HOSTNAME:
|
|
459
|
+
base_tags['hostname'] = HOSTNAME
|
|
460
|
+
return merge_dict(kwargs, base_tags)
|
|
461
|
+
|
|
462
|
+
@safe_db_query
|
|
463
|
+
def __check_pipeline_run_timeout(self) -> bool:
|
|
464
|
+
"""
|
|
465
|
+
Check run timeout for pipeline run. The method checks if a pipeline run timeout is set
|
|
466
|
+
and compares to the pipeline run time. If the run time is greater than the timeout,
|
|
467
|
+
the run will be put into a failed state and the corresponding job is cancelled.
|
|
468
|
+
|
|
469
|
+
Returns:
|
|
470
|
+
bool: True if the pipeline run has timed out, False otherwise.
|
|
471
|
+
"""
|
|
472
|
+
try:
|
|
473
|
+
pipeline_run_timeout = self.pipeline_run.pipeline_schedule.timeout
|
|
474
|
+
|
|
475
|
+
if self.pipeline_run.started_at and pipeline_run_timeout:
|
|
476
|
+
time_difference = datetime.now(tz=pytz.UTC).timestamp() - \
|
|
477
|
+
self.pipeline_run.started_at.timestamp()
|
|
478
|
+
if time_difference > int(pipeline_run_timeout):
|
|
479
|
+
self.logger.error(
|
|
480
|
+
f'Pipeline run timed out after {int(time_difference)} seconds',
|
|
481
|
+
**self.build_tags(),
|
|
482
|
+
)
|
|
483
|
+
return True
|
|
484
|
+
except Exception:
|
|
485
|
+
pass
|
|
486
|
+
|
|
487
|
+
return False
|
|
488
|
+
|
|
489
|
+
@safe_db_query
|
|
490
|
+
def __check_block_run_timeout(self) -> bool:
|
|
491
|
+
"""
|
|
492
|
+
Check run timeout block runs. Currently only works for batch pipelines that are run
|
|
493
|
+
using the `__schedule_blocks` method. This method checks if a block run has exceeded
|
|
494
|
+
its timeout and puts the block run into a failed state and cancels the block run job.
|
|
495
|
+
|
|
496
|
+
Returns:
|
|
497
|
+
bool: True if any block runs have timed out, False otherwise.
|
|
498
|
+
"""
|
|
499
|
+
block_runs = self.pipeline_run.running_block_runs
|
|
500
|
+
|
|
501
|
+
any_block_run_timed_out = False
|
|
502
|
+
for block_run in block_runs:
|
|
503
|
+
try:
|
|
504
|
+
block = self.pipeline.get_block(block_run.block_uuid)
|
|
505
|
+
if block and block.timeout and block_run.started_at:
|
|
506
|
+
time_difference = datetime.now(tz=pytz.UTC).timestamp() - \
|
|
507
|
+
block_run.started_at.timestamp()
|
|
508
|
+
if time_difference > int(block.timeout):
|
|
509
|
+
# Get logger from block_executor so that the error log shows up in the
|
|
510
|
+
# block run log file and not the pipeline run log file.
|
|
511
|
+
block_executor = ExecutorFactory.get_block_executor(
|
|
512
|
+
self.pipeline,
|
|
513
|
+
block.uuid,
|
|
514
|
+
execution_partition=self.pipeline_run.execution_partition,
|
|
515
|
+
)
|
|
516
|
+
block_executor.logger.error(
|
|
517
|
+
f'Block {block_run.block_uuid} timed out after ' +
|
|
518
|
+
f'{int(time_difference)} seconds',
|
|
519
|
+
**block_executor.build_tags(
|
|
520
|
+
block_run_id=block_run.id,
|
|
521
|
+
pipeline_run_id=self.pipeline_run.id,
|
|
522
|
+
),
|
|
523
|
+
)
|
|
524
|
+
self.on_block_failure(block_run.block_uuid)
|
|
525
|
+
job_manager.kill_block_run_job(block_run.id)
|
|
526
|
+
any_block_run_timed_out = True
|
|
527
|
+
except Exception:
|
|
528
|
+
pass
|
|
529
|
+
return any_block_run_timed_out
|
|
530
|
+
|
|
531
|
+
def __schedule_blocks(self, block_runs: List[BlockRun] = None) -> None:
|
|
532
|
+
"""Schedule the block runs for execution.
|
|
533
|
+
|
|
534
|
+
This method schedules the block runs for execution by adding jobs to the job manager.
|
|
535
|
+
It updates the statuses of the initial block runs and fetches any crashed block runs.
|
|
536
|
+
The block runs to be scheduled are determined based on the provided block runs or the
|
|
537
|
+
executable block runs of the pipeline run. The method adds jobs to the job manager for
|
|
538
|
+
each block run, invoking the `run_block` function with the pipeline run ID, block run ID,
|
|
539
|
+
variables, and tags as arguments.
|
|
540
|
+
|
|
541
|
+
Args:
|
|
542
|
+
block_runs (List[BlockRun], optional): A list of block runs. Defaults to None.
|
|
543
|
+
|
|
544
|
+
Returns:
|
|
545
|
+
None
|
|
546
|
+
"""
|
|
547
|
+
self.pipeline_run.update_block_run_statuses(self.pipeline_run.initial_block_runs)
|
|
548
|
+
if block_runs is None:
|
|
549
|
+
block_runs_to_schedule = self.pipeline_run.executable_block_runs(
|
|
550
|
+
allow_blocks_to_fail=self.allow_blocks_to_fail,
|
|
551
|
+
)
|
|
552
|
+
else:
|
|
553
|
+
block_runs_to_schedule = block_runs
|
|
554
|
+
block_runs_to_schedule = \
|
|
555
|
+
self.__fetch_crashed_block_runs() + block_runs_to_schedule
|
|
556
|
+
|
|
557
|
+
block_run_quota = len(block_runs_to_schedule)
|
|
558
|
+
if self.concurrency_config.block_run_limit is not None:
|
|
559
|
+
queued_or_running_block_runs = self.pipeline_run.queued_or_running_block_runs
|
|
560
|
+
block_run_quota = self.concurrency_config.block_run_limit -\
|
|
561
|
+
len(queued_or_running_block_runs)
|
|
562
|
+
if block_run_quota <= 0:
|
|
563
|
+
return
|
|
564
|
+
|
|
565
|
+
for b in block_runs_to_schedule[:block_run_quota]:
|
|
566
|
+
tags = dict(
|
|
567
|
+
block_run_id=b.id,
|
|
568
|
+
block_uuid=b.block_uuid,
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
b.update(
|
|
572
|
+
status=BlockRun.BlockRunStatus.QUEUED,
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
job_manager.add_job(
|
|
576
|
+
JobType.BLOCK_RUN,
|
|
577
|
+
b.id,
|
|
578
|
+
run_block,
|
|
579
|
+
# args
|
|
580
|
+
self.pipeline_run.id,
|
|
581
|
+
b.id,
|
|
582
|
+
self.pipeline_run.get_variables(),
|
|
583
|
+
self.build_tags(**tags),
|
|
584
|
+
None,
|
|
585
|
+
None,
|
|
586
|
+
None,
|
|
587
|
+
None,
|
|
588
|
+
None,
|
|
589
|
+
None,
|
|
590
|
+
None,
|
|
591
|
+
[dict(
|
|
592
|
+
block_uuid=br.block_uuid,
|
|
593
|
+
id=br.id,
|
|
594
|
+
metrics=br.metrics,
|
|
595
|
+
status=br.status,
|
|
596
|
+
) for br in self.pipeline_run.block_runs],
|
|
597
|
+
)
|
|
598
|
+
|
|
599
|
+
def __schedule_integration_streams(self, block_runs: List[BlockRun] = None) -> None:
|
|
600
|
+
"""Schedule the integration streams for execution.
|
|
601
|
+
|
|
602
|
+
This method schedules the integration streams for execution by adding jobs to the job
|
|
603
|
+
manager. It determines the integration streams that need to be scheduled based on the
|
|
604
|
+
provided block runs or the pipeline run's block runs. It filters the parallel and
|
|
605
|
+
sequential streams to ensure only streams without corresponding integration stream jobs
|
|
606
|
+
are scheduled. The method generates the necessary variables and runtime arguments for the
|
|
607
|
+
pipeline execution. Jobs are added to the job manager to invoke the `run_integration_stream`
|
|
608
|
+
function for parallel streams and the `run_integration_streams` function for sequential
|
|
609
|
+
streams.
|
|
610
|
+
|
|
611
|
+
Args:
|
|
612
|
+
block_runs (List[BlockRun], optional): A list of block runs. Defaults to None.
|
|
613
|
+
|
|
614
|
+
Returns:
|
|
615
|
+
None
|
|
616
|
+
"""
|
|
617
|
+
if block_runs is not None:
|
|
618
|
+
block_runs_to_schedule = block_runs
|
|
619
|
+
else:
|
|
620
|
+
# Fetch all "in progress" blocks to handle crashed block runs
|
|
621
|
+
block_runs_to_schedule = [
|
|
622
|
+
b for b in self.pipeline_run.block_runs
|
|
623
|
+
if b.status in [
|
|
624
|
+
BlockRun.BlockRunStatus.INITIAL,
|
|
625
|
+
BlockRun.BlockRunStatus.QUEUED,
|
|
626
|
+
BlockRun.BlockRunStatus.RUNNING,
|
|
627
|
+
]
|
|
628
|
+
]
|
|
629
|
+
|
|
630
|
+
if len(block_runs_to_schedule) > 0:
|
|
631
|
+
tags = self.build_tags()
|
|
632
|
+
|
|
633
|
+
block_run_stream_ids = set()
|
|
634
|
+
for br in block_runs_to_schedule:
|
|
635
|
+
stream_id = br.block_uuid.split(':')[-2]
|
|
636
|
+
if stream_id:
|
|
637
|
+
block_run_stream_ids.add(stream_id)
|
|
638
|
+
|
|
639
|
+
filtered_streams = \
|
|
640
|
+
[s for s in self.streams if s['tap_stream_id'] in block_run_stream_ids]
|
|
641
|
+
parallel_streams = list(filter(lambda s: s.get('run_in_parallel'), filtered_streams))
|
|
642
|
+
sequential_streams = list(filter(
|
|
643
|
+
lambda s: not s.get('run_in_parallel'),
|
|
644
|
+
filtered_streams,
|
|
645
|
+
))
|
|
646
|
+
|
|
647
|
+
# Filter parallel streams so that we are only left with block runs for streams
|
|
648
|
+
# that do not have a corresponding integration stream job.
|
|
649
|
+
parallel_streams_to_schedule = []
|
|
650
|
+
for stream in parallel_streams:
|
|
651
|
+
tap_stream_id = stream.get('tap_stream_id')
|
|
652
|
+
if not job_manager.has_integration_stream_job(self.pipeline_run.id, tap_stream_id):
|
|
653
|
+
parallel_streams_to_schedule.append(stream)
|
|
654
|
+
|
|
655
|
+
# Stop scheduling if there are no streams to schedule.
|
|
656
|
+
if (not sequential_streams or job_manager.has_pipeline_run_job(self.pipeline_run.id)) \
|
|
657
|
+
and len(parallel_streams_to_schedule) == 0:
|
|
658
|
+
return
|
|
659
|
+
|
|
660
|
+
# Generate global variables and runtime arguments for pipeline execution.
|
|
661
|
+
variables = self.pipeline_run.get_variables(
|
|
662
|
+
extra_variables=get_extra_variables(self.pipeline),
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
pipeline_schedule = self.pipeline_run.pipeline_schedule
|
|
666
|
+
schedule_interval = pipeline_schedule.schedule_interval
|
|
667
|
+
if ScheduleType.API == pipeline_schedule.schedule_type:
|
|
668
|
+
execution_date = datetime.utcnow()
|
|
669
|
+
else:
|
|
670
|
+
# This will be none if trigger is API type
|
|
671
|
+
execution_date = pipeline_schedule.current_execution_date()
|
|
672
|
+
|
|
673
|
+
end_date = None
|
|
674
|
+
start_date = None
|
|
675
|
+
date_diff = None
|
|
676
|
+
|
|
677
|
+
if ScheduleInterval.ONCE == schedule_interval:
|
|
678
|
+
end_date = variables.get('_end_date')
|
|
679
|
+
start_date = variables.get('_start_date')
|
|
680
|
+
elif ScheduleInterval.HOURLY == schedule_interval:
|
|
681
|
+
date_diff = timedelta(hours=1)
|
|
682
|
+
elif ScheduleInterval.DAILY == schedule_interval:
|
|
683
|
+
date_diff = timedelta(days=1)
|
|
684
|
+
elif ScheduleInterval.WEEKLY == schedule_interval:
|
|
685
|
+
date_diff = timedelta(weeks=1)
|
|
686
|
+
elif ScheduleInterval.MONTHLY == schedule_interval:
|
|
687
|
+
date_diff = relativedelta(months=1)
|
|
688
|
+
|
|
689
|
+
if date_diff is not None:
|
|
690
|
+
end_date = (execution_date).isoformat()
|
|
691
|
+
start_date = (execution_date - date_diff).isoformat()
|
|
692
|
+
|
|
693
|
+
runtime_arguments = dict(
|
|
694
|
+
_end_date=end_date,
|
|
695
|
+
_execution_date=execution_date.isoformat(),
|
|
696
|
+
_execution_partition=self.pipeline_run.execution_partition,
|
|
697
|
+
_start_date=start_date,
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
executable_block_runs = [b.id for b in block_runs_to_schedule]
|
|
701
|
+
|
|
702
|
+
self.logger.info(
|
|
703
|
+
f'Start executing PipelineRun {self.pipeline_run.id}: '
|
|
704
|
+
f'pipeline {self.pipeline.uuid}',
|
|
705
|
+
**tags,
|
|
706
|
+
)
|
|
707
|
+
|
|
708
|
+
for stream in parallel_streams_to_schedule:
|
|
709
|
+
tap_stream_id = stream.get('tap_stream_id')
|
|
710
|
+
job_manager.add_job(
|
|
711
|
+
JobType.INTEGRATION_STREAM,
|
|
712
|
+
f'{self.pipeline_run.id}_{tap_stream_id}',
|
|
713
|
+
run_integration_stream,
|
|
714
|
+
# args
|
|
715
|
+
stream,
|
|
716
|
+
set(executable_block_runs),
|
|
717
|
+
tags,
|
|
718
|
+
runtime_arguments,
|
|
719
|
+
self.pipeline_run.id,
|
|
720
|
+
variables,
|
|
721
|
+
)
|
|
722
|
+
|
|
723
|
+
if job_manager.has_pipeline_run_job(self.pipeline_run.id) or \
|
|
724
|
+
len(sequential_streams) == 0:
|
|
725
|
+
return
|
|
726
|
+
|
|
727
|
+
job_manager.add_job(
|
|
728
|
+
JobType.PIPELINE_RUN,
|
|
729
|
+
self.pipeline_run.id,
|
|
730
|
+
run_integration_streams,
|
|
731
|
+
# args
|
|
732
|
+
sequential_streams,
|
|
733
|
+
set(executable_block_runs),
|
|
734
|
+
tags,
|
|
735
|
+
runtime_arguments,
|
|
736
|
+
self.pipeline_run.id,
|
|
737
|
+
variables,
|
|
738
|
+
)
|
|
739
|
+
|
|
740
|
+
def __schedule_pipeline(self) -> None:
|
|
741
|
+
"""Schedule the pipeline run for execution.
|
|
742
|
+
|
|
743
|
+
This method schedules the pipeline run for execution by adding a job to the job manager.
|
|
744
|
+
If a job for the pipeline run already exists, the method returns without scheduling a new
|
|
745
|
+
job. The job added to the job manager invokes the `run_pipeline` function with the
|
|
746
|
+
pipeline run ID, variables, and tags as arguments.
|
|
747
|
+
|
|
748
|
+
Returns:
|
|
749
|
+
None
|
|
750
|
+
"""
|
|
751
|
+
if job_manager.has_pipeline_run_job(self.pipeline_run.id):
|
|
752
|
+
return
|
|
753
|
+
self.logger.info(
|
|
754
|
+
f'Start a process for PipelineRun {self.pipeline_run.id}',
|
|
755
|
+
**self.build_tags(),
|
|
756
|
+
)
|
|
757
|
+
if PipelineType.STREAMING != self.pipeline.type:
|
|
758
|
+
# Reset crashed block runs to INITIAL status
|
|
759
|
+
self.__fetch_crashed_block_runs()
|
|
760
|
+
job_manager.add_job(
|
|
761
|
+
JobType.PIPELINE_RUN,
|
|
762
|
+
self.pipeline_run.id,
|
|
763
|
+
run_pipeline,
|
|
764
|
+
# args
|
|
765
|
+
self.pipeline_run.id,
|
|
766
|
+
self.pipeline_run.get_variables(),
|
|
767
|
+
self.build_tags(),
|
|
768
|
+
)
|
|
769
|
+
|
|
770
|
+
def __fetch_crashed_block_runs(self) -> None:
|
|
771
|
+
"""Fetch and handle crashed block runs.
|
|
772
|
+
|
|
773
|
+
This method fetches the running or queued block runs of the pipeline run and checks if
|
|
774
|
+
their corresponding job is still active. If a job is no longer active, the status of the
|
|
775
|
+
block run is updated to 'INITIAL' to indicate that it needs to be re-executed. A list of
|
|
776
|
+
crashed block runs is returned.
|
|
777
|
+
|
|
778
|
+
Returns:
|
|
779
|
+
List[BlockRun]: A list of crashed block runs.
|
|
780
|
+
"""
|
|
781
|
+
running_or_queued_block_runs = [b for b in self.pipeline_run.block_runs if b.status in [
|
|
782
|
+
BlockRun.BlockRunStatus.RUNNING,
|
|
783
|
+
BlockRun.BlockRunStatus.QUEUED,
|
|
784
|
+
]]
|
|
785
|
+
|
|
786
|
+
crashed_runs = []
|
|
787
|
+
for br in running_or_queued_block_runs:
|
|
788
|
+
if not job_manager.has_block_run_job(br.id):
|
|
789
|
+
br.update(status=BlockRun.BlockRunStatus.INITIAL)
|
|
790
|
+
crashed_runs.append(br)
|
|
791
|
+
|
|
792
|
+
return crashed_runs
|
|
793
|
+
|
|
794
|
+
def __run_heartbeat(self) -> None:
|
|
795
|
+
load1, load5, load15, cpu_count = get_compute()
|
|
796
|
+
cpu_usage = load15 / cpu_count if cpu_count else None
|
|
797
|
+
|
|
798
|
+
free_memory, used_memory, total_memory = get_memory()
|
|
799
|
+
memory_usage = used_memory / total_memory if total_memory else None
|
|
800
|
+
|
|
801
|
+
tags = self.build_tags(
|
|
802
|
+
cpu=load15,
|
|
803
|
+
cpu_total=cpu_count,
|
|
804
|
+
cpu_usage=cpu_usage,
|
|
805
|
+
memory=used_memory,
|
|
806
|
+
memory_total=total_memory,
|
|
807
|
+
memory_usage=memory_usage,
|
|
808
|
+
)
|
|
809
|
+
|
|
810
|
+
self.logger.info(
|
|
811
|
+
f'Pipeline {self.pipeline.uuid} for run {self.pipeline_run.id} '
|
|
812
|
+
f'in schedule {self.pipeline_run.pipeline_schedule_id} is alive.',
|
|
813
|
+
**tags,
|
|
814
|
+
)
|
|
815
|
+
|
|
816
|
+
if memory_usage and memory_usage >= MEMORY_USAGE_MAXIMUM:
|
|
817
|
+
self.memory_usage_failure(tags)
|
|
818
|
+
|
|
819
|
+
|
|
820
|
+
def run_integration_streams(
|
|
821
|
+
streams: List[Dict],
|
|
822
|
+
*args,
|
|
823
|
+
):
|
|
824
|
+
for stream in streams:
|
|
825
|
+
run_integration_stream(stream, *args)
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
def run_integration_stream(
|
|
829
|
+
stream: Dict,
|
|
830
|
+
executable_block_runs: Set[int],
|
|
831
|
+
tags: Dict,
|
|
832
|
+
runtime_arguments: Dict,
|
|
833
|
+
pipeline_run_id: int,
|
|
834
|
+
variables: Dict,
|
|
835
|
+
):
|
|
836
|
+
"""Run an integration stream within the pipeline.
|
|
837
|
+
|
|
838
|
+
This method executes an integration stream within the pipeline run. It iterates through each
|
|
839
|
+
stream and executes the corresponding block runs in order. It handles the configuration
|
|
840
|
+
and execution of the data loader, transformer blocks, and data exporter. Metrics calculation is
|
|
841
|
+
performed for the stream if applicable.
|
|
842
|
+
|
|
843
|
+
Args:
|
|
844
|
+
stream (Dict): The configuration of the integration stream.
|
|
845
|
+
executable_block_runs (Set[int]): A set of executable block run IDs.
|
|
846
|
+
tags (Dict): A dictionary of tags for logging.
|
|
847
|
+
runtime_arguments (Dict): A dictionary of runtime arguments.
|
|
848
|
+
pipeline_run_id (int): The ID of the pipeline run.
|
|
849
|
+
variables (Dict): A dictionary of variables.
|
|
850
|
+
"""
|
|
851
|
+
pipeline_run = PipelineRun.query.get(pipeline_run_id)
|
|
852
|
+
pipeline_scheduler = PipelineScheduler(pipeline_run)
|
|
853
|
+
pipeline = pipeline_scheduler.pipeline
|
|
854
|
+
data_loader_block = pipeline.data_loader
|
|
855
|
+
data_exporter_block = pipeline.data_exporter
|
|
856
|
+
|
|
857
|
+
tap_stream_id = stream['tap_stream_id']
|
|
858
|
+
destination_table = stream.get('destination_table', tap_stream_id)
|
|
859
|
+
|
|
860
|
+
# all_block_runs is a list of all block runs for the pipeline run
|
|
861
|
+
all_block_runs = BlockRun.query.filter(BlockRun.pipeline_run_id == pipeline_run.id)
|
|
862
|
+
# block_runs is a list of all executable blocks runs for the pipeline run
|
|
863
|
+
block_runs = list(filter(lambda br: br.id in executable_block_runs, all_block_runs))
|
|
864
|
+
|
|
865
|
+
# block_runs_for_stream is a list of block runs for the specified stream
|
|
866
|
+
block_runs_for_stream = list(filter(lambda br: tap_stream_id in br.block_uuid, block_runs))
|
|
867
|
+
if len(block_runs_for_stream) == 0:
|
|
868
|
+
return
|
|
869
|
+
|
|
870
|
+
indexes = [0]
|
|
871
|
+
for br in block_runs_for_stream:
|
|
872
|
+
parts = br.block_uuid.split(':')
|
|
873
|
+
if len(parts) >= 3:
|
|
874
|
+
indexes.append(int(parts[2]))
|
|
875
|
+
max_index = max(indexes)
|
|
876
|
+
|
|
877
|
+
all_block_runs_for_stream = list(filter(
|
|
878
|
+
lambda br: tap_stream_id in br.block_uuid,
|
|
879
|
+
all_block_runs,
|
|
880
|
+
))
|
|
881
|
+
all_indexes = [0]
|
|
882
|
+
for br in all_block_runs_for_stream:
|
|
883
|
+
# Block run block uuid foramt: "{block_uuid}:{stream_name}:{index}"
|
|
884
|
+
parts = br.block_uuid.split(':')
|
|
885
|
+
if len(parts) >= 3:
|
|
886
|
+
all_indexes.append(int(parts[2]))
|
|
887
|
+
max_index_for_stream = max(all_indexes)
|
|
888
|
+
|
|
889
|
+
# Streams can be split up into multiple parts if the source has a large amount of
|
|
890
|
+
# data. Loop through each part of the stream, and execute the blocks runs.
|
|
891
|
+
for idx in range(max_index + 1):
|
|
892
|
+
block_runs_in_order = []
|
|
893
|
+
current_block = data_loader_block
|
|
894
|
+
|
|
895
|
+
while True:
|
|
896
|
+
block_runs_in_order.append(
|
|
897
|
+
find(
|
|
898
|
+
lambda b: b.block_uuid ==
|
|
899
|
+
f'{current_block.uuid}:{tap_stream_id}:{idx}', # noqa: B023
|
|
900
|
+
all_block_runs,
|
|
901
|
+
)
|
|
902
|
+
)
|
|
903
|
+
downstream_blocks = current_block.downstream_blocks
|
|
904
|
+
if len(downstream_blocks) == 0:
|
|
905
|
+
break
|
|
906
|
+
current_block = downstream_blocks[0]
|
|
907
|
+
|
|
908
|
+
data_loader_uuid = f'{data_loader_block.uuid}:{tap_stream_id}:{idx}'
|
|
909
|
+
data_exporter_uuid = f'{data_exporter_block.uuid}:{tap_stream_id}:{idx}'
|
|
910
|
+
|
|
911
|
+
data_loader_block_run = find(
|
|
912
|
+
lambda b, u=data_loader_uuid: b.block_uuid == u,
|
|
913
|
+
all_block_runs,
|
|
914
|
+
)
|
|
915
|
+
data_exporter_block_run = find(
|
|
916
|
+
lambda b, u=data_exporter_uuid: b.block_uuid == u,
|
|
917
|
+
block_runs_for_stream,
|
|
918
|
+
)
|
|
919
|
+
if not data_loader_block_run or not data_exporter_block_run:
|
|
920
|
+
continue
|
|
921
|
+
|
|
922
|
+
transformer_block_runs = [br for br in block_runs_in_order if (
|
|
923
|
+
br.block_uuid not in [data_loader_uuid, data_exporter_uuid] and
|
|
924
|
+
br.id in executable_block_runs
|
|
925
|
+
)]
|
|
926
|
+
|
|
927
|
+
index = stream.get('index', idx)
|
|
928
|
+
|
|
929
|
+
# Create config for the block runs. This metadata will be passed into the
|
|
930
|
+
# block before block execution.
|
|
931
|
+
shared_dict = dict(
|
|
932
|
+
destination_table=destination_table,
|
|
933
|
+
index=index,
|
|
934
|
+
is_last_block_run=(index == max_index_for_stream),
|
|
935
|
+
selected_streams=[
|
|
936
|
+
tap_stream_id,
|
|
937
|
+
],
|
|
938
|
+
)
|
|
939
|
+
block_runs_and_configs = [
|
|
940
|
+
(data_loader_block_run, shared_dict),
|
|
941
|
+
] + [(br, shared_dict) for br in transformer_block_runs] + [
|
|
942
|
+
(data_exporter_block_run, shared_dict),
|
|
943
|
+
]
|
|
944
|
+
if len(executable_block_runs) == 1 and \
|
|
945
|
+
data_exporter_block_run.id in executable_block_runs:
|
|
946
|
+
block_runs_and_configs = block_runs_and_configs[-1:]
|
|
947
|
+
elif data_loader_block_run.id not in executable_block_runs:
|
|
948
|
+
block_runs_and_configs = block_runs_and_configs[1:]
|
|
949
|
+
|
|
950
|
+
block_failed = False
|
|
951
|
+
for _, tup in enumerate(block_runs_and_configs):
|
|
952
|
+
block_run, template_runtime_configuration = tup
|
|
953
|
+
|
|
954
|
+
tags_updated = merge_dict(tags, dict(
|
|
955
|
+
block_run_id=block_run.id,
|
|
956
|
+
block_uuid=block_run.block_uuid,
|
|
957
|
+
))
|
|
958
|
+
|
|
959
|
+
if block_failed:
|
|
960
|
+
block_run.update(
|
|
961
|
+
status=BlockRun.BlockRunStatus.UPSTREAM_FAILED,
|
|
962
|
+
)
|
|
963
|
+
continue
|
|
964
|
+
|
|
965
|
+
pipeline_run.refresh()
|
|
966
|
+
if pipeline_run.status != PipelineRun.PipelineRunStatus.RUNNING:
|
|
967
|
+
return
|
|
968
|
+
|
|
969
|
+
block_run.update(
|
|
970
|
+
started_at=datetime.now(tz=pytz.UTC),
|
|
971
|
+
status=BlockRun.BlockRunStatus.RUNNING,
|
|
972
|
+
)
|
|
973
|
+
pipeline_scheduler.logger.info(
|
|
974
|
+
f'Start a process for BlockRun {block_run.id}',
|
|
975
|
+
**tags_updated,
|
|
976
|
+
)
|
|
977
|
+
|
|
978
|
+
try:
|
|
979
|
+
run_block(
|
|
980
|
+
pipeline_run_id,
|
|
981
|
+
block_run.id,
|
|
982
|
+
variables,
|
|
983
|
+
tags_updated,
|
|
984
|
+
pipeline_type=PipelineType.INTEGRATION,
|
|
985
|
+
verify_output=False,
|
|
986
|
+
# Not retry for data integration pipeline blocks
|
|
987
|
+
retry_config=dict(retries=0),
|
|
988
|
+
runtime_arguments=runtime_arguments,
|
|
989
|
+
schedule_after_complete=False,
|
|
990
|
+
template_runtime_configuration=template_runtime_configuration,
|
|
991
|
+
)
|
|
992
|
+
except Exception as e:
|
|
993
|
+
if pipeline_scheduler.allow_blocks_to_fail:
|
|
994
|
+
block_failed = True
|
|
995
|
+
else:
|
|
996
|
+
raise e
|
|
997
|
+
else:
|
|
998
|
+
tags2 = merge_dict(tags_updated.get('tags', {}), dict(
|
|
999
|
+
destination_table=destination_table,
|
|
1000
|
+
index=index,
|
|
1001
|
+
stream=tap_stream_id,
|
|
1002
|
+
))
|
|
1003
|
+
if f'{data_loader_block.uuid}:{tap_stream_id}' in block_run.block_uuid:
|
|
1004
|
+
calculate_source_metrics(
|
|
1005
|
+
pipeline_run,
|
|
1006
|
+
block_run,
|
|
1007
|
+
stream=tap_stream_id,
|
|
1008
|
+
logger=pipeline_scheduler.logger,
|
|
1009
|
+
logging_tags=merge_dict(tags_updated, dict(tags=tags2)),
|
|
1010
|
+
)
|
|
1011
|
+
elif f'{data_exporter_block.uuid}:{tap_stream_id}' in block_run.block_uuid:
|
|
1012
|
+
calculate_destination_metrics(
|
|
1013
|
+
pipeline_run,
|
|
1014
|
+
block_run,
|
|
1015
|
+
stream=tap_stream_id,
|
|
1016
|
+
logger=pipeline_scheduler.logger,
|
|
1017
|
+
logging_tags=merge_dict(tags_updated, dict(tags=tags2)),
|
|
1018
|
+
)
|
|
1019
|
+
|
|
1020
|
+
|
|
1021
|
+
def run_block(
|
|
1022
|
+
pipeline_run_id: int,
|
|
1023
|
+
block_run_id: int,
|
|
1024
|
+
variables: Dict,
|
|
1025
|
+
tags: Dict,
|
|
1026
|
+
input_from_output: Dict = None,
|
|
1027
|
+
pipeline_type: PipelineType = None,
|
|
1028
|
+
verify_output: bool = True,
|
|
1029
|
+
retry_config: Dict = None,
|
|
1030
|
+
runtime_arguments: Dict = None,
|
|
1031
|
+
schedule_after_complete: bool = False,
|
|
1032
|
+
template_runtime_configuration: Dict = None,
|
|
1033
|
+
block_run_dicts: List[Dict] = None,
|
|
1034
|
+
) -> Any:
|
|
1035
|
+
"""Execute a block within a pipeline run.
|
|
1036
|
+
Only run block that's with INITIAL or QUEUED status.
|
|
1037
|
+
|
|
1038
|
+
Args:
|
|
1039
|
+
pipeline_run_id (int): The ID of the pipeline run.
|
|
1040
|
+
block_run_id (int): The ID of the block run.
|
|
1041
|
+
variables (Dict): A dictionary of variables.
|
|
1042
|
+
tags (Dict): A dictionary of tags for logging.
|
|
1043
|
+
input_from_output (Dict, optional): A dictionary mapping input names to output names.
|
|
1044
|
+
pipeline_type (PipelineType, optional): The type of pipeline.
|
|
1045
|
+
verify_output (bool, optional): Flag indicating whether to verify the output.
|
|
1046
|
+
retry_config (Dict, optional): A dictionary containing retry configuration.
|
|
1047
|
+
runtime_arguments (Dict, optional): A dictionary of runtime arguments. Used by data
|
|
1048
|
+
integration pipeline.
|
|
1049
|
+
schedule_after_complete (bool, optional): Flag indicating whether to schedule after
|
|
1050
|
+
completion.
|
|
1051
|
+
template_runtime_configuration (Dict, optional): A dictionary of template runtime
|
|
1052
|
+
configuration. Used by data integration pipeline.
|
|
1053
|
+
|
|
1054
|
+
Returns:
|
|
1055
|
+
Any: The result of executing the block.
|
|
1056
|
+
"""
|
|
1057
|
+
|
|
1058
|
+
pipeline_run = PipelineRun.query.get(pipeline_run_id)
|
|
1059
|
+
if pipeline_run.status != PipelineRun.PipelineRunStatus.RUNNING:
|
|
1060
|
+
return {}
|
|
1061
|
+
|
|
1062
|
+
block_run = BlockRun.query.get(block_run_id)
|
|
1063
|
+
if block_run.status not in [
|
|
1064
|
+
BlockRun.BlockRunStatus.INITIAL,
|
|
1065
|
+
BlockRun.BlockRunStatus.QUEUED,
|
|
1066
|
+
BlockRun.BlockRunStatus.RUNNING,
|
|
1067
|
+
]:
|
|
1068
|
+
return {}
|
|
1069
|
+
|
|
1070
|
+
block_run_data = dict(status=BlockRun.BlockRunStatus.RUNNING)
|
|
1071
|
+
if not block_run.started_at or (block_run.metrics and not block_run.metrics.get('controller')):
|
|
1072
|
+
block_run_data['started_at'] = datetime.now(tz=pytz.UTC)
|
|
1073
|
+
|
|
1074
|
+
block_run.update(**block_run_data)
|
|
1075
|
+
|
|
1076
|
+
pipeline_scheduler = PipelineScheduler(pipeline_run)
|
|
1077
|
+
pipeline = pipeline_scheduler.pipeline
|
|
1078
|
+
|
|
1079
|
+
pipeline_scheduler.logger.info(
|
|
1080
|
+
f'Execute PipelineRun {pipeline_run.id}, BlockRun {block_run.id}: '
|
|
1081
|
+
f'pipeline {pipeline.uuid} block {block_run.block_uuid}',
|
|
1082
|
+
**tags)
|
|
1083
|
+
|
|
1084
|
+
if schedule_after_complete:
|
|
1085
|
+
on_complete = pipeline_scheduler.on_block_complete
|
|
1086
|
+
else:
|
|
1087
|
+
on_complete = pipeline_scheduler.on_block_complete_without_schedule
|
|
1088
|
+
|
|
1089
|
+
execution_partition = pipeline_run.execution_partition
|
|
1090
|
+
block_uuid = block_run.block_uuid
|
|
1091
|
+
block = pipeline.get_block(block_uuid)
|
|
1092
|
+
|
|
1093
|
+
if block and retry_config is None:
|
|
1094
|
+
retry_config = merge_dict(
|
|
1095
|
+
get_repo_config(get_repo_path()).retry_config or dict(),
|
|
1096
|
+
block.retry_config or dict(),
|
|
1097
|
+
)
|
|
1098
|
+
|
|
1099
|
+
return ExecutorFactory.get_block_executor(
|
|
1100
|
+
pipeline,
|
|
1101
|
+
block_uuid,
|
|
1102
|
+
execution_partition=execution_partition,
|
|
1103
|
+
).execute(
|
|
1104
|
+
block_run_id=block_run.id,
|
|
1105
|
+
global_vars=variables,
|
|
1106
|
+
input_from_output=input_from_output,
|
|
1107
|
+
on_complete=on_complete,
|
|
1108
|
+
on_failure=pipeline_scheduler.on_block_failure,
|
|
1109
|
+
pipeline_run_id=pipeline_run_id,
|
|
1110
|
+
retry_config=retry_config,
|
|
1111
|
+
runtime_arguments=runtime_arguments,
|
|
1112
|
+
tags=tags,
|
|
1113
|
+
template_runtime_configuration=template_runtime_configuration,
|
|
1114
|
+
verify_output=verify_output,
|
|
1115
|
+
block_run_dicts=block_run_dicts,
|
|
1116
|
+
)
|
|
1117
|
+
|
|
1118
|
+
|
|
1119
|
+
def run_pipeline(
|
|
1120
|
+
pipeline_run_id: int,
|
|
1121
|
+
variables: Dict,
|
|
1122
|
+
tags: Dict,
|
|
1123
|
+
allow_blocks_to_fail: bool = False,
|
|
1124
|
+
):
|
|
1125
|
+
pipeline_run = PipelineRun.query.get(pipeline_run_id)
|
|
1126
|
+
pipeline_scheduler = PipelineScheduler(pipeline_run)
|
|
1127
|
+
pipeline = pipeline_scheduler.pipeline
|
|
1128
|
+
pipeline_scheduler.logger.info(f'Execute PipelineRun {pipeline_run.id}: '
|
|
1129
|
+
f'pipeline {pipeline.uuid}',
|
|
1130
|
+
**tags)
|
|
1131
|
+
executor_type = ExecutorFactory.get_pipeline_executor_type(pipeline)
|
|
1132
|
+
try:
|
|
1133
|
+
pipeline_run.update(executor_type=executor_type)
|
|
1134
|
+
except Exception:
|
|
1135
|
+
traceback.print_exc()
|
|
1136
|
+
ExecutorFactory.get_pipeline_executor(
|
|
1137
|
+
pipeline,
|
|
1138
|
+
execution_partition=pipeline_run.execution_partition,
|
|
1139
|
+
executor_type=executor_type,
|
|
1140
|
+
).execute(
|
|
1141
|
+
allow_blocks_to_fail=allow_blocks_to_fail,
|
|
1142
|
+
global_vars=variables,
|
|
1143
|
+
pipeline_run_id=pipeline_run_id,
|
|
1144
|
+
tags=tags,
|
|
1145
|
+
)
|
|
1146
|
+
|
|
1147
|
+
|
|
1148
|
+
def configure_pipeline_run_payload(
|
|
1149
|
+
pipeline_schedule: PipelineSchedule,
|
|
1150
|
+
pipeline_type: PipelineType,
|
|
1151
|
+
payload: Dict = None,
|
|
1152
|
+
) -> Tuple[Dict, bool]:
|
|
1153
|
+
if payload is None:
|
|
1154
|
+
payload = dict()
|
|
1155
|
+
|
|
1156
|
+
if not payload.get('variables'):
|
|
1157
|
+
payload['variables'] = {}
|
|
1158
|
+
|
|
1159
|
+
payload['pipeline_schedule_id'] = pipeline_schedule.id
|
|
1160
|
+
payload['pipeline_uuid'] = pipeline_schedule.pipeline_uuid
|
|
1161
|
+
execution_date = payload.get('execution_date')
|
|
1162
|
+
if execution_date is None:
|
|
1163
|
+
payload['execution_date'] = datetime.utcnow()
|
|
1164
|
+
elif not isinstance(execution_date, datetime):
|
|
1165
|
+
payload['execution_date'] = datetime.fromisoformat(execution_date)
|
|
1166
|
+
|
|
1167
|
+
# Set execution_partition in variables
|
|
1168
|
+
payload['variables']['execution_partition'] = \
|
|
1169
|
+
os.sep.join([
|
|
1170
|
+
str(pipeline_schedule.id),
|
|
1171
|
+
payload['execution_date'].strftime(format='%Y%m%dT%H%M%S_%f'),
|
|
1172
|
+
])
|
|
1173
|
+
|
|
1174
|
+
is_integration = PipelineType.INTEGRATION == pipeline_type
|
|
1175
|
+
if is_integration:
|
|
1176
|
+
payload['create_block_runs'] = False
|
|
1177
|
+
|
|
1178
|
+
return payload, is_integration
|
|
1179
|
+
|
|
1180
|
+
|
|
1181
|
+
@safe_db_query
|
|
1182
|
+
def retry_pipeline_run(
|
|
1183
|
+
pipeline_run: Dict,
|
|
1184
|
+
) -> 'PipelineRun':
|
|
1185
|
+
pipeline_uuid = pipeline_run['pipeline_uuid']
|
|
1186
|
+
pipeline = Pipeline.get(pipeline_uuid, check_if_exists=True)
|
|
1187
|
+
if pipeline is None or not pipeline.is_valid_pipeline(pipeline.dir_path):
|
|
1188
|
+
raise Exception(f'Pipeline {pipeline_uuid} is not a valid pipeline.')
|
|
1189
|
+
|
|
1190
|
+
pipeline_schedule_id = pipeline_run['pipeline_schedule_id']
|
|
1191
|
+
pipeline_run_model = PipelineRun(
|
|
1192
|
+
id=pipeline_run['id'],
|
|
1193
|
+
pipeline_schedule_id=pipeline_schedule_id,
|
|
1194
|
+
pipeline_uuid=pipeline_uuid,
|
|
1195
|
+
)
|
|
1196
|
+
execution_date = datetime.fromisoformat(pipeline_run['execution_date'])
|
|
1197
|
+
new_pipeline_run = pipeline_run_model.create(
|
|
1198
|
+
backfill_id=pipeline_run.get('backfill_id'),
|
|
1199
|
+
create_block_runs=False,
|
|
1200
|
+
execution_date=execution_date,
|
|
1201
|
+
event_variables=pipeline_run.get('event_variables', {}),
|
|
1202
|
+
pipeline_schedule_id=pipeline_schedule_id,
|
|
1203
|
+
pipeline_uuid=pipeline_run_model.pipeline_uuid,
|
|
1204
|
+
variables=pipeline_run.get('variables', {}),
|
|
1205
|
+
)
|
|
1206
|
+
return new_pipeline_run
|
|
1207
|
+
|
|
1208
|
+
|
|
1209
|
+
def stop_pipeline_run(
|
|
1210
|
+
pipeline_run: PipelineRun,
|
|
1211
|
+
pipeline: Pipeline = None,
|
|
1212
|
+
status: PipelineRun.PipelineRunStatus = PipelineRun.PipelineRunStatus.CANCELLED,
|
|
1213
|
+
) -> None:
|
|
1214
|
+
"""Stop a pipeline run.
|
|
1215
|
+
|
|
1216
|
+
This function stops a pipeline run by cancelling the pipeline run and its
|
|
1217
|
+
associated block runs. If a pipeline object is provided, it also kills the jobs
|
|
1218
|
+
associated with the pipeline run and its integration streams if applicable.
|
|
1219
|
+
|
|
1220
|
+
Args:
|
|
1221
|
+
pipeline_run (PipelineRun): The pipeline run to stop.
|
|
1222
|
+
pipeline (Pipeline, optional): The pipeline associated with the pipeline run.
|
|
1223
|
+
Defaults to None.
|
|
1224
|
+
|
|
1225
|
+
Returns:
|
|
1226
|
+
None
|
|
1227
|
+
"""
|
|
1228
|
+
if pipeline_run.status not in [PipelineRun.PipelineRunStatus.INITIAL,
|
|
1229
|
+
PipelineRun.PipelineRunStatus.RUNNING]:
|
|
1230
|
+
return
|
|
1231
|
+
|
|
1232
|
+
# Update pipeline run status to cancelled
|
|
1233
|
+
pipeline_run.update(status=status)
|
|
1234
|
+
|
|
1235
|
+
asyncio.run(UsageStatisticLogger().pipeline_run_ended(pipeline_run))
|
|
1236
|
+
|
|
1237
|
+
# Cancel all the block runs
|
|
1238
|
+
cancel_block_runs_and_jobs(pipeline_run, pipeline)
|
|
1239
|
+
|
|
1240
|
+
|
|
1241
|
+
def cancel_block_runs_and_jobs(
|
|
1242
|
+
pipeline_run: PipelineRun,
|
|
1243
|
+
pipeline: Pipeline = None,
|
|
1244
|
+
) -> None:
|
|
1245
|
+
"""Cancel in progress block runs and jobs for a pipeline run.
|
|
1246
|
+
|
|
1247
|
+
This function cancels blocks runs for the pipeline run. If a pipeline object
|
|
1248
|
+
is provided, it also kills the jobs associated with the pipeline run and its
|
|
1249
|
+
integration streams if applicable.
|
|
1250
|
+
|
|
1251
|
+
Args:
|
|
1252
|
+
pipeline_run (PipelineRun): The pipeline run to stop.
|
|
1253
|
+
pipeline (Pipeline, optional): The pipeline associated with the pipeline run.
|
|
1254
|
+
Defaults to None.
|
|
1255
|
+
|
|
1256
|
+
Returns:
|
|
1257
|
+
None
|
|
1258
|
+
"""
|
|
1259
|
+
block_runs_to_cancel = []
|
|
1260
|
+
running_blocks = []
|
|
1261
|
+
for b in pipeline_run.block_runs:
|
|
1262
|
+
if b.status in [
|
|
1263
|
+
BlockRun.BlockRunStatus.INITIAL,
|
|
1264
|
+
BlockRun.BlockRunStatus.QUEUED,
|
|
1265
|
+
BlockRun.BlockRunStatus.RUNNING,
|
|
1266
|
+
]:
|
|
1267
|
+
block_runs_to_cancel.append(b)
|
|
1268
|
+
if b.status == BlockRun.BlockRunStatus.RUNNING:
|
|
1269
|
+
running_blocks.append(b)
|
|
1270
|
+
BlockRun.batch_update_status(
|
|
1271
|
+
[b.id for b in block_runs_to_cancel],
|
|
1272
|
+
BlockRun.BlockRunStatus.CANCELLED,
|
|
1273
|
+
)
|
|
1274
|
+
|
|
1275
|
+
# Kill jobs for integration streams and pipeline run
|
|
1276
|
+
if pipeline and (
|
|
1277
|
+
pipeline.type in [PipelineType.INTEGRATION, PipelineType.STREAMING]
|
|
1278
|
+
or pipeline.run_pipeline_in_one_process
|
|
1279
|
+
):
|
|
1280
|
+
job_manager.kill_pipeline_run_job(pipeline_run.id)
|
|
1281
|
+
if pipeline.type == PipelineType.INTEGRATION:
|
|
1282
|
+
for stream in pipeline.streams():
|
|
1283
|
+
job_manager.kill_integration_stream_job(
|
|
1284
|
+
pipeline_run.id,
|
|
1285
|
+
stream.get('tap_stream_id')
|
|
1286
|
+
)
|
|
1287
|
+
if pipeline_run.executor_type == ExecutorType.K8S:
|
|
1288
|
+
"""
|
|
1289
|
+
TODO: Support running and cancelling pipeline runs in ECS and GCP_CLOUD_RUN executors
|
|
1290
|
+
"""
|
|
1291
|
+
ExecutorFactory.get_pipeline_executor(
|
|
1292
|
+
pipeline,
|
|
1293
|
+
executor_type=pipeline_run.executor_type,
|
|
1294
|
+
).cancel(pipeline_run_id=pipeline_run.id)
|
|
1295
|
+
else:
|
|
1296
|
+
for b in running_blocks:
|
|
1297
|
+
job_manager.kill_block_run_job(b.id)
|
|
1298
|
+
|
|
1299
|
+
|
|
1300
|
+
def check_sla():
|
|
1301
|
+
repo_pipelines = set(Pipeline.get_all_pipelines(get_repo_path()))
|
|
1302
|
+
pipeline_schedules_results = PipelineSchedule.active_schedules(pipeline_uuids=repo_pipelines)
|
|
1303
|
+
pipeline_schedules_mapping = index_by(lambda x: x.id, pipeline_schedules_results)
|
|
1304
|
+
|
|
1305
|
+
pipeline_schedules = set([s.id for s in pipeline_schedules_results])
|
|
1306
|
+
|
|
1307
|
+
pipeline_runs = PipelineRun.in_progress_runs(pipeline_schedules)
|
|
1308
|
+
|
|
1309
|
+
if pipeline_runs:
|
|
1310
|
+
current_time = datetime.now(tz=pytz.UTC)
|
|
1311
|
+
|
|
1312
|
+
# TODO: combine all SLA alerts in one notification
|
|
1313
|
+
for pipeline_run in pipeline_runs:
|
|
1314
|
+
pipeline_schedule = pipeline_schedules_mapping.get(pipeline_run.pipeline_schedule_id)
|
|
1315
|
+
if not pipeline_schedule:
|
|
1316
|
+
continue
|
|
1317
|
+
|
|
1318
|
+
sla = pipeline_schedule.sla
|
|
1319
|
+
if not sla:
|
|
1320
|
+
continue
|
|
1321
|
+
start_date = \
|
|
1322
|
+
pipeline_run.execution_date \
|
|
1323
|
+
if pipeline_run.execution_date is not None \
|
|
1324
|
+
else pipeline_run.created_at
|
|
1325
|
+
if compare(start_date + timedelta(seconds=sla), current_time) == -1:
|
|
1326
|
+
# passed SLA for pipeline_run
|
|
1327
|
+
pipeline = Pipeline.get(pipeline_schedule.pipeline_uuid)
|
|
1328
|
+
notification_sender = NotificationSender(
|
|
1329
|
+
NotificationConfig.load(
|
|
1330
|
+
config=merge_dict(
|
|
1331
|
+
pipeline.repo_config.notification_config,
|
|
1332
|
+
pipeline.notification_config,
|
|
1333
|
+
),
|
|
1334
|
+
),
|
|
1335
|
+
)
|
|
1336
|
+
notification_sender.send_pipeline_run_sla_passed_message(
|
|
1337
|
+
pipeline,
|
|
1338
|
+
pipeline_run,
|
|
1339
|
+
)
|
|
1340
|
+
|
|
1341
|
+
pipeline_run.update(passed_sla=True)
|
|
1342
|
+
|
|
1343
|
+
|
|
1344
|
+
def schedule_all():
|
|
1345
|
+
"""
|
|
1346
|
+
This method manages the scheduling and execution of pipeline runs based on specified
|
|
1347
|
+
concurrency and pipeline scheduling rules.
|
|
1348
|
+
|
|
1349
|
+
1. Check whether any new pipeline runs need to be scheduled.
|
|
1350
|
+
2. Group active pipeline runs by pipeline.
|
|
1351
|
+
3. Run git sync if "sync_on_pipeline_run" is enabled.
|
|
1352
|
+
4. For each pipeline, check whether or not any pipeline runs need to be scheduled for
|
|
1353
|
+
the active pipeline schedules by performing the following steps:
|
|
1354
|
+
1. Loop over pipeline schedules and acquire locks.
|
|
1355
|
+
2. Determine whether to schedule pipeline runs based on pipeline schedule trigger interval.
|
|
1356
|
+
3. Enforce per trigger pipeline run limit and create or cancel pipeline runs.
|
|
1357
|
+
4. Start pipeline runs and handle per pipeline pipeline run limit.
|
|
1358
|
+
5. In active pipeline runs, check whether any block runs need to be scheduled.
|
|
1359
|
+
|
|
1360
|
+
The current limit checks can potentially run into race conditions with api or event triggered
|
|
1361
|
+
schedules, so that needs to be addressed at some point.
|
|
1362
|
+
"""
|
|
1363
|
+
db_connection.session.expire_all()
|
|
1364
|
+
|
|
1365
|
+
repo_pipelines = set(Pipeline.get_all_pipelines(get_repo_path()))
|
|
1366
|
+
|
|
1367
|
+
# Sync schedules from yaml file to DB
|
|
1368
|
+
sync_schedules(list(repo_pipelines))
|
|
1369
|
+
|
|
1370
|
+
active_pipeline_schedules = \
|
|
1371
|
+
list(PipelineSchedule.active_schedules(pipeline_uuids=repo_pipelines))
|
|
1372
|
+
|
|
1373
|
+
backfills = Backfill.filter(pipeline_schedule_ids=[ps.id for ps in active_pipeline_schedules])
|
|
1374
|
+
|
|
1375
|
+
backfills_by_pipeline_schedule_id = index_by(
|
|
1376
|
+
lambda backfill: backfill.pipeline_schedule_id,
|
|
1377
|
+
backfills,
|
|
1378
|
+
)
|
|
1379
|
+
|
|
1380
|
+
active_pipeline_schedule_ids_with_landing_time_enabled = set()
|
|
1381
|
+
for pipeline_schedule in active_pipeline_schedules:
|
|
1382
|
+
if pipeline_schedule.landing_time_enabled():
|
|
1383
|
+
active_pipeline_schedule_ids_with_landing_time_enabled.add(pipeline_schedule.id)
|
|
1384
|
+
|
|
1385
|
+
previous_pipeline_run_by_pipeline_schedule_id = {}
|
|
1386
|
+
if len(active_pipeline_schedule_ids_with_landing_time_enabled) >= 1:
|
|
1387
|
+
row_number_column = (
|
|
1388
|
+
func.
|
|
1389
|
+
row_number().
|
|
1390
|
+
over(
|
|
1391
|
+
order_by=desc(PipelineRun.execution_date),
|
|
1392
|
+
partition_by=PipelineRun.pipeline_schedule_id,
|
|
1393
|
+
).
|
|
1394
|
+
label('row_number')
|
|
1395
|
+
)
|
|
1396
|
+
|
|
1397
|
+
query = PipelineRun.query.filter(
|
|
1398
|
+
PipelineRun.pipeline_schedule_id.in_(
|
|
1399
|
+
active_pipeline_schedule_ids_with_landing_time_enabled,
|
|
1400
|
+
),
|
|
1401
|
+
PipelineRun.status == PipelineRun.PipelineRunStatus.COMPLETED,
|
|
1402
|
+
)
|
|
1403
|
+
query = query.add_columns(row_number_column)
|
|
1404
|
+
query = query.from_self().filter(row_number_column == 1)
|
|
1405
|
+
for tup in query.all():
|
|
1406
|
+
pr, _ = tup
|
|
1407
|
+
previous_pipeline_run_by_pipeline_schedule_id[pr.pipeline_schedule_id] = pr
|
|
1408
|
+
|
|
1409
|
+
git_sync_result = None
|
|
1410
|
+
sync_config = get_sync_config()
|
|
1411
|
+
|
|
1412
|
+
active_pipeline_uuids = list(set([s.pipeline_uuid for s in active_pipeline_schedules]))
|
|
1413
|
+
pipeline_runs_by_pipeline = PipelineRun.active_runs_for_pipelines_grouped(active_pipeline_uuids)
|
|
1414
|
+
|
|
1415
|
+
pipeline_schedules_by_pipeline = collections.defaultdict(list)
|
|
1416
|
+
for schedule in active_pipeline_schedules:
|
|
1417
|
+
pipeline_schedules_by_pipeline[schedule.pipeline_uuid].append(schedule)
|
|
1418
|
+
|
|
1419
|
+
# Iterate through pipeline schedules by pipeline to handle pipeline run limits for
|
|
1420
|
+
# each pipeline.
|
|
1421
|
+
for pipeline_uuid, active_pipeline_schedules in pipeline_schedules_by_pipeline.items():
|
|
1422
|
+
pipeline = Pipeline.get(pipeline_uuid)
|
|
1423
|
+
concurrency_config = ConcurrencyConfig.load(config=pipeline.concurrency_config)
|
|
1424
|
+
|
|
1425
|
+
pipeline_runs_to_start = []
|
|
1426
|
+
pipeline_runs_excluded_by_limit = []
|
|
1427
|
+
for pipeline_schedule in active_pipeline_schedules:
|
|
1428
|
+
lock_key = f'pipeline_schedule_{pipeline_schedule.id}'
|
|
1429
|
+
if not lock.try_acquire_lock(lock_key):
|
|
1430
|
+
continue
|
|
1431
|
+
|
|
1432
|
+
try:
|
|
1433
|
+
previous_runtimes = []
|
|
1434
|
+
if pipeline_schedule.id in active_pipeline_schedule_ids_with_landing_time_enabled:
|
|
1435
|
+
previous_pipeline_run = previous_pipeline_run_by_pipeline_schedule_id.get(
|
|
1436
|
+
pipeline_schedule.id,
|
|
1437
|
+
)
|
|
1438
|
+
if previous_pipeline_run:
|
|
1439
|
+
previous_runtimes = pipeline_schedule.runtime_history(
|
|
1440
|
+
pipeline_run=previous_pipeline_run,
|
|
1441
|
+
)
|
|
1442
|
+
|
|
1443
|
+
# Decide whether to schedule any pipeline runs
|
|
1444
|
+
should_schedule = pipeline_schedule.should_schedule(
|
|
1445
|
+
previous_runtimes=previous_runtimes
|
|
1446
|
+
)
|
|
1447
|
+
initial_pipeline_runs = [
|
|
1448
|
+
r for r in pipeline_schedule.pipeline_runs
|
|
1449
|
+
if r.status == PipelineRun.PipelineRunStatus.INITIAL
|
|
1450
|
+
]
|
|
1451
|
+
|
|
1452
|
+
if not should_schedule and not initial_pipeline_runs:
|
|
1453
|
+
lock.release_lock(lock_key)
|
|
1454
|
+
continue
|
|
1455
|
+
|
|
1456
|
+
running_pipeline_runs = [
|
|
1457
|
+
r for r in pipeline_schedule.pipeline_runs
|
|
1458
|
+
if r.status == PipelineRun.PipelineRunStatus.RUNNING
|
|
1459
|
+
]
|
|
1460
|
+
|
|
1461
|
+
if should_schedule and \
|
|
1462
|
+
pipeline_schedule.id not in backfills_by_pipeline_schedule_id:
|
|
1463
|
+
# Perform git sync if "sync_on_pipeline_run" is enabled and no other git sync
|
|
1464
|
+
# has been run for this scheduler loop.
|
|
1465
|
+
if not git_sync_result and sync_config and sync_config.sync_on_pipeline_run:
|
|
1466
|
+
git_sync_result = run_git_sync(lock=lock, sync_config=sync_config)
|
|
1467
|
+
|
|
1468
|
+
payload = dict(
|
|
1469
|
+
execution_date=pipeline_schedule.current_execution_date(),
|
|
1470
|
+
pipeline_schedule_id=pipeline_schedule.id,
|
|
1471
|
+
pipeline_uuid=pipeline_uuid,
|
|
1472
|
+
variables=pipeline_schedule.variables,
|
|
1473
|
+
)
|
|
1474
|
+
|
|
1475
|
+
if len(previous_runtimes) >= 1:
|
|
1476
|
+
payload['metrics'] = dict(previous_runtimes=previous_runtimes)
|
|
1477
|
+
|
|
1478
|
+
if (
|
|
1479
|
+
pipeline_schedule.get_settings().skip_if_previous_running
|
|
1480
|
+
and (initial_pipeline_runs or running_pipeline_runs)
|
|
1481
|
+
):
|
|
1482
|
+
# Cancel the pipeline run if previous pipeline runs haven't completed and
|
|
1483
|
+
# skip_if_previous_running is enabled
|
|
1484
|
+
from mage_ai.orchestration.triggers.utils import (
|
|
1485
|
+
create_and_cancel_pipeline_run,
|
|
1486
|
+
)
|
|
1487
|
+
|
|
1488
|
+
pipeline_run = create_and_cancel_pipeline_run(
|
|
1489
|
+
pipeline,
|
|
1490
|
+
pipeline_schedule,
|
|
1491
|
+
payload,
|
|
1492
|
+
message='Pipeline run limit reached... skipping this run',
|
|
1493
|
+
)
|
|
1494
|
+
else:
|
|
1495
|
+
payload['create_block_runs'] = False
|
|
1496
|
+
pipeline_run = PipelineRun.create(**payload)
|
|
1497
|
+
# Log Git sync status for new pipeline runs if a git sync result exists
|
|
1498
|
+
if git_sync_result:
|
|
1499
|
+
pipeline_scheduler = PipelineScheduler(pipeline_run)
|
|
1500
|
+
log_git_sync(
|
|
1501
|
+
git_sync_result,
|
|
1502
|
+
pipeline_scheduler.logger,
|
|
1503
|
+
pipeline_scheduler.build_tags(),
|
|
1504
|
+
)
|
|
1505
|
+
initial_pipeline_runs.append(pipeline_run)
|
|
1506
|
+
|
|
1507
|
+
# Enforce pipeline concurrency limit
|
|
1508
|
+
pipeline_run_quota = None
|
|
1509
|
+
if concurrency_config.pipeline_run_limit is not None:
|
|
1510
|
+
pipeline_run_quota = concurrency_config.pipeline_run_limit - \
|
|
1511
|
+
len(running_pipeline_runs)
|
|
1512
|
+
|
|
1513
|
+
if pipeline_run_quota is None:
|
|
1514
|
+
pipeline_run_quota = len(initial_pipeline_runs)
|
|
1515
|
+
|
|
1516
|
+
if pipeline_run_quota > 0:
|
|
1517
|
+
initial_pipeline_runs.sort(key=lambda x: x.execution_date)
|
|
1518
|
+
pipeline_runs_to_start.extend(initial_pipeline_runs[:pipeline_run_quota])
|
|
1519
|
+
pipeline_runs_excluded_by_limit.extend(
|
|
1520
|
+
initial_pipeline_runs[pipeline_run_quota:]
|
|
1521
|
+
)
|
|
1522
|
+
finally:
|
|
1523
|
+
lock.release_lock(lock_key)
|
|
1524
|
+
|
|
1525
|
+
pipeline_run_limit = concurrency_config.pipeline_run_limit_all_triggers
|
|
1526
|
+
if pipeline_run_limit is not None:
|
|
1527
|
+
pipeline_quota = pipeline_run_limit - len(
|
|
1528
|
+
pipeline_runs_by_pipeline.get(pipeline_uuid, [])
|
|
1529
|
+
)
|
|
1530
|
+
else:
|
|
1531
|
+
pipeline_quota = None
|
|
1532
|
+
|
|
1533
|
+
quota_filtered_runs = pipeline_runs_to_start
|
|
1534
|
+
if pipeline_quota is not None:
|
|
1535
|
+
pipeline_quota = pipeline_quota if pipeline_quota > 0 else 0
|
|
1536
|
+
pipeline_runs_to_start.sort(key=lambda x: x.execution_date)
|
|
1537
|
+
quota_filtered_runs = pipeline_runs_to_start[:pipeline_quota]
|
|
1538
|
+
pipeline_runs_excluded_by_limit.extend(
|
|
1539
|
+
pipeline_runs_to_start[pipeline_quota:]
|
|
1540
|
+
)
|
|
1541
|
+
|
|
1542
|
+
for r in quota_filtered_runs:
|
|
1543
|
+
try:
|
|
1544
|
+
PipelineScheduler(r).start()
|
|
1545
|
+
except Exception:
|
|
1546
|
+
logger.exception(f'Failed to start {r}')
|
|
1547
|
+
traceback.print_exc()
|
|
1548
|
+
r.update(status=PipelineRun.PipelineRunStatus.FAILED)
|
|
1549
|
+
continue
|
|
1550
|
+
|
|
1551
|
+
# If on_pipeline_run_limit_reached is set as SKIP, cancel the pipeline runs that
|
|
1552
|
+
# were not scheduled due to pipeline run limits.
|
|
1553
|
+
if concurrency_config.on_pipeline_run_limit_reached == OnLimitReached.SKIP:
|
|
1554
|
+
for r in pipeline_runs_excluded_by_limit:
|
|
1555
|
+
pipeline_scheduler = PipelineScheduler(r)
|
|
1556
|
+
pipeline_scheduler.logger.warning(
|
|
1557
|
+
'Pipeline run limit reached... skipping this run',
|
|
1558
|
+
**pipeline_scheduler.build_tags(),
|
|
1559
|
+
)
|
|
1560
|
+
r.update(status=PipelineRun.PipelineRunStatus.CANCELLED)
|
|
1561
|
+
|
|
1562
|
+
# Schedule active pipeline runs
|
|
1563
|
+
active_pipeline_runs = PipelineRun.active_runs_for_pipelines(
|
|
1564
|
+
pipeline_uuids=repo_pipelines,
|
|
1565
|
+
include_block_runs=True,
|
|
1566
|
+
)
|
|
1567
|
+
logger.info(f'Active pipeline runs: {[p.id for p in active_pipeline_runs]}')
|
|
1568
|
+
|
|
1569
|
+
for r in active_pipeline_runs:
|
|
1570
|
+
try:
|
|
1571
|
+
r.refresh()
|
|
1572
|
+
PipelineScheduler(r).schedule()
|
|
1573
|
+
except Exception:
|
|
1574
|
+
logger.exception(f'Failed to schedule {r}')
|
|
1575
|
+
traceback.print_exc()
|
|
1576
|
+
continue
|
|
1577
|
+
job_manager.clean_up_jobs()
|
|
1578
|
+
|
|
1579
|
+
|
|
1580
|
+
def schedule_with_event(event: Dict = None):
|
|
1581
|
+
"""
|
|
1582
|
+
This method manages the scheduling and execution of pipeline runs for event triggered
|
|
1583
|
+
schedules. The logic is relatively similar to the `schedule_all()` method.
|
|
1584
|
+
|
|
1585
|
+
1. Evaluate event matchers and get active pipeline schedules for each matched event matcher.
|
|
1586
|
+
2. Group matched pipeline schedules by pipeline.
|
|
1587
|
+
3. Create a new pipeline run for each matched pipeline schedule.
|
|
1588
|
+
|
|
1589
|
+
Args:
|
|
1590
|
+
event (Dict): the trigger event
|
|
1591
|
+
"""
|
|
1592
|
+
if event is None:
|
|
1593
|
+
event = dict()
|
|
1594
|
+
logger.info(f'Schedule with event {event}')
|
|
1595
|
+
all_event_matchers = EventMatcher.active_event_matchers()
|
|
1596
|
+
|
|
1597
|
+
matched_pipeline_schedules = []
|
|
1598
|
+
for e in all_event_matchers:
|
|
1599
|
+
if e.match(event):
|
|
1600
|
+
logger.info(f'Event matched with {e}')
|
|
1601
|
+
matched_pipeline_schedules.extend(e.active_pipeline_schedules())
|
|
1602
|
+
else:
|
|
1603
|
+
logger.info(f'Event not matched with {e}')
|
|
1604
|
+
|
|
1605
|
+
if len(matched_pipeline_schedules) > 0:
|
|
1606
|
+
from mage_ai.orchestration.triggers.utils import create_and_start_pipeline_run
|
|
1607
|
+
for p in matched_pipeline_schedules:
|
|
1608
|
+
payload = dict(
|
|
1609
|
+
execution_date=datetime.now(tz=pytz.UTC),
|
|
1610
|
+
pipeline_schedule_id=p.id,
|
|
1611
|
+
pipeline_uuid=p.pipeline_uuid,
|
|
1612
|
+
variables=merge_dict(p.variables or dict(), dict(event=event)),
|
|
1613
|
+
)
|
|
1614
|
+
create_and_start_pipeline_run(
|
|
1615
|
+
p.pipeline,
|
|
1616
|
+
p,
|
|
1617
|
+
payload,
|
|
1618
|
+
should_schedule=False,
|
|
1619
|
+
)
|
|
1620
|
+
|
|
1621
|
+
|
|
1622
|
+
def sync_schedules(pipeline_uuids: List[str]):
|
|
1623
|
+
trigger_configs = []
|
|
1624
|
+
|
|
1625
|
+
# Sync schedule configs from triggers.yaml to DB
|
|
1626
|
+
for pipeline_uuid in pipeline_uuids:
|
|
1627
|
+
pipeline_triggers = get_triggers_by_pipeline(pipeline_uuid)
|
|
1628
|
+
logger.debug(f'Sync pipeline trigger configs for {pipeline_uuid}: {pipeline_triggers}.')
|
|
1629
|
+
for pipeline_trigger in pipeline_triggers:
|
|
1630
|
+
if pipeline_trigger.envs and get_env() not in pipeline_trigger.envs:
|
|
1631
|
+
continue
|
|
1632
|
+
|
|
1633
|
+
trigger_configs.append(pipeline_trigger)
|
|
1634
|
+
|
|
1635
|
+
PipelineSchedule.create_or_update_batch(trigger_configs)
|