sea-dev 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/tasks/README.md +89 -0
- package/.cursor/rules/commits.mdc +31 -0
- package/.cursor/rules/general.mdc +84 -0
- package/.github/workflows/ci-cd.yml +141 -0
- package/CLAUDE.md +337 -0
- package/README.md +129 -0
- package/apps/api/.prettierignore +6 -0
- package/apps/api/.prettierrc.js +3 -0
- package/apps/api/dotenvx-safe.sh +11 -0
- package/apps/api/eslint.config.mjs +3 -0
- package/apps/api/package.json +58 -0
- package/apps/api/src/clients/posthog.ts +25 -0
- package/apps/api/src/dal/submission.ts +59 -0
- package/apps/api/src/errors.ts +55 -0
- package/apps/api/src/index.ts +21 -0
- package/apps/api/src/lib/channel.ts +28 -0
- package/apps/api/src/lib/config.ts +9 -0
- package/apps/api/src/lib/fmt.test.ts +9 -0
- package/apps/api/src/lib/fmt.ts +62 -0
- package/apps/api/src/lib/invariant.ts +23 -0
- package/apps/api/src/middleware/auth.ts +66 -0
- package/apps/api/src/routes/index.ts +20 -0
- package/apps/api/src/routes/v2/chat/handlers.ts +693 -0
- package/apps/api/src/routes/v2/chat/index.ts +257 -0
- package/apps/api/src/routes/v2/chat/schemas.ts +43 -0
- package/apps/api/src/routes/v2/deals/handlers.ts +64 -0
- package/apps/api/src/routes/v2/deals/index.ts +88 -0
- package/apps/api/src/routes/v2/deals/schemas.ts +38 -0
- package/apps/api/src/routes/v2/forms/handlers.ts +415 -0
- package/apps/api/src/routes/v2/forms/index.ts +382 -0
- package/apps/api/src/routes/v2/forms/schemas.ts +243 -0
- package/apps/api/src/routes/v2/index.ts +19 -0
- package/apps/api/src/routes/v2/pipelines/handlers.ts +261 -0
- package/apps/api/src/routes/v2/pipelines/index.ts +224 -0
- package/apps/api/src/routes/v2/pipelines/schemas.ts +173 -0
- package/apps/api/src/routes/v2/submissions/handlers.ts +555 -0
- package/apps/api/src/routes/v2/submissions/index.ts +366 -0
- package/apps/api/src/routes/v2/submissions/schemas.ts +233 -0
- package/apps/api/src/routes/v2/workflows/handlers.ts +81 -0
- package/apps/api/src/routes/v2/workflows/index.ts +88 -0
- package/apps/api/src/routes/v2/workflows/schemas.ts +40 -0
- package/apps/api/src/server.ts +146 -0
- package/apps/api/src/static/favicon.ico +0 -0
- package/apps/api/src/types/api.ts +14 -0
- package/apps/api/src/types/result.ts +3 -0
- package/apps/api/tsconfig.json +22 -0
- package/apps/api/vite.config.ts +28 -0
- package/apps/api/vitest.config.ts +14 -0
- package/apps/conversion-worker/Dockerfile +59 -0
- package/apps/conversion-worker/package.json +31 -0
- package/apps/conversion-worker/src/lib/config.ts +7 -0
- package/apps/conversion-worker/src/main.ts +22 -0
- package/apps/conversion-worker/src/workflows/convert-pptx.ts +116 -0
- package/apps/conversion-worker/tsconfig.json +27 -0
- package/apps/conversion-worker/vite.config.ts +33 -0
- package/apps/main/.prettierignore +6 -0
- package/apps/main/.prettierrc.js +3 -0
- package/apps/main/CLAUDE.md +245 -0
- package/apps/main/Procfile +1 -0
- package/apps/main/README.md +193 -0
- package/apps/main/db-tests.jsonl +116 -0
- package/apps/main/dotenvx-safe.sh +11 -0
- package/apps/main/drizzle/meta/_journal.json +1 -0
- package/apps/main/drizzle.config.ts +25 -0
- package/apps/main/eslint.config.mjs +3 -0
- package/apps/main/generate-routes.mjs +5 -0
- package/apps/main/package.json +131 -0
- package/apps/main/playwright.config.ts +23 -0
- package/apps/main/postcss.config.ts +5 -0
- package/apps/main/public/bg-dark.svg +10 -0
- package/apps/main/public/bg.svg +10 -0
- package/apps/main/public/favicon.ico +0 -0
- package/apps/main/run.sh +146 -0
- package/apps/main/scripts/browser.ts +14 -0
- package/apps/main/scripts/db-test-cov.ts +277 -0
- package/apps/main/scripts/login.ts +78 -0
- package/apps/main/scripts/repl.ts +61 -0
- package/apps/main/src/_foo.ts +31 -0
- package/apps/main/src/_tests/db.test.ts +19 -0
- package/apps/main/src/_tests/mock-db.ts +60 -0
- package/apps/main/src/client.tsx +13 -0
- package/apps/main/src/clients/loops.ts +13 -0
- package/apps/main/src/clients/polar.ts +12 -0
- package/apps/main/src/clients/posthog.ts +12 -0
- package/apps/main/src/components/chat/chat-context.tsx +99 -0
- package/apps/main/src/components/chat/chat-messages.tsx +184 -0
- package/apps/main/src/components/chat/chat-status.tsx +140 -0
- package/apps/main/src/components/chat/chat.tsx +458 -0
- package/apps/main/src/components/chat/citation-modal.tsx +54 -0
- package/apps/main/src/components/cta.tsx +21 -0
- package/apps/main/src/components/data-display/derived.tsx +40 -0
- package/apps/main/src/components/data-display/group-single.tsx +57 -0
- package/apps/main/src/components/data-display/group-table.tsx +165 -0
- package/apps/main/src/components/data-display/group-wrapper.tsx +54 -0
- package/apps/main/src/components/data-display/item.tsx +678 -0
- package/apps/main/src/components/error.tsx +45 -0
- package/apps/main/src/components/forms/error.tsx +22 -0
- package/apps/main/src/components/grid.tsx +7 -0
- package/apps/main/src/components/header/container.tsx +73 -0
- package/apps/main/src/components/header/header-bar.tsx +102 -0
- package/apps/main/src/components/modals/copy-display.tsx +37 -0
- package/apps/main/src/components/modals/copy-form.tsx +152 -0
- package/apps/main/src/components/modals/duplicate-workflow.tsx +89 -0
- package/apps/main/src/components/modals/field-correction.tsx +323 -0
- package/apps/main/src/components/modals/form-viewer.tsx +126 -0
- package/apps/main/src/components/modals/modals.tsx +44 -0
- package/apps/main/src/components/modals/new-deal.tsx +78 -0
- package/apps/main/src/components/modals/new-form.tsx +133 -0
- package/apps/main/src/components/modals/new-pipeline.tsx +70 -0
- package/apps/main/src/components/modals/new-submission.tsx +321 -0
- package/apps/main/src/components/modals/new-workflow.tsx +342 -0
- package/apps/main/src/components/modals/transformation-sources-modal.tsx +157 -0
- package/apps/main/src/components/modals/view-report.tsx +193 -0
- package/apps/main/src/components/not-found.tsx +14 -0
- package/apps/main/src/components/search/search-bar.tsx +178 -0
- package/apps/main/src/components/sheet-selector.tsx +135 -0
- package/apps/main/src/components/side-panel/doc-list.tsx +480 -0
- package/apps/main/src/components/sidebar/admin-sidebar.tsx +75 -0
- package/apps/main/src/components/sidebar/app-sidebar.tsx +417 -0
- package/apps/main/src/components/sidebar/model-select.tsx +134 -0
- package/apps/main/src/components/sidebar/settings-sidebar.tsx +132 -0
- package/apps/main/src/components/sidebar/sidebar-right.tsx +22 -0
- package/apps/main/src/components/sidebar/stop-impersonate.tsx +21 -0
- package/apps/main/src/components/svg/loading.tsx +33 -0
- package/apps/main/src/components/theme-selector.tsx +43 -0
- package/apps/main/src/components/unsaved-badge.tsx +19 -0
- package/apps/main/src/components/upload/file-upload.tsx +354 -0
- package/apps/main/src/fns/submission-groups.ts +28 -0
- package/apps/main/src/fns/submission-items.ts +11 -0
- package/apps/main/src/global-middleware.ts +16 -0
- package/apps/main/src/hooks/use-update-state.ts +18 -0
- package/apps/main/src/lib/auth-client.ts +16 -0
- package/apps/main/src/lib/auth.test.ts +359 -0
- package/apps/main/src/lib/auth.ts +144 -0
- package/apps/main/src/lib/billing.ts +23 -0
- package/apps/main/src/lib/config-iso.ts +76 -0
- package/apps/main/src/lib/config.ts +61 -0
- package/apps/main/src/lib/excel.ts +16 -0
- package/apps/main/src/lib/feedback-cache.ts +70 -0
- package/apps/main/src/lib/logger.ts +44 -0
- package/apps/main/src/lib/models.ts +22 -0
- package/apps/main/src/lib/not-found.ts +17 -0
- package/apps/main/src/lib/pdf.ts +16 -0
- package/apps/main/src/lib/tabularize.ts +54 -0
- package/apps/main/src/lib/utils.ts +10 -0
- package/apps/main/src/lib/zfd.ts +217 -0
- package/apps/main/src/middleware.ts +55 -0
- package/apps/main/src/routeTree.gen.ts +1255 -0
- package/apps/main/src/router.tsx +24 -0
- package/apps/main/src/routes/__root.tsx +92 -0
- package/apps/main/src/routes/_authed/_app/(dashboard)/index.tsx +227 -0
- package/apps/main/src/routes/_authed/_app/agents/$agentId/config.tsx +224 -0
- package/apps/main/src/routes/_authed/_app/agents/$agentId/index.tsx +206 -0
- package/apps/main/src/routes/_authed/_app/agents/-components/agent-actions-menu.tsx +94 -0
- package/apps/main/src/routes/_authed/_app/agents/-components/agent-artifacts.tsx +153 -0
- package/apps/main/src/routes/_authed/_app/agents/-components/agent-chat.tsx +220 -0
- package/apps/main/src/routes/_authed/_app/agents/-components/agent-history-menu.tsx +81 -0
- package/apps/main/src/routes/_authed/_app/agents/-components/agent-model-select.tsx +84 -0
- package/apps/main/src/routes/_authed/_app/agents/-components/agent-relevant-items.tsx +226 -0
- package/apps/main/src/routes/_authed/_app/agents/-components/agent-upload-button.tsx +298 -0
- package/apps/main/src/routes/_authed/_app/agents/-components/context-modal.tsx +187 -0
- package/apps/main/src/routes/_authed/_app/agents/-fns.ts +560 -0
- package/apps/main/src/routes/_authed/_app/agents/index.tsx +65 -0
- package/apps/main/src/routes/_authed/_app/deals/$dealId/$subId/-components/citation-tree.tsx +268 -0
- package/apps/main/src/routes/_authed/_app/deals/$dealId/$subId.tsx +655 -0
- package/apps/main/src/routes/_authed/_app/deals/$dealId/-components/doc-loading.tsx +37 -0
- package/apps/main/src/routes/_authed/_app/deals/$dealId/-components/share-link.tsx +42 -0
- package/apps/main/src/routes/_authed/_app/deals/$dealId/-components/submission-card.tsx +89 -0
- package/apps/main/src/routes/_authed/_app/deals/$dealId/-components/submission-filter.tsx +193 -0
- package/apps/main/src/routes/_authed/_app/deals/$dealId/-components/submissions.tsx +36 -0
- package/apps/main/src/routes/_authed/_app/deals/$dealId/-components/summary.tsx +82 -0
- package/apps/main/src/routes/_authed/_app/deals/$dealId/-components/upload-doc.tsx +120 -0
- package/apps/main/src/routes/_authed/_app/deals/$dealId/-fns.ts +653 -0
- package/apps/main/src/routes/_authed/_app/deals/$dealId/index.tsx +259 -0
- package/apps/main/src/routes/_authed/_app/deals/$dealId/route.tsx +29 -0
- package/apps/main/src/routes/_authed/_app/deals/index.tsx +104 -0
- package/apps/main/src/routes/_authed/_app/feedback/index.tsx +639 -0
- package/apps/main/src/routes/_authed/_app/feedback/insights.tsx +250 -0
- package/apps/main/src/routes/_authed/_app/pipelines/$pipelineId/$runId/-components/blockers-panel.tsx +260 -0
- package/apps/main/src/routes/_authed/_app/pipelines/$pipelineId/$runId/-components/manual-input-panel.tsx +301 -0
- package/apps/main/src/routes/_authed/_app/pipelines/$pipelineId/$runId/-components/submission-selector-modal.tsx +143 -0
- package/apps/main/src/routes/_authed/_app/pipelines/$pipelineId/$runId/-components/upload-doc.tsx +120 -0
- package/apps/main/src/routes/_authed/_app/pipelines/$pipelineId/$runId/index.tsx +1485 -0
- package/apps/main/src/routes/_authed/_app/pipelines/$pipelineId/-components/dag-view.tsx +296 -0
- package/apps/main/src/routes/_authed/_app/pipelines/$pipelineId/-components/step-config-modal.tsx +634 -0
- package/apps/main/src/routes/_authed/_app/pipelines/$pipelineId/index.tsx +911 -0
- package/apps/main/src/routes/_authed/_app/pipelines/-fns.ts +510 -0
- package/apps/main/src/routes/_authed/_app/pipelines/index.tsx +103 -0
- package/apps/main/src/routes/_authed/_app/reports/$reportId.tsx +397 -0
- package/apps/main/src/routes/_authed/_app/reports/-fns.ts +11 -0
- package/apps/main/src/routes/_authed/_app/reports/index.tsx +22 -0
- package/apps/main/src/routes/_authed/_app/route.tsx +48 -0
- package/apps/main/src/routes/_authed/_app/submissions/-columns.tsx +161 -0
- package/apps/main/src/routes/_authed/_app/submissions/-fns.ts +128 -0
- package/apps/main/src/routes/_authed/_app/submissions/index.tsx +190 -0
- package/apps/main/src/routes/_authed/_app/workflows/$wfSlug/$formId.tsx +542 -0
- package/apps/main/src/routes/_authed/_app/workflows/$wfSlug/-components/derived.tsx +154 -0
- package/apps/main/src/routes/_authed/_app/workflows/$wfSlug/-components/field.tsx +369 -0
- package/apps/main/src/routes/_authed/_app/workflows/$wfSlug/-components/group.tsx +475 -0
- package/apps/main/src/routes/_authed/_app/workflows/$wfSlug/index.tsx +263 -0
- package/apps/main/src/routes/_authed/_app/workflows/$wfSlug/route.tsx +33 -0
- package/apps/main/src/routes/_authed/_app/workflows/-components/form-card.tsx +315 -0
- package/apps/main/src/routes/_authed/_app/workflows/index.tsx +86 -0
- package/apps/main/src/routes/_authed/admin/index.tsx +12 -0
- package/apps/main/src/routes/_authed/admin/route.tsx +42 -0
- package/apps/main/src/routes/_authed/admin/users/-columns.tsx +124 -0
- package/apps/main/src/routes/_authed/admin/users/-fns.ts +30 -0
- package/apps/main/src/routes/_authed/admin/users/index.tsx +29 -0
- package/apps/main/src/routes/_authed/catchNotFound.tsx +114 -0
- package/apps/main/src/routes/_authed/redirects/forms.$id.tsx +29 -0
- package/apps/main/src/routes/_authed/redirects/submissions.$id.tsx +27 -0
- package/apps/main/src/routes/_authed/redirects/workflows.$id.tsx +27 -0
- package/apps/main/src/routes/_authed/route.tsx +51 -0
- package/apps/main/src/routes/_authed/settings/-components/new-api-key.tsx +85 -0
- package/apps/main/src/routes/_authed/settings/-components/new-invite.tsx +100 -0
- package/apps/main/src/routes/_authed/settings/analytics.tsx +1710 -0
- package/apps/main/src/routes/_authed/settings/billing/-components/price-table.tsx +129 -0
- package/apps/main/src/routes/_authed/settings/billing/-fns.ts +76 -0
- package/apps/main/src/routes/_authed/settings/billing/index.tsx +119 -0
- package/apps/main/src/routes/_authed/settings/embed.tsx +337 -0
- package/apps/main/src/routes/_authed/settings/index.tsx +12 -0
- package/apps/main/src/routes/_authed/settings/keys.tsx +157 -0
- package/apps/main/src/routes/_authed/settings/members.tsx +276 -0
- package/apps/main/src/routes/_authed/settings/route.tsx +22 -0
- package/apps/main/src/routes/_authed/settings/user.tsx +87 -0
- package/apps/main/src/routes/_authed/settings/workspace.tsx +206 -0
- package/apps/main/src/routes/_public/-components/sign-in-up.tsx +96 -0
- package/apps/main/src/routes/_public/embedded.tsx +57 -0
- package/apps/main/src/routes/_public/invite.$inviteId.tsx +143 -0
- package/apps/main/src/routes/_public/no-access.tsx +38 -0
- package/apps/main/src/routes/_public/no-invite.tsx +39 -0
- package/apps/main/src/routes/_public/otp.tsx +103 -0
- package/apps/main/src/routes/_public/route.tsx +15 -0
- package/apps/main/src/routes/_public/sign-in.tsx +111 -0
- package/apps/main/src/routes/_public/sign-up.tsx +114 -0
- package/apps/main/src/routes/api/auth/$.ts +11 -0
- package/apps/main/src/routes/api/billing/paid.ts +42 -0
- package/apps/main/src/routes/api/billing/webhooks.ts +70 -0
- package/apps/main/src/routes/api/chat/agent.ts +40 -0
- package/apps/main/src/routes/api/chat/key.ts +42 -0
- package/apps/main/src/routes/api/chat/member.ts +35 -0
- package/apps/main/src/routes/api/test/index.ts +19 -0
- package/apps/main/src/server.tsx +6 -0
- package/apps/main/src/styles/app.css +23 -0
- package/apps/main/src/vite-env.d.ts +7 -0
- package/apps/main/test.http +6 -0
- package/apps/main/tsconfig.json +17 -0
- package/apps/main/vite.config.ts +24 -0
- package/apps/main/vitest.config.js +17 -0
- package/apps/mcp/README.md +171 -0
- package/apps/mcp/eslint.config.mjs +3 -0
- package/apps/mcp/package.json +37 -0
- package/apps/mcp/src/index.ts +414 -0
- package/apps/mcp/tsconfig.json +19 -0
- package/apps/mcp/vite.config.ts +22 -0
- package/apps/posthog-proxy/index.html +9 -0
- package/apps/workers/.prettierignore +7 -0
- package/apps/workers/.prettierrc.js +3 -0
- package/apps/workers/dotenvx-safe.sh +11 -0
- package/apps/workers/eslint.config.mjs +3 -0
- package/apps/workers/package.json +65 -0
- package/apps/workers/src/lib/config.ts +7 -0
- package/apps/workers/src/lib/messages.ts +0 -0
- package/apps/workers/src/lib/posthog.ts +25 -0
- package/apps/workers/src/main.ts +58 -0
- package/apps/workers/src/workflows/extraction.ts +866 -0
- package/apps/workers/src/workflows/index.ts +3 -0
- package/apps/workers/src/workflows/pipeline-dag.ts +210 -0
- package/apps/workers/src/workflows/pipeline-steps.ts +1393 -0
- package/apps/workers/tsconfig.json +16 -0
- package/apps/workers/vite.config.ts +35 -0
- package/docs/CHANGELOG.md +84 -0
- package/docs/agent-templates-and-runs.md +859 -0
- package/docs/aws-migration-plan.md +267 -0
- package/docs/impl-p0-form-builder-improvements.md +683 -0
- package/docs/on-prem-deployment-spec.docx +0 -0
- package/docs/on-prem-deployment-spec.md +378 -0
- package/docs/prd-form-builder-strategy.md +1120 -0
- package/docs/widget-ng-apf-packaging-spec.md +43 -0
- package/infra/k8s/charts/seadotdev/Chart.yaml +6 -0
- package/infra/k8s/charts/seadotdev/templates/_helpers.tpl +27 -0
- package/infra/k8s/charts/seadotdev/templates/api-v2.yaml +105 -0
- package/infra/k8s/charts/seadotdev/templates/external-secrets.yaml +83 -0
- package/infra/k8s/charts/seadotdev/templates/ingress.yaml +54 -0
- package/infra/k8s/charts/seadotdev/templates/main-app.yaml +104 -0
- package/infra/k8s/charts/seadotdev/templates/workers.yaml +182 -0
- package/infra/k8s/charts/seadotdev/values.yaml +143 -0
- package/infra/terraform/main.tf +399 -0
- package/libs/ai/.prettierignore +2 -0
- package/libs/ai/.prettierrc.js +5 -0
- package/libs/ai/README.md +139 -0
- package/libs/ai/eslint.config.mjs +3 -0
- package/libs/ai/package.json +42 -0
- package/libs/ai/src/index.ts +5 -0
- package/libs/ai/src/models.ts +19 -0
- package/libs/ai/src/rag/index.ts +1 -0
- package/libs/ai/src/rag/rag.test.ts +99 -0
- package/libs/ai/src/rag/rag.ts +510 -0
- package/libs/ai/tsconfig.json +21 -0
- package/libs/ai/vite.config.ts +38 -0
- package/libs/cache/.prettierignore +2 -0
- package/libs/cache/eslint.config.mjs +3 -0
- package/libs/cache/package.json +35 -0
- package/libs/cache/src/feedback.ts +77 -0
- package/libs/cache/src/index.ts +2 -0
- package/libs/cache/tsconfig.json +19 -0
- package/libs/cache/vite.config.ts +36 -0
- package/libs/clients/.prettierignore +6 -0
- package/libs/clients/eslint.config.mjs +3 -0
- package/libs/clients/package.json +59 -0
- package/libs/clients/src/azure.ts +249 -0
- package/libs/clients/src/gcp.ts +220 -0
- package/libs/clients/src/hatchet.ts +86 -0
- package/libs/clients/src/index.ts +8 -0
- package/libs/clients/src/loops.ts +86 -0
- package/libs/clients/src/polar.ts +77 -0
- package/libs/clients/src/posthog.ts +55 -0
- package/libs/clients/tsconfig.json +19 -0
- package/libs/clients/vite.config.ts +35 -0
- package/libs/config/.prettierignore +6 -0
- package/libs/config/.prettierrc.js +12 -0
- package/libs/config/eslint.config.mjs +3 -0
- package/libs/config/package.json +50 -0
- package/libs/config/src/azure.ts +54 -0
- package/libs/config/src/db.ts +18 -0
- package/libs/config/src/gcp.ts +53 -0
- package/libs/config/src/google.ts +17 -0
- package/libs/config/src/hatchet.ts +20 -0
- package/libs/config/src/index.ts +108 -0
- package/libs/config/src/llm.ts +17 -0
- package/libs/config/src/polar.ts +24 -0
- package/libs/config/src/util.ts +8 -0
- package/libs/config/src/vercel.ts +26 -0
- package/libs/config/tsconfig.json +19 -0
- package/libs/config/vite.config.ts +34 -0
- package/libs/core/.prettierignore +2 -0
- package/libs/core/eslint.config.mjs +3 -0
- package/libs/core/package.json +59 -0
- package/libs/core/src/chat/derived.ts +97 -0
- package/libs/core/src/chat/feedback.ts +293 -0
- package/libs/core/src/chat/index.ts +6 -0
- package/libs/core/src/chat/model.ts +92 -0
- package/libs/core/src/chat/prepare-tools.ts +286 -0
- package/libs/core/src/chat/prompts.ts +623 -0
- package/libs/core/src/chat/stream.ts +311 -0
- package/libs/core/src/chat/summarize.ts +168 -0
- package/libs/core/src/chat/tools/agent.ts +403 -0
- package/libs/core/src/chat/tools/chart-agent.ts +526 -0
- package/libs/core/src/chat/tools/chart-helpers/sandbox.ts +47 -0
- package/libs/core/src/chat/tools/chart.ts +86 -0
- package/libs/core/src/chat/tools/credit-agent.ts +1383 -0
- package/libs/core/src/chat/tools/credit.ts +1435 -0
- package/libs/core/src/chat/tools/deep-dive-agent.ts +100 -0
- package/libs/core/src/chat/tools/deep-dive.ts +141 -0
- package/libs/core/src/chat/tools/form.ts +449 -0
- package/libs/core/src/chat/tools/helpers.ts +91 -0
- package/libs/core/src/chat/tools/index.ts +42 -0
- package/libs/core/src/chat/tools/pipeline-artifact.ts +76 -0
- package/libs/core/src/chat/tools/report.ts +40 -0
- package/libs/core/src/chat/tools/search.ts +390 -0
- package/libs/core/src/chat/tools/submission.ts +227 -0
- package/libs/core/src/chat/tools/workflow.ts +684 -0
- package/libs/core/src/chat/types.ts +3 -0
- package/libs/core/src/data-extraction/classification/azure.ts +168 -0
- package/libs/core/src/data-extraction/classification/index.ts +1 -0
- package/libs/core/src/data-extraction/dal.ts +246 -0
- package/libs/core/src/data-extraction/form-structure-extractor.ts +294 -0
- package/libs/core/src/data-extraction/index.ts +4 -0
- package/libs/core/src/data-extraction/layout/azure.ts +730 -0
- package/libs/core/src/data-extraction/layout/excel.ts +180 -0
- package/libs/core/src/data-extraction/layout/gcp.ts +1071 -0
- package/libs/core/src/data-extraction/layout/index.ts +266 -0
- package/libs/core/src/data-extraction/layout/plaintext.ts +45 -0
- package/libs/core/src/data-extraction/models.ts +38 -0
- package/libs/core/src/data-extraction/pdf-utils.ts +96 -0
- package/libs/core/src/data-extraction/structuring/bank-statement.ts +1182 -0
- package/libs/core/src/data-extraction/structuring/custom.ts +495 -0
- package/libs/core/src/data-extraction/structuring/index.ts +290 -0
- package/libs/core/src/data-extraction/structuring/prompts.ts +69 -0
- package/libs/core/src/data-extraction/type-guards.ts +110 -0
- package/libs/core/src/data-extraction/types.ts +84 -0
- package/libs/core/src/data-extraction/utils.ts +31 -0
- package/libs/core/src/data-extraction/validation/bank-statement.ts +127 -0
- package/libs/core/src/deals.ts +17 -0
- package/libs/core/src/documents.ts +152 -0
- package/libs/core/src/index.ts +5 -0
- package/libs/core/src/pipelines/display.ts +678 -0
- package/libs/core/src/pipelines/execute.ts +2342 -0
- package/libs/core/src/pipelines/index.ts +4 -0
- package/libs/core/src/pipelines/list.ts +12 -0
- package/libs/core/src/pipelines/runs.ts +53 -0
- package/libs/core/tsconfig.json +20 -0
- package/libs/core/vite.config.ts +56 -0
- package/libs/dal/.prettierignore +6 -0
- package/libs/dal/.prettierrc.js +12 -0
- package/libs/dal/eslint.config.mjs +3 -0
- package/libs/dal/package.json +57 -0
- package/libs/dal/src/_tests/db.test.ts +19 -0
- package/libs/dal/src/_tests/mock-db.ts +60 -0
- package/libs/dal/src/api-key.test.ts +397 -0
- package/libs/dal/src/api-key.ts +110 -0
- package/libs/dal/src/billing.ts +23 -0
- package/libs/dal/src/conversation.test.ts +655 -0
- package/libs/dal/src/conversation.ts +532 -0
- package/libs/dal/src/deal.test.ts +45 -0
- package/libs/dal/src/deal.ts +87 -0
- package/libs/dal/src/defaults-consumer-lending-uk.ts +33 -0
- package/libs/dal/src/defaults-consumer-lending-us.ts +33 -0
- package/libs/dal/src/defaults-private-credit.ts +57 -0
- package/libs/dal/src/defaults-private-equity.ts +51 -0
- package/libs/dal/src/defaults-smb-lending-us.ts +1569 -0
- package/libs/dal/src/defaults-sme-lending-uk-express.ts +1527 -0
- package/libs/dal/src/defaults-sme-lending-uk.ts +1669 -0
- package/libs/dal/src/defaults-types.ts +23 -0
- package/libs/dal/src/defaults.ts +550 -0
- package/libs/dal/src/document.test.ts +70 -0
- package/libs/dal/src/document.ts +192 -0
- package/libs/dal/src/feedback.ts +255 -0
- package/libs/dal/src/form.test.ts +637 -0
- package/libs/dal/src/form.ts +1165 -0
- package/libs/dal/src/index.ts +20 -0
- package/libs/dal/src/invitation.test.ts +746 -0
- package/libs/dal/src/invitation.ts +207 -0
- package/libs/dal/src/member.test.ts +185 -0
- package/libs/dal/src/member.ts +80 -0
- package/libs/dal/src/organization.ts +116 -0
- package/libs/dal/src/permission.ts +25 -0
- package/libs/dal/src/pipeline.test.ts +388 -0
- package/libs/dal/src/pipeline.ts +4222 -0
- package/libs/dal/src/report.ts +199 -0
- package/libs/dal/src/result.ts +16 -0
- package/libs/dal/src/search.ts +172 -0
- package/libs/dal/src/session.test.ts +110 -0
- package/libs/dal/src/session.ts +31 -0
- package/libs/dal/src/submission.test.ts +1304 -0
- package/libs/dal/src/submission.ts +1396 -0
- package/libs/dal/src/tool.ts +159 -0
- package/libs/dal/src/user.ts +16 -0
- package/libs/dal/src/workflow.test.ts +89 -0
- package/libs/dal/src/workflow.ts +262 -0
- package/libs/dal/tsconfig.build.json +4 -0
- package/libs/dal/tsconfig.json +22 -0
- package/libs/dal/vite.config.ts +34 -0
- package/libs/db/.prettierignore +6 -0
- package/libs/db/.prettierrc.js +12 -0
- package/libs/db/eslint.config.mjs +3 -0
- package/libs/db/package.json +52 -0
- package/libs/db/src/index.ts +24 -0
- package/libs/db/src/relations.ts +549 -0
- package/libs/db/src/schema.ts +2 -0
- package/libs/db/src/schemas/api.ts +35 -0
- package/libs/db/src/schemas/conversations.ts +175 -0
- package/libs/db/src/schemas/core.ts +359 -0
- package/libs/db/src/schemas/documents.ts +181 -0
- package/libs/db/src/schemas/feedback.ts +40 -0
- package/libs/db/src/schemas/index.ts +26 -0
- package/libs/db/src/schemas/organisations.ts +97 -0
- package/libs/db/src/schemas/pipelines.ts +440 -0
- package/libs/db/src/schemas/users.ts +95 -0
- package/libs/db/src/types.ts +190 -0
- package/libs/db/src/utils.ts +14 -0
- package/libs/db/tsconfig.json +19 -0
- package/libs/db/vite.config.ts +31 -0
- package/libs/lint/.prettierignore +6 -0
- package/libs/lint/eslint.config.mjs +61 -0
- package/libs/lint/package.json +29 -0
- package/libs/lint/prettier.config.js +12 -0
- package/libs/schemas/.prettierignore +6 -0
- package/libs/schemas/.prettierrc.js +12 -0
- package/libs/schemas/README.md +15 -0
- package/libs/schemas/eslint.config.mjs +3 -0
- package/libs/schemas/package.json +67 -0
- package/libs/schemas/src/core/chat.ts +67 -0
- package/libs/schemas/src/core/core-result.ts +15 -0
- package/libs/schemas/src/core/data-extraction.ts +184 -0
- package/libs/schemas/src/core/layout.ts +478 -0
- package/libs/schemas/src/core/pipeline.ts +128 -0
- package/libs/schemas/src/core/submission.ts +97 -0
- package/libs/schemas/src/db/account.ts +57 -0
- package/libs/schemas/src/db/apiKey.ts +57 -0
- package/libs/schemas/src/db/context.ts +33 -0
- package/libs/schemas/src/db/conversation.ts +65 -0
- package/libs/schemas/src/db/deal.ts +42 -0
- package/libs/schemas/src/db/document.ts +103 -0
- package/libs/schemas/src/db/documentCitation.ts +58 -0
- package/libs/schemas/src/db/documentExtraction.ts +69 -0
- package/libs/schemas/src/db/fieldCorrection.ts +85 -0
- package/libs/schemas/src/db/form.ts +45 -0
- package/libs/schemas/src/db/formField.ts +59 -0
- package/libs/schemas/src/db/formGroup.ts +42 -0
- package/libs/schemas/src/db/impersonation.ts +39 -0
- package/libs/schemas/src/db/index.ts +25 -0
- package/libs/schemas/src/db/invitation.ts +42 -0
- package/libs/schemas/src/db/member.ts +36 -0
- package/libs/schemas/src/db/message.ts +58 -0
- package/libs/schemas/src/db/organization.ts +62 -0
- package/libs/schemas/src/db/session.ts +48 -0
- package/libs/schemas/src/db/submission.ts +54 -0
- package/libs/schemas/src/db/submissionGroup.ts +36 -0
- package/libs/schemas/src/db/submissionItem.ts +33 -0
- package/libs/schemas/src/db/submissionItemVersion.ts +70 -0
- package/libs/schemas/src/db/user.ts +51 -0
- package/libs/schemas/src/db/utils.ts +3 -0
- package/libs/schemas/src/db/verification.ts +36 -0
- package/libs/schemas/src/db/workflow.ts +42 -0
- package/libs/schemas/src/index.ts +10 -0
- package/libs/schemas/tsconfig.json +21 -0
- package/libs/schemas/vite.config.ts +38 -0
- package/libs/ui/.prettierignore +6 -0
- package/libs/ui/.prettierrc.js +12 -0
- package/libs/ui/components.json +24 -0
- package/libs/ui/eslint.config.mjs +3 -0
- package/libs/ui/package.json +142 -0
- package/libs/ui/src/components/chart-viz/chart.tsx +255 -0
- package/libs/ui/src/components/chart-viz/converters.ts +474 -0
- package/libs/ui/src/components/chart-viz/dashboard.tsx +146 -0
- package/libs/ui/src/components/chart-viz/index.ts +37 -0
- package/libs/ui/src/components/chart-viz/markdown.tsx +344 -0
- package/libs/ui/src/components/chart-viz/table.tsx +446 -0
- package/libs/ui/src/components/chart-viz/theme-context.tsx +70 -0
- package/libs/ui/src/components/chart-viz/themes/dark.ts +98 -0
- package/libs/ui/src/components/chart-viz/themes/index.ts +69 -0
- package/libs/ui/src/components/chart-viz/themes/light.ts +98 -0
- package/libs/ui/src/components/chart-viz/themes/tailwind.ts +326 -0
- package/libs/ui/src/components/chart-viz/themes/types.ts +99 -0
- package/libs/ui/src/components/chart-viz/tool-display.tsx +150 -0
- package/libs/ui/src/components/chart-viz/types.ts +95 -0
- package/libs/ui/src/components/doc-viewers/excel/index.tsx +431 -0
- package/libs/ui/src/components/doc-viewers/excel/themes.ts +160 -0
- package/libs/ui/src/components/doc-viewers/image/index.tsx +410 -0
- package/libs/ui/src/components/doc-viewers/pdf/index.tsx +258 -0
- package/libs/ui/src/components/doc-viewers/pdf/virtualized-pdf.tsx +556 -0
- package/libs/ui/src/components/misc/rel-date.tsx +52 -0
- package/libs/ui/src/components/misc/styled-link.tsx +2 -0
- package/libs/ui/src/components/table/data-table.tsx +546 -0
- package/libs/ui/src/components/table/report-table.tsx +305 -0
- package/libs/ui/src/components/table/sortable-column.tsx +34 -0
- package/libs/ui/src/components/ui/accordion.tsx +62 -0
- package/libs/ui/src/components/ui/alert-dialog.tsx +142 -0
- package/libs/ui/src/components/ui/alert.tsx +62 -0
- package/libs/ui/src/components/ui/artifact.tsx +118 -0
- package/libs/ui/src/components/ui/attachments.tsx +388 -0
- package/libs/ui/src/components/ui/avatar.tsx +39 -0
- package/libs/ui/src/components/ui/badge.tsx +43 -0
- package/libs/ui/src/components/ui/breadcrumb.tsx +102 -0
- package/libs/ui/src/components/ui/button-group.tsx +78 -0
- package/libs/ui/src/components/ui/button.tsx +79 -0
- package/libs/ui/src/components/ui/card.tsx +32 -0
- package/libs/ui/src/components/ui/carousel.tsx +228 -0
- package/libs/ui/src/components/ui/chain-of-thought.tsx +198 -0
- package/libs/ui/src/components/ui/checkbox.tsx +27 -0
- package/libs/ui/src/components/ui/citation.tsx +34 -0
- package/libs/ui/src/components/ui/code-block.tsx +500 -0
- package/libs/ui/src/components/ui/collapsible.tsx +19 -0
- package/libs/ui/src/components/ui/command.tsx +161 -0
- package/libs/ui/src/components/ui/conversation.tsx +90 -0
- package/libs/ui/src/components/ui/dialog.tsx +142 -0
- package/libs/ui/src/components/ui/dropdown-menu.tsx +246 -0
- package/libs/ui/src/components/ui/highlight.tsx +3 -0
- package/libs/ui/src/components/ui/hover-card.tsx +36 -0
- package/libs/ui/src/components/ui/inline-citation.tsx +251 -0
- package/libs/ui/src/components/ui/input-group.tsx +156 -0
- package/libs/ui/src/components/ui/input-otp.tsx +78 -0
- package/libs/ui/src/components/ui/input.tsx +21 -0
- package/libs/ui/src/components/ui/label.tsx +19 -0
- package/libs/ui/src/components/ui/model-selector.tsx +174 -0
- package/libs/ui/src/components/ui/multisidebar.tsx +750 -0
- package/libs/ui/src/components/ui/popover.tsx +43 -0
- package/libs/ui/src/components/ui/progress.tsx +28 -0
- package/libs/ui/src/components/ui/reasoning.tsx +178 -0
- package/libs/ui/src/components/ui/resizable.tsx +49 -0
- package/libs/ui/src/components/ui/scroll-area.tsx +54 -0
- package/libs/ui/src/components/ui/select.tsx +171 -0
- package/libs/ui/src/components/ui/separator.tsx +26 -0
- package/libs/ui/src/components/ui/sheet.tsx +128 -0
- package/libs/ui/src/components/ui/shimmer.tsx +53 -0
- package/libs/ui/src/components/ui/skeleton.tsx +13 -0
- package/libs/ui/src/components/ui/sonner.tsx +23 -0
- package/libs/ui/src/components/ui/switch.tsx +26 -0
- package/libs/ui/src/components/ui/table.tsx +96 -0
- package/libs/ui/src/components/ui/tabs.tsx +52 -0
- package/libs/ui/src/components/ui/textarea.tsx +41 -0
- package/libs/ui/src/components/ui/tool.tsx +209 -0
- package/libs/ui/src/components/ui/tooltip.tsx +58 -0
- package/libs/ui/src/components/ui/typography.tsx +113 -0
- package/libs/ui/src/fonts/manrope-v15-latin-300.woff2 +0 -0
- package/libs/ui/src/fonts/manrope-v15-latin-400.woff2 +0 -0
- package/libs/ui/src/fonts/manrope-v15-latin-500.woff2 +0 -0
- package/libs/ui/src/fonts/manrope-v15-latin-600.woff2 +0 -0
- package/libs/ui/src/hooks/use-mobile.ts +19 -0
- package/libs/ui/src/lib/utils.ts +6 -0
- package/libs/ui/src/styles/fonts.css +35 -0
- package/libs/ui/src/styles/style.css +218 -0
- package/libs/ui/tsconfig.json +21 -0
- package/libs/ui/vite.config.ts +80 -0
- package/libs/ui-lit/README.md +245 -0
- package/libs/ui-lit/TESTING_GUIDE.md +296 -0
- package/libs/ui-lit/eslint.config.mjs +3 -0
- package/libs/ui-lit/package.json +41 -0
- package/libs/ui-lit/scripts/build-css.js +43 -0
- package/libs/ui-lit/src/components/sea-alert.ts +132 -0
- package/libs/ui-lit/src/components/sea-button.ts +95 -0
- package/libs/ui-lit/src/components/sea-card.ts +113 -0
- package/libs/ui-lit/src/components/sea-input.ts +184 -0
- package/libs/ui-lit/src/components/sea-spinner.ts +65 -0
- package/libs/ui-lit/src/index.ts +15 -0
- package/libs/ui-lit/src/lib/utils.ts +6 -0
- package/libs/ui-lit/src/styles/tailwind.css +76 -0
- package/libs/ui-lit/src/theme.css +66 -0
- package/libs/ui-lit/src/theme.ts +79 -0
- package/libs/ui-lit/src/vite-env.d.ts +6 -0
- package/libs/ui-lit/tailwind.config.ts +50 -0
- package/libs/ui-lit/test.html +289 -0
- package/libs/ui-lit/tsconfig.json +23 -0
- package/libs/ui-lit/vite.config.ts +31 -0
- package/libs/ui-lit/vite.css.config.ts +20 -0
- package/libs/util/.prettierignore +6 -0
- package/libs/util/.prettierrc.js +12 -0
- package/libs/util/eslint.config.mjs +3 -0
- package/libs/util/package.json +45 -0
- package/libs/util/src/billing.ts +10 -0
- package/libs/util/src/data-transform.ts +19 -0
- package/libs/util/src/encryption.ts +45 -0
- package/libs/util/src/fmt.test.ts +9 -0
- package/libs/util/src/fmt.ts +71 -0
- package/libs/util/src/fuzzy.ts +47 -0
- package/libs/util/src/id.ts +24 -0
- package/libs/util/src/invariant.ts +31 -0
- package/libs/util/src/sub-name.ts +7 -0
- package/libs/util/tsconfig.json +19 -0
- package/libs/util/vite.config.ts +34 -0
- package/package.json +28 -0
- package/packages/widget/.prettierignore +6 -0
- package/packages/widget/.prettierrc.js +12 -0
- package/packages/widget/README.md +95 -0
- package/packages/widget/eslint.config.mjs +11 -0
- package/packages/widget/openapi-ts.config.ts +8 -0
- package/packages/widget/package.json +89 -0
- package/packages/widget/postcss.config.mjs +10 -0
- package/packages/widget/src/clients/api/client/client.ts +187 -0
- package/packages/widget/src/clients/api/client/index.ts +22 -0
- package/packages/widget/src/clients/api/client/types.ts +192 -0
- package/packages/widget/src/clients/api/client/utils.ts +394 -0
- package/packages/widget/src/clients/api/client.gen.ts +18 -0
- package/packages/widget/src/clients/api/core/auth.ts +39 -0
- package/packages/widget/src/clients/api/core/bodySerializer.ts +74 -0
- package/packages/widget/src/clients/api/core/params.ts +132 -0
- package/packages/widget/src/clients/api/core/pathSerializer.ts +169 -0
- package/packages/widget/src/clients/api/core/types.ts +80 -0
- package/packages/widget/src/clients/api/index.ts +3 -0
- package/packages/widget/src/clients/api/sdk.gen.ts +805 -0
- package/packages/widget/src/clients/api/types.gen.ts +2085 -0
- package/packages/widget/src/components/container.tsx +42 -0
- package/packages/widget/src/components/data-display.tsx +384 -0
- package/packages/widget/src/components/data-viewer.tsx +311 -0
- package/packages/widget/src/components/doc-list.tsx +102 -0
- package/packages/widget/src/components/field-correction-modal.tsx +265 -0
- package/packages/widget/src/components/header.tsx +71 -0
- package/packages/widget/src/components/new-submission.tsx +290 -0
- package/packages/widget/src/components/sidebar-right.tsx +19 -0
- package/packages/widget/src/components/submission-card.tsx +66 -0
- package/packages/widget/src/components/submission-page.tsx +75 -0
- package/packages/widget/src/components/upload-doc.tsx +241 -0
- package/packages/widget/src/components/widget.tsx +101 -0
- package/packages/widget/src/index.tsx +167 -0
- package/packages/widget/src/lib/config.ts +2 -0
- package/packages/widget/src/lib/util.ts +40 -0
- package/packages/widget/src/styles/index.css +5 -0
- package/packages/widget/src/styles/tw-properties.css +337 -0
- package/packages/widget/src/vite-env.d.ts +3 -0
- package/packages/widget/tsconfig.app.json +35 -0
- package/packages/widget/tsconfig.json +4 -0
- package/packages/widget/tsconfig.node.json +24 -0
- package/packages/widget/vite.config.ts +116 -0
- package/packages/widget-lit/BOTTLENECKS.md +250 -0
- package/packages/widget-lit/IMPLEMENTATION_SUMMARY.md +295 -0
- package/packages/widget-lit/README.md +232 -0
- package/packages/widget-lit/eslint.config.mjs +3 -0
- package/packages/widget-lit/package.json +52 -0
- package/packages/widget-lit/src/api-client.ts +230 -0
- package/packages/widget-lit/src/api-client.ts.backup +218 -0
- package/packages/widget-lit/src/components/sea-chat.ts +382 -0
- package/packages/widget-lit/src/components/sea-submission-viewer.ts +267 -0
- package/packages/widget-lit/src/components/sea-widget.ts +317 -0
- package/packages/widget-lit/src/index.ts +48 -0
- package/packages/widget-lit/src/react.ts +58 -0
- package/packages/widget-lit/src/style.css +47 -0
- package/packages/widget-lit/tsconfig.json +24 -0
- package/packages/widget-lit/vite.config.ts +29 -0
- package/packages/widget-ng/DEVELOPMENT.md +74 -0
- package/packages/widget-ng/README.md +657 -0
- package/packages/widget-ng/dev.sh +14 -0
- package/packages/widget-ng/eslint.config.mjs +24 -0
- package/packages/widget-ng/ng-package.json +9 -0
- package/packages/widget-ng/package.json +85 -0
- package/packages/widget-ng/src/index.ts +45 -0
- package/packages/widget-ng/src/lib/components/sea-chat.component.ts +737 -0
- package/packages/widget-ng/src/lib/components/sea-data-viewer.component.ts +2240 -0
- package/packages/widget-ng/src/lib/components/sea-deal-form-modal.component.ts +702 -0
- package/packages/widget-ng/src/lib/components/sea-document-list.component.ts +350 -0
- package/packages/widget-ng/src/lib/components/sea-feedback-modal.component.ts +461 -0
- package/packages/widget-ng/src/lib/components/sea-file-upload.component.ts +655 -0
- package/packages/widget-ng/src/lib/components/sea-model-selection-modal.component.ts +367 -0
- package/packages/widget-ng/src/lib/components/sea-new-submission-modal.component.ts +414 -0
- package/packages/widget-ng/src/lib/components/sea-pdf-viewer.component.ts +869 -0
- package/packages/widget-ng/src/lib/components/sea-submission-card.component.ts +251 -0
- package/packages/widget-ng/src/lib/components/sea-widget.component.ts +684 -0
- package/packages/widget-ng/src/lib/models/submission.model.ts +170 -0
- package/packages/widget-ng/src/lib/pipes/markdown.pipe.ts +57 -0
- package/packages/widget-ng/src/lib/services/api-client.service.ts +715 -0
- package/packages/widget-ng/src/lib/services/chat.service.ts +330 -0
- package/packages/widget-ng/src/lib/services/config.service.ts +107 -0
- package/packages/widget-ng/src/web-component.ts +56 -0
- package/packages/widget-ng/tsconfig.json +25 -0
- package/packages/widget-ng/tsconfig.lib.json +9 -0
- package/packages/widget-ng/vite.config.elements.ts +26 -0
- package/packages/widget-ng/vitest.config.ts +19 -0
- package/packages/widget-ng/vitest.setup.ts +13 -0
- package/pnpm-workspace.yaml +18 -0
- package/render.yaml +136 -0
- package/scripts/README.md +57 -0
- package/scripts/package.json +22 -0
- package/scripts/python/.python-version +1 -0
- package/scripts/python/README.md +3 -0
- package/scripts/python/export-org-data.py +693 -0
- package/scripts/python/pyproject.toml +29 -0
- package/scripts/python/requirements-dev.lock +36 -0
- package/scripts/python/requirements.lock +36 -0
- package/scripts/python/src/gen.py +297 -0
- package/scripts/python/test.py +34 -0
- package/scripts/src/fix-storage-provider-mismatch.ts +239 -0
- package/scripts/src/sync-render-yaml.ts +290 -0
- package/scripts/src/test-chat-stream.ts +300 -0
- package/scripts/src/test-reconciliation.ts +230 -0
- package/scripts/tsconfig.json +15 -0
- package/tests/angular-test-app/.vscode/extensions.json +4 -0
- package/tests/angular-test-app/.vscode/launch.json +13 -0
- package/tests/angular-test-app/.vscode/tasks.json +24 -0
- package/tests/angular-test-app/README.md +59 -0
- package/tests/angular-test-app/angular.json +111 -0
- package/tests/angular-test-app/clean-start.sh +14 -0
- package/tests/angular-test-app/package.json +36 -0
- package/tests/angular-test-app/public/favicon.ico +0 -0
- package/tests/angular-test-app/src/app/app.component.ts +220 -0
- package/tests/angular-test-app/src/app/app.config.ts +5 -0
- package/tests/angular-test-app/src/env.d.ts +13 -0
- package/tests/angular-test-app/src/index.html +13 -0
- package/tests/angular-test-app/src/main.ts +6 -0
- package/tests/angular-test-app/src/styles.css +8 -0
- package/tests/angular-test-app/tsconfig.app.json +15 -0
- package/tests/angular-test-app/tsconfig.json +27 -0
- package/tests/crm-viewer-app/API_INTEGRATION_SUMMARY.md +295 -0
- package/tests/crm-viewer-app/CURRENT_ASSETS_FIELDS.md +148 -0
- package/tests/crm-viewer-app/FIELD_ID_MAPPING.md +206 -0
- package/tests/crm-viewer-app/INTEGRATION_GUIDE.md +309 -0
- package/tests/crm-viewer-app/README.md +174 -0
- package/tests/crm-viewer-app/REAL_API_INTEGRATION.md +240 -0
- package/tests/crm-viewer-app/UPDATED_IMPLEMENTATION.md +279 -0
- package/tests/crm-viewer-app/angular.json +114 -0
- package/tests/crm-viewer-app/package.json +35 -0
- package/tests/crm-viewer-app/src/app/app.component.ts +534 -0
- package/tests/crm-viewer-app/src/app/citation.service.ts +316 -0
- package/tests/crm-viewer-app/src/env.d.ts +16 -0
- package/tests/crm-viewer-app/src/index.html +19 -0
- package/tests/crm-viewer-app/src/main.ts +7 -0
- package/tests/crm-viewer-app/src/styles.css +409 -0
- package/tests/crm-viewer-app/src/template.html +2678 -0
- package/tests/crm-viewer-app/tsconfig.app.json +15 -0
- package/tests/crm-viewer-app/tsconfig.json +27 -0
- package/tests/e2e/package.json +17 -0
- package/tests/e2e/playwright.config.ts +75 -0
- package/tests/e2e/tests/api/health.spec.ts +10 -0
- package/tests/e2e/tests/app/example.spec.ts +10 -0
- package/tests/widget-test-app/.prettierignore +6 -0
- package/tests/widget-test-app/README.md +48 -0
- package/tests/widget-test-app/index.html +12 -0
- package/tests/widget-test-app/package.json +24 -0
- package/tests/widget-test-app/src/App.css +192 -0
- package/tests/widget-test-app/src/App.tsx +80 -0
- package/tests/widget-test-app/src/main.tsx +9 -0
- package/tests/widget-test-app/src/vite-env.d.ts +4 -0
- package/tests/widget-test-app/tsconfig.json +25 -0
- package/tests/widget-test-app/tsconfig.node.json +11 -0
- package/tests/widget-test-app/vite.config.ts +14 -0
|
@@ -0,0 +1,1071 @@
|
|
|
1
|
+
import type { v1beta3 } from "@google-cloud/documentai";
|
|
2
|
+
import { type protos } from "@google-cloud/documentai";
|
|
3
|
+
import type { Storage } from "@google-cloud/storage";
|
|
4
|
+
import type { GCPConfig } from "@sea/config";
|
|
5
|
+
import {
|
|
6
|
+
type LayoutItemType,
|
|
7
|
+
type PretrainedEntity,
|
|
8
|
+
type EntityType,
|
|
9
|
+
type ReferenceMap,
|
|
10
|
+
type Polygon,
|
|
11
|
+
Cell,
|
|
12
|
+
DocumentLayout,
|
|
13
|
+
Header,
|
|
14
|
+
Footer,
|
|
15
|
+
Heading,
|
|
16
|
+
List,
|
|
17
|
+
ListItem,
|
|
18
|
+
Paragraph,
|
|
19
|
+
Row,
|
|
20
|
+
Section,
|
|
21
|
+
Table,
|
|
22
|
+
} from "@sea/schemas/core/layout";
|
|
23
|
+
import { MAX_PAGES_PER_CHUNK } from "../pdf-utils";
|
|
24
|
+
import { sequential } from "@sea/util/id";
|
|
25
|
+
|
|
26
|
+
export type GCPDocument = protos.google.cloud.documentai.v1beta3.IDocument;
|
|
27
|
+
export type GCPEntity = protos.google.cloud.documentai.v1beta3.Document.IEntity;
|
|
28
|
+
export type GCPPage = protos.google.cloud.documentai.v1beta3.Document.IPage;
|
|
29
|
+
export type GCPLayoutBlock =
|
|
30
|
+
protos.google.cloud.documentai.v1beta3.Document.DocumentLayout.IDocumentLayoutBlock;
|
|
31
|
+
export type GCPLayoutTextBlock =
|
|
32
|
+
protos.google.cloud.documentai.v1beta3.Document.DocumentLayout.DocumentLayoutBlock.ILayoutTextBlock;
|
|
33
|
+
export type GCPLayoutTableBlock =
|
|
34
|
+
protos.google.cloud.documentai.v1beta3.Document.DocumentLayout.DocumentLayoutBlock.ILayoutTableBlock;
|
|
35
|
+
export type GCPLayoutTableCell =
|
|
36
|
+
protos.google.cloud.documentai.v1beta3.Document.DocumentLayout.DocumentLayoutBlock.ILayoutTableCell;
|
|
37
|
+
export type GCPLayoutListBlock =
|
|
38
|
+
protos.google.cloud.documentai.v1beta3.Document.DocumentLayout.DocumentLayoutBlock.ILayoutListBlock;
|
|
39
|
+
export type GCPLayoutImageBlock =
|
|
40
|
+
protos.google.cloud.documentai.v1beta3.Document.DocumentLayout.DocumentLayoutBlock.ILayoutImageBlock;
|
|
41
|
+
export type GCPPageItemLayout =
|
|
42
|
+
protos.google.cloud.documentai.v1beta3.Document.Page.ILayout;
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Google Cloud Document AI adapter for document extraction
|
|
46
|
+
* Handles document processing using GCP's Document AI API
|
|
47
|
+
*/
|
|
48
|
+
export class GCPDocumentProcessor {
|
|
49
|
+
private documentProcessorClient?: v1beta3.DocumentProcessorServiceClient;
|
|
50
|
+
private storageClient?: Storage;
|
|
51
|
+
private config: GCPConfig;
|
|
52
|
+
|
|
53
|
+
constructor(config: GCPConfig) {
|
|
54
|
+
this.config = config;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Lazy-loads and returns a Google Cloud Storage client instance
|
|
59
|
+
* Uses dynamic import to avoid bundling the entire package until needed
|
|
60
|
+
* @returns Promise<Storage> - The Google Cloud Storage client
|
|
61
|
+
* @private
|
|
62
|
+
*/
|
|
63
|
+
private async getStorageClient(): Promise<Storage> {
|
|
64
|
+
if (!this.storageClient) {
|
|
65
|
+
const { Storage } = await import("@google-cloud/storage");
|
|
66
|
+
this.storageClient = new Storage({
|
|
67
|
+
projectId: this.config.projectId,
|
|
68
|
+
keyFilename: this.config.serviceAccountCredentials,
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
return this.storageClient;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Lazy-loads and returns a Google Cloud Document AI client instance
|
|
76
|
+
* Uses dynamic import to avoid bundling the entire package until needed
|
|
77
|
+
* @returns Promise<v1beta3.DocumentProcessorServiceClient> - The Document AI client
|
|
78
|
+
* @private
|
|
79
|
+
*/
|
|
80
|
+
private async getDocumentAIClient(): Promise<v1beta3.DocumentProcessorServiceClient> {
|
|
81
|
+
if (!this.documentProcessorClient) {
|
|
82
|
+
const { v1beta3 } = await import("@google-cloud/documentai");
|
|
83
|
+
this.documentProcessorClient = new v1beta3.DocumentProcessorServiceClient(
|
|
84
|
+
{
|
|
85
|
+
keyFilename: this.config.serviceAccountCredentials,
|
|
86
|
+
apiEndpoint: this.config.documentProcessor.apiEndpoint,
|
|
87
|
+
},
|
|
88
|
+
);
|
|
89
|
+
}
|
|
90
|
+
return this.documentProcessorClient;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Performs document extraction on a list of input files
|
|
95
|
+
* @param inputFiles - Array of file paths in GCS to process
|
|
96
|
+
* @param modelId - The model ID to use for extraction
|
|
97
|
+
* @param mimeType - The MIME type of the documents
|
|
98
|
+
* @returns Promise<GCPDocument[]> - Array of extracted documents (one per input)
|
|
99
|
+
*/
|
|
100
|
+
public async runExtraction(
|
|
101
|
+
inputFiles: string[],
|
|
102
|
+
modelId: string,
|
|
103
|
+
mimeType: string,
|
|
104
|
+
): Promise<GCPDocument[]> {
|
|
105
|
+
return await this.runBatchExtraction(inputFiles, modelId, mimeType);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Runs synchronous extraction on a list of GCS URIs
|
|
110
|
+
* @param inputFiles - Array of file paths to process
|
|
111
|
+
* @param modelId - The model ID to use for extraction
|
|
112
|
+
* @param mimeType - The MIME type of the documents
|
|
113
|
+
* @returns Promise<GCPDocument[]> - Array of extracted documents (one per input)
|
|
114
|
+
* @private
|
|
115
|
+
*/
|
|
116
|
+
private async runSyncExtraction(
|
|
117
|
+
inputFiles: string[],
|
|
118
|
+
modelId: string,
|
|
119
|
+
mimeType: string,
|
|
120
|
+
): Promise<GCPDocument[]> {
|
|
121
|
+
const documentProcessorClient = await this.getDocumentAIClient();
|
|
122
|
+
const processorName = `projects/${this.config.projectId}/locations/${this.config.documentProcessor.location}/processors/${modelId}`;
|
|
123
|
+
|
|
124
|
+
const processing = inputFiles.map(async (f) => {
|
|
125
|
+
const request = {
|
|
126
|
+
name: processorName,
|
|
127
|
+
gcsDocument: {
|
|
128
|
+
gcsUri: `gs://${this.config.storage.documentBucketName}/${f}`,
|
|
129
|
+
mimeType,
|
|
130
|
+
},
|
|
131
|
+
...(modelId === this.config.documentProcessor.layoutParserModelId
|
|
132
|
+
? {
|
|
133
|
+
processOptions: {
|
|
134
|
+
layoutConfig: {
|
|
135
|
+
returnBoundingBoxes: true,
|
|
136
|
+
},
|
|
137
|
+
},
|
|
138
|
+
}
|
|
139
|
+
: {}),
|
|
140
|
+
};
|
|
141
|
+
const [result] = await documentProcessorClient.processDocument(request);
|
|
142
|
+
if (!result.document) {
|
|
143
|
+
throw new Error(
|
|
144
|
+
`No document returned from synchronous processing for ${f}`,
|
|
145
|
+
);
|
|
146
|
+
}
|
|
147
|
+
console.log("RESULT:", result.document);
|
|
148
|
+
return result.document as GCPDocument;
|
|
149
|
+
});
|
|
150
|
+
const results = await Promise.all(processing);
|
|
151
|
+
|
|
152
|
+
return results;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Runs batch extraction on a list of GCS URIs (fallback for sync)
|
|
157
|
+
* @param inputFiles - Array of file paths to process
|
|
158
|
+
* @param modelId - The model ID to use for extraction
|
|
159
|
+
* @param mimeType - The MIME type of the documents
|
|
160
|
+
* @returns Promise<GCPDocument[]> - Array of extracted documents (one per input)
|
|
161
|
+
* @private
|
|
162
|
+
*/
|
|
163
|
+
private async runBatchExtraction(
|
|
164
|
+
inputFiles: string[],
|
|
165
|
+
modelId: string,
|
|
166
|
+
mimeType: string,
|
|
167
|
+
): Promise<GCPDocument[]> {
|
|
168
|
+
const outputGcsUri = `gs://${this.config.storage.outputBucketName}`;
|
|
169
|
+
const documentProcessorClient = await this.getDocumentAIClient();
|
|
170
|
+
const processorName = `projects/${this.config.projectId}/locations/${this.config.documentProcessor.location}/processors/${modelId}`;
|
|
171
|
+
|
|
172
|
+
// Build documents array for batch processing
|
|
173
|
+
const documents = inputFiles.map((f) => ({
|
|
174
|
+
gcsUri: `gs://${this.config.storage.documentBucketName}/${f}`,
|
|
175
|
+
mimeType,
|
|
176
|
+
}));
|
|
177
|
+
|
|
178
|
+
const [operation] = await documentProcessorClient.batchProcessDocuments({
|
|
179
|
+
name: processorName,
|
|
180
|
+
inputDocuments: {
|
|
181
|
+
gcsDocuments: { documents },
|
|
182
|
+
},
|
|
183
|
+
documentOutputConfig: {
|
|
184
|
+
gcsOutputConfig: {
|
|
185
|
+
gcsUri: outputGcsUri,
|
|
186
|
+
},
|
|
187
|
+
},
|
|
188
|
+
...(modelId === this.config.documentProcessor.layoutParserModelId
|
|
189
|
+
? {
|
|
190
|
+
processOptions: {
|
|
191
|
+
layoutConfig: {
|
|
192
|
+
returnBoundingBoxes: true,
|
|
193
|
+
},
|
|
194
|
+
},
|
|
195
|
+
}
|
|
196
|
+
: {}),
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
const metadata = (await operation.promise())[1];
|
|
200
|
+
const results = await Promise.all(
|
|
201
|
+
inputFiles.map(async (f) => {
|
|
202
|
+
const status = metadata.individualProcessStatuses?.find(
|
|
203
|
+
(s) =>
|
|
204
|
+
s.inputGcsSource ===
|
|
205
|
+
`gs://${this.config.storage.documentBucketName}/${f}`,
|
|
206
|
+
);
|
|
207
|
+
const outputUri = status?.outputGcsDestination;
|
|
208
|
+
if (!outputUri) {
|
|
209
|
+
throw new Error(`No batch processing output found for ${f}`);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
return await this.mergeGcpShards(outputUri);
|
|
213
|
+
}),
|
|
214
|
+
);
|
|
215
|
+
|
|
216
|
+
return results;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Merges GCP's internal shards from a single batch processing operation
|
|
221
|
+
* @private
|
|
222
|
+
*/
|
|
223
|
+
private async mergeGcpShards(outputUri: string): Promise<GCPDocument> {
|
|
224
|
+
const storageClient = await this.getStorageClient();
|
|
225
|
+
const { bucket, prefix } = this.parseGcsUri(outputUri);
|
|
226
|
+
const [files] = await storageClient.bucket(bucket).getFiles({ prefix });
|
|
227
|
+
const jsonFiles = files.filter((file) => file.name.endsWith(".json"));
|
|
228
|
+
|
|
229
|
+
if (jsonFiles.length === 0) {
|
|
230
|
+
throw new Error(`No JSON files found in ${prefix}`);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
const downloads = jsonFiles.map((file) =>
|
|
234
|
+
file.download().then((data) => JSON.parse(data[0].toString("utf8"))),
|
|
235
|
+
);
|
|
236
|
+
const allJsonContents = await Promise.all(downloads);
|
|
237
|
+
const mainDocument: GCPDocument = allJsonContents[0];
|
|
238
|
+
|
|
239
|
+
if (allJsonContents.length > 1) {
|
|
240
|
+
for (let i = 1; i < allJsonContents.length; i++) {
|
|
241
|
+
const shard = allJsonContents[i];
|
|
242
|
+
if (shard.pages && Array.isArray(shard.pages)) {
|
|
243
|
+
mainDocument.pages!.push(...shard.pages);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
return mainDocument;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Parses a GCS URI into bucket and fileName components
|
|
253
|
+
* @param gcsUri - GCS URI in format gs://bucket/path/to/file
|
|
254
|
+
* @returns Object with bucket and fileName
|
|
255
|
+
* @throws Error if URI format is invalid
|
|
256
|
+
*/
|
|
257
|
+
private parseGcsUri(gcsUri: string): { bucket: string; prefix: string } {
|
|
258
|
+
const match = gcsUri.match(/^gs:\/\/([^\/]+)\/(.*)$/);
|
|
259
|
+
if (!match) {
|
|
260
|
+
throw new Error(`Invalid GCS URI: ${gcsUri}`);
|
|
261
|
+
}
|
|
262
|
+
const [, bucket, prefix] = match;
|
|
263
|
+
if (!bucket || !prefix) {
|
|
264
|
+
throw new Error(`Invalid GCS URI: ${gcsUri}`);
|
|
265
|
+
}
|
|
266
|
+
return { bucket, prefix };
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Merges multiple GCP document chunks into a single document with corrected page offsets.
|
|
272
|
+
*
|
|
273
|
+
* Each chunk's pages are offset by their position multiplied by MAX_PAGES_PER_CHUNK
|
|
274
|
+
* to maintain correct page numbering across document chunks. All bounding boxes and
|
|
275
|
+
* page references are updated in-place.
|
|
276
|
+
*
|
|
277
|
+
* @param layouts - Array of GCP document chunks to merge
|
|
278
|
+
* @returns Single merged GCP document with corrected page references
|
|
279
|
+
*/
|
|
280
|
+
export function mergeChunkedLayoutsGCP(layouts: GCPDocument[]): GCPDocument {
|
|
281
|
+
const [mergedDocument] = layouts;
|
|
282
|
+
if (!mergedDocument) {
|
|
283
|
+
throw new Error("Cannot merge empty layouts array");
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
for (const [i, layout] of layouts.slice(1).entries()) {
|
|
287
|
+
const pageOffset = (i + 1) * MAX_PAGES_PER_CHUNK;
|
|
288
|
+
|
|
289
|
+
// merge text from all chunks
|
|
290
|
+
mergedDocument.text += layout.text ?? "";
|
|
291
|
+
|
|
292
|
+
// Merge blocks with offset applied to bounding boxes
|
|
293
|
+
if (layout.documentLayout?.blocks) {
|
|
294
|
+
layout.documentLayout.blocks.forEach((block) =>
|
|
295
|
+
offsetBlockPages(block, pageOffset),
|
|
296
|
+
);
|
|
297
|
+
mergedDocument.documentLayout?.blocks?.push(
|
|
298
|
+
...layout.documentLayout.blocks,
|
|
299
|
+
);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Merge pages with offset
|
|
303
|
+
if (layout.pages) {
|
|
304
|
+
layout.pages.forEach((page) => {
|
|
305
|
+
page.pageNumber = page.pageNumber
|
|
306
|
+
? page.pageNumber + pageOffset
|
|
307
|
+
: page.pageNumber;
|
|
308
|
+
});
|
|
309
|
+
mergedDocument.pages?.push(...layout.pages);
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
// Merge pretrained entities with offset applied to page anchors
|
|
313
|
+
if (layout.entities) {
|
|
314
|
+
layout.entities.forEach((entity) =>
|
|
315
|
+
offsetEntityPages(entity, pageOffset),
|
|
316
|
+
);
|
|
317
|
+
mergedDocument.entities?.push(...layout.entities);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
return mergedDocument;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Recursively offsets all page references in a GCP layout block in place.
|
|
326
|
+
*
|
|
327
|
+
* Modifies the block's pageSpan and recursively processes all nested blocks
|
|
328
|
+
* (text blocks, table cells, list entries) to ensure all page references are
|
|
329
|
+
* correctly offset.
|
|
330
|
+
*
|
|
331
|
+
* @param block - GCP layout block to offset (modified in place)
|
|
332
|
+
* @param pageOffset - Number of pages to add to all page references
|
|
333
|
+
*/
|
|
334
|
+
function offsetBlockPages(block: GCPLayoutBlock, pageOffset: number): void {
|
|
335
|
+
// offset pageSpan for a block
|
|
336
|
+
if (block.pageSpan) {
|
|
337
|
+
block.pageSpan.pageStart = (block.pageSpan.pageStart ?? 0) + pageOffset;
|
|
338
|
+
block.pageSpan.pageEnd = (block.pageSpan.pageEnd ?? 0) + pageOffset;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
// recursively offset nested blocks for different block types
|
|
342
|
+
block.textBlock?.blocks?.forEach((b) => offsetBlockPages(b, pageOffset));
|
|
343
|
+
|
|
344
|
+
// offset table block cells
|
|
345
|
+
block.tableBlock?.headerRows?.forEach((row) => {
|
|
346
|
+
row.cells?.forEach((cell) => {
|
|
347
|
+
cell.blocks?.forEach((b) => offsetBlockPages(b, pageOffset));
|
|
348
|
+
});
|
|
349
|
+
});
|
|
350
|
+
block.tableBlock?.bodyRows?.forEach((row) => {
|
|
351
|
+
row.cells?.forEach((cell) => {
|
|
352
|
+
cell.blocks?.forEach((b) => offsetBlockPages(b, pageOffset));
|
|
353
|
+
});
|
|
354
|
+
});
|
|
355
|
+
|
|
356
|
+
// offset list block entries
|
|
357
|
+
block.listBlock?.listEntries?.forEach((entry) => {
|
|
358
|
+
entry.blocks?.forEach((b) => offsetBlockPages(b, pageOffset));
|
|
359
|
+
});
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
/**
|
|
363
|
+
* Offsets all page references in a GCP entity in place.
|
|
364
|
+
*
|
|
365
|
+
* Modifies page anchor references and recursively processes nested entity
|
|
366
|
+
* properties to ensure all page references are correctly offset.
|
|
367
|
+
*
|
|
368
|
+
* @param entity - GCP entity to offset (modified in place)
|
|
369
|
+
* @param pageOffset - Number of pages to add to all page references
|
|
370
|
+
*/
|
|
371
|
+
function offsetEntityPages(entity: GCPEntity, pageOffset: number): void {
|
|
372
|
+
// Offset page anchor
|
|
373
|
+
entity.pageAnchor?.pageRefs?.forEach((ref) => {
|
|
374
|
+
if (ref.page) {
|
|
375
|
+
ref.page = ((parseInt(ref.page.toString()) || 0) + pageOffset).toString();
|
|
376
|
+
}
|
|
377
|
+
});
|
|
378
|
+
|
|
379
|
+
// Recursively offset nested properties
|
|
380
|
+
entity.properties?.forEach((prop) => offsetEntityPages(prop, pageOffset));
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
/**
|
|
384
|
+
* Transforms GCP-specific extraction output into a hierarchical document structure.
|
|
385
|
+
*
|
|
386
|
+
* @param doc - GCP Document AI extraction result
|
|
387
|
+
* @returns Hierarchical document layout with entities and reference map
|
|
388
|
+
*/
|
|
389
|
+
export function serializeLayoutGCP(doc: GCPDocument): DocumentLayout {
|
|
390
|
+
// // Save raw GCP output
|
|
391
|
+
// writeFileSync(
|
|
392
|
+
// ".claude/debug/gcp-raw-output.json",
|
|
393
|
+
// JSON.stringify(doc, null, 2),
|
|
394
|
+
// );
|
|
395
|
+
// console.log("GCP raw output saved to .claude/debug/gcp-raw-output.json");
|
|
396
|
+
|
|
397
|
+
const referenceMap: ReferenceMap = {};
|
|
398
|
+
const entities: Record<string, PretrainedEntity> = {};
|
|
399
|
+
const currentLayout: LayoutItemType[] = [];
|
|
400
|
+
const sectionStack: Section[] = [];
|
|
401
|
+
|
|
402
|
+
if (
|
|
403
|
+
doc.documentLayout &&
|
|
404
|
+
doc.documentLayout.blocks &&
|
|
405
|
+
doc.documentLayout.blocks.length > 0
|
|
406
|
+
) {
|
|
407
|
+
// this is used to get layout from generic layout parser
|
|
408
|
+
for (const block of doc.documentLayout.blocks) {
|
|
409
|
+
parseBlock(block, currentLayout, referenceMap, sectionStack);
|
|
410
|
+
}
|
|
411
|
+
} else {
|
|
412
|
+
// this is used to get layout and entities from pretrained model
|
|
413
|
+
const getText = (layout?: GCPPageItemLayout | null) => {
|
|
414
|
+
return (
|
|
415
|
+
layout?.textAnchor?.textSegments
|
|
416
|
+
?.map(
|
|
417
|
+
({ startIndex, endIndex }) =>
|
|
418
|
+
doc.text?.slice(startIndex as number, endIndex as number) ?? "",
|
|
419
|
+
)
|
|
420
|
+
.join("") ?? ""
|
|
421
|
+
);
|
|
422
|
+
};
|
|
423
|
+
for (const page of doc.pages ?? []) {
|
|
424
|
+
parsePage(page, currentLayout, referenceMap, getText);
|
|
425
|
+
}
|
|
426
|
+
Object.assign(entities, serializePretrainedEntities(doc, referenceMap));
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
return new DocumentLayout(currentLayout, entities, referenceMap);
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
/**
|
|
433
|
+
* Transforms GCP-specific extraction output into a standardized entity format.
|
|
434
|
+
*
|
|
435
|
+
* Processes all entities from the GCP document and converts them to the standardized
|
|
436
|
+
* PretrainedEntity format. When multiple entities share the same type, they are
|
|
437
|
+
* automatically grouped into an array.
|
|
438
|
+
*
|
|
439
|
+
* @param content - GCP Document AI extraction result
|
|
440
|
+
* @param referenceMap - Reference map to register entity bounding regions
|
|
441
|
+
* @returns Record mapping entity types to their converted PretrainedEntity objects
|
|
442
|
+
*/
|
|
443
|
+
function serializePretrainedEntities(
|
|
444
|
+
content: GCPDocument,
|
|
445
|
+
referenceMap: ReferenceMap,
|
|
446
|
+
): Record<string, PretrainedEntity> {
|
|
447
|
+
const entities: Record<string, PretrainedEntity> = {};
|
|
448
|
+
console.info(
|
|
449
|
+
`[serializePretrainedEntities] Processing ${content.entities?.length ?? 0} entities`,
|
|
450
|
+
);
|
|
451
|
+
const idGen = sequential();
|
|
452
|
+
for (const entity of content.entities ?? []) {
|
|
453
|
+
const entityType = entity.type;
|
|
454
|
+
if (!entityType) continue;
|
|
455
|
+
const convertedEntity = convertGCPEntity(entity, referenceMap, idGen);
|
|
456
|
+
|
|
457
|
+
// If an entity with this type already exists, convert to array or append to existing array
|
|
458
|
+
if (entities[entityType]) {
|
|
459
|
+
const existing = entities[entityType];
|
|
460
|
+
if (existing.type === "array") {
|
|
461
|
+
existing.valueArray = existing.valueArray || [];
|
|
462
|
+
existing.valueArray.push(convertedEntity);
|
|
463
|
+
} else {
|
|
464
|
+
// Convert to array
|
|
465
|
+
entities[entityType] = {
|
|
466
|
+
id: convertedEntity.id,
|
|
467
|
+
type: "array",
|
|
468
|
+
valueArray: [existing, convertedEntity],
|
|
469
|
+
};
|
|
470
|
+
}
|
|
471
|
+
} else {
|
|
472
|
+
entities[entityType] = convertedEntity;
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
return entities;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
/**
|
|
480
|
+
* Converts a GCP Document AI entity into the standardized PretrainedEntity format.
|
|
481
|
+
*
|
|
482
|
+
* Handles type inference, value normalization, nested properties, and bounding region
|
|
483
|
+
* extraction from GCP's entity structure. Processes normalized values (dates, money,
|
|
484
|
+
* booleans, text), nested properties/objects, page anchors, bounding polygons, and
|
|
485
|
+
* confidence scores.
|
|
486
|
+
*
|
|
487
|
+
* @param entity - GCP entity object from Document AI extraction
|
|
488
|
+
* @param referenceMap - Reference map to register entity bounding regions
|
|
489
|
+
* @param idGen - ID generator for creating unique entity IDs
|
|
490
|
+
* @returns Standardized PretrainedEntity with typed values and metadata
|
|
491
|
+
*/
|
|
492
|
+
function convertGCPEntity(
|
|
493
|
+
entity: GCPEntity,
|
|
494
|
+
referenceMap: ReferenceMap,
|
|
495
|
+
idGen: Generator<number, void, unknown>,
|
|
496
|
+
): PretrainedEntity {
|
|
497
|
+
const result: PretrainedEntity = {
|
|
498
|
+
id: `/pe/${idGen.next().value}`,
|
|
499
|
+
type: inferEntityType(entity),
|
|
500
|
+
content: entity.mentionText ?? "",
|
|
501
|
+
confidence: entity.confidence ?? 0,
|
|
502
|
+
};
|
|
503
|
+
|
|
504
|
+
result.boundingRegions =
|
|
505
|
+
entity.pageAnchor?.pageRefs?.map((p) => {
|
|
506
|
+
return {
|
|
507
|
+
pageNumber: p.page ? parseInt(p.page.toString()) + 1 : 1,
|
|
508
|
+
polygon: p.boundingPoly?.normalizedVertices
|
|
509
|
+
?.map((v) => [v.x ?? 0, v.y ?? 0])
|
|
510
|
+
.flat() as [
|
|
511
|
+
number,
|
|
512
|
+
number,
|
|
513
|
+
number,
|
|
514
|
+
number,
|
|
515
|
+
number,
|
|
516
|
+
number,
|
|
517
|
+
number,
|
|
518
|
+
number,
|
|
519
|
+
],
|
|
520
|
+
};
|
|
521
|
+
}) ?? [];
|
|
522
|
+
|
|
523
|
+
if (result.boundingRegions?.length) {
|
|
524
|
+
referenceMap[result.id] = {
|
|
525
|
+
text: result.content ?? "",
|
|
526
|
+
boundingRegions: result.boundingRegions,
|
|
527
|
+
};
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
// Convert normalized values to standardized format
|
|
531
|
+
if (entity.normalizedValue) {
|
|
532
|
+
if (entity.normalizedValue.text) {
|
|
533
|
+
result.type = "string";
|
|
534
|
+
result.valueString = entity.normalizedValue.text;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
if (entity.normalizedValue.dateValue) {
|
|
538
|
+
result.type = "date";
|
|
539
|
+
const { year, month, day } = entity.normalizedValue.dateValue;
|
|
540
|
+
if (year && month && day) {
|
|
541
|
+
result.valueDate = `${year}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`;
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
if (entity.normalizedValue.moneyValue) {
|
|
546
|
+
result.type = "number";
|
|
547
|
+
const units = entity.normalizedValue.moneyValue.units?.toString() ?? "0";
|
|
548
|
+
const nanos = entity.normalizedValue.moneyValue.nanos ?? 0;
|
|
549
|
+
result.valueNumber = parseFloat(units) + nanos / 1e9;
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
if (
|
|
553
|
+
entity.normalizedValue.booleanValue !== undefined &&
|
|
554
|
+
entity.normalizedValue.booleanValue !== null
|
|
555
|
+
) {
|
|
556
|
+
result.type = "boolean";
|
|
557
|
+
result.valueBoolean = entity.normalizedValue.booleanValue;
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
// Handle properties/nested entities as object
|
|
562
|
+
if (
|
|
563
|
+
entity.properties &&
|
|
564
|
+
Array.isArray(entity.properties) &&
|
|
565
|
+
entity.properties.length > 0
|
|
566
|
+
) {
|
|
567
|
+
result.type = "object";
|
|
568
|
+
result.valueObject = {};
|
|
569
|
+
for (const prop of entity.properties) {
|
|
570
|
+
const propType = prop.type ?? "unknown";
|
|
571
|
+
result.valueObject[propType] = convertGCPEntity(
|
|
572
|
+
prop,
|
|
573
|
+
referenceMap,
|
|
574
|
+
idGen,
|
|
575
|
+
);
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
return result;
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
/**
|
|
583
|
+
* Determines the appropriate standardized type for a GCP entity.
|
|
584
|
+
*
|
|
585
|
+
* Examines the entity's normalizedValue and properties to infer whether it represents
|
|
586
|
+
* a date, number (money), boolean, string, or complex object type.
|
|
587
|
+
*
|
|
588
|
+
* @param entity - GCP entity to analyze
|
|
589
|
+
* @returns The inferred EntityType
|
|
590
|
+
*/
|
|
591
|
+
function inferEntityType(entity: GCPEntity): EntityType {
|
|
592
|
+
if (entity.normalizedValue) {
|
|
593
|
+
if (entity.normalizedValue.dateValue) return "date";
|
|
594
|
+
if (entity.normalizedValue.moneyValue) return "number";
|
|
595
|
+
if (entity.normalizedValue.booleanValue !== undefined) return "boolean";
|
|
596
|
+
if (entity.normalizedValue.text) return "string";
|
|
597
|
+
}
|
|
598
|
+
if (entity.properties && entity.properties.length > 0) return "object";
|
|
599
|
+
return "string";
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
/**
|
|
603
|
+
* Registers a layout block in the reference map with its text content and bounding regions.
|
|
604
|
+
*
|
|
605
|
+
* The reference map allows looking up the original text and spatial location of any
|
|
606
|
+
* content element by its ID. This is essential for highlighting, extraction validation,
|
|
607
|
+
* and UI rendering of extracted data.
|
|
608
|
+
*
|
|
609
|
+
* @param referenceMap - The map to add this block to
|
|
610
|
+
* @param id - Unique identifier for this block (e.g., "/p/123")
|
|
611
|
+
* @param block - GCP layout block containing text and bounding box
|
|
612
|
+
*/
|
|
613
|
+
function addBlockToRefMap(
|
|
614
|
+
referenceMap: ReferenceMap,
|
|
615
|
+
id: string,
|
|
616
|
+
block: GCPLayoutBlock,
|
|
617
|
+
): void {
|
|
618
|
+
if ("boundingBox" in block && block.boundingBox) {
|
|
619
|
+
const vertices = block.boundingBox.normalizedVertices;
|
|
620
|
+
const polygon = (
|
|
621
|
+
vertices && vertices.length > 0
|
|
622
|
+
? [
|
|
623
|
+
vertices[0]?.x ?? 0,
|
|
624
|
+
vertices[0]?.y ?? 0,
|
|
625
|
+
...vertices
|
|
626
|
+
.slice(1)
|
|
627
|
+
.reverse()
|
|
628
|
+
.flatMap((v) => [v.x ?? 0, v.y ?? 0]),
|
|
629
|
+
]
|
|
630
|
+
: [0, 0, 0, 0, 0, 0, 0, 0]
|
|
631
|
+
) as Polygon;
|
|
632
|
+
|
|
633
|
+
referenceMap[id] = {
|
|
634
|
+
text: block.textBlock?.text ?? "",
|
|
635
|
+
boundingRegions: [
|
|
636
|
+
{
|
|
637
|
+
pageNumber: block.pageSpan?.pageStart ?? 0,
|
|
638
|
+
polygon,
|
|
639
|
+
},
|
|
640
|
+
],
|
|
641
|
+
};
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
/**
|
|
646
|
+
* Parses a GCP layout block and adds it to the document structure.
|
|
647
|
+
*
|
|
648
|
+
* Handles different block types (text, table, list) and manages nested sections.
|
|
649
|
+
* Text blocks with nested content create new sections in the hierarchy.
|
|
650
|
+
*
|
|
651
|
+
* @param block - GCP layout block to parse
|
|
652
|
+
* @param currentLayout - Top-level layout array
|
|
653
|
+
* @param sectionStack - Stack of nested sections for hierarchy management
|
|
654
|
+
* @param referenceMap - Reference map to register block locations
|
|
655
|
+
* @param pageOffset - Page offset to apply to page numbers (default: 0)
|
|
656
|
+
*/
|
|
657
|
+
function parseBlock(
|
|
658
|
+
block: GCPLayoutBlock,
|
|
659
|
+
currentLayout: LayoutItemType[],
|
|
660
|
+
referenceMap: ReferenceMap,
|
|
661
|
+
sectionStack: Section[] = [],
|
|
662
|
+
): void {
|
|
663
|
+
if (block.textBlock) {
|
|
664
|
+
const { type, text } = block.textBlock;
|
|
665
|
+
const textContent = text ?? "";
|
|
666
|
+
|
|
667
|
+
// process nested blocks as nested sections
|
|
668
|
+
if (block.textBlock.blocks?.length) {
|
|
669
|
+
const parentSection = sectionStack[sectionStack.length - 1];
|
|
670
|
+
const sectionCount = (parentSection?.content ?? currentLayout).filter(
|
|
671
|
+
(item) => item instanceof Section,
|
|
672
|
+
).length;
|
|
673
|
+
const sectionId = `${parentSection?.id ?? ""}/s${sectionCount}`;
|
|
674
|
+
const newSection = new Section(
|
|
675
|
+
sectionId,
|
|
676
|
+
[],
|
|
677
|
+
new Header(`${sectionId}/h`, []),
|
|
678
|
+
new Footer(`${sectionId}/f`, []),
|
|
679
|
+
);
|
|
680
|
+
|
|
681
|
+
// attach the container text to the child section to preserve semantics
|
|
682
|
+
placeTextBlockInSection(
|
|
683
|
+
type,
|
|
684
|
+
textContent,
|
|
685
|
+
block,
|
|
686
|
+
newSection,
|
|
687
|
+
currentLayout,
|
|
688
|
+
referenceMap,
|
|
689
|
+
);
|
|
690
|
+
|
|
691
|
+
// recurse into nested blocks within the new section's context
|
|
692
|
+
sectionStack.push(newSection);
|
|
693
|
+
for (const b of block.textBlock.blocks) {
|
|
694
|
+
parseBlock(b, currentLayout, referenceMap, sectionStack);
|
|
695
|
+
}
|
|
696
|
+
sectionStack.pop();
|
|
697
|
+
|
|
698
|
+
// append the fully built section to its parent container
|
|
699
|
+
(sectionStack[sectionStack.length - 1]?.content ?? currentLayout).push(
|
|
700
|
+
newSection,
|
|
701
|
+
);
|
|
702
|
+
return;
|
|
703
|
+
}
|
|
704
|
+
const currentSection = sectionStack[sectionStack.length - 1];
|
|
705
|
+
placeTextBlockInSection(
|
|
706
|
+
type,
|
|
707
|
+
textContent,
|
|
708
|
+
block,
|
|
709
|
+
currentSection,
|
|
710
|
+
currentLayout,
|
|
711
|
+
referenceMap,
|
|
712
|
+
);
|
|
713
|
+
} else if (block.tableBlock) {
|
|
714
|
+
const currentSection = sectionStack[sectionStack.length - 1];
|
|
715
|
+
const parentId = `${currentSection?.id ?? ""}`;
|
|
716
|
+
const tableCount = (currentSection?.content ?? currentLayout).filter(
|
|
717
|
+
(item) => item instanceof Table,
|
|
718
|
+
).length;
|
|
719
|
+
const blockId = `${parentId}/t${tableCount}`;
|
|
720
|
+
const table = parseGCPTable(block.tableBlock, blockId, referenceMap);
|
|
721
|
+
const currentSectionContent =
|
|
722
|
+
sectionStack[sectionStack.length - 1]?.content ?? currentLayout;
|
|
723
|
+
currentSectionContent.push(table);
|
|
724
|
+
} else if (block.listBlock) {
|
|
725
|
+
const currentSection = sectionStack[sectionStack.length - 1];
|
|
726
|
+
const parentId = `${currentSection?.id ?? ""}`;
|
|
727
|
+
const listCount = (currentSection?.content ?? currentLayout).filter(
|
|
728
|
+
(item) => item instanceof List,
|
|
729
|
+
).length;
|
|
730
|
+
const blockId = `${parentId}/l${listCount}`;
|
|
731
|
+
const list = parseGCPList(block.listBlock, blockId, referenceMap);
|
|
732
|
+
const currentSectionContent =
|
|
733
|
+
sectionStack[sectionStack.length - 1]?.content ?? currentLayout;
|
|
734
|
+
currentSectionContent.push(list);
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
/**
|
|
739
|
+
* Parses a GCP page and adds its content to the document structure. This is
|
|
740
|
+
* used to get layout from a pretrained model, not genertic layout parser.
|
|
741
|
+
*
|
|
742
|
+
* Processes page-level elements (paragraphs, tables, etc.) and adds them
|
|
743
|
+
* to the document layout. Page number is used to calculate bounding region offsets.
|
|
744
|
+
*
|
|
745
|
+
* @param page - GCP page object containing layout elements
|
|
746
|
+
* @param baseText - Base text of the document
|
|
747
|
+
* @param currentLayout - Top-level layout array
|
|
748
|
+
* @param referenceMap - Reference map to register element locations
|
|
749
|
+
*/
|
|
750
|
+
function parsePage(
|
|
751
|
+
page: GCPPage,
|
|
752
|
+
currentLayout: LayoutItemType[],
|
|
753
|
+
referenceMap: ReferenceMap,
|
|
754
|
+
getText: (layout?: GCPPageItemLayout | null) => string,
|
|
755
|
+
): void {
|
|
756
|
+
const pageNumber = page.pageNumber ?? 1;
|
|
757
|
+
|
|
758
|
+
// Process paragraphs - these are the main text content elements from page structure
|
|
759
|
+
for (const paragraph of page.paragraphs ?? []) {
|
|
760
|
+
const text = getText(paragraph.layout);
|
|
761
|
+
const paragraphCount = currentLayout.filter(
|
|
762
|
+
(i) => i instanceof Paragraph,
|
|
763
|
+
).length;
|
|
764
|
+
const paragraphId = `/p${paragraphCount}`;
|
|
765
|
+
|
|
766
|
+
// Add to reference map with bounding region
|
|
767
|
+
if (paragraph.layout?.boundingPoly?.normalizedVertices) {
|
|
768
|
+
const vertices = paragraph.layout.boundingPoly.normalizedVertices;
|
|
769
|
+
const polygon = (
|
|
770
|
+
vertices.length > 0
|
|
771
|
+
? vertices.flatMap((v) => [v.x ?? 0, v.y ?? 0])
|
|
772
|
+
: [0, 0, 0, 0, 0, 0, 0, 0]
|
|
773
|
+
) as Polygon;
|
|
774
|
+
|
|
775
|
+
referenceMap[paragraphId] = {
|
|
776
|
+
text: text ?? "",
|
|
777
|
+
boundingRegions: [{ pageNumber, polygon }],
|
|
778
|
+
};
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
currentLayout.push(new Paragraph(paragraphId, text ?? ""));
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
// Process tables from page structure
|
|
785
|
+
for (const table of page.tables ?? []) {
|
|
786
|
+
const tableCount = currentLayout.filter((i) => i instanceof Table).length;
|
|
787
|
+
const tableId = `/t${tableCount}`;
|
|
788
|
+
|
|
789
|
+
const rows: Row[] = [];
|
|
790
|
+
for (const headerRow of table.headerRows ?? []) {
|
|
791
|
+
const cells: Cell[] = [];
|
|
792
|
+
for (const [i, cell] of (headerRow.cells ?? []).entries()) {
|
|
793
|
+
const cellId = `${tableId}/${rows.length}/${i}`;
|
|
794
|
+
const cellText = getText(cell.layout);
|
|
795
|
+
|
|
796
|
+
// Add cell to reference map
|
|
797
|
+
if (cell.layout?.boundingPoly?.normalizedVertices) {
|
|
798
|
+
const vertices = cell.layout.boundingPoly.normalizedVertices;
|
|
799
|
+
const polygon = (
|
|
800
|
+
vertices.length > 0
|
|
801
|
+
? vertices.flatMap((v) => [v.x ?? 0, v.y ?? 0])
|
|
802
|
+
: [0, 0, 0, 0, 0, 0, 0, 0]
|
|
803
|
+
) as Polygon;
|
|
804
|
+
|
|
805
|
+
referenceMap[cellId] = {
|
|
806
|
+
text: cellText,
|
|
807
|
+
boundingRegions: [{ pageNumber, polygon }],
|
|
808
|
+
};
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
cells.push(
|
|
812
|
+
new Cell(
|
|
813
|
+
cellId,
|
|
814
|
+
cellText,
|
|
815
|
+
cell.colSpan ?? undefined,
|
|
816
|
+
cell.rowSpan ?? undefined,
|
|
817
|
+
),
|
|
818
|
+
);
|
|
819
|
+
}
|
|
820
|
+
rows.push(new Row("head", cells));
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
// Process body rows
|
|
824
|
+
for (const bodyRow of table.bodyRows ?? []) {
|
|
825
|
+
const cells: Cell[] = [];
|
|
826
|
+
for (const [i, cell] of bodyRow.cells?.entries() ?? []) {
|
|
827
|
+
const cellId = `${tableId}/${rows.length}/${i}`;
|
|
828
|
+
const cellText = getText(cell.layout);
|
|
829
|
+
|
|
830
|
+
// add cell to reference map
|
|
831
|
+
if (cell.layout?.boundingPoly?.normalizedVertices) {
|
|
832
|
+
const vertices = cell.layout.boundingPoly.normalizedVertices;
|
|
833
|
+
const polygon = (
|
|
834
|
+
vertices.length > 0
|
|
835
|
+
? vertices.flatMap((v) => [v.x ?? 0, v.y ?? 0])
|
|
836
|
+
: [0, 0, 0, 0, 0, 0, 0, 0]
|
|
837
|
+
) as Polygon;
|
|
838
|
+
|
|
839
|
+
referenceMap[cellId] = {
|
|
840
|
+
text: cellText,
|
|
841
|
+
boundingRegions: [{ pageNumber, polygon }],
|
|
842
|
+
};
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
cells.push(
|
|
846
|
+
new Cell(
|
|
847
|
+
cellId,
|
|
848
|
+
cellText,
|
|
849
|
+
cell.colSpan ?? undefined,
|
|
850
|
+
cell.rowSpan ?? undefined,
|
|
851
|
+
),
|
|
852
|
+
);
|
|
853
|
+
}
|
|
854
|
+
rows.push(new Row("body", cells));
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
currentLayout.push(new Table(tableId, rows));
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
// TODO: order the elements by their y-coordinate to ensure they are in the correct order
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
/**
|
|
864
|
+
* Places a GCP text block into the appropriate part of the document structure.
|
|
865
|
+
*
|
|
866
|
+
* Routes text blocks to the correct location based on their type (header, footer,
|
|
867
|
+
* heading, paragraph) within the target section or top-level layout. Centralizes
|
|
868
|
+
* the routing logic for both regular blocks and container blocks.
|
|
869
|
+
*
|
|
870
|
+
* @param type - GCP block type (e.g., "header", "paragraph", "heading-1")
|
|
871
|
+
* @param textContent - Text content of the block
|
|
872
|
+
* @param block - GCP layout block for reference map registration
|
|
873
|
+
* @param targetSection - Target section to add content to (undefined for top-level)
|
|
874
|
+
* @param currentLayout - Top-level layout array
|
|
875
|
+
* @param referenceMap - Reference map to register block locations
|
|
876
|
+
* @param pageOffset - Page offset to apply to page numbers (default: 0)
|
|
877
|
+
*/
|
|
878
|
+
function placeTextBlockInSection(
|
|
879
|
+
type: string | null | undefined,
|
|
880
|
+
textContent: string,
|
|
881
|
+
block: GCPLayoutBlock,
|
|
882
|
+
targetSection: Section | undefined,
|
|
883
|
+
currentLayout: LayoutItemType[],
|
|
884
|
+
referenceMap: ReferenceMap,
|
|
885
|
+
): void {
|
|
886
|
+
const parentId = `${targetSection?.id ?? ""}`;
|
|
887
|
+
switch (type) {
|
|
888
|
+
case "header":
|
|
889
|
+
case "subtitle": {
|
|
890
|
+
if (targetSection) {
|
|
891
|
+
const paragraphCount = targetSection.header.content.filter(
|
|
892
|
+
(item) => item instanceof Paragraph,
|
|
893
|
+
).length;
|
|
894
|
+
const paragraphId = `${parentId}/hp${paragraphCount}`; // header paragraph
|
|
895
|
+
addBlockToRefMap(referenceMap, paragraphId, block);
|
|
896
|
+
targetSection.header.content.unshift(
|
|
897
|
+
new Paragraph(paragraphId, textContent),
|
|
898
|
+
);
|
|
899
|
+
}
|
|
900
|
+
break;
|
|
901
|
+
}
|
|
902
|
+
case "footer": {
|
|
903
|
+
if (targetSection) {
|
|
904
|
+
const paragraphCount = targetSection.footer.content.filter(
|
|
905
|
+
(item) => item instanceof Paragraph,
|
|
906
|
+
).length;
|
|
907
|
+
const paragraphId = `${parentId}/fp${paragraphCount}`; // footer paragraph
|
|
908
|
+
addBlockToRefMap(referenceMap, paragraphId, block);
|
|
909
|
+
targetSection.footer.content.push(
|
|
910
|
+
new Paragraph(paragraphId, textContent),
|
|
911
|
+
);
|
|
912
|
+
}
|
|
913
|
+
break;
|
|
914
|
+
}
|
|
915
|
+
case "heading-1":
|
|
916
|
+
case "heading-2":
|
|
917
|
+
case "heading-3":
|
|
918
|
+
case "heading-4":
|
|
919
|
+
case "heading-5": {
|
|
920
|
+
const level = parseInt(type.match(/^heading-(\d)$/)?.[1] ?? "1") as
|
|
921
|
+
| 1
|
|
922
|
+
| 2
|
|
923
|
+
| 3
|
|
924
|
+
| 4
|
|
925
|
+
| 5;
|
|
926
|
+
const container = targetSection?.content ?? currentLayout;
|
|
927
|
+
const headingCount = container.filter((i) => i instanceof Heading).length;
|
|
928
|
+
const headingId = `${parentId}/h${headingCount}`;
|
|
929
|
+
addBlockToRefMap(referenceMap, headingId, block);
|
|
930
|
+
container.push(new Heading(headingId, level, textContent));
|
|
931
|
+
break;
|
|
932
|
+
}
|
|
933
|
+
case "paragraph": {
|
|
934
|
+
const container = targetSection?.content ?? currentLayout;
|
|
935
|
+
const paragraphCount = container.filter(
|
|
936
|
+
(i) => i instanceof Paragraph,
|
|
937
|
+
).length;
|
|
938
|
+
const paragraphId = `${parentId}/p${paragraphCount}`;
|
|
939
|
+
addBlockToRefMap(referenceMap, paragraphId, block);
|
|
940
|
+
container.push(new Paragraph(paragraphId, textContent));
|
|
941
|
+
break;
|
|
942
|
+
}
|
|
943
|
+
default: {
|
|
944
|
+
console.warn(`[placeTextBlockInSection] Unknown block type: ${type}`);
|
|
945
|
+
break;
|
|
946
|
+
}
|
|
947
|
+
}
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
/**
|
|
951
|
+
* Converts a GCP table block into the standardized Table structure.
|
|
952
|
+
*
|
|
953
|
+
* Processes header rows and body rows separately, parsing all cells and maintaining
|
|
954
|
+
* row/column structure. Registers each cell in the reference map for spatial lookups.
|
|
955
|
+
*
|
|
956
|
+
* @param tableBlock - GCP table block from Document AI
|
|
957
|
+
* @param blockId - Unique identifier for this table
|
|
958
|
+
* @param referenceMap - Map to register cell locations
|
|
959
|
+
* @returns Structured Table object with rows and cells
|
|
960
|
+
*/
|
|
961
|
+
function parseGCPTable(
|
|
962
|
+
tableBlock: GCPLayoutTableBlock,
|
|
963
|
+
blockId: string,
|
|
964
|
+
referenceMap: ReferenceMap,
|
|
965
|
+
): Table {
|
|
966
|
+
const rows: Row[] = [];
|
|
967
|
+
for (const row of tableBlock.headerRows || []) {
|
|
968
|
+
rows.push(
|
|
969
|
+
new Row(
|
|
970
|
+
"head",
|
|
971
|
+
parseGCPTableCells(
|
|
972
|
+
row.cells ?? [],
|
|
973
|
+
referenceMap,
|
|
974
|
+
`${blockId}/${rows.length}`,
|
|
975
|
+
),
|
|
976
|
+
),
|
|
977
|
+
);
|
|
978
|
+
}
|
|
979
|
+
for (const row of tableBlock.bodyRows || []) {
|
|
980
|
+
rows.push(
|
|
981
|
+
new Row(
|
|
982
|
+
"body",
|
|
983
|
+
parseGCPTableCells(
|
|
984
|
+
row.cells ?? [],
|
|
985
|
+
referenceMap,
|
|
986
|
+
`${blockId}/${rows.length}`,
|
|
987
|
+
),
|
|
988
|
+
),
|
|
989
|
+
);
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
return new Table(blockId, rows);
|
|
993
|
+
}
|
|
994
|
+
|
|
995
|
+
/**
|
|
996
|
+
* Extracts cell data from a row of GCP table cells.
|
|
997
|
+
*
|
|
998
|
+
* Each cell may contain multiple text blocks that are concatenated together.
|
|
999
|
+
* All blocks within each cell are registered in the reference map.
|
|
1000
|
+
*
|
|
1001
|
+
* @param cells - Array of GCP cell structures from a table row
|
|
1002
|
+
* @param referenceMap - Map to register cell block locations
|
|
1003
|
+
* @param parentId - Parent ID for cell identification
|
|
1004
|
+
* @returns Array of structured Cell objects with IDs and content
|
|
1005
|
+
*/
|
|
1006
|
+
function parseGCPTableCells(
|
|
1007
|
+
cells: GCPLayoutTableCell[],
|
|
1008
|
+
referenceMap: ReferenceMap,
|
|
1009
|
+
parentId: string,
|
|
1010
|
+
): Cell[] {
|
|
1011
|
+
return cells.map((c, i) => {
|
|
1012
|
+
const cell = new Cell(
|
|
1013
|
+
"",
|
|
1014
|
+
"",
|
|
1015
|
+
c.colSpan ?? undefined,
|
|
1016
|
+
c.rowSpan ?? undefined,
|
|
1017
|
+
);
|
|
1018
|
+
c.blocks?.forEach((b) => {
|
|
1019
|
+
if (b.blockId) {
|
|
1020
|
+
cell.id = `${parentId}/${i}`;
|
|
1021
|
+
addBlockToRefMap(referenceMap, cell.id, b);
|
|
1022
|
+
}
|
|
1023
|
+
if (b.textBlock?.text) {
|
|
1024
|
+
cell.content += b.textBlock.text;
|
|
1025
|
+
}
|
|
1026
|
+
});
|
|
1027
|
+
|
|
1028
|
+
return cell;
|
|
1029
|
+
});
|
|
1030
|
+
}
|
|
1031
|
+
|
|
1032
|
+
/**
|
|
1033
|
+
* Converts a GCP list block into the standardized List structure.
|
|
1034
|
+
*
|
|
1035
|
+
* Handles both ordered and unordered lists. Extracts all list entries and their
|
|
1036
|
+
* text content, registering each entry's blocks in the reference map.
|
|
1037
|
+
*
|
|
1038
|
+
* @param listBlock - GCP list block from Document AI
|
|
1039
|
+
* @param blockId - Unique identifier for this list
|
|
1040
|
+
* @param referenceMap - Map to register list item locations
|
|
1041
|
+
* @returns Structured List object with ordered flag and list items
|
|
1042
|
+
*/
|
|
1043
|
+
function parseGCPList(
|
|
1044
|
+
listBlock: GCPLayoutListBlock,
|
|
1045
|
+
blockId: string,
|
|
1046
|
+
referenceMap: ReferenceMap,
|
|
1047
|
+
): List {
|
|
1048
|
+
const ordered = listBlock.type === "ordered";
|
|
1049
|
+
const items: ListItem[] = [];
|
|
1050
|
+
|
|
1051
|
+
if (listBlock.listEntries) {
|
|
1052
|
+
for (const entry of listBlock.listEntries) {
|
|
1053
|
+
const li = new ListItem("", "");
|
|
1054
|
+
// Extract text from all blocks in the list entry
|
|
1055
|
+
for (const entryBlock of entry.blocks || []) {
|
|
1056
|
+
if (entryBlock.blockId) {
|
|
1057
|
+
const refId = `/b/${entryBlock.blockId}`;
|
|
1058
|
+
if (!li.id) li.id = refId; // set once per list item
|
|
1059
|
+
addBlockToRefMap(referenceMap, refId, entryBlock);
|
|
1060
|
+
}
|
|
1061
|
+
if (entryBlock.textBlock?.text) {
|
|
1062
|
+
li.content += entryBlock.textBlock.text + " ";
|
|
1063
|
+
}
|
|
1064
|
+
}
|
|
1065
|
+
li.content = li.content.trim();
|
|
1066
|
+
items.push(li);
|
|
1067
|
+
}
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
return new List(blockId, ordered, items);
|
|
1071
|
+
}
|