@vertesia/workflow 0.78.0-dev-28b447d → 0.78.0-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js +34 -0
- package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +67 -0
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js +20 -0
- package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/chunkDocument.js +86 -0
- package/lib/cjs/activities/chunkDocument.js.map +1 -0
- package/lib/cjs/activities/createDocumentFromOther.js +64 -0
- package/lib/cjs/activities/createDocumentFromOther.js.map +1 -0
- package/lib/cjs/activities/executeInteraction.js +189 -0
- package/lib/cjs/activities/executeInteraction.js.map +1 -0
- package/lib/cjs/activities/extractDocumentText.js +156 -0
- package/lib/cjs/activities/extractDocumentText.js.map +1 -0
- package/lib/cjs/activities/generateDocumentProperties.js +84 -0
- package/lib/cjs/activities/generateDocumentProperties.js.map +1 -0
- package/lib/cjs/activities/generateEmbeddings.js +358 -0
- package/lib/cjs/activities/generateEmbeddings.js.map +1 -0
- package/lib/cjs/activities/generateOrAssignContentType.js +126 -0
- package/lib/cjs/activities/generateOrAssignContentType.js.map +1 -0
- package/lib/cjs/activities/getObjectFromStore.js +20 -0
- package/lib/cjs/activities/getObjectFromStore.js.map +1 -0
- package/lib/cjs/activities/handleError.js +22 -0
- package/lib/cjs/activities/handleError.js.map +1 -0
- package/lib/cjs/activities/index-dsl.js +43 -0
- package/lib/cjs/activities/index-dsl.js.map +1 -0
- package/lib/cjs/activities/index.js +21 -0
- package/lib/cjs/activities/index.js.map +1 -0
- package/lib/cjs/activities/media/processPdfWithTextract.js +103 -0
- package/lib/cjs/activities/media/processPdfWithTextract.js.map +1 -0
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js +51 -0
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js.map +1 -0
- package/lib/cjs/activities/notifyWebhook.js +45 -0
- package/lib/cjs/activities/notifyWebhook.js.map +1 -0
- package/lib/cjs/activities/rateLimiter.js +30 -0
- package/lib/cjs/activities/rateLimiter.js.map +1 -0
- package/lib/cjs/activities/renditions/generateImageRendition.js +66 -0
- package/lib/cjs/activities/renditions/generateImageRendition.js.map +1 -0
- package/lib/cjs/activities/renditions/generateVideoRendition.js +200 -0
- package/lib/cjs/activities/renditions/generateVideoRendition.js.map +1 -0
- package/lib/cjs/activities/setDocumentStatus.js +15 -0
- package/lib/cjs/activities/setDocumentStatus.js.map +1 -0
- package/lib/cjs/conversion/TextractProcessor.js +417 -0
- package/lib/cjs/conversion/TextractProcessor.js.map +1 -0
- package/lib/cjs/conversion/image.js +149 -0
- package/lib/cjs/conversion/image.js.map +1 -0
- package/lib/cjs/conversion/markitdown.js +42 -0
- package/lib/cjs/conversion/markitdown.js.map +1 -0
- package/lib/cjs/conversion/mutool.js +147 -0
- package/lib/cjs/conversion/mutool.js.map +1 -0
- package/lib/cjs/conversion/pandoc.js +39 -0
- package/lib/cjs/conversion/pandoc.js.map +1 -0
- package/lib/cjs/dsl/conditions.js +81 -0
- package/lib/cjs/dsl/conditions.js.map +1 -0
- package/lib/cjs/dsl/dsl-workflow.js +338 -0
- package/lib/cjs/dsl/dsl-workflow.js.map +1 -0
- package/lib/cjs/dsl/dslProxyActivities.js +23 -0
- package/lib/cjs/dsl/dslProxyActivities.js.map +1 -0
- package/lib/cjs/dsl/projections.js +59 -0
- package/lib/cjs/dsl/projections.js.map +1 -0
- package/lib/cjs/dsl/setup/ActivityContext.js +123 -0
- package/lib/cjs/dsl/setup/ActivityContext.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/DataProvider.js +51 -0
- package/lib/cjs/dsl/setup/fetch/DataProvider.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/index.js +16 -0
- package/lib/cjs/dsl/setup/fetch/index.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/providers.js +67 -0
- package/lib/cjs/dsl/setup/fetch/providers.js.map +1 -0
- package/lib/cjs/dsl/test/test-child-workflow.js +10 -0
- package/lib/cjs/dsl/test/test-child-workflow.js.map +1 -0
- package/lib/cjs/dsl/validation.js +122 -0
- package/lib/cjs/dsl/validation.js.map +1 -0
- package/lib/cjs/dsl/vars.js +341 -0
- package/lib/cjs/dsl/vars.js.map +1 -0
- package/lib/cjs/dsl/walk.js +100 -0
- package/lib/cjs/dsl/walk.js.map +1 -0
- package/lib/cjs/dsl.js +20 -0
- package/lib/cjs/dsl.js.map +1 -0
- package/lib/cjs/errors.js +57 -0
- package/lib/cjs/errors.js.map +1 -0
- package/lib/cjs/index.js +54 -0
- package/lib/cjs/index.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/extractToc.js +47 -0
- package/lib/cjs/iterative-generation/activities/extractToc.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js +72 -0
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/generatePart.js +79 -0
- package/lib/cjs/iterative-generation/activities/generatePart.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/generateToc.js +87 -0
- package/lib/cjs/iterative-generation/activities/generateToc.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/index.js +12 -0
- package/lib/cjs/iterative-generation/activities/index.js.map +1 -0
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js +56 -0
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
- package/lib/cjs/iterative-generation/types.js +5 -0
- package/lib/cjs/iterative-generation/types.js.map +1 -0
- package/lib/cjs/iterative-generation/utils.js +121 -0
- package/lib/cjs/iterative-generation/utils.js.map +1 -0
- package/lib/cjs/package.json +3 -0
- package/lib/cjs/result-types.js +10 -0
- package/lib/cjs/result-types.js.map +1 -0
- package/lib/cjs/system/notifyWebhookWorkflow.js +46 -0
- package/lib/cjs/system/notifyWebhookWorkflow.js.map +1 -0
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js +33 -0
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js.map +1 -0
- package/lib/cjs/utils/auth.js +15 -0
- package/lib/cjs/utils/auth.js.map +1 -0
- package/lib/cjs/utils/blobs.js +64 -0
- package/lib/cjs/utils/blobs.js.map +1 -0
- package/lib/cjs/utils/chunks.js +14 -0
- package/lib/cjs/utils/chunks.js.map +1 -0
- package/lib/cjs/utils/client.js +31 -0
- package/lib/cjs/utils/client.js.map +1 -0
- package/lib/cjs/utils/expand-vars.js +33 -0
- package/lib/cjs/utils/expand-vars.js.map +1 -0
- package/lib/cjs/utils/memory.js +65 -0
- package/lib/cjs/utils/memory.js.map +1 -0
- package/lib/cjs/utils/renditions.js +88 -0
- package/lib/cjs/utils/renditions.js.map +1 -0
- package/lib/cjs/utils/storage.js +55 -0
- package/lib/cjs/utils/storage.js.map +1 -0
- package/lib/cjs/utils/tokens.js +38 -0
- package/lib/cjs/utils/tokens.js.map +1 -0
- package/lib/cjs/vars.js +20 -0
- package/lib/cjs/vars.js.map +1 -0
- package/lib/cjs/workflows.js +15 -0
- package/lib/cjs/workflows.js.map +1 -0
- package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js +31 -0
- package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +64 -0
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js +17 -0
- package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/chunkDocument.js +83 -0
- package/lib/esm/activities/chunkDocument.js.map +1 -0
- package/lib/esm/activities/createDocumentFromOther.js +58 -0
- package/lib/esm/activities/createDocumentFromOther.js.map +1 -0
- package/lib/esm/activities/executeInteraction.js +185 -0
- package/lib/esm/activities/executeInteraction.js.map +1 -0
- package/lib/esm/activities/extractDocumentText.js +153 -0
- package/lib/esm/activities/extractDocumentText.js.map +1 -0
- package/lib/esm/activities/generateDocumentProperties.js +81 -0
- package/lib/esm/activities/generateDocumentProperties.js.map +1 -0
- package/lib/esm/activities/generateEmbeddings.js +355 -0
- package/lib/esm/activities/generateEmbeddings.js.map +1 -0
- package/lib/esm/activities/generateOrAssignContentType.js +123 -0
- package/lib/esm/activities/generateOrAssignContentType.js.map +1 -0
- package/lib/esm/activities/getObjectFromStore.js +17 -0
- package/lib/esm/activities/getObjectFromStore.js.map +1 -0
- package/lib/esm/activities/handleError.js +19 -0
- package/lib/esm/activities/handleError.js.map +1 -0
- package/lib/esm/activities/index-dsl.js +21 -0
- package/lib/esm/activities/index-dsl.js.map +1 -0
- package/lib/esm/activities/index.js +5 -0
- package/lib/esm/activities/index.js.map +1 -0
- package/lib/esm/activities/media/processPdfWithTextract.js +99 -0
- package/lib/esm/activities/media/processPdfWithTextract.js.map +1 -0
- package/lib/esm/activities/media/transcribeMediaWithGladia.js +48 -0
- package/lib/esm/activities/media/transcribeMediaWithGladia.js.map +1 -0
- package/lib/esm/activities/notifyWebhook.js +42 -0
- package/lib/esm/activities/notifyWebhook.js.map +1 -0
- package/lib/esm/activities/rateLimiter.js +27 -0
- package/lib/esm/activities/rateLimiter.js.map +1 -0
- package/lib/esm/activities/renditions/generateImageRendition.js +63 -0
- package/lib/esm/activities/renditions/generateImageRendition.js.map +1 -0
- package/lib/esm/activities/renditions/generateVideoRendition.js +194 -0
- package/lib/esm/activities/renditions/generateVideoRendition.js.map +1 -0
- package/lib/esm/activities/setDocumentStatus.js +12 -0
- package/lib/esm/activities/setDocumentStatus.js.map +1 -0
- package/lib/esm/conversion/TextractProcessor.js +410 -0
- package/lib/esm/conversion/TextractProcessor.js.map +1 -0
- package/lib/esm/conversion/image.js +143 -0
- package/lib/esm/conversion/image.js.map +1 -0
- package/lib/esm/conversion/markitdown.js +36 -0
- package/lib/esm/conversion/markitdown.js.map +1 -0
- package/lib/esm/conversion/mutool.js +139 -0
- package/lib/esm/conversion/mutool.js.map +1 -0
- package/lib/esm/conversion/pandoc.js +36 -0
- package/lib/esm/conversion/pandoc.js.map +1 -0
- package/lib/esm/dsl/conditions.js +75 -0
- package/lib/esm/dsl/conditions.js.map +1 -0
- package/lib/esm/dsl/dsl-workflow.js +331 -0
- package/lib/esm/dsl/dsl-workflow.js.map +1 -0
- package/lib/esm/dsl/dslProxyActivities.js +20 -0
- package/lib/esm/dsl/dslProxyActivities.js.map +1 -0
- package/lib/esm/dsl/projections.js +55 -0
- package/lib/esm/dsl/projections.js.map +1 -0
- package/lib/esm/dsl/setup/ActivityContext.js +118 -0
- package/lib/esm/dsl/setup/ActivityContext.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/DataProvider.js +47 -0
- package/lib/esm/dsl/setup/fetch/DataProvider.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/index.js +12 -0
- package/lib/esm/dsl/setup/fetch/index.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/providers.js +61 -0
- package/lib/esm/dsl/setup/fetch/providers.js.map +1 -0
- package/lib/esm/dsl/test/test-child-workflow.js +5 -0
- package/lib/esm/dsl/test/test-child-workflow.js.map +1 -0
- package/lib/esm/dsl/validation.js +118 -0
- package/lib/esm/dsl/validation.js.map +1 -0
- package/lib/esm/dsl/vars.js +335 -0
- package/lib/esm/dsl/vars.js.map +1 -0
- package/lib/esm/dsl/walk.js +96 -0
- package/lib/esm/dsl/walk.js.map +1 -0
- package/lib/esm/dsl.js +4 -0
- package/lib/esm/dsl.js.map +1 -0
- package/lib/esm/errors.js +49 -0
- package/lib/esm/errors.js.map +1 -0
- package/lib/esm/index.js +36 -0
- package/lib/esm/index.js.map +1 -0
- package/lib/esm/iterative-generation/activities/extractToc.js +44 -0
- package/lib/esm/iterative-generation/activities/extractToc.js.map +1 -0
- package/lib/esm/iterative-generation/activities/finalizeOutput.js +69 -0
- package/lib/esm/iterative-generation/activities/finalizeOutput.js.map +1 -0
- package/lib/esm/iterative-generation/activities/generatePart.js +76 -0
- package/lib/esm/iterative-generation/activities/generatePart.js.map +1 -0
- package/lib/esm/iterative-generation/activities/generateToc.js +84 -0
- package/lib/esm/iterative-generation/activities/generateToc.js.map +1 -0
- package/lib/esm/iterative-generation/activities/index.js +5 -0
- package/lib/esm/iterative-generation/activities/index.js.map +1 -0
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js +53 -0
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
- package/lib/esm/iterative-generation/types.js +2 -0
- package/lib/esm/iterative-generation/types.js.map +1 -0
- package/lib/esm/iterative-generation/utils.js +112 -0
- package/lib/esm/iterative-generation/utils.js.map +1 -0
- package/lib/esm/result-types.js +7 -0
- package/lib/esm/result-types.js.map +1 -0
- package/lib/esm/system/notifyWebhookWorkflow.js +43 -0
- package/lib/esm/system/notifyWebhookWorkflow.js.map +1 -0
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js +30 -0
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js.map +1 -0
- package/lib/esm/utils/auth.js +8 -0
- package/lib/esm/utils/auth.js.map +1 -0
- package/lib/esm/utils/blobs.js +54 -0
- package/lib/esm/utils/blobs.js.map +1 -0
- package/lib/esm/utils/chunks.js +9 -0
- package/lib/esm/utils/chunks.js.map +1 -0
- package/lib/esm/utils/client.js +27 -0
- package/lib/esm/utils/client.js.map +1 -0
- package/lib/esm/utils/expand-vars.js +30 -0
- package/lib/esm/utils/expand-vars.js.map +1 -0
- package/lib/esm/utils/memory.js +55 -0
- package/lib/esm/utils/memory.js.map +1 -0
- package/lib/esm/utils/renditions.js +80 -0
- package/lib/esm/utils/renditions.js.map +1 -0
- package/lib/esm/utils/storage.js +46 -0
- package/lib/esm/utils/storage.js.map +1 -0
- package/lib/esm/utils/tokens.js +34 -0
- package/lib/esm/utils/tokens.js.map +1 -0
- package/lib/esm/vars.js +4 -0
- package/lib/esm/vars.js.map +1 -0
- package/lib/esm/workflows.js +8 -0
- package/lib/esm/workflows.js.map +1 -0
- package/lib/tsconfig.tsbuildinfo +1 -0
- package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts +17 -0
- package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts +39 -0
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts +19 -0
- package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/chunkDocument.d.ts +33 -0
- package/lib/types/activities/chunkDocument.d.ts.map +1 -0
- package/lib/types/activities/createDocumentFromOther.d.ts +21 -0
- package/lib/types/activities/createDocumentFromOther.d.ts.map +1 -0
- package/lib/types/activities/executeInteraction.d.ts +61 -0
- package/lib/types/activities/executeInteraction.d.ts.map +1 -0
- package/lib/types/activities/extractDocumentText.d.ts +10 -0
- package/lib/types/activities/extractDocumentText.d.ts.map +1 -0
- package/lib/types/activities/generateDocumentProperties.d.ts +32 -0
- package/lib/types/activities/generateDocumentProperties.d.ts.map +1 -0
- package/lib/types/activities/generateEmbeddings.d.ts +53 -0
- package/lib/types/activities/generateEmbeddings.d.ts.map +1 -0
- package/lib/types/activities/generateOrAssignContentType.d.ts +44 -0
- package/lib/types/activities/generateOrAssignContentType.d.ts.map +1 -0
- package/lib/types/activities/getObjectFromStore.d.ts +14 -0
- package/lib/types/activities/getObjectFromStore.d.ts.map +1 -0
- package/lib/types/activities/handleError.d.ts +6 -0
- package/lib/types/activities/handleError.d.ts.map +1 -0
- package/lib/types/activities/index-dsl.d.ts +20 -0
- package/lib/types/activities/index-dsl.d.ts.map +1 -0
- package/lib/types/activities/index.d.ts +5 -0
- package/lib/types/activities/index.d.ts.map +1 -0
- package/lib/types/activities/media/processPdfWithTextract.d.ts +26 -0
- package/lib/types/activities/media/processPdfWithTextract.d.ts.map +1 -0
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts +14 -0
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts.map +1 -0
- package/lib/types/activities/notifyWebhook.d.ts +16 -0
- package/lib/types/activities/notifyWebhook.d.ts.map +1 -0
- package/lib/types/activities/rateLimiter.d.ts +11 -0
- package/lib/types/activities/rateLimiter.d.ts.map +1 -0
- package/lib/types/activities/renditions/generateImageRendition.d.ts +14 -0
- package/lib/types/activities/renditions/generateImageRendition.d.ts.map +1 -0
- package/lib/types/activities/renditions/generateVideoRendition.d.ts +15 -0
- package/lib/types/activities/renditions/generateVideoRendition.d.ts.map +1 -0
- package/lib/types/activities/setDocumentStatus.d.ts +15 -0
- package/lib/types/activities/setDocumentStatus.d.ts.map +1 -0
- package/lib/types/conversion/TextractProcessor.d.ts +45 -0
- package/lib/types/conversion/TextractProcessor.d.ts.map +1 -0
- package/lib/types/conversion/image.d.ts +13 -0
- package/lib/types/conversion/image.d.ts.map +1 -0
- package/lib/types/conversion/markitdown.d.ts +2 -0
- package/lib/types/conversion/markitdown.d.ts.map +1 -0
- package/lib/types/conversion/mutool.d.ts +19 -0
- package/lib/types/conversion/mutool.d.ts.map +1 -0
- package/lib/types/conversion/pandoc.d.ts +2 -0
- package/lib/types/conversion/pandoc.d.ts.map +1 -0
- package/lib/types/dsl/conditions.d.ts +2 -0
- package/lib/types/dsl/conditions.d.ts.map +1 -0
- package/lib/types/dsl/dsl-workflow.d.ts +5 -0
- package/lib/types/dsl/dsl-workflow.d.ts.map +1 -0
- package/lib/types/dsl/dslProxyActivities.d.ts +10 -0
- package/lib/types/dsl/dslProxyActivities.d.ts.map +1 -0
- package/lib/types/dsl/projections.d.ts +4 -0
- package/lib/types/dsl/projections.d.ts.map +1 -0
- package/lib/types/dsl/setup/ActivityContext.d.ts +17 -0
- package/lib/types/dsl/setup/ActivityContext.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/DataProvider.d.ts +9 -0
- package/lib/types/dsl/setup/fetch/DataProvider.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/index.d.ts +6 -0
- package/lib/types/dsl/setup/fetch/index.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/providers.d.ts +25 -0
- package/lib/types/dsl/setup/fetch/providers.d.ts.map +1 -0
- package/lib/types/dsl/test/test-child-workflow.d.ts +4 -0
- package/lib/types/dsl/test/test-child-workflow.d.ts.map +1 -0
- package/lib/types/dsl/validation.d.ts +4 -0
- package/lib/types/dsl/validation.d.ts.map +1 -0
- package/lib/types/dsl/vars.d.ts +48 -0
- package/lib/types/dsl/vars.d.ts.map +1 -0
- package/lib/types/dsl/walk.d.ts +18 -0
- package/lib/types/dsl/walk.d.ts.map +1 -0
- package/lib/types/dsl.d.ts +4 -0
- package/lib/types/dsl.d.ts.map +1 -0
- package/lib/types/errors.d.ts +27 -0
- package/lib/types/errors.d.ts.map +1 -0
- package/lib/types/index.d.ts +35 -0
- package/lib/types/index.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/extractToc.d.ts +10 -0
- package/lib/types/iterative-generation/activities/extractToc.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts +3 -0
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/generatePart.d.ts +3 -0
- package/lib/types/iterative-generation/activities/generatePart.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/generateToc.d.ts +4 -0
- package/lib/types/iterative-generation/activities/generateToc.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/index.d.ts +5 -0
- package/lib/types/iterative-generation/activities/index.d.ts.map +1 -0
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts +3 -0
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts.map +1 -0
- package/lib/types/iterative-generation/types.d.ts +79 -0
- package/lib/types/iterative-generation/types.d.ts.map +1 -0
- package/lib/types/iterative-generation/utils.d.ts +27 -0
- package/lib/types/iterative-generation/utils.d.ts.map +1 -0
- package/lib/types/result-types.d.ts +22 -0
- package/lib/types/result-types.d.ts.map +1 -0
- package/lib/types/system/notifyWebhookWorkflow.d.ts +7 -0
- package/lib/types/system/notifyWebhookWorkflow.d.ts.map +1 -0
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +25 -0
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +1 -0
- package/lib/types/utils/auth.d.ts +4 -0
- package/lib/types/utils/auth.d.ts.map +1 -0
- package/lib/types/utils/blobs.d.ts +7 -0
- package/lib/types/utils/blobs.d.ts.map +1 -0
- package/lib/types/utils/chunks.d.ts +9 -0
- package/lib/types/utils/chunks.d.ts.map +1 -0
- package/lib/types/utils/client.d.ts +8 -0
- package/lib/types/utils/client.d.ts.map +1 -0
- package/lib/types/utils/expand-vars.d.ts +8 -0
- package/lib/types/utils/expand-vars.d.ts.map +1 -0
- package/lib/types/utils/memory.d.ts +8 -0
- package/lib/types/utils/memory.d.ts.map +1 -0
- package/lib/types/utils/renditions.d.ts +23 -0
- package/lib/types/utils/renditions.d.ts.map +1 -0
- package/lib/types/utils/storage.d.ts +16 -0
- package/lib/types/utils/storage.d.ts.map +1 -0
- package/lib/types/utils/tokens.d.ts +11 -0
- package/lib/types/utils/tokens.d.ts.map +1 -0
- package/lib/types/vars.d.ts +3 -0
- package/lib/types/vars.d.ts.map +1 -0
- package/lib/types/workflows.d.ts +8 -0
- package/lib/types/workflows.d.ts.map +1 -0
- package/lib/workflows-bundle.js +13166 -0
- package/package.json +127 -127
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.generateVideoRendition = generateVideoRendition;
|
|
7
|
+
const activity_1 = require("@temporalio/activity");
|
|
8
|
+
const child_process_1 = require("child_process");
|
|
9
|
+
const fs_1 = __importDefault(require("fs"));
|
|
10
|
+
const os_1 = __importDefault(require("os"));
|
|
11
|
+
const path_1 = __importDefault(require("path"));
|
|
12
|
+
const util_1 = require("util");
|
|
13
|
+
const ActivityContext_js_1 = require("../../dsl/setup/ActivityContext.js");
|
|
14
|
+
const errors_js_1 = require("../../errors.js");
|
|
15
|
+
const blobs_js_1 = require("../../utils/blobs.js");
|
|
16
|
+
const renditions_js_1 = require("../../utils/renditions.js");
|
|
17
|
+
const execAsync = (0, util_1.promisify)(child_process_1.exec);
|
|
18
|
+
async function getVideoMetadata(videoPath) {
|
|
19
|
+
try {
|
|
20
|
+
const command = `ffprobe -v quiet -print_format json -show_format -show_streams "${videoPath}"`;
|
|
21
|
+
const { stdout } = await execAsync(command);
|
|
22
|
+
const metadata = JSON.parse(stdout);
|
|
23
|
+
const videoStream = metadata.streams.find((stream) => stream.codec_type === "video");
|
|
24
|
+
const duration = parseFloat(metadata.format.duration) || 0;
|
|
25
|
+
const width = videoStream?.width || 0;
|
|
26
|
+
const height = videoStream?.height || 0;
|
|
27
|
+
return { duration, width, height };
|
|
28
|
+
}
|
|
29
|
+
catch (error) {
|
|
30
|
+
activity_1.log.error(`Failed to get video metadata: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
31
|
+
throw new Error(`Failed to probe video metadata: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
async function generateThumbnail(videoPath, outputDir, timestamp, maxSize) {
|
|
35
|
+
//pad timestamp to 5 digits as filename
|
|
36
|
+
const outputFile = path_1.default.join(outputDir, `thumb-${timestamp.toString().padStart(5, "0")}.jpg`);
|
|
37
|
+
// FFmpeg command to extract thumbnail at specific timestamp
|
|
38
|
+
// Use proper scale filter syntax: scale=w:h:force_original_aspect_ratio=decrease
|
|
39
|
+
const scaleFilter = `scale=${maxSize}:${maxSize}:force_original_aspect_ratio=decrease`;
|
|
40
|
+
const command = [
|
|
41
|
+
"ffmpeg",
|
|
42
|
+
"-y", // Overwrite output files
|
|
43
|
+
"-ss",
|
|
44
|
+
timestamp.toString(), // Seek to timestamp
|
|
45
|
+
"-i",
|
|
46
|
+
`"${videoPath}"`, // Input file
|
|
47
|
+
"-vframes",
|
|
48
|
+
"1", // Extract only 1 frame
|
|
49
|
+
"-vf",
|
|
50
|
+
`"${scaleFilter}"`, // Scale maintaining aspect ratio
|
|
51
|
+
"-q:v",
|
|
52
|
+
"2", // High quality
|
|
53
|
+
`"${outputFile}"`,
|
|
54
|
+
].join(" ");
|
|
55
|
+
activity_1.log.info(`Generating thumbnail at ${timestamp}s`), { command };
|
|
56
|
+
try {
|
|
57
|
+
const { stderr } = await execAsync(command);
|
|
58
|
+
// Log any warnings from ffmpeg
|
|
59
|
+
if (stderr && !stderr.includes("frame=")) {
|
|
60
|
+
activity_1.log.debug(`FFmpeg stderr for thumbnail at ${timestamp}s: ${stderr}`);
|
|
61
|
+
}
|
|
62
|
+
// Verify the file was created
|
|
63
|
+
if (fs_1.default.existsSync(outputFile)) {
|
|
64
|
+
activity_1.log.debug(`Generated thumbnail at ${timestamp}s`);
|
|
65
|
+
return outputFile;
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
activity_1.log.warn(`Thumbnail not generated for timestamp ${timestamp}s`);
|
|
69
|
+
return undefined;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
catch (error) {
|
|
73
|
+
activity_1.log.error(`Failed to generate thumbnail at ${timestamp}s: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
74
|
+
return undefined;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
async function generateVideoRendition(payload) {
|
|
78
|
+
const { client, objectId, params: originParams, } = await (0, ActivityContext_js_1.setupActivity)(payload);
|
|
79
|
+
// Fix: Use maxHeightWidth if max_hw is not provided
|
|
80
|
+
const params = {
|
|
81
|
+
...originParams,
|
|
82
|
+
max_hw: originParams.max_hw || originParams.maxHeightWidth || 1024, // Default to 1024 if both are missing
|
|
83
|
+
format: originParams.format || originParams.format_output || "png", // Default to png if format is missing
|
|
84
|
+
};
|
|
85
|
+
activity_1.log.info(`Generating video rendition for ${objectId}`, {
|
|
86
|
+
originParams,
|
|
87
|
+
params,
|
|
88
|
+
});
|
|
89
|
+
const inputObject = await client.objects.retrieve(objectId).catch((err) => {
|
|
90
|
+
activity_1.log.error(`Failed to retrieve document ${objectId}`, { err });
|
|
91
|
+
if (err.message.includes("not found")) {
|
|
92
|
+
throw new errors_js_1.DocumentNotFoundError(`Document ${objectId} not found`, [
|
|
93
|
+
objectId,
|
|
94
|
+
]);
|
|
95
|
+
}
|
|
96
|
+
throw err;
|
|
97
|
+
});
|
|
98
|
+
if (!params.format) {
|
|
99
|
+
activity_1.log.error(`Format not found`);
|
|
100
|
+
throw new errors_js_1.WorkflowParamNotFoundError(`format`);
|
|
101
|
+
}
|
|
102
|
+
if (!inputObject.content?.source) {
|
|
103
|
+
activity_1.log.error(`Document ${objectId} has no source`);
|
|
104
|
+
throw new errors_js_1.DocumentNotFoundError(`Document ${objectId} has no source`, [
|
|
105
|
+
objectId,
|
|
106
|
+
]);
|
|
107
|
+
}
|
|
108
|
+
if (!inputObject.content.type ||
|
|
109
|
+
!inputObject.content.type?.startsWith("video/")) {
|
|
110
|
+
activity_1.log.error(`Document ${objectId} is not a video: ${inputObject.content.type}`);
|
|
111
|
+
throw new errors_js_1.DocumentNotFoundError(`Document ${objectId} is not a video: ${inputObject.content.type}`, [objectId]);
|
|
112
|
+
}
|
|
113
|
+
//array of rendition files to upload
|
|
114
|
+
let renditionPages = [];
|
|
115
|
+
const videoFile = await (0, blobs_js_1.saveBlobToTempFile)(client, inputObject.content.source);
|
|
116
|
+
const tempOutputDir = fs_1.default.mkdtempSync(path_1.default.join(os_1.default.tmpdir(), "video-rendition-"));
|
|
117
|
+
try {
|
|
118
|
+
// Get video metadata using command line ffprobe
|
|
119
|
+
const metadata = await getVideoMetadata(videoFile);
|
|
120
|
+
const duration = metadata.duration;
|
|
121
|
+
// Calculate optimal number of thumbnails based on video length
|
|
122
|
+
const calculateThumbnailCount = (videoDuration) => {
|
|
123
|
+
if (videoDuration <= 60)
|
|
124
|
+
return 3; // Short videos: 3 thumbnails
|
|
125
|
+
if (videoDuration <= 300)
|
|
126
|
+
return 5; // 5min videos: 5 thumbnails
|
|
127
|
+
if (videoDuration <= 600)
|
|
128
|
+
return 8; // 10min videos: 8 thumbnails
|
|
129
|
+
if (videoDuration <= 1800)
|
|
130
|
+
return 12; // 30min videos: 12 thumbnails
|
|
131
|
+
if (videoDuration <= 3600)
|
|
132
|
+
return 16; // 1hr videos: 16 thumbnails
|
|
133
|
+
return 20; // Longer videos: max 20 thumbnails
|
|
134
|
+
};
|
|
135
|
+
const thumbnailCount = calculateThumbnailCount(duration);
|
|
136
|
+
// Generate evenly spaced timestamps, avoiding very beginning and end
|
|
137
|
+
const timestamps = [];
|
|
138
|
+
const startOffset = Math.min(duration * 0.05, 5); // Skip first 5% or 5 seconds
|
|
139
|
+
const endOffset = Math.min(duration * 0.05, 5); // Skip last 5% or 5 seconds
|
|
140
|
+
const usableDuration = duration - startOffset - endOffset;
|
|
141
|
+
for (let i = 0; i < thumbnailCount; i++) {
|
|
142
|
+
const progress = (i + 1) / (thumbnailCount + 1); // Evenly distribute
|
|
143
|
+
const timestamp = startOffset + usableDuration * progress;
|
|
144
|
+
timestamps.push(Math.max(timestamp, 1));
|
|
145
|
+
}
|
|
146
|
+
activity_1.log.info(`Generating ${thumbnailCount} thumbnails for ${duration}s video`, {
|
|
147
|
+
objectId,
|
|
148
|
+
duration,
|
|
149
|
+
thumbnailCount,
|
|
150
|
+
timestamps: timestamps.map((t) => Math.round(t)),
|
|
151
|
+
tempOutputDir,
|
|
152
|
+
});
|
|
153
|
+
// Generate thumbnails using command line ffmpeg
|
|
154
|
+
const generatedThumbnails = await Promise.all(timestamps.map(async (timestamp) => {
|
|
155
|
+
return await generateThumbnail(videoFile, tempOutputDir, timestamp, params.max_hw);
|
|
156
|
+
}));
|
|
157
|
+
if (generatedThumbnails.length === 0) {
|
|
158
|
+
activity_1.log.info(`No thumbnails were generated for video ${objectId}`, {
|
|
159
|
+
objectId,
|
|
160
|
+
thumbnailCount,
|
|
161
|
+
tempOutputDir,
|
|
162
|
+
});
|
|
163
|
+
throw new Error(`No thumbnails were generated for video ${objectId}`);
|
|
164
|
+
}
|
|
165
|
+
renditionPages.push(...generatedThumbnails.filter((thumbnail) => thumbnail !== undefined));
|
|
166
|
+
activity_1.log.info(`Successfully generated ${generatedThumbnails.length} thumbnails for ${objectId}`, {
|
|
167
|
+
objectId,
|
|
168
|
+
generatedCount: generatedThumbnails.length,
|
|
169
|
+
requestedCount: thumbnailCount,
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
catch (error) {
|
|
173
|
+
activity_1.log.error(`Error generating thumbnails for video: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
174
|
+
throw new Error(`Failed to generate thumbnails for video: ${objectId}`);
|
|
175
|
+
}
|
|
176
|
+
finally {
|
|
177
|
+
// Clean up temporary video file
|
|
178
|
+
try {
|
|
179
|
+
if (fs_1.default.existsSync(videoFile)) {
|
|
180
|
+
fs_1.default.unlinkSync(videoFile);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
catch (cleanupError) {
|
|
184
|
+
activity_1.log.warn(`Failed to cleanup temporary video file: ${videoFile}`);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
if (!inputObject.content?.etag) {
|
|
188
|
+
activity_1.log.warn(`Document ${objectId} has no etag, using object id as etag`);
|
|
189
|
+
}
|
|
190
|
+
const etag = inputObject.content.etag ?? inputObject.id;
|
|
191
|
+
// Update the final upload call to handle multiple thumbnails
|
|
192
|
+
const uploaded = await (0, renditions_js_1.uploadRenditionPages)(client, etag, renditionPages, params);
|
|
193
|
+
return {
|
|
194
|
+
uploads: uploaded.map((u) => u),
|
|
195
|
+
format: params.format,
|
|
196
|
+
thumbnailCount: renditionPages.length,
|
|
197
|
+
status: "success",
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
//# sourceMappingURL=generateVideoRendition.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"generateVideoRendition.js","sourceRoot":"","sources":["../../../../src/activities/renditions/generateVideoRendition.ts"],"names":[],"mappings":";;;;;AAgHA,wDAoLC;AApSD,mDAA2C;AAE3C,iDAAqC;AACrC,4CAAoB;AACpB,4CAAoB;AACpB,gDAAwB;AACxB,+BAAiC;AACjC,2EAAmE;AACnE,+CAAoF;AACpF,mDAA0D;AAC1D,6DAGmC;AAEnC,MAAM,SAAS,GAAG,IAAA,gBAAS,EAAC,oBAAI,CAAC,CAAC;AAelC,KAAK,UAAU,gBAAgB,CAAC,SAAiB;IAC7C,IAAI,CAAC;QACD,MAAM,OAAO,GAAG,mEAAmE,SAAS,GAAG,CAAC;QAChG,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAEpC,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,CACrC,CAAC,MAAW,EAAE,EAAE,CAAC,MAAM,CAAC,UAAU,KAAK,OAAO,CACjD,CAAC;QACF,MAAM,QAAQ,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QAC3D,MAAM,KAAK,GAAG,WAAW,EAAE,KAAK,IAAI,CAAC,CAAC;QACtC,MAAM,MAAM,GAAG,WAAW,EAAE,MAAM,IAAI,CAAC,CAAC;QAExC,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IACvC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACb,cAAG,CAAC,KAAK,CACL,iCAAiC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAC9F,CAAC;QACF,MAAM,IAAI,KAAK,CACX,mCAAmC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAChG,CAAC;IACN,CAAC;AACL,CAAC;AAED,KAAK,UAAU,iBAAiB,CAC5B,SAAiB,EACjB,SAAiB,EACjB,SAAiB,EACjB,OAAe;IAEf,uCAAuC;IACvC,MAAM,UAAU,GAAG,cAAI,CAAC,IAAI,CACxB,SAAS,EACT,SAAS,SAAS,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,MAAM,CACvD,CAAC;IAEF,4DAA4D;IAC5D,iFAAiF;IACjF,MAAM,WAAW,GAAG,SAAS,OAAO,IAAI,OAAO,uCAAuC,CAAC;IAEvF,MAAM,OAAO,GAAG;QACZ,QAAQ;QACR,IAAI,EAAE,yBAAyB;QAC/B,KAAK;QACL,SAAS,CAAC,QAAQ,EAAE,EAAE,oBAAoB;QAC1C,IAAI;QACJ,IAAI,SAAS,GAAG,EAAE,aAAa;QAC/B,UAAU;QACV,GAAG,EAAE,uBAAuB;QAC5B,KAAK;QACL,IAAI,WAAW,GAAG,EAAE,iCAAiC;QACrD,MAAM;QACN,GAAG,EAAE,eAAe;QACpB,IAAI,UAAU,GAAG;KACpB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACZ,cAAG,CAAC,IAAI,CAAC,2BAA2B,SAAS,GAAG,CAAC,EAAE,EAAE,OAAO,EAAE,CAAC;IAC/D,IAAI,CAAC;QACD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,CAAC;QAE5C,+BAA+B;QAC/B,IAAI,MAAM,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;YACvC,cAAG,CAAC,KAAK,CACL,kCAAkC,SAAS,MAAM,MAAM,EAAE,CAC5D,CAAC;QACN,CAAC;QAED,8BAA8B;QAC9B,IAAI,YAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAC5B,cAAG,CAAC,KAAK,CAAC,0BAA0B,SAAS,GAAG,CAAC,CAAC;YAClD,OAAO,UAAU,CAAC;QACtB,CAAC;aAAM,CAAC;YACJ,cAAG,CAAC,IAAI,CAAC,yCAAyC,SAAS,GAAG,CAAC,CAAC;YAChE,OAAO,SAAS,CAAC;QACrB,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACb,cAAG,CAAC,KAAK,CACL,mCAAmC,SAAS,MAAM,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAC/G,CAAC;QACF,OAAO,SAAS,CAAC;IACrB,CAAC;AACL,CAAC;AAEM,KAAK,UAAU,sBAAsB,CACxC,OAAkE;IAElE,MAAM,EACF,MAAM,EACN,QAAQ,EACR,MAAM,EAAE,YAAY,GACvB,GAAG,MAAM,IAAA,kCAAa,EAA+B,OAAO,CAAC,CAAC;IAE/D,oDAAoD;IACpD,MAAM,MAAM,GAAG;QACX,GAAG,YAAY;QACf,MAAM,EACF,YAAY,CAAC,MAAM,IAAK,YAAoB,CAAC,cAAc,IAAI,IAAI,EAAE,sCAAsC;QAC/G,MAAM,EACF,YAAY,CAAC,MAAM,IAAK,YAAoB,CAAC,aAAa,IAAI,KAAK,EAAE,sCAAsC;KAClH,CAAC;IAEF,cAAG,CAAC,IAAI,CAAC,kCAAkC,QAAQ,EAAE,EAAE;QACnD,YAAY;QACZ,MAAM;KACT,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;QACtE,cAAG,CAAC,KAAK,CAAC,+BAA+B,QAAQ,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QAC9D,IAAI,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;YACpC,MAAM,IAAI,iCAAqB,CAAC,YAAY,QAAQ,YAAY,EAAE;gBAC9D,QAAQ;aACX,CAAC,CAAC;QACP,CAAC;QACD,MAAM,GAAG,CAAC;IACd,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QACjB,cAAG,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;QAC9B,MAAM,IAAI,sCAA0B,CAAC,QAAQ,CAAC,CAAC;IACnD,CAAC;IAED,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,CAAC;QAC/B,cAAG,CAAC,KAAK,CAAC,YAAY,QAAQ,gBAAgB,CAAC,CAAC;QAChD,MAAM,IAAI,iCAAqB,CAAC,YAAY,QAAQ,gBAAgB,EAAE;YAClE,QAAQ;SACX,CAAC,CAAC;IACP,CAAC;IAED,IACI,CAAC,WAAW,CAAC,OAAO,CAAC,IAAI;QACzB,CAAC,WAAW,CAAC,OAAO,CAAC,IAAI,EAAE,UAAU,CAAC,QAAQ,CAAC,EACjD,CAAC;QACC,cAAG,CAAC,KAAK,CACL,YAAY,QAAQ,oBAAoB,WAAW,CAAC,OAAO,CAAC,IAAI,EAAE,CACrE,CAAC;QACF,MAAM,IAAI,iCAAqB,CAC3B,YAAY,QAAQ,oBAAoB,WAAW,CAAC,OAAO,CAAC,IAAI,EAAE,EAClE,CAAC,QAAQ,CAAC,CACb,CAAC;IACN,CAAC;IAED,oCAAoC;IACpC,IAAI,cAAc,GAAa,EAAE,CAAC;IAElC,MAAM,SAAS,GAAG,MAAM,IAAA,6BAAkB,EACtC,MAAM,EACN,WAAW,CAAC,OAAO,CAAC,MAAM,CAC7B,CAAC;IACF,MAAM,aAAa,GAAG,YAAE,CAAC,WAAW,CAChC,cAAI,CAAC,IAAI,CAAC,YAAE,CAAC,MAAM,EAAE,EAAE,kBAAkB,CAAC,CAC7C,CAAC;IAEF,IAAI,CAAC;QACD,gDAAgD;QAChD,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CAAC,SAAS,CAAC,CAAC;QACnD,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC;QAEnC,+DAA+D;QAC/D,MAAM,uBAAuB,GAAG,CAAC,aAAqB,EAAU,EAAE;YAC9D,IAAI,aAAa,IAAI,EAAE;gBAAE,OAAO,CAAC,CAAC,CAAC,6BAA6B;YAChE,IAAI,aAAa,IAAI,GAAG;gBAAE,OAAO,CAAC,CAAC,CAAC,4BAA4B;YAChE,IAAI,aAAa,IAAI,GAAG;gBAAE,OAAO,CAAC,CAAC,CAAC,6BAA6B;YACjE,IAAI,aAAa,IAAI,IAAI;gBAAE,OAAO,EAAE,CAAC,CAAC,8BAA8B;YACpE,IAAI,aAAa,IAAI,IAAI;gBAAE,OAAO,EAAE,CAAC,CAAC,4BAA4B;YAClE,OAAO,EAAE,CAAC,CAAC,mCAAmC;QAClD,CAAC,CAAC;QAEF,MAAM,cAAc,GAAG,uBAAuB,CAAC,QAAQ,CAAC,CAAC;QAEzD,qEAAqE;QACrE,MAAM,UAAU,GAAa,EAAE,CAAC;QAChC,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,6BAA6B;QAC/E,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,4BAA4B;QAC5E,MAAM,cAAc,GAAG,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;QAE1D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,cAAc,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,QAAQ,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,cAAc,GAAG,CAAC,CAAC,CAAC,CAAC,oBAAoB;YACrE,MAAM,SAAS,GAAG,WAAW,GAAG,cAAc,GAAG,QAAQ,CAAC;YAC1D,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC;QAC5C,CAAC;QAED,cAAG,CAAC,IAAI,CACJ,cAAc,cAAc,mBAAmB,QAAQ,SAAS,EAChE;YACI,QAAQ;YACR,QAAQ;YACR,cAAc;YACd,UAAU,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAChD,aAAa;SAChB,CACJ,CAAC;QAEF,gDAAgD;QAChD,MAAM,mBAAmB,GAAG,MAAM,OAAO,CAAC,GAAG,CACzC,UAAU,CAAC,GAAG,CAAC,KAAK,EAAE,SAAS,EAAE,EAAE;YAC/B,OAAO,MAAM,iBAAiB,CAC1B,SAAS,EACT,aAAa,EACb,SAAS,EACT,MAAM,CAAC,MAAM,CAChB,CAAC;QACN,CAAC,CAAC,CACL,CAAC;QAEF,IAAI,mBAAmB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACnC,cAAG,CAAC,IAAI,CAAC,0CAA0C,QAAQ,EAAE,EAAE;gBAC3D,QAAQ;gBACR,cAAc;gBACd,aAAa;aAChB,CAAC,CAAC;YACH,MAAM,IAAI,KAAK,CACX,0CAA0C,QAAQ,EAAE,CACvD,CAAC;QACN,CAAC;QAED,cAAc,CAAC,IAAI,CACf,GAAG,mBAAmB,CAAC,MAAM,CACzB,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,KAAK,SAAS,CACzC,CACJ,CAAC;QACF,cAAG,CAAC,IAAI,CACJ,0BAA0B,mBAAmB,CAAC,MAAM,mBAAmB,QAAQ,EAAE,EACjF;YACI,QAAQ;YACR,cAAc,EAAE,mBAAmB,CAAC,MAAM;YAC1C,cAAc,EAAE,cAAc;SACjC,CACJ,CAAC;IACN,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACb,cAAG,CAAC,KAAK,CACL,0CAA0C,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CACvG,CAAC;QACF,MAAM,IAAI,KAAK,CAAC,4CAA4C,QAAQ,EAAE,CAAC,CAAC;IAC5E,CAAC;YAAS,CAAC;QACP,gCAAgC;QAChC,IAAI,CAAC;YACD,IAAI,YAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;gBAC3B,YAAE,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;YAC7B,CAAC;QACL,CAAC;QAAC,OAAO,YAAY,EAAE,CAAC;YACpB,cAAG,CAAC,IAAI,CAAC,2CAA2C,SAAS,EAAE,CAAC,CAAC;QACrE,CAAC;IACL,CAAC;IAED,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC;QAC7B,cAAG,CAAC,IAAI,CAAC,YAAY,QAAQ,uCAAuC,CAAC,CAAC;IAC1E,CAAC;IACD,MAAM,IAAI,GAAG,WAAW,CAAC,OAAO,CAAC,IAAI,IAAI,WAAW,CAAC,EAAE,CAAC;IAExD,6DAA6D;IAC7D,MAAM,QAAQ,GAAG,MAAM,IAAA,oCAAoB,EACvC,MAAM,EACN,IAAI,EACJ,cAAc,EACd,MAAM,CACT,CAAC;IAEF,OAAO;QACH,OAAO,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;QAC/B,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,cAAc,EAAE,cAAc,CAAC,MAAM;QACrC,MAAM,EAAE,SAAS;KACpB,CAAC;AACN,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.setDocumentStatus = setDocumentStatus;
|
|
4
|
+
const ActivityContext_js_1 = require("../dsl/setup/ActivityContext.js");
|
|
5
|
+
/**
|
|
6
|
+
* We are using a union type for the status parameter since typescript enums breaks the workflow code generation
|
|
7
|
+
* @param objectId
|
|
8
|
+
* @param status
|
|
9
|
+
*/
|
|
10
|
+
async function setDocumentStatus(payload) {
|
|
11
|
+
const { client, params, objectId } = await (0, ActivityContext_js_1.setupActivity)(payload);
|
|
12
|
+
const res = await client.objects.update(objectId, { status: params.status });
|
|
13
|
+
return res.status;
|
|
14
|
+
}
|
|
15
|
+
//# sourceMappingURL=setDocumentStatus.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"setDocumentStatus.js","sourceRoot":"","sources":["../../../src/activities/setDocumentStatus.ts"],"names":[],"mappings":";;AAiBA,8CAOC;AAvBD,wEAAgE;AAWhE;;;;GAIG;AACI,KAAK,UAAU,iBAAiB,CAAC,OAA6D;IACjG,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,IAAA,kCAAa,EAA0B,OAAO,CAAC,CAAC;IAE3F,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IAE7E,OAAO,GAAG,CAAC,MAAM,CAAC;AAEtB,CAAC"}
|
|
@@ -0,0 +1,417 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.TextractProcessor = void 0;
|
|
7
|
+
const client_s3_1 = require("@aws-sdk/client-s3");
|
|
8
|
+
const client_textract_1 = require("@aws-sdk/client-textract");
|
|
9
|
+
const papaparse_1 = __importDefault(require("papaparse"));
|
|
10
|
+
class TextractProcessor {
|
|
11
|
+
textractClient;
|
|
12
|
+
s3Client;
|
|
13
|
+
fileKey;
|
|
14
|
+
bucket;
|
|
15
|
+
log;
|
|
16
|
+
detectImages;
|
|
17
|
+
/**
|
|
18
|
+
* Whether or not to include confidence values in CSV output for tables.
|
|
19
|
+
*/
|
|
20
|
+
includeConfidenceInTables;
|
|
21
|
+
constructor({ fileKey, region, bucket, credentials, log, detectImages = false, includeConfidenceInTables = false // NEW default = false
|
|
22
|
+
}) {
|
|
23
|
+
this.fileKey = fileKey;
|
|
24
|
+
this.bucket = bucket;
|
|
25
|
+
this.log = log;
|
|
26
|
+
this.detectImages = detectImages;
|
|
27
|
+
this.includeConfidenceInTables = includeConfidenceInTables;
|
|
28
|
+
this.textractClient = new client_textract_1.TextractClient({
|
|
29
|
+
region,
|
|
30
|
+
credentials
|
|
31
|
+
});
|
|
32
|
+
this.s3Client = new client_s3_1.S3Client({
|
|
33
|
+
region,
|
|
34
|
+
credentials
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
getText(result, blocksMap) {
|
|
38
|
+
let text = '';
|
|
39
|
+
if (result.Relationships) {
|
|
40
|
+
for (const relationship of result.Relationships) {
|
|
41
|
+
if (relationship.Type === 'CHILD') {
|
|
42
|
+
for (const childId of relationship.Ids || []) {
|
|
43
|
+
const word = blocksMap[childId];
|
|
44
|
+
if (word.BlockType === 'WORD') {
|
|
45
|
+
const wordText = word.Text || '';
|
|
46
|
+
// Example logic to quote numeric text with commas
|
|
47
|
+
if (wordText.includes(',') &&
|
|
48
|
+
wordText.replace(',', '').match(/^\d+$/)) {
|
|
49
|
+
text += `"${wordText}" `;
|
|
50
|
+
}
|
|
51
|
+
else {
|
|
52
|
+
text += `${wordText} `;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
if (word.BlockType === 'SELECTION_ELEMENT' &&
|
|
56
|
+
word.SelectionStatus === 'SELECTED') {
|
|
57
|
+
text += 'X ';
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return text.trim();
|
|
64
|
+
}
|
|
65
|
+
isBlockInTable(block, blocksMap) {
|
|
66
|
+
if (block.BlockType !== 'LINE') {
|
|
67
|
+
return false;
|
|
68
|
+
}
|
|
69
|
+
if (block.Relationships) {
|
|
70
|
+
for (const relationship of block.Relationships) {
|
|
71
|
+
if (relationship.Type === 'CHILD') {
|
|
72
|
+
for (const childId of relationship.Ids || []) {
|
|
73
|
+
const wordBlock = blocksMap[childId];
|
|
74
|
+
if (this.isWordInTableCell(wordBlock, blocksMap)) {
|
|
75
|
+
return true;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
83
|
+
isWordInTableCell(wordBlock, blocksMap) {
|
|
84
|
+
// Check if the wordBlock is a descendant of any TABLE->CELL block
|
|
85
|
+
for (const blockId in blocksMap) {
|
|
86
|
+
const potentialTable = blocksMap[blockId];
|
|
87
|
+
if (potentialTable.BlockType === 'TABLE' && potentialTable.Relationships) {
|
|
88
|
+
for (const relationship of potentialTable.Relationships) {
|
|
89
|
+
if (relationship.Type === 'CHILD') {
|
|
90
|
+
for (const cellId of relationship.Ids || []) {
|
|
91
|
+
const cell = blocksMap[cellId];
|
|
92
|
+
if (cell.BlockType === 'CELL' && cell.Relationships) {
|
|
93
|
+
for (const cellRel of cell.Relationships) {
|
|
94
|
+
if (cellRel.Type === 'CHILD' &&
|
|
95
|
+
cellRel.Ids?.includes(wordBlock.Id)) {
|
|
96
|
+
return true;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
return false;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* NEW: Helper type to store row and column text along with confidence.
|
|
109
|
+
*/
|
|
110
|
+
getRowsColumnsMap(tableResult, blocksMap) {
|
|
111
|
+
const rows = [];
|
|
112
|
+
tableResult.Relationships?.forEach(relationship => {
|
|
113
|
+
if (relationship.Type === 'CHILD') {
|
|
114
|
+
relationship.Ids?.forEach(childId => {
|
|
115
|
+
const cell = blocksMap[childId];
|
|
116
|
+
if (cell.BlockType === 'CELL') {
|
|
117
|
+
const rowIndex = cell.RowIndex || 1;
|
|
118
|
+
const colIndex = cell.ColumnIndex || 1;
|
|
119
|
+
// Expand the array if needed
|
|
120
|
+
if (!rows[rowIndex - 1]) {
|
|
121
|
+
rows[rowIndex - 1] = [];
|
|
122
|
+
}
|
|
123
|
+
// Prepare cell text and confidence
|
|
124
|
+
const text = this.getText(cell, blocksMap);
|
|
125
|
+
const confidence = cell.Confidence || 0;
|
|
126
|
+
// If there's a gap, fill it with placeholders
|
|
127
|
+
// so that we can safely place text at colIndex - 1
|
|
128
|
+
for (let i = rows[rowIndex - 1].length; i < colIndex - 1; i++) {
|
|
129
|
+
rows[rowIndex - 1].push({ text: '', confidence: 0 });
|
|
130
|
+
}
|
|
131
|
+
rows[rowIndex - 1][colIndex - 1] = { text, confidence };
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
});
|
|
136
|
+
return { rows };
|
|
137
|
+
}
|
|
138
|
+
generateTableCSV(tableResult, blocksMap, _tableIndex, _pageNumber) {
|
|
139
|
+
const { rows } = this.getRowsColumnsMap(tableResult, blocksMap);
|
|
140
|
+
let totalConfidence = 0;
|
|
141
|
+
let cellCount = 0;
|
|
142
|
+
// Prepare CSV data
|
|
143
|
+
const csvData = [];
|
|
144
|
+
for (const row of rows) {
|
|
145
|
+
const rowData = [];
|
|
146
|
+
for (const cell of row) {
|
|
147
|
+
// Add to CSV
|
|
148
|
+
rowData.push(cell.text.trim());
|
|
149
|
+
// Accumulate confidence
|
|
150
|
+
totalConfidence += cell.confidence;
|
|
151
|
+
cellCount++;
|
|
152
|
+
}
|
|
153
|
+
csvData.push(rowData);
|
|
154
|
+
}
|
|
155
|
+
// Compute average confidence (or any other method you prefer)
|
|
156
|
+
const tableConfidence = cellCount > 0 ? (totalConfidence / cellCount) : 0;
|
|
157
|
+
// Convert to CSV
|
|
158
|
+
const csv = papaparse_1.default.unparse(csvData, {
|
|
159
|
+
delimiter: ',',
|
|
160
|
+
quotes: true,
|
|
161
|
+
quoteChar: '"',
|
|
162
|
+
escapeChar: '"',
|
|
163
|
+
header: false,
|
|
164
|
+
newline: '\n',
|
|
165
|
+
skipEmptyLines: false
|
|
166
|
+
});
|
|
167
|
+
return { csv, tableConfidence };
|
|
168
|
+
}
|
|
169
|
+
async upload(fileBuf) {
|
|
170
|
+
this.log.info('Uploading file to S3', { fileKey: this.fileKey });
|
|
171
|
+
const command = new client_s3_1.PutObjectCommand({
|
|
172
|
+
Bucket: this.bucket,
|
|
173
|
+
Key: this.fileKey,
|
|
174
|
+
Body: fileBuf,
|
|
175
|
+
});
|
|
176
|
+
await this.s3Client.send(command);
|
|
177
|
+
}
|
|
178
|
+
async startAnalysis(s3Key) {
|
|
179
|
+
const command = new client_textract_1.StartDocumentAnalysisCommand({
|
|
180
|
+
DocumentLocation: {
|
|
181
|
+
S3Object: {
|
|
182
|
+
Bucket: this.bucket,
|
|
183
|
+
Name: s3Key
|
|
184
|
+
}
|
|
185
|
+
},
|
|
186
|
+
FeatureTypes: ["TABLES"]
|
|
187
|
+
});
|
|
188
|
+
const response = await this.textractClient.send(command);
|
|
189
|
+
return response.JobId;
|
|
190
|
+
}
|
|
191
|
+
async checkJobStatus(jobId) {
|
|
192
|
+
const command = new client_textract_1.GetDocumentAnalysisCommand({ JobId: jobId });
|
|
193
|
+
const response = await this.textractClient.send(command);
|
|
194
|
+
return response.JobStatus;
|
|
195
|
+
}
|
|
196
|
+
getImagePlaceholder(block) {
|
|
197
|
+
const geometry = block.Geometry?.BoundingBox;
|
|
198
|
+
if (!geometry)
|
|
199
|
+
return '';
|
|
200
|
+
const area = (geometry.Width || 0) * (geometry.Height || 0);
|
|
201
|
+
if (area < 0.05)
|
|
202
|
+
return ''; // skip small images
|
|
203
|
+
const top = geometry.Top || 0;
|
|
204
|
+
const left = geometry.Left || 0;
|
|
205
|
+
let position = '';
|
|
206
|
+
if (top < 0.3)
|
|
207
|
+
position += 'TOP_';
|
|
208
|
+
else if (top > 0.7)
|
|
209
|
+
position += 'BOTTOM_';
|
|
210
|
+
if (left < 0.3)
|
|
211
|
+
position += 'LEFT';
|
|
212
|
+
else if (left > 0.7)
|
|
213
|
+
position += 'RIGHT';
|
|
214
|
+
else
|
|
215
|
+
position += 'CENTER';
|
|
216
|
+
return `[IMAGE_${position}]\n`;
|
|
217
|
+
}
|
|
218
|
+
getIndentationLevel(block) {
|
|
219
|
+
const left = block.Geometry?.BoundingBox?.Left || 0;
|
|
220
|
+
if (left < 0.15)
|
|
221
|
+
return 0;
|
|
222
|
+
if (left < 0.25)
|
|
223
|
+
return 1;
|
|
224
|
+
return 2;
|
|
225
|
+
}
|
|
226
|
+
isLikelyHeader(block, prevBlock) {
|
|
227
|
+
if (!prevBlock)
|
|
228
|
+
return true;
|
|
229
|
+
const gap = (block.Geometry?.BoundingBox?.Top || 0) -
|
|
230
|
+
((prevBlock.Geometry?.BoundingBox?.Top || 0) +
|
|
231
|
+
(prevBlock.Geometry?.BoundingBox?.Height || 0));
|
|
232
|
+
return gap > 0.03;
|
|
233
|
+
}
|
|
234
|
+
formatTextBlock(block, prevBlock) {
|
|
235
|
+
const text = block.Text || '';
|
|
236
|
+
const indentLevel = this.getIndentationLevel(block);
|
|
237
|
+
const indent = ' '.repeat(indentLevel);
|
|
238
|
+
if (this.isLikelyHeader(block, prevBlock)) {
|
|
239
|
+
return `\n${indent}${text}\n`;
|
|
240
|
+
}
|
|
241
|
+
return `${indent}${text}\n`;
|
|
242
|
+
}
|
|
243
|
+
shouldMergeLines(prev, current) {
|
|
244
|
+
const prevBottom = (prev.Geometry?.BoundingBox?.Top || 0)
|
|
245
|
+
+ (prev.Geometry?.BoundingBox?.Height || 0);
|
|
246
|
+
const currentTop = current.Geometry?.BoundingBox?.Top || 0;
|
|
247
|
+
const gap = currentTop - prevBottom;
|
|
248
|
+
// For example, if gap < 0.02, treat them as contiguous
|
|
249
|
+
if (gap < 0.02) {
|
|
250
|
+
return true;
|
|
251
|
+
}
|
|
252
|
+
return false;
|
|
253
|
+
}
|
|
254
|
+
async processResults(jobId) {
|
|
255
|
+
let nextToken;
|
|
256
|
+
let allBlocks = [];
|
|
257
|
+
do {
|
|
258
|
+
const command = new client_textract_1.GetDocumentAnalysisCommand({
|
|
259
|
+
JobId: jobId,
|
|
260
|
+
NextToken: nextToken
|
|
261
|
+
});
|
|
262
|
+
const response = await this.textractClient.send(command);
|
|
263
|
+
allBlocks = allBlocks.concat(response.Blocks || []);
|
|
264
|
+
nextToken = response.NextToken;
|
|
265
|
+
} while (nextToken);
|
|
266
|
+
// Create blocks map
|
|
267
|
+
const blocksMap = {};
|
|
268
|
+
for (const block of allBlocks) {
|
|
269
|
+
blocksMap[block.Id] = block;
|
|
270
|
+
}
|
|
271
|
+
// We'll store each page's content in sequence
|
|
272
|
+
const pageContents = [];
|
|
273
|
+
let currentPage = null;
|
|
274
|
+
// We'll keep track of a "current text block" that we're building
|
|
275
|
+
let currentTextContent = "";
|
|
276
|
+
let prevLineBlock = null;
|
|
277
|
+
// Sort by page and vertical position
|
|
278
|
+
allBlocks.sort((a, b) => {
|
|
279
|
+
if (a.Page !== b.Page)
|
|
280
|
+
return (a.Page || 0) - (b.Page || 0);
|
|
281
|
+
return (a.Geometry?.BoundingBox?.Top || 0) - (b.Geometry?.BoundingBox?.Top || 0);
|
|
282
|
+
});
|
|
283
|
+
for (const block of allBlocks) {
|
|
284
|
+
if (block.BlockType === 'PAGE') {
|
|
285
|
+
// If we were building a text block, push it before starting a new page
|
|
286
|
+
if (currentTextContent.trim().length > 0 && currentPage) {
|
|
287
|
+
currentPage.blocks.push({
|
|
288
|
+
type: 'text',
|
|
289
|
+
content: currentTextContent
|
|
290
|
+
});
|
|
291
|
+
}
|
|
292
|
+
if (currentPage) {
|
|
293
|
+
pageContents.push(currentPage);
|
|
294
|
+
}
|
|
295
|
+
currentPage = {
|
|
296
|
+
pageNumber: block.Page || 0,
|
|
297
|
+
blocks: []
|
|
298
|
+
};
|
|
299
|
+
currentTextContent = "";
|
|
300
|
+
prevLineBlock = null;
|
|
301
|
+
}
|
|
302
|
+
else if (currentPage && block.Page === currentPage.pageNumber) {
|
|
303
|
+
// TABLE handling
|
|
304
|
+
if (block.BlockType === 'TABLE') {
|
|
305
|
+
// If there's a pending text block, push it first
|
|
306
|
+
if (currentTextContent.trim().length > 0) {
|
|
307
|
+
currentPage.blocks.push({
|
|
308
|
+
type: 'text',
|
|
309
|
+
content: currentTextContent
|
|
310
|
+
});
|
|
311
|
+
currentTextContent = "";
|
|
312
|
+
}
|
|
313
|
+
const { csv, tableConfidence } = this.generateTableCSV(block, blocksMap, currentPage.blocks.filter(b => b.type === 'table').length + 1, currentPage.pageNumber);
|
|
314
|
+
currentPage.blocks.push({
|
|
315
|
+
type: 'table',
|
|
316
|
+
content: csv,
|
|
317
|
+
confidence: tableConfidence
|
|
318
|
+
});
|
|
319
|
+
prevLineBlock = null;
|
|
320
|
+
}
|
|
321
|
+
// LINE handling (merge or start new)
|
|
322
|
+
else if (block.BlockType === 'LINE' && !this.isBlockInTable(block, blocksMap)) {
|
|
323
|
+
if (prevLineBlock && this.shouldMergeLines(prevLineBlock, block)) {
|
|
324
|
+
// If we consider this line to be part of the same paragraph,
|
|
325
|
+
// just append the text. We'll call formatTextBlock to get
|
|
326
|
+
// indentation/header logic, but we won't add a leading newline.
|
|
327
|
+
const formatted = this.formatTextBlock(block, prevLineBlock);
|
|
328
|
+
// formatTextBlock might include a leading newline if isLikelyHeader = true
|
|
329
|
+
// so you can strip it out if you want them truly "merged" into one paragraph:
|
|
330
|
+
const mergedText = formatted.replace(/^\s*\n/, " ");
|
|
331
|
+
currentTextContent += " " + mergedText.trim();
|
|
332
|
+
}
|
|
333
|
+
else {
|
|
334
|
+
// If there's an existing text block, push it
|
|
335
|
+
if (currentTextContent.trim().length > 0) {
|
|
336
|
+
currentPage.blocks.push({
|
|
337
|
+
type: 'text',
|
|
338
|
+
content: currentTextContent
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
// Start a new text block
|
|
342
|
+
currentTextContent = this.formatTextBlock(block, prevLineBlock).trim();
|
|
343
|
+
}
|
|
344
|
+
prevLineBlock = block;
|
|
345
|
+
}
|
|
346
|
+
// IMAGES (if detectImages)
|
|
347
|
+
else if (this.detectImages) {
|
|
348
|
+
const geometry = block.Geometry?.BoundingBox;
|
|
349
|
+
if (geometry && geometry.Width && geometry.Height) {
|
|
350
|
+
const imagePlaceholder = this.getImagePlaceholder(block);
|
|
351
|
+
if (imagePlaceholder) {
|
|
352
|
+
// If there's a pending text block, push it first
|
|
353
|
+
if (currentTextContent.trim().length > 0) {
|
|
354
|
+
currentPage.blocks.push({
|
|
355
|
+
type: 'text',
|
|
356
|
+
content: currentTextContent
|
|
357
|
+
});
|
|
358
|
+
currentTextContent = "";
|
|
359
|
+
}
|
|
360
|
+
currentPage.blocks.push({
|
|
361
|
+
type: 'image',
|
|
362
|
+
content: imagePlaceholder,
|
|
363
|
+
left: geometry.Left,
|
|
364
|
+
top: geometry.Top,
|
|
365
|
+
width: geometry.Width,
|
|
366
|
+
height: geometry.Height
|
|
367
|
+
});
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
// No line update to prevLineBlock here
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
// Handle last page
|
|
375
|
+
if (currentPage) {
|
|
376
|
+
if (currentTextContent.trim().length > 0) {
|
|
377
|
+
currentPage.blocks.push({
|
|
378
|
+
type: 'text',
|
|
379
|
+
content: currentTextContent
|
|
380
|
+
});
|
|
381
|
+
}
|
|
382
|
+
pageContents.push(currentPage);
|
|
383
|
+
}
|
|
384
|
+
// Build final output
|
|
385
|
+
let fullText = '';
|
|
386
|
+
let imgNumber = 1;
|
|
387
|
+
let tableNumber = 1;
|
|
388
|
+
for (const page of pageContents) {
|
|
389
|
+
fullText += `<page number="${page.pageNumber}">\n`;
|
|
390
|
+
for (const block of page.blocks) {
|
|
391
|
+
if (block.type === 'text') {
|
|
392
|
+
fullText += `<text>\n${block.content}\n</text>\n\n`;
|
|
393
|
+
}
|
|
394
|
+
else if (block.type === 'table') {
|
|
395
|
+
const confidenceAttr = block.confidence !== undefined && this.includeConfidenceInTables
|
|
396
|
+
? ` confidence="${block.confidence.toFixed(2)}"`
|
|
397
|
+
: '';
|
|
398
|
+
fullText += `<table number=${tableNumber++} type="csv" ${confidenceAttr}>\n`;
|
|
399
|
+
fullText += `${block.content}\n`;
|
|
400
|
+
fullText += `</table>\n\n`;
|
|
401
|
+
}
|
|
402
|
+
else if (block.type === 'image') {
|
|
403
|
+
// Include geometry if you like
|
|
404
|
+
const leftAttr = block.left ? ` left="${block.left.toFixed(4)}"` : '';
|
|
405
|
+
const topAttr = block.top ? ` top="${block.top.toFixed(4)}"` : '';
|
|
406
|
+
const widthAttr = block.width ? ` width="${block.width.toFixed(4)}"` : '';
|
|
407
|
+
const heightAttr = block.height ? ` height="${block.height.toFixed(4)}"` : '';
|
|
408
|
+
fullText += `<image id="${imgNumber++}" ${leftAttr}${topAttr}${widthAttr}${heightAttr}>\n${block.content.trim()}\n</image>\n\n`;
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
fullText += `</page>\n\n`;
|
|
412
|
+
}
|
|
413
|
+
return fullText;
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
exports.TextractProcessor = TextractProcessor;
|
|
417
|
+
//# sourceMappingURL=TextractProcessor.js.map
|