@vertesia/workflow 1.1.1-dev.20260505.163000Z → 1.3.0-dev.20260620.061059Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -2
- package/lib/{types/activities → activities}/advanced/createDocumentTypeFromInteractionRun.d.ts +2 -2
- package/lib/activities/advanced/createDocumentTypeFromInteractionRun.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/advanced/createDocumentTypeFromInteractionRun.js +9 -9
- package/lib/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
- package/lib/{types/activities → activities}/advanced/createOrUpdateDocumentFromInteractionRun.d.ts +2 -2
- package/lib/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/advanced/createOrUpdateDocumentFromInteractionRun.js +29 -22
- package/lib/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
- package/lib/{types/activities → activities}/advanced/updateDocumentFromInteractionRun.d.ts +1 -1
- package/lib/activities/advanced/updateDocumentFromInteractionRun.d.ts.map +1 -0
- package/lib/activities/advanced/updateDocumentFromInteractionRun.js +16 -0
- package/lib/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
- package/lib/{types/activities → activities}/chunkDocument.d.ts +3 -3
- package/lib/activities/chunkDocument.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/chunkDocument.js +38 -33
- package/lib/activities/chunkDocument.js.map +1 -0
- package/lib/{types/activities → activities}/createDocumentFromOther.d.ts +1 -1
- package/lib/activities/createDocumentFromOther.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/createDocumentFromOther.js +15 -13
- package/lib/activities/createDocumentFromOther.js.map +1 -0
- package/lib/{types/activities → activities}/executeInteraction.d.ts +19 -10
- package/lib/activities/executeInteraction.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/executeInteraction.js +100 -79
- package/lib/activities/executeInteraction.js.map +1 -0
- package/lib/{types/activities → activities}/executeRemoteActivity.d.ts +4 -4
- package/lib/activities/executeRemoteActivity.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/executeRemoteActivity.js +48 -23
- package/lib/activities/executeRemoteActivity.js.map +1 -0
- package/lib/{types/activities → activities}/extractDocumentText.d.ts +3 -3
- package/lib/activities/extractDocumentText.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/extractDocumentText.js +65 -52
- package/lib/activities/extractDocumentText.js.map +1 -0
- package/lib/{types/activities → activities}/generateDocumentProperties.d.ts +4 -4
- package/lib/activities/generateDocumentProperties.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/generateDocumentProperties.js +37 -33
- package/lib/activities/generateDocumentProperties.js.map +1 -0
- package/lib/{types/activities → activities}/generateEmbeddings.d.ts +3 -3
- package/lib/activities/generateEmbeddings.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/generateEmbeddings.js +80 -80
- package/lib/activities/generateEmbeddings.js.map +1 -0
- package/lib/{types/activities → activities}/generateOrAssignContentType.d.ts +4 -4
- package/lib/activities/generateOrAssignContentType.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/generateOrAssignContentType.js +51 -49
- package/lib/activities/generateOrAssignContentType.js.map +1 -0
- package/lib/{types/activities → activities}/getObjectFromStore.d.ts +1 -1
- package/lib/activities/getObjectFromStore.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/getObjectFromStore.js +4 -3
- package/lib/activities/getObjectFromStore.js.map +1 -0
- package/lib/{types/activities → activities}/handleError.d.ts +1 -1
- package/lib/activities/handleError.d.ts.map +1 -0
- package/lib/activities/handleError.js +37 -0
- package/lib/activities/handleError.js.map +1 -0
- package/lib/activities/index-dsl.d.ts +31 -0
- package/lib/activities/index-dsl.d.ts.map +1 -0
- package/lib/activities/index-dsl.js +29 -0
- package/lib/activities/index-dsl.js.map +1 -0
- package/lib/{types/activities → activities}/index.d.ts +1 -1
- package/lib/activities/index.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/index.js +1 -1
- package/lib/activities/index.js.map +1 -0
- package/lib/{types/activities → activities}/loadChildWorkflowSpec.d.ts +1 -1
- package/lib/activities/loadChildWorkflowSpec.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/loadChildWorkflowSpec.js +4 -2
- package/lib/activities/loadChildWorkflowSpec.js.map +1 -0
- package/lib/{types/activities → activities}/media/prepareAudio.d.ts +1 -1
- package/lib/activities/media/prepareAudio.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/media/prepareAudio.js +27 -23
- package/lib/activities/media/prepareAudio.js.map +1 -0
- package/lib/{types/activities → activities}/media/prepareVideo.d.ts +2 -2
- package/lib/activities/media/prepareVideo.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/media/prepareVideo.js +72 -54
- package/lib/activities/media/prepareVideo.js.map +1 -0
- package/lib/{types/activities → activities}/media/probeMediaStreams.d.ts +2 -3
- package/lib/activities/media/probeMediaStreams.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/media/probeMediaStreams.js +7 -5
- package/lib/activities/media/probeMediaStreams.js.map +1 -0
- package/lib/{types/activities → activities}/media/processPdfWithTextract.d.ts +3 -3
- package/lib/activities/media/processPdfWithTextract.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/media/processPdfWithTextract.js +43 -26
- package/lib/activities/media/processPdfWithTextract.js.map +1 -0
- package/lib/{types/activities → activities}/media/saveGladiaTranscription.d.ts +2 -2
- package/lib/activities/media/saveGladiaTranscription.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/media/saveGladiaTranscription.js +35 -25
- package/lib/activities/media/saveGladiaTranscription.js.map +1 -0
- package/lib/{types/activities → activities}/media/transcribeMediaWithGladia.d.ts +2 -2
- package/lib/activities/media/transcribeMediaWithGladia.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/media/transcribeMediaWithGladia.js +26 -21
- package/lib/activities/media/transcribeMediaWithGladia.js.map +1 -0
- package/lib/{types/activities → activities}/mergeChildArtifacts.d.ts +1 -1
- package/lib/activities/mergeChildArtifacts.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/mergeChildArtifacts.js +27 -26
- package/lib/activities/mergeChildArtifacts.js.map +1 -0
- package/lib/{types/activities → activities}/notifyWebhook.d.ts +6 -5
- package/lib/activities/notifyWebhook.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/notifyWebhook.js +20 -17
- package/lib/activities/notifyWebhook.js.map +1 -0
- package/lib/{types/activities → activities}/rateLimiter.d.ts +1 -1
- package/lib/activities/rateLimiter.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/rateLimiter.js +4 -3
- package/lib/activities/rateLimiter.js.map +1 -0
- package/lib/{types/activities → activities}/renditions/generateImageRendition.d.ts +3 -3
- package/lib/activities/renditions/generateImageRendition.d.ts.map +1 -0
- package/lib/activities/renditions/generateImageRendition.js +75 -0
- package/lib/activities/renditions/generateImageRendition.js.map +1 -0
- package/lib/{types/activities → activities}/renditions/generateVideoRendition.d.ts +3 -3
- package/lib/activities/renditions/generateVideoRendition.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/renditions/generateVideoRendition.js +51 -54
- package/lib/activities/renditions/generateVideoRendition.js.map +1 -0
- package/lib/{types/activities → activities}/resolveRemoteActivities.d.ts +3 -4
- package/lib/activities/resolveRemoteActivities.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/resolveRemoteActivities.js +16 -15
- package/lib/activities/resolveRemoteActivities.js.map +1 -0
- package/lib/{types/activities → activities}/setDocumentStatus.d.ts +1 -1
- package/lib/activities/setDocumentStatus.d.ts.map +1 -0
- package/lib/{esm/activities → activities}/setDocumentStatus.js +5 -3
- package/lib/activities/setDocumentStatus.js.map +1 -0
- package/lib/{types/bulk-import.d.ts → bulk-import.d.ts} +10 -1
- package/lib/bulk-import.d.ts.map +1 -0
- package/lib/bulk-import.js.map +1 -0
- package/lib/{types/conversion → conversion}/TextractProcessor.d.ts +7 -4
- package/lib/conversion/TextractProcessor.d.ts.map +1 -0
- package/lib/{esm/conversion → conversion}/TextractProcessor.js +38 -38
- package/lib/conversion/TextractProcessor.js.map +1 -0
- package/lib/conversion/image.d.ts.map +1 -0
- package/lib/{esm/conversion → conversion}/image.js +35 -35
- package/lib/conversion/image.js.map +1 -0
- package/lib/conversion/markitdown.d.ts.map +1 -0
- package/lib/{esm/conversion → conversion}/markitdown.js +11 -11
- package/lib/conversion/markitdown.js.map +1 -0
- package/lib/conversion/mutool.d.ts.map +1 -0
- package/lib/{esm/conversion → conversion}/mutool.js +19 -24
- package/lib/conversion/mutool.js.map +1 -0
- package/lib/conversion/pandoc.d.ts.map +1 -0
- package/lib/{esm/conversion → conversion}/pandoc.js +11 -11
- package/lib/conversion/pandoc.js.map +1 -0
- package/lib/dsl/conditions.d.ts +2 -0
- package/lib/dsl/conditions.d.ts.map +1 -0
- package/lib/dsl/conditions.js +90 -0
- package/lib/dsl/conditions.js.map +1 -0
- package/lib/{types/dsl → dsl}/dsl-workflow.d.ts +2 -2
- package/lib/dsl/dsl-workflow.d.ts.map +1 -0
- package/lib/{esm/dsl → dsl}/dsl-workflow.js +93 -65
- package/lib/dsl/dsl-workflow.js.map +1 -0
- package/lib/dsl/dslProxyActivities.d.ts +7 -0
- package/lib/dsl/dslProxyActivities.d.ts.map +1 -0
- package/lib/{esm/dsl → dsl}/dslProxyActivities.js +10 -1
- package/lib/dsl/dslProxyActivities.js.map +1 -0
- package/lib/dsl/projections.d.ts +4 -0
- package/lib/dsl/projections.d.ts.map +1 -0
- package/lib/{esm/dsl → dsl}/projections.js +22 -8
- package/lib/dsl/projections.js.map +1 -0
- package/lib/{types/dsl → dsl}/setup/ActivityContext.d.ts +4 -4
- package/lib/dsl/setup/ActivityContext.d.ts.map +1 -0
- package/lib/{esm/dsl → dsl}/setup/ActivityContext.js +24 -27
- package/lib/dsl/setup/ActivityContext.js.map +1 -0
- package/lib/{types/dsl → dsl}/setup/fetch/DataProvider.d.ts +3 -3
- package/lib/dsl/setup/fetch/DataProvider.d.ts.map +1 -0
- package/lib/{esm/dsl → dsl}/setup/fetch/DataProvider.js +2 -1
- package/lib/dsl/setup/fetch/DataProvider.js.map +1 -0
- package/lib/dsl/setup/fetch/index.d.ts +6 -0
- package/lib/dsl/setup/fetch/index.d.ts.map +1 -0
- package/lib/{esm/dsl → dsl}/setup/fetch/index.js +1 -1
- package/lib/dsl/setup/fetch/index.js.map +1 -0
- package/lib/{types/dsl → dsl}/setup/fetch/providers.d.ts +6 -6
- package/lib/dsl/setup/fetch/providers.d.ts.map +1 -0
- package/lib/{esm/dsl → dsl}/setup/fetch/providers.js +30 -16
- package/lib/dsl/setup/fetch/providers.js.map +1 -0
- package/lib/dsl/test/test-child-workflow.d.ts +4 -0
- package/lib/dsl/test/test-child-workflow.d.ts.map +1 -0
- package/lib/{esm/dsl → dsl}/test/test-child-workflow.js +1 -1
- package/lib/dsl/test/test-child-workflow.js.map +1 -0
- package/lib/{types/dsl → dsl}/validation.d.ts +1 -1
- package/lib/dsl/validation.d.ts.map +1 -0
- package/lib/{esm/dsl → dsl}/validation.js +8 -8
- package/lib/dsl/validation.js.map +1 -0
- package/lib/{types/dsl → dsl}/vars.d.ts +16 -16
- package/lib/dsl/vars.d.ts.map +1 -0
- package/lib/{esm/dsl → dsl}/vars.js +20 -15
- package/lib/dsl/vars.js.map +1 -0
- package/lib/dsl/walk.d.ts +18 -0
- package/lib/dsl/walk.d.ts.map +1 -0
- package/lib/{esm/dsl → dsl}/walk.js +37 -16
- package/lib/dsl/walk.js.map +1 -0
- package/lib/dsl.d.ts +4 -0
- package/lib/dsl.d.ts.map +1 -0
- package/lib/dsl.js +4 -0
- package/lib/dsl.js.map +1 -0
- package/lib/{types/errors.d.ts → errors.d.ts} +2 -2
- package/lib/errors.d.ts.map +1 -0
- package/lib/{esm/errors.js → errors.js} +22 -17
- package/lib/errors.js.map +1 -0
- package/lib/index.d.ts +36 -0
- package/lib/index.d.ts.map +1 -0
- package/lib/index.js +37 -0
- package/lib/index.js.map +1 -0
- package/lib/result-types.d.ts.map +1 -0
- package/lib/result-types.js.map +1 -0
- package/lib/security/ssrf.d.ts.map +1 -0
- package/lib/security/ssrf.js.map +1 -0
- package/lib/system/notifyWebhookWorkflow.d.ts +8 -0
- package/lib/system/notifyWebhookWorkflow.d.ts.map +1 -0
- package/lib/{esm/system → system}/notifyWebhookWorkflow.js +13 -12
- package/lib/system/notifyWebhookWorkflow.js.map +1 -0
- package/lib/{types/system → system}/recalculateEmbeddingsWorkflow.d.ts +1 -1
- package/lib/system/recalculateEmbeddingsWorkflow.d.ts.map +1 -0
- package/lib/{esm/system → system}/recalculateEmbeddingsWorkflow.js +7 -7
- package/lib/system/recalculateEmbeddingsWorkflow.js.map +1 -0
- package/lib/utils/activity-info.d.ts +12 -0
- package/lib/utils/activity-info.d.ts.map +1 -0
- package/lib/utils/activity-info.js +15 -0
- package/lib/utils/activity-info.js.map +1 -0
- package/lib/{types/utils → utils}/auth.d.ts +1 -1
- package/lib/utils/auth.d.ts.map +1 -0
- package/lib/utils/auth.js.map +1 -0
- package/lib/{types/utils → utils}/blobs.d.ts +1 -1
- package/lib/utils/blobs.d.ts.map +1 -0
- package/lib/{esm/utils → utils}/blobs.js +17 -16
- package/lib/utils/blobs.js.map +1 -0
- package/lib/utils/chunks.d.ts.map +1 -0
- package/lib/{esm/utils → utils}/chunks.js +1 -1
- package/lib/utils/chunks.js.map +1 -0
- package/lib/utils/client.d.ts +8 -0
- package/lib/utils/client.d.ts.map +1 -0
- package/lib/utils/client.js +57 -0
- package/lib/utils/client.js.map +1 -0
- package/lib/{types/utils → utils}/expand-vars.d.ts +1 -1
- package/lib/utils/expand-vars.d.ts.map +1 -0
- package/lib/{esm/utils → utils}/expand-vars.js +4 -1
- package/lib/utils/expand-vars.js.map +1 -0
- package/lib/{types/utils → utils}/renditions.d.ts +2 -2
- package/lib/utils/renditions.d.ts.map +1 -0
- package/lib/{esm/utils → utils}/renditions.js +29 -18
- package/lib/utils/renditions.js.map +1 -0
- package/lib/{types/utils → utils}/storage.d.ts +2 -2
- package/lib/utils/storage.d.ts.map +1 -0
- package/lib/{esm/utils → utils}/storage.js +13 -12
- package/lib/utils/storage.js.map +1 -0
- package/lib/{types/utils → utils}/text-preview-utils.d.ts +2 -2
- package/lib/utils/text-preview-utils.d.ts.map +1 -0
- package/lib/{esm/utils → utils}/text-preview-utils.js +4 -4
- package/lib/utils/text-preview-utils.js.map +1 -0
- package/lib/{types/utils → utils}/tokens.d.ts +1 -1
- package/lib/utils/tokens.d.ts.map +1 -0
- package/lib/{esm/utils → utils}/tokens.js +3 -3
- package/lib/utils/tokens.js.map +1 -0
- package/lib/vars.d.ts +3 -0
- package/lib/vars.d.ts.map +1 -0
- package/lib/vars.js +4 -0
- package/lib/vars.js.map +1 -0
- package/lib/workflows-bundle.js +6201 -3026
- package/lib/workflows.d.ts +7 -0
- package/lib/workflows.d.ts.map +1 -0
- package/lib/workflows.js +7 -0
- package/lib/workflows.js.map +1 -0
- package/package.json +49 -89
- package/src/activities/advanced/createDocumentTypeFromInteractionRun.ts +30 -17
- package/src/activities/advanced/createOrUpdateDocumentFromInteractionRun.ts +51 -37
- package/src/activities/advanced/updateDocumentFromInteractionRun.ts +12 -11
- package/src/activities/chunkDocument.ts +87 -77
- package/src/activities/createDocumentFromOther.ts +19 -25
- package/src/activities/executeInteraction.test.ts +156 -0
- package/src/activities/executeInteraction.ts +150 -103
- package/src/activities/executeRemoteActivity.test.ts +61 -67
- package/src/activities/executeRemoteActivity.ts +56 -29
- package/src/activities/extractDocumentText.ts +75 -64
- package/src/activities/generateDocumentProperties.ts +69 -50
- package/src/activities/generateEmbeddings.test.ts +114 -0
- package/src/activities/generateEmbeddings.ts +113 -141
- package/src/activities/generateOrAssignContentType.ts +219 -231
- package/src/activities/getObjectFromStore.ts +11 -10
- package/src/activities/handleError.ts +23 -11
- package/src/activities/index-dsl.ts +30 -30
- package/src/activities/index.ts +1 -1
- package/src/activities/loadChildWorkflowSpec.ts +8 -3
- package/src/activities/media/prepareAudio.ts +47 -56
- package/src/activities/media/prepareVideo.ts +107 -122
- package/src/activities/media/probeMediaStreams.test.ts +11 -13
- package/src/activities/media/probeMediaStreams.ts +13 -15
- package/src/activities/media/processPdfWithTextract.ts +65 -46
- package/src/activities/media/saveGladiaTranscription.test.ts +122 -121
- package/src/activities/media/saveGladiaTranscription.ts +52 -26
- package/src/activities/media/transcribeMediaWithGladia.test.ts +173 -165
- package/src/activities/media/transcribeMediaWithGladia.ts +44 -28
- package/src/activities/mergeChildArtifacts.ts +36 -38
- package/src/activities/notifyWebhook.test.ts +223 -217
- package/src/activities/notifyWebhook.ts +67 -40
- package/src/activities/rateLimiter.ts +31 -30
- package/src/activities/renditions/generateImageRendition.ts +54 -64
- package/src/activities/renditions/generateVideoRendition.ts +87 -141
- package/src/activities/resolveRemoteActivities.test.ts +96 -88
- package/src/activities/resolveRemoteActivities.ts +32 -23
- package/src/activities/setDocumentStatus.ts +11 -7
- package/src/bulk-import.ts +11 -5
- package/src/conversion/TextractProcessor.ts +69 -71
- package/src/conversion/image.test.ts +31 -31
- package/src/conversion/image.ts +41 -38
- package/src/conversion/markitdown.ts +11 -11
- package/src/conversion/mutool.test.ts +77 -62
- package/src/conversion/mutool.ts +27 -49
- package/src/conversion/pandoc.test.ts +5 -6
- package/src/conversion/pandoc.ts +11 -11
- package/src/dsl/conditions.ts +35 -17
- package/src/dsl/dsl-workflow.test.ts +49 -39
- package/src/dsl/dsl-workflow.ts +200 -112
- package/src/dsl/dslProxyActivities.test.ts +23 -0
- package/src/dsl/dslProxyActivities.ts +46 -26
- package/src/dsl/ms.d.ts +34 -3
- package/src/dsl/projections.test.ts +108 -100
- package/src/dsl/projections.ts +46 -26
- package/src/dsl/setup/ActivityContext.test.ts +22 -23
- package/src/dsl/setup/ActivityContext.ts +41 -84
- package/src/dsl/setup/fetch/DataProvider.ts +11 -8
- package/src/dsl/setup/fetch/index.ts +6 -8
- package/src/dsl/setup/fetch/providers.ts +38 -24
- package/src/dsl/test/test-child-workflow.ts +3 -2
- package/src/dsl/validation.test.ts +232 -213
- package/src/dsl/validation.ts +17 -12
- package/src/dsl/vars.test.ts +143 -140
- package/src/dsl/vars.ts +61 -54
- package/src/dsl/walk.test.ts +61 -56
- package/src/dsl/walk.ts +61 -35
- package/src/dsl/workflow-exec-child.test.ts +77 -61
- package/src/dsl/workflow-fetch.test.ts +40 -38
- package/src/dsl/workflow-import.test.ts +27 -27
- package/src/dsl/workflow.test.ts +34 -33
- package/src/dsl.ts +3 -4
- package/src/errors.ts +31 -23
- package/src/index.ts +28 -33
- package/src/result-types.ts +6 -9
- package/src/system/notifyWebhookWorkflow.ts +23 -23
- package/src/system/recalculateEmbeddingsWorkflow.ts +14 -17
- package/src/utils/activity-info.ts +15 -0
- package/src/utils/auth.ts +1 -1
- package/src/utils/blobs.ts +19 -18
- package/src/utils/chunks.ts +7 -8
- package/src/utils/client.ts +45 -17
- package/src/utils/expand-vars.ts +9 -6
- package/src/utils/renditions.ts +68 -72
- package/src/utils/storage.ts +18 -22
- package/src/utils/text-preview-utils.ts +9 -19
- package/src/utils/tokens.ts +9 -11
- package/src/vars.ts +2 -2
- package/src/workflows.ts +3 -4
- package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js +0 -33
- package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js.map +0 -1
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +0 -73
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +0 -1
- package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js +0 -19
- package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js.map +0 -1
- package/lib/cjs/activities/chunkDocument.js +0 -111
- package/lib/cjs/activities/chunkDocument.js.map +0 -1
- package/lib/cjs/activities/createDocumentFromOther.js +0 -64
- package/lib/cjs/activities/createDocumentFromOther.js.map +0 -1
- package/lib/cjs/activities/executeInteraction.js +0 -239
- package/lib/cjs/activities/executeInteraction.js.map +0 -1
- package/lib/cjs/activities/executeRemoteActivity.js +0 -126
- package/lib/cjs/activities/executeRemoteActivity.js.map +0 -1
- package/lib/cjs/activities/extractDocumentText.js +0 -196
- package/lib/cjs/activities/extractDocumentText.js.map +0 -1
- package/lib/cjs/activities/generateDocumentProperties.js +0 -109
- package/lib/cjs/activities/generateDocumentProperties.js.map +0 -1
- package/lib/cjs/activities/generateEmbeddings.js +0 -269
- package/lib/cjs/activities/generateEmbeddings.js.map +0 -1
- package/lib/cjs/activities/generateOrAssignContentType.js +0 -173
- package/lib/cjs/activities/generateOrAssignContentType.js.map +0 -1
- package/lib/cjs/activities/getObjectFromStore.js +0 -31
- package/lib/cjs/activities/getObjectFromStore.js.map +0 -1
- package/lib/cjs/activities/handleError.js +0 -29
- package/lib/cjs/activities/handleError.js.map +0 -1
- package/lib/cjs/activities/index-dsl.js +0 -59
- package/lib/cjs/activities/index-dsl.js.map +0 -1
- package/lib/cjs/activities/index.js +0 -21
- package/lib/cjs/activities/index.js.map +0 -1
- package/lib/cjs/activities/loadChildWorkflowSpec.js +0 -15
- package/lib/cjs/activities/loadChildWorkflowSpec.js.map +0 -1
- package/lib/cjs/activities/media/prepareAudio.js +0 -240
- package/lib/cjs/activities/media/prepareAudio.js.map +0 -1
- package/lib/cjs/activities/media/prepareVideo.js +0 -432
- package/lib/cjs/activities/media/prepareVideo.js.map +0 -1
- package/lib/cjs/activities/media/probeMediaStreams.js +0 -49
- package/lib/cjs/activities/media/probeMediaStreams.js.map +0 -1
- package/lib/cjs/activities/media/processPdfWithTextract.js +0 -103
- package/lib/cjs/activities/media/processPdfWithTextract.js.map +0 -1
- package/lib/cjs/activities/media/saveGladiaTranscription.js +0 -103
- package/lib/cjs/activities/media/saveGladiaTranscription.js.map +0 -1
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js +0 -107
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js.map +0 -1
- package/lib/cjs/activities/mergeChildArtifacts.js +0 -150
- package/lib/cjs/activities/mergeChildArtifacts.js.map +0 -1
- package/lib/cjs/activities/notifyWebhook.js +0 -167
- package/lib/cjs/activities/notifyWebhook.js.map +0 -1
- package/lib/cjs/activities/rateLimiter.js +0 -30
- package/lib/cjs/activities/rateLimiter.js.map +0 -1
- package/lib/cjs/activities/renditions/generateImageRendition.js +0 -66
- package/lib/cjs/activities/renditions/generateImageRendition.js.map +0 -1
- package/lib/cjs/activities/renditions/generateVideoRendition.js +0 -200
- package/lib/cjs/activities/renditions/generateVideoRendition.js.map +0 -1
- package/lib/cjs/activities/resolveRemoteActivities.js +0 -133
- package/lib/cjs/activities/resolveRemoteActivities.js.map +0 -1
- package/lib/cjs/activities/setDocumentStatus.js +0 -26
- package/lib/cjs/activities/setDocumentStatus.js.map +0 -1
- package/lib/cjs/bulk-import.js +0 -12
- package/lib/cjs/bulk-import.js.map +0 -1
- package/lib/cjs/conversion/TextractProcessor.js +0 -417
- package/lib/cjs/conversion/TextractProcessor.js.map +0 -1
- package/lib/cjs/conversion/image.js +0 -149
- package/lib/cjs/conversion/image.js.map +0 -1
- package/lib/cjs/conversion/markitdown.js +0 -42
- package/lib/cjs/conversion/markitdown.js.map +0 -1
- package/lib/cjs/conversion/mutool.js +0 -147
- package/lib/cjs/conversion/mutool.js.map +0 -1
- package/lib/cjs/conversion/pandoc.js +0 -39
- package/lib/cjs/conversion/pandoc.js.map +0 -1
- package/lib/cjs/dsl/conditions.js +0 -81
- package/lib/cjs/dsl/conditions.js.map +0 -1
- package/lib/cjs/dsl/dsl-workflow.js +0 -458
- package/lib/cjs/dsl/dsl-workflow.js.map +0 -1
- package/lib/cjs/dsl/dslProxyActivities.js +0 -23
- package/lib/cjs/dsl/dslProxyActivities.js.map +0 -1
- package/lib/cjs/dsl/projections.js +0 -59
- package/lib/cjs/dsl/projections.js.map +0 -1
- package/lib/cjs/dsl/setup/ActivityContext.js +0 -188
- package/lib/cjs/dsl/setup/ActivityContext.js.map +0 -1
- package/lib/cjs/dsl/setup/fetch/DataProvider.js +0 -51
- package/lib/cjs/dsl/setup/fetch/DataProvider.js.map +0 -1
- package/lib/cjs/dsl/setup/fetch/index.js +0 -16
- package/lib/cjs/dsl/setup/fetch/index.js.map +0 -1
- package/lib/cjs/dsl/setup/fetch/providers.js +0 -67
- package/lib/cjs/dsl/setup/fetch/providers.js.map +0 -1
- package/lib/cjs/dsl/test/test-child-workflow.js +0 -10
- package/lib/cjs/dsl/test/test-child-workflow.js.map +0 -1
- package/lib/cjs/dsl/validation.js +0 -122
- package/lib/cjs/dsl/validation.js.map +0 -1
- package/lib/cjs/dsl/vars.js +0 -341
- package/lib/cjs/dsl/vars.js.map +0 -1
- package/lib/cjs/dsl/walk.js +0 -100
- package/lib/cjs/dsl/walk.js.map +0 -1
- package/lib/cjs/dsl.js +0 -20
- package/lib/cjs/dsl.js.map +0 -1
- package/lib/cjs/errors.js +0 -89
- package/lib/cjs/errors.js.map +0 -1
- package/lib/cjs/index.js +0 -57
- package/lib/cjs/index.js.map +0 -1
- package/lib/cjs/iterative-generation/activities/extractToc.js +0 -47
- package/lib/cjs/iterative-generation/activities/extractToc.js.map +0 -1
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js +0 -72
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js.map +0 -1
- package/lib/cjs/iterative-generation/activities/generatePart.js +0 -78
- package/lib/cjs/iterative-generation/activities/generatePart.js.map +0 -1
- package/lib/cjs/iterative-generation/activities/generateToc.js +0 -86
- package/lib/cjs/iterative-generation/activities/generateToc.js.map +0 -1
- package/lib/cjs/iterative-generation/activities/index.js +0 -12
- package/lib/cjs/iterative-generation/activities/index.js.map +0 -1
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js +0 -56
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js.map +0 -1
- package/lib/cjs/iterative-generation/types.js +0 -5
- package/lib/cjs/iterative-generation/types.js.map +0 -1
- package/lib/cjs/iterative-generation/utils.js +0 -121
- package/lib/cjs/iterative-generation/utils.js.map +0 -1
- package/lib/cjs/package.json +0 -3
- package/lib/cjs/result-types.js +0 -10
- package/lib/cjs/result-types.js.map +0 -1
- package/lib/cjs/security/ssrf.js +0 -34
- package/lib/cjs/security/ssrf.js.map +0 -1
- package/lib/cjs/system/notifyWebhookWorkflow.js +0 -53
- package/lib/cjs/system/notifyWebhookWorkflow.js.map +0 -1
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js +0 -33
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js.map +0 -1
- package/lib/cjs/utils/auth.js +0 -15
- package/lib/cjs/utils/auth.js.map +0 -1
- package/lib/cjs/utils/blobs.js +0 -64
- package/lib/cjs/utils/blobs.js.map +0 -1
- package/lib/cjs/utils/chunks.js +0 -14
- package/lib/cjs/utils/chunks.js.map +0 -1
- package/lib/cjs/utils/client.js +0 -31
- package/lib/cjs/utils/client.js.map +0 -1
- package/lib/cjs/utils/expand-vars.js +0 -33
- package/lib/cjs/utils/expand-vars.js.map +0 -1
- package/lib/cjs/utils/memory.js +0 -65
- package/lib/cjs/utils/memory.js.map +0 -1
- package/lib/cjs/utils/renditions.js +0 -92
- package/lib/cjs/utils/renditions.js.map +0 -1
- package/lib/cjs/utils/storage.js +0 -54
- package/lib/cjs/utils/storage.js.map +0 -1
- package/lib/cjs/utils/text-preview-utils.js +0 -43
- package/lib/cjs/utils/text-preview-utils.js.map +0 -1
- package/lib/cjs/utils/tokens.js +0 -38
- package/lib/cjs/utils/tokens.js.map +0 -1
- package/lib/cjs/vars.js +0 -20
- package/lib/cjs/vars.js.map +0 -1
- package/lib/cjs/workflows.js +0 -15
- package/lib/cjs/workflows.js.map +0 -1
- package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js.map +0 -1
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +0 -1
- package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js +0 -16
- package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js.map +0 -1
- package/lib/esm/activities/chunkDocument.js.map +0 -1
- package/lib/esm/activities/createDocumentFromOther.js.map +0 -1
- package/lib/esm/activities/executeInteraction.js.map +0 -1
- package/lib/esm/activities/executeRemoteActivity.js.map +0 -1
- package/lib/esm/activities/extractDocumentText.js.map +0 -1
- package/lib/esm/activities/generateDocumentProperties.js.map +0 -1
- package/lib/esm/activities/generateEmbeddings.js.map +0 -1
- package/lib/esm/activities/generateOrAssignContentType.js.map +0 -1
- package/lib/esm/activities/getObjectFromStore.js.map +0 -1
- package/lib/esm/activities/handleError.js +0 -26
- package/lib/esm/activities/handleError.js.map +0 -1
- package/lib/esm/activities/index-dsl.js +0 -29
- package/lib/esm/activities/index-dsl.js.map +0 -1
- package/lib/esm/activities/index.js.map +0 -1
- package/lib/esm/activities/loadChildWorkflowSpec.js.map +0 -1
- package/lib/esm/activities/media/prepareAudio.js.map +0 -1
- package/lib/esm/activities/media/prepareVideo.js.map +0 -1
- package/lib/esm/activities/media/probeMediaStreams.js.map +0 -1
- package/lib/esm/activities/media/processPdfWithTextract.js.map +0 -1
- package/lib/esm/activities/media/saveGladiaTranscription.js.map +0 -1
- package/lib/esm/activities/media/transcribeMediaWithGladia.js.map +0 -1
- package/lib/esm/activities/mergeChildArtifacts.js.map +0 -1
- package/lib/esm/activities/notifyWebhook.js.map +0 -1
- package/lib/esm/activities/rateLimiter.js.map +0 -1
- package/lib/esm/activities/renditions/generateImageRendition.js +0 -63
- package/lib/esm/activities/renditions/generateImageRendition.js.map +0 -1
- package/lib/esm/activities/renditions/generateVideoRendition.js.map +0 -1
- package/lib/esm/activities/resolveRemoteActivities.js.map +0 -1
- package/lib/esm/activities/setDocumentStatus.js.map +0 -1
- package/lib/esm/bulk-import.js.map +0 -1
- package/lib/esm/conversion/TextractProcessor.js.map +0 -1
- package/lib/esm/conversion/image.js.map +0 -1
- package/lib/esm/conversion/markitdown.js.map +0 -1
- package/lib/esm/conversion/mutool.js.map +0 -1
- package/lib/esm/conversion/pandoc.js.map +0 -1
- package/lib/esm/dsl/conditions.js +0 -75
- package/lib/esm/dsl/conditions.js.map +0 -1
- package/lib/esm/dsl/dsl-workflow.js.map +0 -1
- package/lib/esm/dsl/dslProxyActivities.js.map +0 -1
- package/lib/esm/dsl/projections.js.map +0 -1
- package/lib/esm/dsl/setup/ActivityContext.js.map +0 -1
- package/lib/esm/dsl/setup/fetch/DataProvider.js.map +0 -1
- package/lib/esm/dsl/setup/fetch/index.js.map +0 -1
- package/lib/esm/dsl/setup/fetch/providers.js.map +0 -1
- package/lib/esm/dsl/test/test-child-workflow.js.map +0 -1
- package/lib/esm/dsl/validation.js.map +0 -1
- package/lib/esm/dsl/vars.js.map +0 -1
- package/lib/esm/dsl/walk.js.map +0 -1
- package/lib/esm/dsl.js +0 -4
- package/lib/esm/dsl.js.map +0 -1
- package/lib/esm/errors.js.map +0 -1
- package/lib/esm/index.js +0 -39
- package/lib/esm/index.js.map +0 -1
- package/lib/esm/iterative-generation/activities/extractToc.js +0 -44
- package/lib/esm/iterative-generation/activities/extractToc.js.map +0 -1
- package/lib/esm/iterative-generation/activities/finalizeOutput.js +0 -69
- package/lib/esm/iterative-generation/activities/finalizeOutput.js.map +0 -1
- package/lib/esm/iterative-generation/activities/generatePart.js +0 -75
- package/lib/esm/iterative-generation/activities/generatePart.js.map +0 -1
- package/lib/esm/iterative-generation/activities/generateToc.js +0 -83
- package/lib/esm/iterative-generation/activities/generateToc.js.map +0 -1
- package/lib/esm/iterative-generation/activities/index.js +0 -5
- package/lib/esm/iterative-generation/activities/index.js.map +0 -1
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js +0 -53
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js.map +0 -1
- package/lib/esm/iterative-generation/types.js +0 -2
- package/lib/esm/iterative-generation/types.js.map +0 -1
- package/lib/esm/iterative-generation/utils.js +0 -112
- package/lib/esm/iterative-generation/utils.js.map +0 -1
- package/lib/esm/result-types.js.map +0 -1
- package/lib/esm/security/ssrf.js.map +0 -1
- package/lib/esm/system/notifyWebhookWorkflow.js.map +0 -1
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js.map +0 -1
- package/lib/esm/utils/auth.js.map +0 -1
- package/lib/esm/utils/blobs.js.map +0 -1
- package/lib/esm/utils/chunks.js.map +0 -1
- package/lib/esm/utils/client.js +0 -27
- package/lib/esm/utils/client.js.map +0 -1
- package/lib/esm/utils/expand-vars.js.map +0 -1
- package/lib/esm/utils/memory.js +0 -55
- package/lib/esm/utils/memory.js.map +0 -1
- package/lib/esm/utils/renditions.js.map +0 -1
- package/lib/esm/utils/storage.js.map +0 -1
- package/lib/esm/utils/text-preview-utils.js.map +0 -1
- package/lib/esm/utils/tokens.js.map +0 -1
- package/lib/esm/vars.js +0 -4
- package/lib/esm/vars.js.map +0 -1
- package/lib/esm/workflows.js +0 -8
- package/lib/esm/workflows.js.map +0 -1
- package/lib/tsconfig.tsbuildinfo +0 -1
- package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts.map +0 -1
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts.map +0 -1
- package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts.map +0 -1
- package/lib/types/activities/chunkDocument.d.ts.map +0 -1
- package/lib/types/activities/createDocumentFromOther.d.ts.map +0 -1
- package/lib/types/activities/executeInteraction.d.ts.map +0 -1
- package/lib/types/activities/executeRemoteActivity.d.ts.map +0 -1
- package/lib/types/activities/extractDocumentText.d.ts.map +0 -1
- package/lib/types/activities/generateDocumentProperties.d.ts.map +0 -1
- package/lib/types/activities/generateEmbeddings.d.ts.map +0 -1
- package/lib/types/activities/generateOrAssignContentType.d.ts.map +0 -1
- package/lib/types/activities/getObjectFromStore.d.ts.map +0 -1
- package/lib/types/activities/handleError.d.ts.map +0 -1
- package/lib/types/activities/index-dsl.d.ts +0 -31
- package/lib/types/activities/index-dsl.d.ts.map +0 -1
- package/lib/types/activities/index.d.ts.map +0 -1
- package/lib/types/activities/loadChildWorkflowSpec.d.ts.map +0 -1
- package/lib/types/activities/media/prepareAudio.d.ts.map +0 -1
- package/lib/types/activities/media/prepareVideo.d.ts.map +0 -1
- package/lib/types/activities/media/probeMediaStreams.d.ts.map +0 -1
- package/lib/types/activities/media/processPdfWithTextract.d.ts.map +0 -1
- package/lib/types/activities/media/saveGladiaTranscription.d.ts.map +0 -1
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts.map +0 -1
- package/lib/types/activities/mergeChildArtifacts.d.ts.map +0 -1
- package/lib/types/activities/notifyWebhook.d.ts.map +0 -1
- package/lib/types/activities/rateLimiter.d.ts.map +0 -1
- package/lib/types/activities/renditions/generateImageRendition.d.ts.map +0 -1
- package/lib/types/activities/renditions/generateVideoRendition.d.ts.map +0 -1
- package/lib/types/activities/resolveRemoteActivities.d.ts.map +0 -1
- package/lib/types/activities/setDocumentStatus.d.ts.map +0 -1
- package/lib/types/bulk-import.d.ts.map +0 -1
- package/lib/types/conversion/TextractProcessor.d.ts.map +0 -1
- package/lib/types/conversion/image.d.ts.map +0 -1
- package/lib/types/conversion/markitdown.d.ts.map +0 -1
- package/lib/types/conversion/mutool.d.ts.map +0 -1
- package/lib/types/conversion/pandoc.d.ts.map +0 -1
- package/lib/types/dsl/conditions.d.ts +0 -2
- package/lib/types/dsl/conditions.d.ts.map +0 -1
- package/lib/types/dsl/dsl-workflow.d.ts.map +0 -1
- package/lib/types/dsl/dslProxyActivities.d.ts +0 -10
- package/lib/types/dsl/dslProxyActivities.d.ts.map +0 -1
- package/lib/types/dsl/projections.d.ts +0 -4
- package/lib/types/dsl/projections.d.ts.map +0 -1
- package/lib/types/dsl/setup/ActivityContext.d.ts.map +0 -1
- package/lib/types/dsl/setup/fetch/DataProvider.d.ts.map +0 -1
- package/lib/types/dsl/setup/fetch/index.d.ts +0 -6
- package/lib/types/dsl/setup/fetch/index.d.ts.map +0 -1
- package/lib/types/dsl/setup/fetch/providers.d.ts.map +0 -1
- package/lib/types/dsl/test/test-child-workflow.d.ts +0 -4
- package/lib/types/dsl/test/test-child-workflow.d.ts.map +0 -1
- package/lib/types/dsl/validation.d.ts.map +0 -1
- package/lib/types/dsl/vars.d.ts.map +0 -1
- package/lib/types/dsl/walk.d.ts +0 -18
- package/lib/types/dsl/walk.d.ts.map +0 -1
- package/lib/types/dsl.d.ts +0 -4
- package/lib/types/dsl.d.ts.map +0 -1
- package/lib/types/errors.d.ts.map +0 -1
- package/lib/types/index.d.ts +0 -38
- package/lib/types/index.d.ts.map +0 -1
- package/lib/types/iterative-generation/activities/extractToc.d.ts +0 -10
- package/lib/types/iterative-generation/activities/extractToc.d.ts.map +0 -1
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts +0 -3
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts.map +0 -1
- package/lib/types/iterative-generation/activities/generatePart.d.ts +0 -3
- package/lib/types/iterative-generation/activities/generatePart.d.ts.map +0 -1
- package/lib/types/iterative-generation/activities/generateToc.d.ts +0 -4
- package/lib/types/iterative-generation/activities/generateToc.d.ts.map +0 -1
- package/lib/types/iterative-generation/activities/index.d.ts +0 -5
- package/lib/types/iterative-generation/activities/index.d.ts.map +0 -1
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts +0 -3
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts.map +0 -1
- package/lib/types/iterative-generation/types.d.ts +0 -79
- package/lib/types/iterative-generation/types.d.ts.map +0 -1
- package/lib/types/iterative-generation/utils.d.ts +0 -26
- package/lib/types/iterative-generation/utils.d.ts.map +0 -1
- package/lib/types/result-types.d.ts.map +0 -1
- package/lib/types/security/ssrf.d.ts.map +0 -1
- package/lib/types/system/notifyWebhookWorkflow.d.ts +0 -8
- package/lib/types/system/notifyWebhookWorkflow.d.ts.map +0 -1
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +0 -1
- package/lib/types/utils/auth.d.ts.map +0 -1
- package/lib/types/utils/blobs.d.ts.map +0 -1
- package/lib/types/utils/chunks.d.ts.map +0 -1
- package/lib/types/utils/client.d.ts +0 -8
- package/lib/types/utils/client.d.ts.map +0 -1
- package/lib/types/utils/expand-vars.d.ts.map +0 -1
- package/lib/types/utils/memory.d.ts +0 -8
- package/lib/types/utils/memory.d.ts.map +0 -1
- package/lib/types/utils/renditions.d.ts.map +0 -1
- package/lib/types/utils/storage.d.ts.map +0 -1
- package/lib/types/utils/text-preview-utils.d.ts.map +0 -1
- package/lib/types/utils/tokens.d.ts.map +0 -1
- package/lib/types/vars.d.ts +0 -3
- package/lib/types/vars.d.ts.map +0 -1
- package/lib/types/workflows.d.ts +0 -8
- package/lib/types/workflows.d.ts.map +0 -1
- package/src/iterative-generation/activities/extractToc.ts +0 -63
- package/src/iterative-generation/activities/finalizeOutput.ts +0 -100
- package/src/iterative-generation/activities/generatePart.ts +0 -123
- package/src/iterative-generation/activities/generateToc.ts +0 -116
- package/src/iterative-generation/activities/index.ts +0 -4
- package/src/iterative-generation/iterativeGenerationWorkflow.ts +0 -68
- package/src/iterative-generation/types.ts +0 -99
- package/src/iterative-generation/utils.ts +0 -126
- package/src/utils/memory.ts +0 -61
- /package/lib/{esm/bulk-import.js → bulk-import.js} +0 -0
- /package/lib/{types/conversion → conversion}/image.d.ts +0 -0
- /package/lib/{types/conversion → conversion}/markitdown.d.ts +0 -0
- /package/lib/{types/conversion → conversion}/mutool.d.ts +0 -0
- /package/lib/{types/conversion → conversion}/pandoc.d.ts +0 -0
- /package/lib/{types/result-types.d.ts → result-types.d.ts} +0 -0
- /package/lib/{esm/result-types.js → result-types.js} +0 -0
- /package/lib/{types/security → security}/ssrf.d.ts +0 -0
- /package/lib/{esm/security → security}/ssrf.js +0 -0
- /package/lib/{esm/utils → utils}/auth.js +0 -0
- /package/lib/{types/utils → utils}/chunks.d.ts +0 -0
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
import { log } from
|
|
2
|
-
import {
|
|
1
|
+
import { log } from '@temporalio/activity';
|
|
2
|
+
import type { VertesiaClient } from '@vertesia/client';
|
|
3
|
+
import type {
|
|
3
4
|
AppInstallationWithManifest,
|
|
4
5
|
AppPackage,
|
|
5
6
|
DSLActivityExecutionPayload,
|
|
6
7
|
RemoteActivityDefinition,
|
|
7
|
-
} from
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
import { URLValidationError, safeFetch } from "../security/ssrf.js";
|
|
8
|
+
} from '@vertesia/common';
|
|
9
|
+
import { setupActivity } from '../dsl/setup/ActivityContext.js';
|
|
10
|
+
import { safeFetch, URLValidationError } from '../security/ssrf.js';
|
|
11
11
|
|
|
12
12
|
/** Prefix identifying a remote activity name in DSL workflow steps */
|
|
13
|
-
const REMOTE_ACTIVITY_PREFIX =
|
|
13
|
+
const REMOTE_ACTIVITY_PREFIX = 'app:';
|
|
14
14
|
|
|
15
15
|
/**
|
|
16
16
|
* Information about a resolved remote activity.
|
|
@@ -25,7 +25,7 @@ export interface RemoteActivityInfo {
|
|
|
25
25
|
/** The app name */
|
|
26
26
|
app_name: string;
|
|
27
27
|
/** The app installation settings */
|
|
28
|
-
app_settings?: Record<string,
|
|
28
|
+
app_settings?: Record<string, unknown>;
|
|
29
29
|
/** The activity definition from the tool server */
|
|
30
30
|
definition: RemoteActivityDefinition;
|
|
31
31
|
}
|
|
@@ -36,8 +36,7 @@ export interface RemoteActivityInfo {
|
|
|
36
36
|
*/
|
|
37
37
|
export type RemoteActivityMap = Record<string, RemoteActivityInfo>;
|
|
38
38
|
|
|
39
|
-
|
|
40
|
-
export interface ResolveRemoteActivitiesParams {}
|
|
39
|
+
export type ResolveRemoteActivitiesParams = Record<string, never>;
|
|
41
40
|
|
|
42
41
|
/**
|
|
43
42
|
* Resolves remote activities from all installed apps that have the `tools` capability.
|
|
@@ -54,10 +53,10 @@ export async function resolveRemoteActivities(
|
|
|
54
53
|
|
|
55
54
|
let installations: AppInstallationWithManifest[];
|
|
56
55
|
try {
|
|
57
|
-
installations = await client.apps.getInstalledApps(
|
|
56
|
+
installations = await client.apps.getInstalledApps('tools');
|
|
58
57
|
} catch (err: unknown) {
|
|
59
58
|
const message = err instanceof Error ? err.message : String(err);
|
|
60
|
-
log.warn(
|
|
59
|
+
log.warn('Failed to fetch installed apps for remote activities', { error: message });
|
|
61
60
|
return map;
|
|
62
61
|
}
|
|
63
62
|
|
|
@@ -76,7 +75,7 @@ export async function resolveRemoteActivities(
|
|
|
76
75
|
for (const activity of pkg.activities) {
|
|
77
76
|
const collection = activity.collection;
|
|
78
77
|
if (!collection) {
|
|
79
|
-
log.warn(
|
|
78
|
+
log.warn('Remote activity missing collection, skipping', {
|
|
80
79
|
app: manifest.name,
|
|
81
80
|
activity: activity.name,
|
|
82
81
|
});
|
|
@@ -87,7 +86,7 @@ export async function resolveRemoteActivities(
|
|
|
87
86
|
const qualifiedName = `${REMOTE_ACTIVITY_PREFIX}${manifest.name}:${collection}:${activity.name}`;
|
|
88
87
|
|
|
89
88
|
if (map[qualifiedName]) {
|
|
90
|
-
log.warn(
|
|
89
|
+
log.warn('Duplicate remote activity name, skipping', {
|
|
91
90
|
qualifiedName,
|
|
92
91
|
existingApp: map[qualifiedName].app_name,
|
|
93
92
|
newApp: manifest.name,
|
|
@@ -108,13 +107,13 @@ export async function resolveRemoteActivities(
|
|
|
108
107
|
};
|
|
109
108
|
}
|
|
110
109
|
|
|
111
|
-
log.info(
|
|
110
|
+
log.info('Resolved remote activities from app', {
|
|
112
111
|
app: manifest.name,
|
|
113
112
|
count: pkg.activities.length,
|
|
114
113
|
});
|
|
115
114
|
} catch (err: unknown) {
|
|
116
115
|
const message = err instanceof Error ? err.message : String(err);
|
|
117
|
-
log.warn(
|
|
116
|
+
log.warn('Failed to fetch activities from app, skipping', {
|
|
118
117
|
app: manifest.name,
|
|
119
118
|
endpoint: manifest.endpoint,
|
|
120
119
|
error: message,
|
|
@@ -128,7 +127,11 @@ export async function resolveRemoteActivities(
|
|
|
128
127
|
/**
|
|
129
128
|
* Fetches the activities scope from a tool server package endpoint.
|
|
130
129
|
*/
|
|
131
|
-
async function fetchActivitiesPackage(
|
|
130
|
+
async function fetchActivitiesPackage(
|
|
131
|
+
endpoint: string,
|
|
132
|
+
authToken: string,
|
|
133
|
+
client: VertesiaClient,
|
|
134
|
+
): Promise<AppPackage> {
|
|
132
135
|
const url = new URL(endpoint);
|
|
133
136
|
url.searchParams.set('scope', 'activities');
|
|
134
137
|
|
|
@@ -137,8 +140,8 @@ async function fetchActivitiesPackage(endpoint: string, authToken: string, clien
|
|
|
137
140
|
const response = await safeFetch(url.toString(), {
|
|
138
141
|
method: 'GET',
|
|
139
142
|
headers: {
|
|
140
|
-
|
|
141
|
-
|
|
143
|
+
Accept: 'application/json',
|
|
144
|
+
Authorization: `Bearer ${authToken}`,
|
|
142
145
|
},
|
|
143
146
|
signal: AbortSignal.timeout(5000),
|
|
144
147
|
});
|
|
@@ -156,13 +159,19 @@ async function fetchActivitiesPackage(endpoint: string, authToken: string, clien
|
|
|
156
159
|
* Otherwise, use the collection-specific activities endpoint: `/api/activities/{collection}`.
|
|
157
160
|
* Validates the resolved URL to prevent second-hop SSRF from tool server responses.
|
|
158
161
|
*/
|
|
159
|
-
async function resolveActivityUrl(
|
|
162
|
+
async function resolveActivityUrl(
|
|
163
|
+
endpoint: string,
|
|
164
|
+
activity: RemoteActivityDefinition,
|
|
165
|
+
collection: string,
|
|
166
|
+
client: VertesiaClient,
|
|
167
|
+
): Promise<string> {
|
|
160
168
|
let resolved: string;
|
|
161
169
|
if (activity.url) {
|
|
162
170
|
// Absolute URLs are used as-is; relative URLs are resolved against the endpoint base
|
|
163
|
-
resolved =
|
|
164
|
-
|
|
165
|
-
|
|
171
|
+
resolved =
|
|
172
|
+
activity.url.startsWith('http://') || activity.url.startsWith('https://')
|
|
173
|
+
? activity.url
|
|
174
|
+
: new URL(activity.url, endpoint).toString();
|
|
166
175
|
} else {
|
|
167
176
|
// Default: POST to the collection-specific activities endpoint
|
|
168
177
|
const base = new URL(endpoint);
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { log } from
|
|
2
|
-
import { ContentObjectStatus, DSLActivityExecutionPayload, DSLActivitySpec } from
|
|
3
|
-
import { setupActivity } from
|
|
1
|
+
import { log } from '@temporalio/activity';
|
|
2
|
+
import type { ContentObjectStatus, DSLActivityExecutionPayload, DSLActivitySpec } from '@vertesia/common';
|
|
3
|
+
import { setupActivity } from '../dsl/setup/ActivityContext.js';
|
|
4
4
|
|
|
5
5
|
export interface SetDocumentStatusParams {
|
|
6
6
|
status: ContentObjectStatus;
|
|
@@ -22,12 +22,16 @@ export async function setDocumentStatus(payload: DSLActivityExecutionPayload<Set
|
|
|
22
22
|
try {
|
|
23
23
|
const res = await client.objects.update(objectId, { status: params.status });
|
|
24
24
|
return res.status;
|
|
25
|
-
} catch (err:
|
|
25
|
+
} catch (err: unknown) {
|
|
26
26
|
// If document was deleted, nothing to update - log warning and continue
|
|
27
|
-
|
|
28
|
-
|
|
27
|
+
const status = err && typeof err === 'object' && 'status' in err ? err.status : undefined;
|
|
28
|
+
const name = err instanceof Error ? err.name : undefined;
|
|
29
|
+
if (status === 404 || name === 'ZenoClientNotFoundError') {
|
|
30
|
+
log.warn(
|
|
31
|
+
`Document ${objectId} not found - may have been deleted. Skipping status update to '${params.status}'`,
|
|
32
|
+
);
|
|
29
33
|
return undefined; // Signal that document wasn't found
|
|
30
34
|
}
|
|
31
35
|
throw err;
|
|
32
36
|
}
|
|
33
|
-
}
|
|
37
|
+
}
|
package/src/bulk-import.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type {
|
|
2
|
+
ContentObjectProcessingPriority,
|
|
2
3
|
CreateCollectionPayload,
|
|
3
4
|
CreateContentObjectPayload,
|
|
4
5
|
} from '@vertesia/common';
|
|
@@ -47,11 +48,7 @@ export interface StorageObjectSourceItem extends BaseItem {
|
|
|
47
48
|
mimeType: string;
|
|
48
49
|
}
|
|
49
50
|
|
|
50
|
-
export type SourceItem =
|
|
51
|
-
| ContentObjectSourceItem
|
|
52
|
-
| CollectionSourceItem
|
|
53
|
-
| MetadataSourceItem
|
|
54
|
-
| StorageObjectSourceItem;
|
|
51
|
+
export type SourceItem = ContentObjectSourceItem | CollectionSourceItem | MetadataSourceItem | StorageObjectSourceItem;
|
|
55
52
|
|
|
56
53
|
export interface SourceItemBatch {
|
|
57
54
|
index: number;
|
|
@@ -96,11 +93,20 @@ export interface BulkImportParams {
|
|
|
96
93
|
partitionSize?: number;
|
|
97
94
|
dryRun?: boolean;
|
|
98
95
|
updateByContentSource?: boolean;
|
|
96
|
+
skipWorkflows?: boolean;
|
|
97
|
+
/**
|
|
98
|
+
* Processing priority for the document-processing workflows triggered by created objects.
|
|
99
|
+
* Defaults to `low` so bulk imports run on the low-priority ("bulk") task queue and don't
|
|
100
|
+
* compete with interactive traffic.
|
|
101
|
+
*/
|
|
102
|
+
processingPriority?: ContentObjectProcessingPriority;
|
|
99
103
|
}
|
|
100
104
|
|
|
101
105
|
export interface PartitionError {
|
|
102
106
|
partitionIndex: number;
|
|
103
107
|
errorCount: number;
|
|
108
|
+
/** Workflow-level error message if the partition itself failed (vs. per-batch failures). */
|
|
109
|
+
message?: string;
|
|
104
110
|
}
|
|
105
111
|
|
|
106
112
|
export interface BulkImportResult {
|
|
@@ -1,11 +1,7 @@
|
|
|
1
|
-
import { PutObjectCommand, S3Client } from
|
|
2
|
-
import type { Block } from
|
|
3
|
-
import {
|
|
4
|
-
|
|
5
|
-
StartDocumentAnalysisCommand,
|
|
6
|
-
TextractClient
|
|
7
|
-
} from "@aws-sdk/client-textract";
|
|
8
|
-
import type { AwsCredentialIdentityProvider } from "@smithy/types";
|
|
1
|
+
import { PutObjectCommand, S3Client } from '@aws-sdk/client-s3';
|
|
2
|
+
import type { Block } from '@aws-sdk/client-textract';
|
|
3
|
+
import { GetDocumentAnalysisCommand, StartDocumentAnalysisCommand, TextractClient } from '@aws-sdk/client-textract';
|
|
4
|
+
import type { AwsCredentialIdentityProvider } from '@smithy/types';
|
|
9
5
|
import Papa from 'papaparse';
|
|
10
6
|
|
|
11
7
|
interface BlocksMap {
|
|
@@ -33,7 +29,7 @@ interface TextractProcessorOptions {
|
|
|
33
29
|
region: string;
|
|
34
30
|
bucket: string;
|
|
35
31
|
credentials?: AwsCredentialIdentityProvider;
|
|
36
|
-
log?:
|
|
32
|
+
log?: TextractLogger;
|
|
37
33
|
detectImages?: boolean;
|
|
38
34
|
/**
|
|
39
35
|
* NEW: If true, includes cell-confidence information in the table CSV
|
|
@@ -41,12 +37,16 @@ interface TextractProcessorOptions {
|
|
|
41
37
|
includeConfidenceInTables?: boolean;
|
|
42
38
|
}
|
|
43
39
|
|
|
40
|
+
interface TextractLogger {
|
|
41
|
+
info(message: string, metadata?: Record<string, unknown>): void;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
44
|
export class TextractProcessor {
|
|
45
45
|
private textractClient: TextractClient;
|
|
46
46
|
private s3Client: S3Client;
|
|
47
47
|
private fileKey: string;
|
|
48
48
|
private bucket: string;
|
|
49
|
-
private log
|
|
49
|
+
private log?: TextractLogger;
|
|
50
50
|
private detectImages: boolean;
|
|
51
51
|
/**
|
|
52
52
|
* Whether or not to include confidence values in CSV output for tables.
|
|
@@ -60,7 +60,7 @@ export class TextractProcessor {
|
|
|
60
60
|
credentials,
|
|
61
61
|
log,
|
|
62
62
|
detectImages = false,
|
|
63
|
-
includeConfidenceInTables = false
|
|
63
|
+
includeConfidenceInTables = false, // NEW default = false
|
|
64
64
|
}: TextractProcessorOptions) {
|
|
65
65
|
this.fileKey = fileKey;
|
|
66
66
|
this.bucket = bucket;
|
|
@@ -70,11 +70,11 @@ export class TextractProcessor {
|
|
|
70
70
|
|
|
71
71
|
this.textractClient = new TextractClient({
|
|
72
72
|
region,
|
|
73
|
-
credentials
|
|
73
|
+
credentials,
|
|
74
74
|
});
|
|
75
75
|
this.s3Client = new S3Client({
|
|
76
76
|
region,
|
|
77
|
-
credentials
|
|
77
|
+
credentials,
|
|
78
78
|
});
|
|
79
79
|
}
|
|
80
80
|
|
|
@@ -88,17 +88,13 @@ export class TextractProcessor {
|
|
|
88
88
|
if (word.BlockType === 'WORD') {
|
|
89
89
|
const wordText = word.Text || '';
|
|
90
90
|
// Example logic to quote numeric text with commas
|
|
91
|
-
if (wordText.includes(',') &&
|
|
92
|
-
wordText.replace(',', '').match(/^\d+$/)) {
|
|
91
|
+
if (wordText.includes(',') && wordText.replace(',', '').match(/^\d+$/)) {
|
|
93
92
|
text += `"${wordText}" `;
|
|
94
93
|
} else {
|
|
95
94
|
text += `${wordText} `;
|
|
96
95
|
}
|
|
97
96
|
}
|
|
98
|
-
if (
|
|
99
|
-
word.BlockType === 'SELECTION_ELEMENT' &&
|
|
100
|
-
word.SelectionStatus === 'SELECTED'
|
|
101
|
-
) {
|
|
97
|
+
if (word.BlockType === 'SELECTION_ELEMENT' && word.SelectionStatus === 'SELECTED') {
|
|
102
98
|
text += 'X ';
|
|
103
99
|
}
|
|
104
100
|
}
|
|
@@ -140,6 +136,7 @@ export class TextractProcessor {
|
|
|
140
136
|
for (const cellRel of cell.Relationships) {
|
|
141
137
|
if (
|
|
142
138
|
cellRel.Type === 'CHILD' &&
|
|
139
|
+
// biome-ignore lint/style/noNonNullAssertion: intentional non-null assertion; TS can't prove narrowing here
|
|
143
140
|
cellRel.Ids?.includes(wordBlock.Id!)
|
|
144
141
|
) {
|
|
145
142
|
return true;
|
|
@@ -159,15 +156,15 @@ export class TextractProcessor {
|
|
|
159
156
|
*/
|
|
160
157
|
private getRowsColumnsMap(
|
|
161
158
|
tableResult: Block,
|
|
162
|
-
blocksMap: BlocksMap
|
|
159
|
+
blocksMap: BlocksMap,
|
|
163
160
|
): {
|
|
164
161
|
rows: Array<Array<{ text: string; confidence: number }>>;
|
|
165
162
|
} {
|
|
166
163
|
const rows: Array<Array<{ text: string; confidence: number }>> = [];
|
|
167
164
|
|
|
168
|
-
tableResult.Relationships?.forEach(relationship => {
|
|
165
|
+
tableResult.Relationships?.forEach((relationship) => {
|
|
169
166
|
if (relationship.Type === 'CHILD') {
|
|
170
|
-
relationship.Ids?.forEach(childId => {
|
|
167
|
+
relationship.Ids?.forEach((childId) => {
|
|
171
168
|
const cell = blocksMap[childId];
|
|
172
169
|
if (cell.BlockType === 'CELL') {
|
|
173
170
|
const rowIndex = cell.RowIndex || 1;
|
|
@@ -200,7 +197,7 @@ export class TextractProcessor {
|
|
|
200
197
|
tableResult: Block,
|
|
201
198
|
blocksMap: BlocksMap,
|
|
202
199
|
_tableIndex: number,
|
|
203
|
-
_pageNumber: number
|
|
200
|
+
_pageNumber: number,
|
|
204
201
|
): { csv: string; tableConfidence: number } {
|
|
205
202
|
const { rows } = this.getRowsColumnsMap(tableResult, blocksMap);
|
|
206
203
|
|
|
@@ -222,7 +219,7 @@ export class TextractProcessor {
|
|
|
222
219
|
}
|
|
223
220
|
|
|
224
221
|
// Compute average confidence (or any other method you prefer)
|
|
225
|
-
const tableConfidence = cellCount > 0 ?
|
|
222
|
+
const tableConfidence = cellCount > 0 ? totalConfidence / cellCount : 0;
|
|
226
223
|
|
|
227
224
|
// Convert to CSV
|
|
228
225
|
const csv = Papa.unparse(csvData, {
|
|
@@ -232,14 +229,14 @@ export class TextractProcessor {
|
|
|
232
229
|
escapeChar: '"',
|
|
233
230
|
header: false,
|
|
234
231
|
newline: '\n',
|
|
235
|
-
skipEmptyLines: false
|
|
232
|
+
skipEmptyLines: false,
|
|
236
233
|
});
|
|
237
234
|
|
|
238
235
|
return { csv, tableConfidence };
|
|
239
236
|
}
|
|
240
237
|
|
|
241
238
|
async upload(fileBuf: Buffer): Promise<void> {
|
|
242
|
-
this.log
|
|
239
|
+
this.log?.info('Uploading file to S3', { fileKey: this.fileKey });
|
|
243
240
|
const command = new PutObjectCommand({
|
|
244
241
|
Bucket: this.bucket,
|
|
245
242
|
Key: this.fileKey,
|
|
@@ -253,18 +250,20 @@ export class TextractProcessor {
|
|
|
253
250
|
DocumentLocation: {
|
|
254
251
|
S3Object: {
|
|
255
252
|
Bucket: this.bucket,
|
|
256
|
-
Name: s3Key
|
|
257
|
-
}
|
|
253
|
+
Name: s3Key,
|
|
254
|
+
},
|
|
258
255
|
},
|
|
259
|
-
FeatureTypes: [
|
|
256
|
+
FeatureTypes: ['TABLES'],
|
|
260
257
|
});
|
|
261
258
|
const response = await this.textractClient.send(command);
|
|
259
|
+
// biome-ignore lint/style/noNonNullAssertion: intentional non-null assertion; TS can't prove narrowing here
|
|
262
260
|
return response.JobId!;
|
|
263
261
|
}
|
|
264
262
|
|
|
265
263
|
async checkJobStatus(jobId: string): Promise<string> {
|
|
266
264
|
const command = new GetDocumentAnalysisCommand({ JobId: jobId });
|
|
267
265
|
const response = await this.textractClient.send(command);
|
|
266
|
+
// biome-ignore lint/style/noNonNullAssertion: intentional non-null assertion; TS can't prove narrowing here
|
|
268
267
|
return response.JobStatus!;
|
|
269
268
|
}
|
|
270
269
|
|
|
@@ -297,9 +296,9 @@ export class TextractProcessor {
|
|
|
297
296
|
|
|
298
297
|
private isLikelyHeader(block: Block, prevBlock: Block | null): boolean {
|
|
299
298
|
if (!prevBlock) return true;
|
|
300
|
-
const gap =
|
|
301
|
-
(
|
|
302
|
-
|
|
299
|
+
const gap =
|
|
300
|
+
(block.Geometry?.BoundingBox?.Top || 0) -
|
|
301
|
+
((prevBlock.Geometry?.BoundingBox?.Top || 0) + (prevBlock.Geometry?.BoundingBox?.Height || 0));
|
|
303
302
|
return gap > 0.03;
|
|
304
303
|
}
|
|
305
304
|
|
|
@@ -315,8 +314,7 @@ export class TextractProcessor {
|
|
|
315
314
|
}
|
|
316
315
|
|
|
317
316
|
private shouldMergeLines(prev: Block, current: Block): boolean {
|
|
318
|
-
const prevBottom = (prev.Geometry?.BoundingBox?.Top || 0)
|
|
319
|
-
+ (prev.Geometry?.BoundingBox?.Height || 0);
|
|
317
|
+
const prevBottom = (prev.Geometry?.BoundingBox?.Top || 0) + (prev.Geometry?.BoundingBox?.Height || 0);
|
|
320
318
|
const currentTop = current.Geometry?.BoundingBox?.Top || 0;
|
|
321
319
|
const gap = currentTop - prevBottom;
|
|
322
320
|
|
|
@@ -330,44 +328,45 @@ export class TextractProcessor {
|
|
|
330
328
|
async processResults(jobId: string): Promise<string> {
|
|
331
329
|
let nextToken: string | undefined;
|
|
332
330
|
let allBlocks: Block[] = [];
|
|
333
|
-
|
|
331
|
+
|
|
334
332
|
do {
|
|
335
333
|
const command = new GetDocumentAnalysisCommand({
|
|
336
334
|
JobId: jobId,
|
|
337
|
-
NextToken: nextToken
|
|
335
|
+
NextToken: nextToken,
|
|
338
336
|
});
|
|
339
337
|
const response = await this.textractClient.send(command);
|
|
340
338
|
allBlocks = allBlocks.concat(response.Blocks || []);
|
|
341
339
|
nextToken = response.NextToken;
|
|
342
340
|
} while (nextToken);
|
|
343
|
-
|
|
341
|
+
|
|
344
342
|
// Create blocks map
|
|
345
343
|
const blocksMap: BlocksMap = {};
|
|
346
344
|
for (const block of allBlocks) {
|
|
345
|
+
// biome-ignore lint/style/noNonNullAssertion: intentional non-null assertion; TS can't prove narrowing here
|
|
347
346
|
blocksMap[block.Id!] = block;
|
|
348
347
|
}
|
|
349
|
-
|
|
348
|
+
|
|
350
349
|
// We'll store each page's content in sequence
|
|
351
350
|
const pageContents: PageContent[] = [];
|
|
352
351
|
let currentPage: PageContent | null = null;
|
|
353
|
-
|
|
352
|
+
|
|
354
353
|
// We'll keep track of a "current text block" that we're building
|
|
355
|
-
let currentTextContent =
|
|
354
|
+
let currentTextContent = '';
|
|
356
355
|
let prevLineBlock: Block | null = null;
|
|
357
|
-
|
|
356
|
+
|
|
358
357
|
// Sort by page and vertical position
|
|
359
358
|
allBlocks.sort((a, b) => {
|
|
360
359
|
if (a.Page !== b.Page) return (a.Page || 0) - (b.Page || 0);
|
|
361
360
|
return (a.Geometry?.BoundingBox?.Top || 0) - (b.Geometry?.BoundingBox?.Top || 0);
|
|
362
361
|
});
|
|
363
|
-
|
|
362
|
+
|
|
364
363
|
for (const block of allBlocks) {
|
|
365
364
|
if (block.BlockType === 'PAGE') {
|
|
366
365
|
// If we were building a text block, push it before starting a new page
|
|
367
366
|
if (currentTextContent.trim().length > 0 && currentPage) {
|
|
368
367
|
currentPage.blocks.push({
|
|
369
368
|
type: 'text',
|
|
370
|
-
content: currentTextContent
|
|
369
|
+
content: currentTextContent,
|
|
371
370
|
});
|
|
372
371
|
}
|
|
373
372
|
if (currentPage) {
|
|
@@ -375,32 +374,31 @@ export class TextractProcessor {
|
|
|
375
374
|
}
|
|
376
375
|
currentPage = {
|
|
377
376
|
pageNumber: block.Page || 0,
|
|
378
|
-
blocks: []
|
|
377
|
+
blocks: [],
|
|
379
378
|
};
|
|
380
|
-
currentTextContent =
|
|
379
|
+
currentTextContent = '';
|
|
381
380
|
prevLineBlock = null;
|
|
382
|
-
}
|
|
383
|
-
else if (currentPage && block.Page === currentPage.pageNumber) {
|
|
381
|
+
} else if (currentPage && block.Page === currentPage.pageNumber) {
|
|
384
382
|
// TABLE handling
|
|
385
383
|
if (block.BlockType === 'TABLE') {
|
|
386
384
|
// If there's a pending text block, push it first
|
|
387
385
|
if (currentTextContent.trim().length > 0) {
|
|
388
386
|
currentPage.blocks.push({
|
|
389
387
|
type: 'text',
|
|
390
|
-
content: currentTextContent
|
|
388
|
+
content: currentTextContent,
|
|
391
389
|
});
|
|
392
|
-
currentTextContent =
|
|
390
|
+
currentTextContent = '';
|
|
393
391
|
}
|
|
394
392
|
const { csv, tableConfidence } = this.generateTableCSV(
|
|
395
393
|
block,
|
|
396
394
|
blocksMap,
|
|
397
|
-
currentPage.blocks.filter(b => b.type === 'table').length + 1,
|
|
398
|
-
currentPage.pageNumber
|
|
395
|
+
currentPage.blocks.filter((b) => b.type === 'table').length + 1,
|
|
396
|
+
currentPage.pageNumber,
|
|
399
397
|
);
|
|
400
398
|
currentPage.blocks.push({
|
|
401
399
|
type: 'table',
|
|
402
400
|
content: csv,
|
|
403
|
-
confidence: tableConfidence
|
|
401
|
+
confidence: tableConfidence,
|
|
404
402
|
});
|
|
405
403
|
prevLineBlock = null;
|
|
406
404
|
}
|
|
@@ -411,18 +409,18 @@ export class TextractProcessor {
|
|
|
411
409
|
// just append the text. We'll call formatTextBlock to get
|
|
412
410
|
// indentation/header logic, but we won't add a leading newline.
|
|
413
411
|
const formatted = this.formatTextBlock(block, prevLineBlock);
|
|
414
|
-
|
|
412
|
+
|
|
415
413
|
// formatTextBlock might include a leading newline if isLikelyHeader = true
|
|
416
414
|
// so you can strip it out if you want them truly "merged" into one paragraph:
|
|
417
|
-
const mergedText = formatted.replace(/^\s*\n/,
|
|
418
|
-
|
|
419
|
-
currentTextContent +=
|
|
415
|
+
const mergedText = formatted.replace(/^\s*\n/, ' ');
|
|
416
|
+
|
|
417
|
+
currentTextContent += ` ${mergedText.trim()}`;
|
|
420
418
|
} else {
|
|
421
419
|
// If there's an existing text block, push it
|
|
422
420
|
if (currentTextContent.trim().length > 0) {
|
|
423
421
|
currentPage.blocks.push({
|
|
424
422
|
type: 'text',
|
|
425
|
-
content: currentTextContent
|
|
423
|
+
content: currentTextContent,
|
|
426
424
|
});
|
|
427
425
|
}
|
|
428
426
|
// Start a new text block
|
|
@@ -433,25 +431,25 @@ export class TextractProcessor {
|
|
|
433
431
|
// IMAGES (if detectImages)
|
|
434
432
|
else if (this.detectImages) {
|
|
435
433
|
const geometry = block.Geometry?.BoundingBox;
|
|
436
|
-
if (geometry
|
|
434
|
+
if (geometry?.Width && geometry.Height) {
|
|
437
435
|
const imagePlaceholder = this.getImagePlaceholder(block);
|
|
438
436
|
if (imagePlaceholder) {
|
|
439
437
|
// If there's a pending text block, push it first
|
|
440
438
|
if (currentTextContent.trim().length > 0) {
|
|
441
439
|
currentPage.blocks.push({
|
|
442
440
|
type: 'text',
|
|
443
|
-
content: currentTextContent
|
|
441
|
+
content: currentTextContent,
|
|
444
442
|
});
|
|
445
|
-
currentTextContent =
|
|
443
|
+
currentTextContent = '';
|
|
446
444
|
}
|
|
447
|
-
|
|
445
|
+
|
|
448
446
|
currentPage.blocks.push({
|
|
449
447
|
type: 'image',
|
|
450
448
|
content: imagePlaceholder,
|
|
451
449
|
left: geometry.Left,
|
|
452
450
|
top: geometry.Top,
|
|
453
451
|
width: geometry.Width,
|
|
454
|
-
height: geometry.Height
|
|
452
|
+
height: geometry.Height,
|
|
455
453
|
});
|
|
456
454
|
}
|
|
457
455
|
}
|
|
@@ -459,18 +457,18 @@ export class TextractProcessor {
|
|
|
459
457
|
}
|
|
460
458
|
}
|
|
461
459
|
}
|
|
462
|
-
|
|
460
|
+
|
|
463
461
|
// Handle last page
|
|
464
462
|
if (currentPage) {
|
|
465
463
|
if (currentTextContent.trim().length > 0) {
|
|
466
464
|
currentPage.blocks.push({
|
|
467
465
|
type: 'text',
|
|
468
|
-
content: currentTextContent
|
|
466
|
+
content: currentTextContent,
|
|
469
467
|
});
|
|
470
468
|
}
|
|
471
469
|
pageContents.push(currentPage);
|
|
472
470
|
}
|
|
473
|
-
|
|
471
|
+
|
|
474
472
|
// Build final output
|
|
475
473
|
let fullText = '';
|
|
476
474
|
let imgNumber = 1;
|
|
@@ -481,9 +479,10 @@ export class TextractProcessor {
|
|
|
481
479
|
if (block.type === 'text') {
|
|
482
480
|
fullText += `<text>\n${block.content}\n</text>\n\n`;
|
|
483
481
|
} else if (block.type === 'table') {
|
|
484
|
-
const confidenceAttr =
|
|
485
|
-
|
|
486
|
-
|
|
482
|
+
const confidenceAttr =
|
|
483
|
+
block.confidence !== undefined && this.includeConfidenceInTables
|
|
484
|
+
? ` confidence="${block.confidence.toFixed(2)}"`
|
|
485
|
+
: '';
|
|
487
486
|
fullText += `<table number=${tableNumber++} type="csv" ${confidenceAttr}>\n`;
|
|
488
487
|
fullText += `${block.content}\n`;
|
|
489
488
|
fullText += `</table>\n\n`;
|
|
@@ -493,14 +492,13 @@ export class TextractProcessor {
|
|
|
493
492
|
const topAttr = block.top ? ` top="${block.top.toFixed(4)}"` : '';
|
|
494
493
|
const widthAttr = block.width ? ` width="${block.width.toFixed(4)}"` : '';
|
|
495
494
|
const heightAttr = block.height ? ` height="${block.height.toFixed(4)}"` : '';
|
|
496
|
-
|
|
495
|
+
|
|
497
496
|
fullText += `<image id="${imgNumber++}" ${leftAttr}${topAttr}${widthAttr}${heightAttr}>\n${block.content.trim()}\n</image>\n\n`;
|
|
498
497
|
}
|
|
499
498
|
}
|
|
500
499
|
fullText += `</page>\n\n`;
|
|
501
500
|
}
|
|
502
|
-
|
|
501
|
+
|
|
503
502
|
return fullText;
|
|
504
503
|
}
|
|
505
|
-
|
|
506
504
|
}
|