@vertesia/workflow 0.24.0-dev.202601221707
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +13 -0
- package/README.md +65 -0
- package/bin/bundle-workflows.mjs +39 -0
- package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js +33 -0
- package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +73 -0
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js +19 -0
- package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/chunkDocument.js +85 -0
- package/lib/cjs/activities/chunkDocument.js.map +1 -0
- package/lib/cjs/activities/copyParentArtifacts.js +127 -0
- package/lib/cjs/activities/copyParentArtifacts.js.map +1 -0
- package/lib/cjs/activities/createDocumentFromOther.js +64 -0
- package/lib/cjs/activities/createDocumentFromOther.js.map +1 -0
- package/lib/cjs/activities/executeInteraction.js +194 -0
- package/lib/cjs/activities/executeInteraction.js.map +1 -0
- package/lib/cjs/activities/extractDocumentText.js +156 -0
- package/lib/cjs/activities/extractDocumentText.js.map +1 -0
- package/lib/cjs/activities/generateDocumentProperties.js +83 -0
- package/lib/cjs/activities/generateDocumentProperties.js.map +1 -0
- package/lib/cjs/activities/generateEmbeddings.js +257 -0
- package/lib/cjs/activities/generateEmbeddings.js.map +1 -0
- package/lib/cjs/activities/generateOrAssignContentType.js +125 -0
- package/lib/cjs/activities/generateOrAssignContentType.js.map +1 -0
- package/lib/cjs/activities/getObjectFromStore.js +20 -0
- package/lib/cjs/activities/getObjectFromStore.js.map +1 -0
- package/lib/cjs/activities/handleError.js +22 -0
- package/lib/cjs/activities/handleError.js.map +1 -0
- package/lib/cjs/activities/index-dsl.js +51 -0
- package/lib/cjs/activities/index-dsl.js.map +1 -0
- package/lib/cjs/activities/index.js +21 -0
- package/lib/cjs/activities/index.js.map +1 -0
- package/lib/cjs/activities/media/prepareAudio.js +239 -0
- package/lib/cjs/activities/media/prepareAudio.js.map +1 -0
- package/lib/cjs/activities/media/prepareVideo.js +429 -0
- package/lib/cjs/activities/media/prepareVideo.js.map +1 -0
- package/lib/cjs/activities/media/processPdfWithTextract.js +103 -0
- package/lib/cjs/activities/media/processPdfWithTextract.js.map +1 -0
- package/lib/cjs/activities/media/saveGladiaTranscription.js +81 -0
- package/lib/cjs/activities/media/saveGladiaTranscription.js.map +1 -0
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js +82 -0
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js.map +1 -0
- package/lib/cjs/activities/notifyWebhook.js +158 -0
- package/lib/cjs/activities/notifyWebhook.js.map +1 -0
- package/lib/cjs/activities/rateLimiter.js +30 -0
- package/lib/cjs/activities/rateLimiter.js.map +1 -0
- package/lib/cjs/activities/renditions/generateImageRendition.js +66 -0
- package/lib/cjs/activities/renditions/generateImageRendition.js.map +1 -0
- package/lib/cjs/activities/renditions/generateVideoRendition.js +200 -0
- package/lib/cjs/activities/renditions/generateVideoRendition.js.map +1 -0
- package/lib/cjs/activities/setDocumentStatus.js +15 -0
- package/lib/cjs/activities/setDocumentStatus.js.map +1 -0
- package/lib/cjs/conversion/TextractProcessor.js +417 -0
- package/lib/cjs/conversion/TextractProcessor.js.map +1 -0
- package/lib/cjs/conversion/image.js +149 -0
- package/lib/cjs/conversion/image.js.map +1 -0
- package/lib/cjs/conversion/markitdown.js +42 -0
- package/lib/cjs/conversion/markitdown.js.map +1 -0
- package/lib/cjs/conversion/mutool.js +147 -0
- package/lib/cjs/conversion/mutool.js.map +1 -0
- package/lib/cjs/conversion/pandoc.js +39 -0
- package/lib/cjs/conversion/pandoc.js.map +1 -0
- package/lib/cjs/dsl/conditions.js +81 -0
- package/lib/cjs/dsl/conditions.js.map +1 -0
- package/lib/cjs/dsl/dsl-workflow.js +343 -0
- package/lib/cjs/dsl/dsl-workflow.js.map +1 -0
- package/lib/cjs/dsl/dslProxyActivities.js +23 -0
- package/lib/cjs/dsl/dslProxyActivities.js.map +1 -0
- package/lib/cjs/dsl/projections.js +59 -0
- package/lib/cjs/dsl/projections.js.map +1 -0
- package/lib/cjs/dsl/setup/ActivityContext.js +122 -0
- package/lib/cjs/dsl/setup/ActivityContext.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/DataProvider.js +51 -0
- package/lib/cjs/dsl/setup/fetch/DataProvider.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/index.js +16 -0
- package/lib/cjs/dsl/setup/fetch/index.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/providers.js +67 -0
- package/lib/cjs/dsl/setup/fetch/providers.js.map +1 -0
- package/lib/cjs/dsl/test/test-child-workflow.js +10 -0
- package/lib/cjs/dsl/test/test-child-workflow.js.map +1 -0
- package/lib/cjs/dsl/validation.js +122 -0
- package/lib/cjs/dsl/validation.js.map +1 -0
- package/lib/cjs/dsl/vars.js +341 -0
- package/lib/cjs/dsl/vars.js.map +1 -0
- package/lib/cjs/dsl/walk.js +100 -0
- package/lib/cjs/dsl/walk.js.map +1 -0
- package/lib/cjs/dsl.js +20 -0
- package/lib/cjs/dsl.js.map +1 -0
- package/lib/cjs/errors.js +79 -0
- package/lib/cjs/errors.js.map +1 -0
- package/lib/cjs/index.js +56 -0
- package/lib/cjs/index.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/extractToc.js +47 -0
- package/lib/cjs/iterative-generation/activities/extractToc.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js +72 -0
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/generatePart.js +78 -0
- package/lib/cjs/iterative-generation/activities/generatePart.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/generateToc.js +86 -0
- package/lib/cjs/iterative-generation/activities/generateToc.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/index.js +12 -0
- package/lib/cjs/iterative-generation/activities/index.js.map +1 -0
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js +56 -0
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
- package/lib/cjs/iterative-generation/types.js +5 -0
- package/lib/cjs/iterative-generation/types.js.map +1 -0
- package/lib/cjs/iterative-generation/utils.js +121 -0
- package/lib/cjs/iterative-generation/utils.js.map +1 -0
- package/lib/cjs/package.json +3 -0
- package/lib/cjs/result-types.js +10 -0
- package/lib/cjs/result-types.js.map +1 -0
- package/lib/cjs/system/notifyWebhookWorkflow.js +53 -0
- package/lib/cjs/system/notifyWebhookWorkflow.js.map +1 -0
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js +33 -0
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js.map +1 -0
- package/lib/cjs/utils/auth.js +15 -0
- package/lib/cjs/utils/auth.js.map +1 -0
- package/lib/cjs/utils/blobs.js +64 -0
- package/lib/cjs/utils/blobs.js.map +1 -0
- package/lib/cjs/utils/chunks.js +14 -0
- package/lib/cjs/utils/chunks.js.map +1 -0
- package/lib/cjs/utils/client.js +31 -0
- package/lib/cjs/utils/client.js.map +1 -0
- package/lib/cjs/utils/expand-vars.js +33 -0
- package/lib/cjs/utils/expand-vars.js.map +1 -0
- package/lib/cjs/utils/memory.js +65 -0
- package/lib/cjs/utils/memory.js.map +1 -0
- package/lib/cjs/utils/renditions.js +88 -0
- package/lib/cjs/utils/renditions.js.map +1 -0
- package/lib/cjs/utils/storage.js +54 -0
- package/lib/cjs/utils/storage.js.map +1 -0
- package/lib/cjs/utils/tokens.js +38 -0
- package/lib/cjs/utils/tokens.js.map +1 -0
- package/lib/cjs/vars.js +20 -0
- package/lib/cjs/vars.js.map +1 -0
- package/lib/cjs/workflows.js +15 -0
- package/lib/cjs/workflows.js.map +1 -0
- package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js +30 -0
- package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +70 -0
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js +16 -0
- package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/chunkDocument.js +82 -0
- package/lib/esm/activities/chunkDocument.js.map +1 -0
- package/lib/esm/activities/copyParentArtifacts.js +124 -0
- package/lib/esm/activities/copyParentArtifacts.js.map +1 -0
- package/lib/esm/activities/createDocumentFromOther.js +58 -0
- package/lib/esm/activities/createDocumentFromOther.js.map +1 -0
- package/lib/esm/activities/executeInteraction.js +190 -0
- package/lib/esm/activities/executeInteraction.js.map +1 -0
- package/lib/esm/activities/extractDocumentText.js +153 -0
- package/lib/esm/activities/extractDocumentText.js.map +1 -0
- package/lib/esm/activities/generateDocumentProperties.js +80 -0
- package/lib/esm/activities/generateDocumentProperties.js.map +1 -0
- package/lib/esm/activities/generateEmbeddings.js +254 -0
- package/lib/esm/activities/generateEmbeddings.js.map +1 -0
- package/lib/esm/activities/generateOrAssignContentType.js +122 -0
- package/lib/esm/activities/generateOrAssignContentType.js.map +1 -0
- package/lib/esm/activities/getObjectFromStore.js +17 -0
- package/lib/esm/activities/getObjectFromStore.js.map +1 -0
- package/lib/esm/activities/handleError.js +19 -0
- package/lib/esm/activities/handleError.js.map +1 -0
- package/lib/esm/activities/index-dsl.js +25 -0
- package/lib/esm/activities/index-dsl.js.map +1 -0
- package/lib/esm/activities/index.js +5 -0
- package/lib/esm/activities/index.js.map +1 -0
- package/lib/esm/activities/media/prepareAudio.js +200 -0
- package/lib/esm/activities/media/prepareAudio.js.map +1 -0
- package/lib/esm/activities/media/prepareVideo.js +390 -0
- package/lib/esm/activities/media/prepareVideo.js.map +1 -0
- package/lib/esm/activities/media/processPdfWithTextract.js +99 -0
- package/lib/esm/activities/media/processPdfWithTextract.js.map +1 -0
- package/lib/esm/activities/media/saveGladiaTranscription.js +78 -0
- package/lib/esm/activities/media/saveGladiaTranscription.js.map +1 -0
- package/lib/esm/activities/media/transcribeMediaWithGladia.js +79 -0
- package/lib/esm/activities/media/transcribeMediaWithGladia.js.map +1 -0
- package/lib/esm/activities/notifyWebhook.js +155 -0
- package/lib/esm/activities/notifyWebhook.js.map +1 -0
- package/lib/esm/activities/rateLimiter.js +27 -0
- package/lib/esm/activities/rateLimiter.js.map +1 -0
- package/lib/esm/activities/renditions/generateImageRendition.js +63 -0
- package/lib/esm/activities/renditions/generateImageRendition.js.map +1 -0
- package/lib/esm/activities/renditions/generateVideoRendition.js +194 -0
- package/lib/esm/activities/renditions/generateVideoRendition.js.map +1 -0
- package/lib/esm/activities/setDocumentStatus.js +12 -0
- package/lib/esm/activities/setDocumentStatus.js.map +1 -0
- package/lib/esm/conversion/TextractProcessor.js +410 -0
- package/lib/esm/conversion/TextractProcessor.js.map +1 -0
- package/lib/esm/conversion/image.js +143 -0
- package/lib/esm/conversion/image.js.map +1 -0
- package/lib/esm/conversion/markitdown.js +36 -0
- package/lib/esm/conversion/markitdown.js.map +1 -0
- package/lib/esm/conversion/mutool.js +139 -0
- package/lib/esm/conversion/mutool.js.map +1 -0
- package/lib/esm/conversion/pandoc.js +36 -0
- package/lib/esm/conversion/pandoc.js.map +1 -0
- package/lib/esm/dsl/conditions.js +75 -0
- package/lib/esm/dsl/conditions.js.map +1 -0
- package/lib/esm/dsl/dsl-workflow.js +336 -0
- package/lib/esm/dsl/dsl-workflow.js.map +1 -0
- package/lib/esm/dsl/dslProxyActivities.js +20 -0
- package/lib/esm/dsl/dslProxyActivities.js.map +1 -0
- package/lib/esm/dsl/projections.js +55 -0
- package/lib/esm/dsl/projections.js.map +1 -0
- package/lib/esm/dsl/setup/ActivityContext.js +117 -0
- package/lib/esm/dsl/setup/ActivityContext.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/DataProvider.js +47 -0
- package/lib/esm/dsl/setup/fetch/DataProvider.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/index.js +12 -0
- package/lib/esm/dsl/setup/fetch/index.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/providers.js +61 -0
- package/lib/esm/dsl/setup/fetch/providers.js.map +1 -0
- package/lib/esm/dsl/test/test-child-workflow.js +5 -0
- package/lib/esm/dsl/test/test-child-workflow.js.map +1 -0
- package/lib/esm/dsl/validation.js +118 -0
- package/lib/esm/dsl/validation.js.map +1 -0
- package/lib/esm/dsl/vars.js +335 -0
- package/lib/esm/dsl/vars.js.map +1 -0
- package/lib/esm/dsl/walk.js +96 -0
- package/lib/esm/dsl/walk.js.map +1 -0
- package/lib/esm/dsl.js +4 -0
- package/lib/esm/dsl.js.map +1 -0
- package/lib/esm/errors.js +69 -0
- package/lib/esm/errors.js.map +1 -0
- package/lib/esm/index.js +38 -0
- package/lib/esm/index.js.map +1 -0
- package/lib/esm/iterative-generation/activities/extractToc.js +44 -0
- package/lib/esm/iterative-generation/activities/extractToc.js.map +1 -0
- package/lib/esm/iterative-generation/activities/finalizeOutput.js +69 -0
- package/lib/esm/iterative-generation/activities/finalizeOutput.js.map +1 -0
- package/lib/esm/iterative-generation/activities/generatePart.js +75 -0
- package/lib/esm/iterative-generation/activities/generatePart.js.map +1 -0
- package/lib/esm/iterative-generation/activities/generateToc.js +83 -0
- package/lib/esm/iterative-generation/activities/generateToc.js.map +1 -0
- package/lib/esm/iterative-generation/activities/index.js +5 -0
- package/lib/esm/iterative-generation/activities/index.js.map +1 -0
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js +53 -0
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
- package/lib/esm/iterative-generation/types.js +2 -0
- package/lib/esm/iterative-generation/types.js.map +1 -0
- package/lib/esm/iterative-generation/utils.js +112 -0
- package/lib/esm/iterative-generation/utils.js.map +1 -0
- package/lib/esm/result-types.js +7 -0
- package/lib/esm/result-types.js.map +1 -0
- package/lib/esm/system/notifyWebhookWorkflow.js +50 -0
- package/lib/esm/system/notifyWebhookWorkflow.js.map +1 -0
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js +30 -0
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js.map +1 -0
- package/lib/esm/utils/auth.js +8 -0
- package/lib/esm/utils/auth.js.map +1 -0
- package/lib/esm/utils/blobs.js +54 -0
- package/lib/esm/utils/blobs.js.map +1 -0
- package/lib/esm/utils/chunks.js +9 -0
- package/lib/esm/utils/chunks.js.map +1 -0
- package/lib/esm/utils/client.js +27 -0
- package/lib/esm/utils/client.js.map +1 -0
- package/lib/esm/utils/expand-vars.js +30 -0
- package/lib/esm/utils/expand-vars.js.map +1 -0
- package/lib/esm/utils/memory.js +55 -0
- package/lib/esm/utils/memory.js.map +1 -0
- package/lib/esm/utils/renditions.js +80 -0
- package/lib/esm/utils/renditions.js.map +1 -0
- package/lib/esm/utils/storage.js +45 -0
- package/lib/esm/utils/storage.js.map +1 -0
- package/lib/esm/utils/tokens.js +34 -0
- package/lib/esm/utils/tokens.js.map +1 -0
- package/lib/esm/vars.js +4 -0
- package/lib/esm/vars.js.map +1 -0
- package/lib/esm/workflows.js +8 -0
- package/lib/esm/workflows.js.map +1 -0
- package/lib/tsconfig.tsbuildinfo +1 -0
- package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts +17 -0
- package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts +39 -0
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts +19 -0
- package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/chunkDocument.d.ts +33 -0
- package/lib/types/activities/chunkDocument.d.ts.map +1 -0
- package/lib/types/activities/copyParentArtifacts.d.ts +19 -0
- package/lib/types/activities/copyParentArtifacts.d.ts.map +1 -0
- package/lib/types/activities/createDocumentFromOther.d.ts +21 -0
- package/lib/types/activities/createDocumentFromOther.d.ts.map +1 -0
- package/lib/types/activities/executeInteraction.d.ts +61 -0
- package/lib/types/activities/executeInteraction.d.ts.map +1 -0
- package/lib/types/activities/extractDocumentText.d.ts +10 -0
- package/lib/types/activities/extractDocumentText.d.ts.map +1 -0
- package/lib/types/activities/generateDocumentProperties.d.ts +32 -0
- package/lib/types/activities/generateDocumentProperties.d.ts.map +1 -0
- package/lib/types/activities/generateEmbeddings.d.ts +53 -0
- package/lib/types/activities/generateEmbeddings.d.ts.map +1 -0
- package/lib/types/activities/generateOrAssignContentType.d.ts +44 -0
- package/lib/types/activities/generateOrAssignContentType.d.ts.map +1 -0
- package/lib/types/activities/getObjectFromStore.d.ts +14 -0
- package/lib/types/activities/getObjectFromStore.d.ts.map +1 -0
- package/lib/types/activities/handleError.d.ts +6 -0
- package/lib/types/activities/handleError.d.ts.map +1 -0
- package/lib/types/activities/index-dsl.d.ts +25 -0
- package/lib/types/activities/index-dsl.d.ts.map +1 -0
- package/lib/types/activities/index.d.ts +5 -0
- package/lib/types/activities/index.d.ts.map +1 -0
- package/lib/types/activities/media/prepareAudio.d.ts +25 -0
- package/lib/types/activities/media/prepareAudio.d.ts.map +1 -0
- package/lib/types/activities/media/prepareVideo.d.ts +30 -0
- package/lib/types/activities/media/prepareVideo.d.ts.map +1 -0
- package/lib/types/activities/media/processPdfWithTextract.d.ts +26 -0
- package/lib/types/activities/media/processPdfWithTextract.d.ts.map +1 -0
- package/lib/types/activities/media/saveGladiaTranscription.d.ts +14 -0
- package/lib/types/activities/media/saveGladiaTranscription.d.ts.map +1 -0
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts +19 -0
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts.map +1 -0
- package/lib/types/activities/notifyWebhook.d.ts +27 -0
- package/lib/types/activities/notifyWebhook.d.ts.map +1 -0
- package/lib/types/activities/rateLimiter.d.ts +11 -0
- package/lib/types/activities/rateLimiter.d.ts.map +1 -0
- package/lib/types/activities/renditions/generateImageRendition.d.ts +14 -0
- package/lib/types/activities/renditions/generateImageRendition.d.ts.map +1 -0
- package/lib/types/activities/renditions/generateVideoRendition.d.ts +15 -0
- package/lib/types/activities/renditions/generateVideoRendition.d.ts.map +1 -0
- package/lib/types/activities/setDocumentStatus.d.ts +15 -0
- package/lib/types/activities/setDocumentStatus.d.ts.map +1 -0
- package/lib/types/conversion/TextractProcessor.d.ts +45 -0
- package/lib/types/conversion/TextractProcessor.d.ts.map +1 -0
- package/lib/types/conversion/image.d.ts +13 -0
- package/lib/types/conversion/image.d.ts.map +1 -0
- package/lib/types/conversion/markitdown.d.ts +2 -0
- package/lib/types/conversion/markitdown.d.ts.map +1 -0
- package/lib/types/conversion/mutool.d.ts +19 -0
- package/lib/types/conversion/mutool.d.ts.map +1 -0
- package/lib/types/conversion/pandoc.d.ts +2 -0
- package/lib/types/conversion/pandoc.d.ts.map +1 -0
- package/lib/types/dsl/conditions.d.ts +2 -0
- package/lib/types/dsl/conditions.d.ts.map +1 -0
- package/lib/types/dsl/dsl-workflow.d.ts +5 -0
- package/lib/types/dsl/dsl-workflow.d.ts.map +1 -0
- package/lib/types/dsl/dslProxyActivities.d.ts +10 -0
- package/lib/types/dsl/dslProxyActivities.d.ts.map +1 -0
- package/lib/types/dsl/projections.d.ts +4 -0
- package/lib/types/dsl/projections.d.ts.map +1 -0
- package/lib/types/dsl/setup/ActivityContext.d.ts +17 -0
- package/lib/types/dsl/setup/ActivityContext.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/DataProvider.d.ts +9 -0
- package/lib/types/dsl/setup/fetch/DataProvider.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/index.d.ts +6 -0
- package/lib/types/dsl/setup/fetch/index.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/providers.d.ts +25 -0
- package/lib/types/dsl/setup/fetch/providers.d.ts.map +1 -0
- package/lib/types/dsl/test/test-child-workflow.d.ts +4 -0
- package/lib/types/dsl/test/test-child-workflow.d.ts.map +1 -0
- package/lib/types/dsl/validation.d.ts +4 -0
- package/lib/types/dsl/validation.d.ts.map +1 -0
- package/lib/types/dsl/vars.d.ts +48 -0
- package/lib/types/dsl/vars.d.ts.map +1 -0
- package/lib/types/dsl/walk.d.ts +18 -0
- package/lib/types/dsl/walk.d.ts.map +1 -0
- package/lib/types/dsl.d.ts +4 -0
- package/lib/types/dsl.d.ts.map +1 -0
- package/lib/types/errors.d.ts +37 -0
- package/lib/types/errors.d.ts.map +1 -0
- package/lib/types/index.d.ts +37 -0
- package/lib/types/index.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/extractToc.d.ts +10 -0
- package/lib/types/iterative-generation/activities/extractToc.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts +3 -0
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/generatePart.d.ts +3 -0
- package/lib/types/iterative-generation/activities/generatePart.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/generateToc.d.ts +4 -0
- package/lib/types/iterative-generation/activities/generateToc.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/index.d.ts +5 -0
- package/lib/types/iterative-generation/activities/index.d.ts.map +1 -0
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts +3 -0
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts.map +1 -0
- package/lib/types/iterative-generation/types.d.ts +79 -0
- package/lib/types/iterative-generation/types.d.ts.map +1 -0
- package/lib/types/iterative-generation/utils.d.ts +26 -0
- package/lib/types/iterative-generation/utils.d.ts.map +1 -0
- package/lib/types/result-types.d.ts +22 -0
- package/lib/types/result-types.d.ts.map +1 -0
- package/lib/types/system/notifyWebhookWorkflow.d.ts +8 -0
- package/lib/types/system/notifyWebhookWorkflow.d.ts.map +1 -0
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +25 -0
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +1 -0
- package/lib/types/utils/auth.d.ts +4 -0
- package/lib/types/utils/auth.d.ts.map +1 -0
- package/lib/types/utils/blobs.d.ts +7 -0
- package/lib/types/utils/blobs.d.ts.map +1 -0
- package/lib/types/utils/chunks.d.ts +9 -0
- package/lib/types/utils/chunks.d.ts.map +1 -0
- package/lib/types/utils/client.d.ts +8 -0
- package/lib/types/utils/client.d.ts.map +1 -0
- package/lib/types/utils/expand-vars.d.ts +8 -0
- package/lib/types/utils/expand-vars.d.ts.map +1 -0
- package/lib/types/utils/memory.d.ts +8 -0
- package/lib/types/utils/memory.d.ts.map +1 -0
- package/lib/types/utils/renditions.d.ts +23 -0
- package/lib/types/utils/renditions.d.ts.map +1 -0
- package/lib/types/utils/storage.d.ts +16 -0
- package/lib/types/utils/storage.d.ts.map +1 -0
- package/lib/types/utils/tokens.d.ts +11 -0
- package/lib/types/utils/tokens.d.ts.map +1 -0
- package/lib/types/vars.d.ts +3 -0
- package/lib/types/vars.d.ts.map +1 -0
- package/lib/types/workflows.d.ts +8 -0
- package/lib/types/workflows.d.ts.map +1 -0
- package/lib/workflows-bundle.js +17213 -0
- package/package.json +146 -0
- package/src/activities/advanced/createDocumentTypeFromInteractionRun.ts +55 -0
- package/src/activities/advanced/createOrUpdateDocumentFromInteractionRun.ts +119 -0
- package/src/activities/advanced/updateDocumentFromInteractionRun.ts +35 -0
- package/src/activities/chunkDocument.ts +146 -0
- package/src/activities/copyParentArtifacts.ts +162 -0
- package/src/activities/createDocumentFromOther.ts +92 -0
- package/src/activities/executeInteraction.ts +300 -0
- package/src/activities/extractDocumentText.ts +205 -0
- package/src/activities/generateDocumentProperties.ts +120 -0
- package/src/activities/generateEmbeddings.ts +387 -0
- package/src/activities/generateOrAssignContentType.ts +218 -0
- package/src/activities/getObjectFromStore.ts +31 -0
- package/src/activities/handleError.ts +25 -0
- package/src/activities/index-dsl.ts +25 -0
- package/src/activities/index.ts +4 -0
- package/src/activities/media/prepareAudio.ts +334 -0
- package/src/activities/media/prepareVideo.ts +622 -0
- package/src/activities/media/processPdfWithTextract.ts +141 -0
- package/src/activities/media/saveGladiaTranscription.ts +128 -0
- package/src/activities/media/transcribeMediaWithGladia.ts +117 -0
- package/src/activities/notifyWebhook.test.ts +134 -0
- package/src/activities/notifyWebhook.ts +199 -0
- package/src/activities/rateLimiter.ts +41 -0
- package/src/activities/renditions/generateImageRendition.ts +111 -0
- package/src/activities/renditions/generateVideoRendition.ts +293 -0
- package/src/activities/setDocumentStatus.ts +25 -0
- package/src/conversion/TextractProcessor.ts +506 -0
- package/src/conversion/image.test.ts +118 -0
- package/src/conversion/image.ts +168 -0
- package/src/conversion/markitdown.ts +41 -0
- package/src/conversion/mutool.test.ts +74 -0
- package/src/conversion/mutool.ts +180 -0
- package/src/conversion/pandoc.test.ts +24 -0
- package/src/conversion/pandoc.ts +40 -0
- package/src/dsl/conditions.ts +76 -0
- package/src/dsl/dsl-workflow.test.ts +58 -0
- package/src/dsl/dsl-workflow.ts +397 -0
- package/src/dsl/dslProxyActivities.ts +38 -0
- package/src/dsl/ms.d.ts +11 -0
- package/src/dsl/projections.test.ts +159 -0
- package/src/dsl/projections.ts +72 -0
- package/src/dsl/setup/ActivityContext.ts +178 -0
- package/src/dsl/setup/fetch/DataProvider.ts +45 -0
- package/src/dsl/setup/fetch/index.ts +19 -0
- package/src/dsl/setup/fetch/providers.ts +67 -0
- package/src/dsl/test/test-child-workflow.ts +6 -0
- package/src/dsl/validation.test.ts +257 -0
- package/src/dsl/validation.ts +125 -0
- package/src/dsl/vars.test.ts +245 -0
- package/src/dsl/vars.ts +340 -0
- package/src/dsl/walk.test.ts +81 -0
- package/src/dsl/walk.ts +103 -0
- package/src/dsl/workflow-exec-child.test.ts +273 -0
- package/src/dsl/workflow-fetch.test.ts +138 -0
- package/src/dsl/workflow-import.test.ts +89 -0
- package/src/dsl/workflow.test.ts +122 -0
- package/src/dsl.ts +3 -0
- package/src/errors.ts +101 -0
- package/src/index.ts +41 -0
- package/src/iterative-generation/activities/extractToc.ts +63 -0
- package/src/iterative-generation/activities/finalizeOutput.ts +100 -0
- package/src/iterative-generation/activities/generatePart.ts +123 -0
- package/src/iterative-generation/activities/generateToc.ts +116 -0
- package/src/iterative-generation/activities/index.ts +4 -0
- package/src/iterative-generation/iterativeGenerationWorkflow.ts +68 -0
- package/src/iterative-generation/types.ts +99 -0
- package/src/iterative-generation/utils.ts +126 -0
- package/src/result-types.ts +25 -0
- package/src/system/notifyWebhookWorkflow.ts +70 -0
- package/src/system/recalculateEmbeddingsWorkflow.ts +41 -0
- package/src/utils/auth.ts +10 -0
- package/src/utils/blobs.ts +59 -0
- package/src/utils/chunks.ts +17 -0
- package/src/utils/client.ts +46 -0
- package/src/utils/expand-vars.ts +31 -0
- package/src/utils/memory.ts +61 -0
- package/src/utils/renditions.ts +127 -0
- package/src/utils/storage.ts +60 -0
- package/src/utils/tokens.ts +44 -0
- package/src/vars.ts +3 -0
- package/src/workflows.ts +7 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import { log } from "@temporalio/activity";
|
|
2
|
+
import { execFile as execFileCallback } from "child_process";
|
|
3
|
+
import fs from "fs";
|
|
4
|
+
import { file } from "tmp-promise";
|
|
5
|
+
import { promisify } from "util";
|
|
6
|
+
const execFile = promisify(execFileCallback);
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Resizes an image to a maximum height or width using ImageMagick
|
|
10
|
+
* with progressive loading when supported and colorspace correction
|
|
11
|
+
* @param inputPath Input file path
|
|
12
|
+
* @param max_hw Maximum height or width
|
|
13
|
+
* @param format Output format
|
|
14
|
+
* @param progressive Enable progressive loading for supported formats (defaults to true)
|
|
15
|
+
* @param colorspaceCorrection Enable colorspace correction (defaults to true), not recommended for Q8 image magick.
|
|
16
|
+
* @param colorspace Colorspace to use for processing ('RGB', 'LAB', 'LUV', 'sigmoidal') (defaults to 'RGB')
|
|
17
|
+
* @returns Path to the resized image
|
|
18
|
+
*/
|
|
19
|
+
export async function imageResizer(
|
|
20
|
+
inputPath: string,
|
|
21
|
+
max_hw: number,
|
|
22
|
+
format: string,
|
|
23
|
+
progressive: boolean = true,
|
|
24
|
+
colorspaceCorrection: boolean = true,
|
|
25
|
+
colorspace: 'RGB' | 'LAB' | 'LUV' | 'sigmoidal' = 'RGB'
|
|
26
|
+
): Promise<string> {
|
|
27
|
+
log.info(`[image-resizer] Resizing image: ${inputPath} to max_hw: ${max_hw}, format: ${format}, progressive: ${progressive}, colorspaceCorrection: ${colorspaceCorrection ? colorspace : 'disabled'}`);
|
|
28
|
+
|
|
29
|
+
const allowedFormats = ["jpg", "jpeg", "png", "webp"];
|
|
30
|
+
|
|
31
|
+
if (!format || format.trim() === "") {
|
|
32
|
+
throw new Error(`Invalid format: ${format}.Supported : ${allowedFormats.join(", ")}`);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
//check that max_hw is valid
|
|
36
|
+
if (!Number.isInteger(max_hw) || max_hw <= 0) {
|
|
37
|
+
throw new Error(`Invalid max_hw value: ${max_hw}`);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
//check that inputPath exists
|
|
41
|
+
if (!fs.existsSync(inputPath)) {
|
|
42
|
+
throw new Error(`Input file does not exist: ${inputPath}`);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Create a temporary file
|
|
46
|
+
const { path: outputPath, cleanup } = await file({ postfix: `.${format}` });
|
|
47
|
+
try {
|
|
48
|
+
// Check if input file exists
|
|
49
|
+
if (!fs.existsSync(inputPath)) {
|
|
50
|
+
throw new Error(`Input file does not exist: ${inputPath}`);
|
|
51
|
+
}
|
|
52
|
+
// Validate max_hw
|
|
53
|
+
if (!Number.isInteger(max_hw) || max_hw <= 0) {
|
|
54
|
+
throw new Error(`Invalid max_hw value: ${max_hw}`);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Progressive loading options
|
|
58
|
+
let conversionOption = "";
|
|
59
|
+
|
|
60
|
+
// Only add progressive option for formats that support it
|
|
61
|
+
if (progressive) {
|
|
62
|
+
// JPEG and some other formats support progressive loading
|
|
63
|
+
const lowerFormat = format.toLowerCase();
|
|
64
|
+
if (lowerFormat === "jpg" || lowerFormat === "jpeg") {
|
|
65
|
+
conversionOption = "-interlace JPEG";
|
|
66
|
+
log.info(`Enabling interlaced ${lowerFormat.toUpperCase()} format`);
|
|
67
|
+
} else if (lowerFormat === "png") {
|
|
68
|
+
conversionOption = "-interlace PNG";
|
|
69
|
+
log.info(`Enabling interlaced ${lowerFormat.toUpperCase()} format`);
|
|
70
|
+
} else if (lowerFormat === "gif") {
|
|
71
|
+
conversionOption = "-interlace GIF";
|
|
72
|
+
log.info(`Enabling interlaced ${lowerFormat.toUpperCase()} format`);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
log.info(`Resizing image using ImageMagick: ${inputPath} -> ${outputPath}`);
|
|
77
|
+
|
|
78
|
+
const command = `convert`
|
|
79
|
+
let args = [inputPath];
|
|
80
|
+
|
|
81
|
+
// Add JPEG shrink-on-load optimization
|
|
82
|
+
args.push("-define", `jpeg:size=${max_hw * 3}x${max_hw * 3}`);
|
|
83
|
+
|
|
84
|
+
// Remove metadata
|
|
85
|
+
args.push("-strip");
|
|
86
|
+
|
|
87
|
+
// https://usage.imagemagick.org/filter/nicolas/#downsample
|
|
88
|
+
// Add colorspace correction if enabled
|
|
89
|
+
if (colorspaceCorrection) {
|
|
90
|
+
switch (colorspace) {
|
|
91
|
+
case 'RGB':
|
|
92
|
+
// Linear light, recommended default
|
|
93
|
+
// Convert from sRGB to linear RGB for processing
|
|
94
|
+
args.push("-colorspace", "RGB");
|
|
95
|
+
log.info("Using linear RGB colorspace for resize processing");
|
|
96
|
+
break;
|
|
97
|
+
case 'LAB':
|
|
98
|
+
// Perceptual linear light
|
|
99
|
+
// Use LAB colorspace which separates intensity from color
|
|
100
|
+
// Better for avoiding color clipping and distortion
|
|
101
|
+
args.push("-colorspace", "LAB");
|
|
102
|
+
log.info("Using LAB colorspace for resize processing");
|
|
103
|
+
break;
|
|
104
|
+
case 'LUV':
|
|
105
|
+
// Perceptual linear light
|
|
106
|
+
// Alternative to LAB with perceptually uniform color deltas
|
|
107
|
+
args.push("-colorspace", "LUV");
|
|
108
|
+
log.info("Using LUV colorspace for resize processing");
|
|
109
|
+
break;
|
|
110
|
+
case 'sigmoidal':
|
|
111
|
+
// Sigmoidal colorspace modification to reduce ringing artifacts
|
|
112
|
+
args.push("-colorspace", "RGB");
|
|
113
|
+
args.push("+sigmoidal-contrast", "6.5,50%");
|
|
114
|
+
log.info("Using sigmoidal contrast modification for resize processing");
|
|
115
|
+
break;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Resize operation
|
|
120
|
+
args.push("-resize", `${max_hw}x${max_hw}>`);
|
|
121
|
+
|
|
122
|
+
// Restore colorspace after processing
|
|
123
|
+
if (colorspaceCorrection) {
|
|
124
|
+
switch (colorspace) {
|
|
125
|
+
case 'RGB':
|
|
126
|
+
case 'LAB':
|
|
127
|
+
case 'LUV':
|
|
128
|
+
// Convert back to sRGB for output
|
|
129
|
+
args.push("-colorspace", "sRGB");
|
|
130
|
+
break;
|
|
131
|
+
case 'sigmoidal':
|
|
132
|
+
// Restore from sigmoidal modification and convert to sRGB
|
|
133
|
+
args.push("-sigmoidal-contrast", "6.5,50%");
|
|
134
|
+
args.push("-colorspace", "sRGB");
|
|
135
|
+
break;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Add progressive/interlace options
|
|
140
|
+
if (conversionOption) {
|
|
141
|
+
args.push(...conversionOption.split(" "));
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Output path
|
|
145
|
+
args.push(outputPath);
|
|
146
|
+
|
|
147
|
+
log.info(`ImageMagick command: ${command} ${args.join(" ")}`);
|
|
148
|
+
|
|
149
|
+
const { stderr } = await execFile(command, args);
|
|
150
|
+
|
|
151
|
+
if (stderr) {
|
|
152
|
+
log.warn(`ImageMagick warning: ${stderr}`);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Verify output exists and has content
|
|
156
|
+
if (!fs.existsSync(outputPath) || fs.statSync(outputPath).size === 0) {
|
|
157
|
+
throw new Error(`ImageMagick conversion failed: output file not created or empty`);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return outputPath;
|
|
161
|
+
} catch (error) {
|
|
162
|
+
// Clean up the temporary file
|
|
163
|
+
await cleanup();
|
|
164
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
165
|
+
log.error(`Image conversion failed: ${errorMessage}`);
|
|
166
|
+
throw new Error(`Image conversion failed: ${errorMessage}`);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { log } from "@temporalio/activity";
|
|
2
|
+
import { spawn } from "child_process";
|
|
3
|
+
import fs from "fs";
|
|
4
|
+
import tmp from "tmp";
|
|
5
|
+
|
|
6
|
+
export function markdownWithMarkitdown(buffer: Buffer, ext?: string): Promise<string> {
|
|
7
|
+
const inputFile = tmp.fileSync({ postfix: ext });
|
|
8
|
+
const targetFileName = tmp.tmpNameSync({ postfix: ".md" });
|
|
9
|
+
|
|
10
|
+
fs.writeSync(inputFile.fd, buffer);
|
|
11
|
+
|
|
12
|
+
return new Promise((resolve, reject) => {
|
|
13
|
+
const tool = "markitdown";
|
|
14
|
+
log.info(`Converting document to markdown with ${tool}`, { inputFile: inputFile.name, targetFileName });
|
|
15
|
+
|
|
16
|
+
const command = spawn(tool, [inputFile.name, "-o", targetFileName]);
|
|
17
|
+
|
|
18
|
+
command.on("exit", function (code) {
|
|
19
|
+
if (code) {
|
|
20
|
+
reject(new Error(`${tool} exited with code ${code}`));
|
|
21
|
+
}
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
command.on("close", function (code) {
|
|
25
|
+
if (code) {
|
|
26
|
+
reject(new Error(`${tool} exited with code ${code}`));
|
|
27
|
+
} else {
|
|
28
|
+
return fs.readFile(targetFileName, "utf8", (err, data) => {
|
|
29
|
+
if (err) {
|
|
30
|
+
reject(err);
|
|
31
|
+
}
|
|
32
|
+
return resolve(data);
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
command.on("error", (err) => {
|
|
38
|
+
reject(err);
|
|
39
|
+
});
|
|
40
|
+
});
|
|
41
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import { MockActivityEnvironment, TestWorkflowEnvironment } from '@temporalio/testing';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import { beforeAll, expect, test } from 'vitest';
|
|
5
|
+
import { mutoolPdfToText, pdfExtractPages, pdfToImages } from './mutool.js';
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
let testEnv: TestWorkflowEnvironment;
|
|
9
|
+
let activityContext: MockActivityEnvironment;
|
|
10
|
+
|
|
11
|
+
beforeAll(async () => {
|
|
12
|
+
testEnv = await TestWorkflowEnvironment.createLocal();
|
|
13
|
+
activityContext = new MockActivityEnvironment();
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
const TIMEOUT = 10000;
|
|
17
|
+
|
|
18
|
+
test('[mutool] should convert pdf to text', async () => {
|
|
19
|
+
const pdf = fs.readFileSync(path.join(__dirname, '../../fixtures', 'test-pdf1.pdf'));
|
|
20
|
+
const buf = Buffer.from(pdf);
|
|
21
|
+
console.log("Running mutoolPdfToText")
|
|
22
|
+
const result = await activityContext.run(mutoolPdfToText, buf);
|
|
23
|
+
expect(result).toContain('VF primarily uses foreign currency exchange');
|
|
24
|
+
|
|
25
|
+
}, TIMEOUT);
|
|
26
|
+
|
|
27
|
+
test('[mutool] should convert pdf to images', async () => {
|
|
28
|
+
const filename = path.join(__dirname, '../../fixtures', 'test-pdf1.pdf');
|
|
29
|
+
|
|
30
|
+
console.log("Running pdfToImages")
|
|
31
|
+
const result = await activityContext.run(pdfToImages, filename);
|
|
32
|
+
console.log(result);
|
|
33
|
+
|
|
34
|
+
expect(result).toBeInstanceOf(Array);
|
|
35
|
+
expect((result as string[]).length).toBe(119);
|
|
36
|
+
|
|
37
|
+
}, TIMEOUT);
|
|
38
|
+
|
|
39
|
+
test('[mutool] should convert pdf to images with pages', async () => {
|
|
40
|
+
const filename = path.join(__dirname, '../../fixtures', 'test-pdf1.pdf');
|
|
41
|
+
const pages = [7, 8, 9];
|
|
42
|
+
|
|
43
|
+
console.log("Running pdfToImages with pages")
|
|
44
|
+
const result = await activityContext.run(pdfToImages, filename, pages);
|
|
45
|
+
console.log(result);
|
|
46
|
+
|
|
47
|
+
expect(result).toBeInstanceOf(Array);
|
|
48
|
+
expect((result as string[]).length).toBe(3);
|
|
49
|
+
|
|
50
|
+
}, TIMEOUT);
|
|
51
|
+
|
|
52
|
+
test('[mutool] should extract 3 pages from PDF into new PDF', async () => {
|
|
53
|
+
const filename = path.join(__dirname, '../../fixtures', 'test-pdf1.pdf');
|
|
54
|
+
const pages = [7, 8, 9];
|
|
55
|
+
|
|
56
|
+
console.log("Running pdfGetPages")
|
|
57
|
+
const result = await activityContext.run(pdfExtractPages, filename, pages);
|
|
58
|
+
console.log(result);
|
|
59
|
+
|
|
60
|
+
expect(result).toContain(".pdf");
|
|
61
|
+
|
|
62
|
+
}, TIMEOUT);
|
|
63
|
+
|
|
64
|
+
test('[mutool] should extract 1 pages from PDF into new PDF', async () => {
|
|
65
|
+
const filename = path.join(__dirname, '../../fixtures', 'test-pdf1.pdf');
|
|
66
|
+
const pages = [12];
|
|
67
|
+
|
|
68
|
+
console.log("Running pdfGetPages")
|
|
69
|
+
const result = await activityContext.run(pdfExtractPages, filename, pages);
|
|
70
|
+
console.log(result);
|
|
71
|
+
|
|
72
|
+
expect(result).toContain(".pdf");
|
|
73
|
+
|
|
74
|
+
}, TIMEOUT);
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import { log } from '@temporalio/activity';
|
|
2
|
+
import { spawn } from 'child_process';
|
|
3
|
+
import fs from 'fs';
|
|
4
|
+
import tmp from 'tmp';
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Convert a pdf file to text
|
|
10
|
+
* TODO: pass file reference instead of Buffer
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
export function mutoolPdfToText(buffer: Buffer): Promise<string> {
|
|
14
|
+
|
|
15
|
+
const inputFile = tmp.fileSync({ postfix: '.pdf' });
|
|
16
|
+
const targetFileName = tmp.tmpNameSync({ postfix: '.txt' });
|
|
17
|
+
|
|
18
|
+
fs.writeSync(inputFile.fd, buffer);
|
|
19
|
+
|
|
20
|
+
return new Promise((resolve, reject) => {
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
log.info("Converting pdf to text", { inputFile: inputFile.name, targetFileName });
|
|
24
|
+
|
|
25
|
+
const command = spawn("mutool", ["convert", "-o", targetFileName, inputFile.name]);
|
|
26
|
+
|
|
27
|
+
command.on('exit', function (code) {
|
|
28
|
+
if (code) {
|
|
29
|
+
reject(new Error(`mutool exited with code ${code}`));
|
|
30
|
+
}
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
command.on('close', function (code) {
|
|
35
|
+
if (code) {
|
|
36
|
+
reject(new Error(`mutool exited with code ${code}`));
|
|
37
|
+
} else {
|
|
38
|
+
return fs.readFile(targetFileName, 'utf8', (err, data) => {
|
|
39
|
+
if (err) {
|
|
40
|
+
reject(err);
|
|
41
|
+
}
|
|
42
|
+
return resolve(data);
|
|
43
|
+
});
|
|
44
|
+
};
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
command.on('error', (err) => {
|
|
48
|
+
reject(err);
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
*
|
|
57
|
+
* Convert a pdf files to images (one image per page), as PNG format
|
|
58
|
+
*
|
|
59
|
+
* @param file
|
|
60
|
+
* @param pages
|
|
61
|
+
* @returns
|
|
62
|
+
*/
|
|
63
|
+
export async function pdfToImages(file: Buffer | string, pages?: number[]): Promise<string[]> {
|
|
64
|
+
|
|
65
|
+
const workDir = tmp.dirSync();
|
|
66
|
+
log.info(`Converting pdf to images`, { workDir: workDir.name, input_type: typeof file, pages });
|
|
67
|
+
|
|
68
|
+
if (file instanceof Buffer) {
|
|
69
|
+
fs.writeFileSync(`${workDir.name}/input.pdf`, file);
|
|
70
|
+
}
|
|
71
|
+
const filename = typeof file === 'string' ? file : `${workDir.name}/input.pdf`;
|
|
72
|
+
|
|
73
|
+
const args = [
|
|
74
|
+
"draw",
|
|
75
|
+
"-o", `${workDir.name}/page-%d.png`,
|
|
76
|
+
filename,
|
|
77
|
+
];
|
|
78
|
+
|
|
79
|
+
if (pages) {
|
|
80
|
+
args.push(pages.join(','));
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return new Promise((resolve, reject) => {
|
|
84
|
+
|
|
85
|
+
const command = spawn("mutool", args);
|
|
86
|
+
log.info(`Executing mutool command`, { workDir: workDir.name, filename, command: command.spawnargs });
|
|
87
|
+
|
|
88
|
+
let errors = '';
|
|
89
|
+
|
|
90
|
+
command.stderr.on('data', (data) => {
|
|
91
|
+
errors += data;
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
command.on('exit', function (code) {
|
|
95
|
+
|
|
96
|
+
if (code) {
|
|
97
|
+
log.error(`mutool exited with code ${code}`, { errors });
|
|
98
|
+
reject(new Error(`mutool exited with code ${code}`));
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
command.on('close', function (code) {
|
|
103
|
+
if (code) {
|
|
104
|
+
reject(new Error(`mutool finished with code ${code}`));
|
|
105
|
+
} else {
|
|
106
|
+
const files = fs.readdirSync(workDir.name);
|
|
107
|
+
const images = files.filter(f => f.endsWith('.png')).map(f => `${workDir.name}/${f}`);
|
|
108
|
+
log.info(`Converted pdf to ${images.length} images`, files);
|
|
109
|
+
return resolve(images);
|
|
110
|
+
};
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
command.on('error', (err) => {
|
|
114
|
+
reject(err);
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Get some pages from a PDF to create a new one
|
|
124
|
+
*/
|
|
125
|
+
|
|
126
|
+
export async function pdfExtractPages(file: Buffer | string, pages: number[]): Promise<string> {
|
|
127
|
+
|
|
128
|
+
const workDir = tmp.dirSync();
|
|
129
|
+
log.info(`Getting pages from pdf`, { workDir: workDir.name, input_type: typeof file, pages });
|
|
130
|
+
|
|
131
|
+
if (file instanceof Buffer) {
|
|
132
|
+
fs.writeFileSync(`${workDir.name}/input.pdf`, file);
|
|
133
|
+
}
|
|
134
|
+
const filename = typeof file === 'string' ? file : `${workDir.name}/input.pdf`;
|
|
135
|
+
|
|
136
|
+
const args = [
|
|
137
|
+
"merge",
|
|
138
|
+
"-o", `${workDir.name}/output.pdf`,
|
|
139
|
+
"-O", "garbage=compact,sanitize",
|
|
140
|
+
filename,
|
|
141
|
+
pages.join(','),
|
|
142
|
+
];
|
|
143
|
+
|
|
144
|
+
return new Promise((resolve, reject) => {
|
|
145
|
+
|
|
146
|
+
const command = spawn("mutool", args);
|
|
147
|
+
log.info(`Executing mutool command`, { workDir: workDir.name, filename, command: command.spawnargs });
|
|
148
|
+
|
|
149
|
+
let errors = '';
|
|
150
|
+
|
|
151
|
+
command.stderr.on('data', (data) => {
|
|
152
|
+
errors += data;
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
command.on('exit', function (code) {
|
|
156
|
+
|
|
157
|
+
if (code) {
|
|
158
|
+
log.error(`mutool exited with code ${code}`, { errors });
|
|
159
|
+
reject(new Error(`mutool exited with code ${code}`));
|
|
160
|
+
}
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
command.on('close', function (code) {
|
|
164
|
+
if (code) {
|
|
165
|
+
reject(new Error(`mutool finished with code ${code}`));
|
|
166
|
+
} else {
|
|
167
|
+
const file = `${workDir.name}/output.pdf`;
|
|
168
|
+
log.info(`Extracted pages from pdf`, { pages, file });
|
|
169
|
+
return resolve(file);
|
|
170
|
+
};
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
command.on('error', (err) => {
|
|
174
|
+
reject(err);
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { MockActivityEnvironment, TestWorkflowEnvironment } from '@temporalio/testing';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import { beforeAll, expect, test } from 'vitest';
|
|
5
|
+
import { markdownWithPandoc } from '../conversion/pandoc';
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
let testEnv: TestWorkflowEnvironment;
|
|
9
|
+
let activityContext: MockActivityEnvironment;
|
|
10
|
+
|
|
11
|
+
beforeAll(async () => {
|
|
12
|
+
testEnv = await TestWorkflowEnvironment.createLocal();
|
|
13
|
+
activityContext = new MockActivityEnvironment();
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
// Add more test cases for other file types (ODT, DOCX) if needed
|
|
18
|
+
test('should convert docx to markdown', async () => {
|
|
19
|
+
const filepath = path.join(__dirname, '../../fixtures', 'us-ciia.docx');
|
|
20
|
+
console.log("Converting file from", filepath);
|
|
21
|
+
const docx = fs.readFileSync(filepath);
|
|
22
|
+
const result = await activityContext.run(markdownWithPandoc, Buffer.from(docx), 'docx');
|
|
23
|
+
expect(result).to.include('confidential');
|
|
24
|
+
});
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { log } from "@temporalio/activity";
|
|
2
|
+
import { spawn } from "child_process";
|
|
3
|
+
import { PassThrough } from "stream";
|
|
4
|
+
|
|
5
|
+
export function markdownWithPandoc(buffer: Buffer, fromFormat: string): Promise<string> {
|
|
6
|
+
const fromType = undefined;
|
|
7
|
+
|
|
8
|
+
return new Promise((resolve, reject) => {
|
|
9
|
+
log.info(`Converting ${fromType} to markdown`);
|
|
10
|
+
const input = new PassThrough();
|
|
11
|
+
input.end(buffer);
|
|
12
|
+
|
|
13
|
+
let result: string[] = [];
|
|
14
|
+
|
|
15
|
+
const command = spawn("pandoc", ["-t", "markdown", "-f", fromFormat], {
|
|
16
|
+
stdio: "pipe",
|
|
17
|
+
});
|
|
18
|
+
input.pipe(command.stdin);
|
|
19
|
+
|
|
20
|
+
command.stdout.on("data", function (data: string) {
|
|
21
|
+
result.push(data.toString());
|
|
22
|
+
});
|
|
23
|
+
command.on("exit", function (code) {
|
|
24
|
+
if (code) {
|
|
25
|
+
reject(new Error(`pandoc exited with code ${code}`));
|
|
26
|
+
}
|
|
27
|
+
});
|
|
28
|
+
command.on("close", function (code) {
|
|
29
|
+
if (code) {
|
|
30
|
+
reject(new Error(`pandoc exited with code ${code}`));
|
|
31
|
+
} else {
|
|
32
|
+
resolve(result.join(""));
|
|
33
|
+
}
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
command.on("error", (err) => {
|
|
37
|
+
reject(err);
|
|
38
|
+
});
|
|
39
|
+
});
|
|
40
|
+
}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import equal from 'fast-deep-equal';
|
|
2
|
+
|
|
3
|
+
function $exists(value: any, arg: boolean) {
|
|
4
|
+
return (value !== undefined) === arg;
|
|
5
|
+
}
|
|
6
|
+
function $null(value: any, arg: boolean) {
|
|
7
|
+
return (value == null) === arg;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function $eq(value: any, arg: any) {
|
|
11
|
+
if (Array.isArray(arg)) {
|
|
12
|
+
return equal(value, arg);
|
|
13
|
+
} else if (typeof arg === 'object') {
|
|
14
|
+
return equal(value, arg);
|
|
15
|
+
} else {
|
|
16
|
+
return value === arg;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
function $ne(value: any, arg: any) {
|
|
20
|
+
return !$eq(value, arg);
|
|
21
|
+
}
|
|
22
|
+
function $or(value: any, arg: any[]) {
|
|
23
|
+
return arg.some(c => matchCondition(value, c));
|
|
24
|
+
}
|
|
25
|
+
function $in(value: any, arg: any[]) {
|
|
26
|
+
return arg.includes(value);
|
|
27
|
+
}
|
|
28
|
+
function $nin(value: any, arg: any[]) {
|
|
29
|
+
return !$in(value, arg);
|
|
30
|
+
}
|
|
31
|
+
function $regexp(value: string, arg: string) {
|
|
32
|
+
return new RegExp(arg).test(value);
|
|
33
|
+
}
|
|
34
|
+
function $endsWith(value: string, arg: string) {
|
|
35
|
+
return value.endsWith(arg);
|
|
36
|
+
}
|
|
37
|
+
function $startsWith(value: string, arg: string) {
|
|
38
|
+
return value.startsWith(arg);
|
|
39
|
+
}
|
|
40
|
+
function $contains(value: string, arg: string) {
|
|
41
|
+
return value.includes(arg);
|
|
42
|
+
}
|
|
43
|
+
function $lt(value: number, arg: number) {
|
|
44
|
+
return value < arg;
|
|
45
|
+
}
|
|
46
|
+
function $gt(value: number, arg: number) {
|
|
47
|
+
return value > arg;
|
|
48
|
+
}
|
|
49
|
+
function $lte(value: number, arg: number) {
|
|
50
|
+
return value <= arg;
|
|
51
|
+
}
|
|
52
|
+
function $gte(value: number, arg: number) {
|
|
53
|
+
return value >= arg;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const conditionFns: Record<string, any> = {
|
|
57
|
+
$exists, $null,
|
|
58
|
+
$eq, $ne,
|
|
59
|
+
$in, $nin,
|
|
60
|
+
$regexp, $startsWith, $endsWith, $contains,
|
|
61
|
+
$lt, $gt, $lte, $gte,
|
|
62
|
+
$or,
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export function matchCondition(value: any, conditions: Record<string, any>) {
|
|
66
|
+
for (const key of Object.keys(conditions)) {
|
|
67
|
+
const cond = conditionFns[key];
|
|
68
|
+
if (!cond) {
|
|
69
|
+
throw new Error(`Unknown condition: ${key}`);
|
|
70
|
+
}
|
|
71
|
+
if (!cond(value, conditions[key])) {
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return true;
|
|
76
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { describe, test, expect } from "vitest";
|
|
2
|
+
import { computeActivityOptions } from "./dsl-workflow.ts";
|
|
3
|
+
|
|
4
|
+
describe('Workflow DSL', () => {
|
|
5
|
+
test('compute activity options without custom options', () => {
|
|
6
|
+
expect(computeActivityOptions({}, {
|
|
7
|
+
startToCloseTimeout: 1000,
|
|
8
|
+
scheduleToCloseTimeout: 2000,
|
|
9
|
+
scheduleToStartTimeout: 3000,
|
|
10
|
+
retry: {
|
|
11
|
+
initialInterval: 4000,
|
|
12
|
+
maximumInterval: 5000,
|
|
13
|
+
maximumAttempts: 6,
|
|
14
|
+
backoffCoefficient: 7,
|
|
15
|
+
nonRetryableErrorTypes: ['error']
|
|
16
|
+
}
|
|
17
|
+
})).toEqual({
|
|
18
|
+
startToCloseTimeout: 1000,
|
|
19
|
+
scheduleToCloseTimeout: 2000,
|
|
20
|
+
scheduleToStartTimeout: 3000,
|
|
21
|
+
retry: {
|
|
22
|
+
initialInterval: 4000,
|
|
23
|
+
maximumInterval: 5000,
|
|
24
|
+
maximumAttempts: 6,
|
|
25
|
+
backoffCoefficient: 7,
|
|
26
|
+
nonRetryableErrorTypes: ['error']
|
|
27
|
+
}
|
|
28
|
+
})
|
|
29
|
+
})
|
|
30
|
+
|
|
31
|
+
test('compute activity options with some custom options', () => {
|
|
32
|
+
expect(computeActivityOptions({
|
|
33
|
+
startToCloseTimeout: 100,
|
|
34
|
+
}, {
|
|
35
|
+
startToCloseTimeout: 1000,
|
|
36
|
+
scheduleToCloseTimeout: 2000,
|
|
37
|
+
scheduleToStartTimeout: 3000,
|
|
38
|
+
retry: {
|
|
39
|
+
initialInterval: 4000,
|
|
40
|
+
maximumInterval: 5000,
|
|
41
|
+
maximumAttempts: 6,
|
|
42
|
+
backoffCoefficient: 7,
|
|
43
|
+
nonRetryableErrorTypes: ['error']
|
|
44
|
+
}
|
|
45
|
+
})).toEqual({
|
|
46
|
+
startToCloseTimeout: `100ms`, // custom value
|
|
47
|
+
scheduleToCloseTimeout: 2000,
|
|
48
|
+
scheduleToStartTimeout: 3000,
|
|
49
|
+
retry: {
|
|
50
|
+
initialInterval: 4000,
|
|
51
|
+
maximumInterval: 5000,
|
|
52
|
+
maximumAttempts: 6,
|
|
53
|
+
backoffCoefficient: 7,
|
|
54
|
+
nonRetryableErrorTypes: ['error']
|
|
55
|
+
}
|
|
56
|
+
})
|
|
57
|
+
})
|
|
58
|
+
});
|