@vertesia/workflow 0.42.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +13 -0
- package/README.md +24 -0
- package/bin/bundle-workflows.mjs +26 -0
- package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js +32 -0
- package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +66 -0
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js +18 -0
- package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/chunkDocument.js +79 -0
- package/lib/cjs/activities/chunkDocument.js.map +1 -0
- package/lib/cjs/activities/createDocumentFromOther.js +64 -0
- package/lib/cjs/activities/createDocumentFromOther.js.map +1 -0
- package/lib/cjs/activities/executeInteraction.js +135 -0
- package/lib/cjs/activities/executeInteraction.js.map +1 -0
- package/lib/cjs/activities/extractDocumentText.js +140 -0
- package/lib/cjs/activities/extractDocumentText.js.map +1 -0
- package/lib/cjs/activities/generateDocumentProperties.js +59 -0
- package/lib/cjs/activities/generateDocumentProperties.js.map +1 -0
- package/lib/cjs/activities/generateEmbeddings.js +292 -0
- package/lib/cjs/activities/generateEmbeddings.js.map +1 -0
- package/lib/cjs/activities/generateImageRendition.js +104 -0
- package/lib/cjs/activities/generateImageRendition.js.map +1 -0
- package/lib/cjs/activities/generateOrAssignContentType.js +103 -0
- package/lib/cjs/activities/generateOrAssignContentType.js.map +1 -0
- package/lib/cjs/activities/getObjectFromStore.js +20 -0
- package/lib/cjs/activities/getObjectFromStore.js.map +1 -0
- package/lib/cjs/activities/index.js +54 -0
- package/lib/cjs/activities/index.js.map +1 -0
- package/lib/cjs/activities/media/processPdfWithTextract.js +102 -0
- package/lib/cjs/activities/media/processPdfWithTextract.js.map +1 -0
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js +51 -0
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js.map +1 -0
- package/lib/cjs/activities/notifyWebhook.js +34 -0
- package/lib/cjs/activities/notifyWebhook.js.map +1 -0
- package/lib/cjs/activities/setDocumentStatus.js +15 -0
- package/lib/cjs/activities/setDocumentStatus.js.map +1 -0
- package/lib/cjs/conversion/TextractProcessor.js +416 -0
- package/lib/cjs/conversion/TextractProcessor.js.map +1 -0
- package/lib/cjs/conversion/image.js +22 -0
- package/lib/cjs/conversion/image.js.map +1 -0
- package/lib/cjs/conversion/mutool.js +147 -0
- package/lib/cjs/conversion/mutool.js.map +1 -0
- package/lib/cjs/conversion/pandoc.js +39 -0
- package/lib/cjs/conversion/pandoc.js.map +1 -0
- package/lib/cjs/conversion/pdf.js +13 -0
- package/lib/cjs/conversion/pdf.js.map +1 -0
- package/lib/cjs/dsl/conditions.js +81 -0
- package/lib/cjs/dsl/conditions.js.map +1 -0
- package/lib/cjs/dsl/dsl-workflow.js +223 -0
- package/lib/cjs/dsl/dsl-workflow.js.map +1 -0
- package/lib/cjs/dsl/projections.js +59 -0
- package/lib/cjs/dsl/projections.js.map +1 -0
- package/lib/cjs/dsl/setup/ActivityContext.js +96 -0
- package/lib/cjs/dsl/setup/ActivityContext.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/DataProvider.js +51 -0
- package/lib/cjs/dsl/setup/fetch/DataProvider.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/index.js +16 -0
- package/lib/cjs/dsl/setup/fetch/index.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/providers.js +67 -0
- package/lib/cjs/dsl/setup/fetch/providers.js.map +1 -0
- package/lib/cjs/dsl/test/test-child-workflow.js +10 -0
- package/lib/cjs/dsl/test/test-child-workflow.js.map +1 -0
- package/lib/cjs/dsl/validation.js +122 -0
- package/lib/cjs/dsl/validation.js.map +1 -0
- package/lib/cjs/dsl/vars.js +341 -0
- package/lib/cjs/dsl/vars.js.map +1 -0
- package/lib/cjs/dsl/walk.js +100 -0
- package/lib/cjs/dsl/walk.js.map +1 -0
- package/lib/cjs/errors.js +36 -0
- package/lib/cjs/errors.js.map +1 -0
- package/lib/cjs/index.js +43 -0
- package/lib/cjs/index.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/extractToc.js +47 -0
- package/lib/cjs/iterative-generation/activities/extractToc.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js +69 -0
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/generatePart.js +73 -0
- package/lib/cjs/iterative-generation/activities/generatePart.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/generateToc.js +91 -0
- package/lib/cjs/iterative-generation/activities/generateToc.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/index.js +12 -0
- package/lib/cjs/iterative-generation/activities/index.js.map +1 -0
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js +55 -0
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
- package/lib/cjs/iterative-generation/types.js +5 -0
- package/lib/cjs/iterative-generation/types.js.map +1 -0
- package/lib/cjs/iterative-generation/utils.js +118 -0
- package/lib/cjs/iterative-generation/utils.js.map +1 -0
- package/lib/cjs/package.json +3 -0
- package/lib/cjs/result-types.js +10 -0
- package/lib/cjs/result-types.js.map +1 -0
- package/lib/cjs/system/generateObjectText.js +89 -0
- package/lib/cjs/system/generateObjectText.js.map +1 -0
- package/lib/cjs/system/notifyWebhookWorkflow.js +52 -0
- package/lib/cjs/system/notifyWebhookWorkflow.js.map +1 -0
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js +37 -0
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js.map +1 -0
- package/lib/cjs/utils/auth.js +15 -0
- package/lib/cjs/utils/auth.js.map +1 -0
- package/lib/cjs/utils/blobs.js +63 -0
- package/lib/cjs/utils/blobs.js.map +1 -0
- package/lib/cjs/utils/client.js +25 -0
- package/lib/cjs/utils/client.js.map +1 -0
- package/lib/cjs/utils/expand-vars.js +33 -0
- package/lib/cjs/utils/expand-vars.js.map +1 -0
- package/lib/cjs/utils/memory.js +72 -0
- package/lib/cjs/utils/memory.js.map +1 -0
- package/lib/cjs/utils/tokens.js +38 -0
- package/lib/cjs/utils/tokens.js.map +1 -0
- package/lib/cjs/vars.js +20 -0
- package/lib/cjs/vars.js.map +1 -0
- package/lib/cjs/workflows.js +17 -0
- package/lib/cjs/workflows.js.map +1 -0
- package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js +29 -0
- package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +63 -0
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js +15 -0
- package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/chunkDocument.js +76 -0
- package/lib/esm/activities/chunkDocument.js.map +1 -0
- package/lib/esm/activities/createDocumentFromOther.js +58 -0
- package/lib/esm/activities/createDocumentFromOther.js.map +1 -0
- package/lib/esm/activities/executeInteraction.js +131 -0
- package/lib/esm/activities/executeInteraction.js.map +1 -0
- package/lib/esm/activities/extractDocumentText.js +137 -0
- package/lib/esm/activities/extractDocumentText.js.map +1 -0
- package/lib/esm/activities/generateDocumentProperties.js +56 -0
- package/lib/esm/activities/generateDocumentProperties.js.map +1 -0
- package/lib/esm/activities/generateEmbeddings.js +256 -0
- package/lib/esm/activities/generateEmbeddings.js.map +1 -0
- package/lib/esm/activities/generateImageRendition.js +98 -0
- package/lib/esm/activities/generateImageRendition.js.map +1 -0
- package/lib/esm/activities/generateOrAssignContentType.js +100 -0
- package/lib/esm/activities/generateOrAssignContentType.js.map +1 -0
- package/lib/esm/activities/getObjectFromStore.js +17 -0
- package/lib/esm/activities/getObjectFromStore.js.map +1 -0
- package/lib/esm/activities/index.js +21 -0
- package/lib/esm/activities/index.js.map +1 -0
- package/lib/esm/activities/media/processPdfWithTextract.js +98 -0
- package/lib/esm/activities/media/processPdfWithTextract.js.map +1 -0
- package/lib/esm/activities/media/transcribeMediaWithGladia.js +48 -0
- package/lib/esm/activities/media/transcribeMediaWithGladia.js.map +1 -0
- package/lib/esm/activities/notifyWebhook.js +31 -0
- package/lib/esm/activities/notifyWebhook.js.map +1 -0
- package/lib/esm/activities/setDocumentStatus.js +12 -0
- package/lib/esm/activities/setDocumentStatus.js.map +1 -0
- package/lib/esm/conversion/TextractProcessor.js +409 -0
- package/lib/esm/conversion/TextractProcessor.js.map +1 -0
- package/lib/esm/conversion/image.js +16 -0
- package/lib/esm/conversion/image.js.map +1 -0
- package/lib/esm/conversion/mutool.js +139 -0
- package/lib/esm/conversion/mutool.js.map +1 -0
- package/lib/esm/conversion/pandoc.js +36 -0
- package/lib/esm/conversion/pandoc.js.map +1 -0
- package/lib/esm/conversion/pdf.js +7 -0
- package/lib/esm/conversion/pdf.js.map +1 -0
- package/lib/esm/dsl/conditions.js +75 -0
- package/lib/esm/dsl/conditions.js.map +1 -0
- package/lib/esm/dsl/dsl-workflow.js +216 -0
- package/lib/esm/dsl/dsl-workflow.js.map +1 -0
- package/lib/esm/dsl/projections.js +55 -0
- package/lib/esm/dsl/projections.js.map +1 -0
- package/lib/esm/dsl/setup/ActivityContext.js +91 -0
- package/lib/esm/dsl/setup/ActivityContext.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/DataProvider.js +47 -0
- package/lib/esm/dsl/setup/fetch/DataProvider.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/index.js +12 -0
- package/lib/esm/dsl/setup/fetch/index.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/providers.js +61 -0
- package/lib/esm/dsl/setup/fetch/providers.js.map +1 -0
- package/lib/esm/dsl/test/test-child-workflow.js +5 -0
- package/lib/esm/dsl/test/test-child-workflow.js.map +1 -0
- package/lib/esm/dsl/validation.js +118 -0
- package/lib/esm/dsl/validation.js.map +1 -0
- package/lib/esm/dsl/vars.js +335 -0
- package/lib/esm/dsl/vars.js.map +1 -0
- package/lib/esm/dsl/walk.js +96 -0
- package/lib/esm/dsl/walk.js.map +1 -0
- package/lib/esm/errors.js +30 -0
- package/lib/esm/errors.js.map +1 -0
- package/lib/esm/index.js +25 -0
- package/lib/esm/index.js.map +1 -0
- package/lib/esm/iterative-generation/activities/extractToc.js +44 -0
- package/lib/esm/iterative-generation/activities/extractToc.js.map +1 -0
- package/lib/esm/iterative-generation/activities/finalizeOutput.js +66 -0
- package/lib/esm/iterative-generation/activities/finalizeOutput.js.map +1 -0
- package/lib/esm/iterative-generation/activities/generatePart.js +70 -0
- package/lib/esm/iterative-generation/activities/generatePart.js.map +1 -0
- package/lib/esm/iterative-generation/activities/generateToc.js +88 -0
- package/lib/esm/iterative-generation/activities/generateToc.js.map +1 -0
- package/lib/esm/iterative-generation/activities/index.js +5 -0
- package/lib/esm/iterative-generation/activities/index.js.map +1 -0
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js +52 -0
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
- package/lib/esm/iterative-generation/types.js +2 -0
- package/lib/esm/iterative-generation/types.js.map +1 -0
- package/lib/esm/iterative-generation/utils.js +109 -0
- package/lib/esm/iterative-generation/utils.js.map +1 -0
- package/lib/esm/result-types.js +7 -0
- package/lib/esm/result-types.js.map +1 -0
- package/lib/esm/system/generateObjectText.js +86 -0
- package/lib/esm/system/generateObjectText.js.map +1 -0
- package/lib/esm/system/notifyWebhookWorkflow.js +49 -0
- package/lib/esm/system/notifyWebhookWorkflow.js.map +1 -0
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js +34 -0
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js.map +1 -0
- package/lib/esm/utils/auth.js +8 -0
- package/lib/esm/utils/auth.js.map +1 -0
- package/lib/esm/utils/blobs.js +52 -0
- package/lib/esm/utils/blobs.js.map +1 -0
- package/lib/esm/utils/client.js +22 -0
- package/lib/esm/utils/client.js.map +1 -0
- package/lib/esm/utils/expand-vars.js +30 -0
- package/lib/esm/utils/expand-vars.js.map +1 -0
- package/lib/esm/utils/memory.js +60 -0
- package/lib/esm/utils/memory.js.map +1 -0
- package/lib/esm/utils/tokens.js +34 -0
- package/lib/esm/utils/tokens.js.map +1 -0
- package/lib/esm/vars.js +4 -0
- package/lib/esm/vars.js.map +1 -0
- package/lib/esm/workflows.js +9 -0
- package/lib/esm/workflows.js.map +1 -0
- package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts +17 -0
- package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts +29 -0
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts +19 -0
- package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/chunkDocument.d.ts +18 -0
- package/lib/types/activities/chunkDocument.d.ts.map +1 -0
- package/lib/types/activities/createDocumentFromOther.d.ts +21 -0
- package/lib/types/activities/createDocumentFromOther.d.ts.map +1 -0
- package/lib/types/activities/executeInteraction.d.ts +40 -0
- package/lib/types/activities/executeInteraction.d.ts.map +1 -0
- package/lib/types/activities/extractDocumentText.d.ts +9 -0
- package/lib/types/activities/extractDocumentText.d.ts.map +1 -0
- package/lib/types/activities/generateDocumentProperties.d.ts +32 -0
- package/lib/types/activities/generateDocumentProperties.d.ts.map +1 -0
- package/lib/types/activities/generateEmbeddings.d.ts +49 -0
- package/lib/types/activities/generateEmbeddings.d.ts.map +1 -0
- package/lib/types/activities/generateImageRendition.d.ts +17 -0
- package/lib/types/activities/generateImageRendition.d.ts.map +1 -0
- package/lib/types/activities/generateOrAssignContentType.d.ts +44 -0
- package/lib/types/activities/generateOrAssignContentType.d.ts.map +1 -0
- package/lib/types/activities/getObjectFromStore.d.ts +14 -0
- package/lib/types/activities/getObjectFromStore.d.ts.map +1 -0
- package/lib/types/activities/index.d.ts +21 -0
- package/lib/types/activities/index.d.ts.map +1 -0
- package/lib/types/activities/media/processPdfWithTextract.d.ts +26 -0
- package/lib/types/activities/media/processPdfWithTextract.d.ts.map +1 -0
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts +14 -0
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts.map +1 -0
- package/lib/types/activities/notifyWebhook.d.ts +17 -0
- package/lib/types/activities/notifyWebhook.d.ts.map +1 -0
- package/lib/types/activities/setDocumentStatus.d.ts +15 -0
- package/lib/types/activities/setDocumentStatus.d.ts.map +1 -0
- package/lib/types/conversion/TextractProcessor.d.ts +45 -0
- package/lib/types/conversion/TextractProcessor.d.ts.map +1 -0
- package/lib/types/conversion/image.d.ts +9 -0
- package/lib/types/conversion/image.d.ts.map +1 -0
- package/lib/types/conversion/mutool.d.ts +19 -0
- package/lib/types/conversion/mutool.d.ts.map +1 -0
- package/lib/types/conversion/pandoc.d.ts +2 -0
- package/lib/types/conversion/pandoc.d.ts.map +1 -0
- package/lib/types/conversion/pdf.d.ts +2 -0
- package/lib/types/conversion/pdf.d.ts.map +1 -0
- package/lib/types/dsl/conditions.d.ts +2 -0
- package/lib/types/dsl/conditions.d.ts.map +1 -0
- package/lib/types/dsl/dsl-workflow.d.ts +5 -0
- package/lib/types/dsl/dsl-workflow.d.ts.map +1 -0
- package/lib/types/dsl/projections.d.ts +4 -0
- package/lib/types/dsl/projections.d.ts.map +1 -0
- package/lib/types/dsl/setup/ActivityContext.d.ts +14 -0
- package/lib/types/dsl/setup/ActivityContext.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/DataProvider.d.ts +9 -0
- package/lib/types/dsl/setup/fetch/DataProvider.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/index.d.ts +6 -0
- package/lib/types/dsl/setup/fetch/index.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/providers.d.ts +25 -0
- package/lib/types/dsl/setup/fetch/providers.d.ts.map +1 -0
- package/lib/types/dsl/test/test-child-workflow.d.ts +4 -0
- package/lib/types/dsl/test/test-child-workflow.d.ts.map +1 -0
- package/lib/types/dsl/validation.d.ts +4 -0
- package/lib/types/dsl/validation.d.ts.map +1 -0
- package/lib/types/dsl/vars.d.ts +48 -0
- package/lib/types/dsl/vars.d.ts.map +1 -0
- package/lib/types/dsl/walk.d.ts +18 -0
- package/lib/types/dsl/walk.d.ts.map +1 -0
- package/lib/types/errors.d.ts +16 -0
- package/lib/types/errors.d.ts.map +1 -0
- package/lib/types/index.d.ts +24 -0
- package/lib/types/index.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/extractToc.d.ts +10 -0
- package/lib/types/iterative-generation/activities/extractToc.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts +3 -0
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/generatePart.d.ts +3 -0
- package/lib/types/iterative-generation/activities/generatePart.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/generateToc.d.ts +4 -0
- package/lib/types/iterative-generation/activities/generateToc.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/index.d.ts +5 -0
- package/lib/types/iterative-generation/activities/index.d.ts.map +1 -0
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts +3 -0
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts.map +1 -0
- package/lib/types/iterative-generation/types.d.ts +79 -0
- package/lib/types/iterative-generation/types.d.ts.map +1 -0
- package/lib/types/iterative-generation/utils.d.ts +27 -0
- package/lib/types/iterative-generation/utils.d.ts.map +1 -0
- package/lib/types/result-types.d.ts +22 -0
- package/lib/types/result-types.d.ts.map +1 -0
- package/lib/types/system/generateObjectText.d.ts +4 -0
- package/lib/types/system/generateObjectText.d.ts.map +1 -0
- package/lib/types/system/notifyWebhookWorkflow.d.ts +6 -0
- package/lib/types/system/notifyWebhookWorkflow.d.ts.map +1 -0
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +40 -0
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +1 -0
- package/lib/types/utils/auth.d.ts +4 -0
- package/lib/types/utils/auth.d.ts.map +1 -0
- package/lib/types/utils/blobs.d.ts +8 -0
- package/lib/types/utils/blobs.d.ts.map +1 -0
- package/lib/types/utils/client.d.ts +7 -0
- package/lib/types/utils/client.d.ts.map +1 -0
- package/lib/types/utils/expand-vars.d.ts +8 -0
- package/lib/types/utils/expand-vars.d.ts.map +1 -0
- package/lib/types/utils/memory.d.ts +12 -0
- package/lib/types/utils/memory.d.ts.map +1 -0
- package/lib/types/utils/tokens.d.ts +11 -0
- package/lib/types/utils/tokens.d.ts.map +1 -0
- package/lib/types/vars.d.ts +3 -0
- package/lib/types/vars.d.ts.map +1 -0
- package/lib/types/workflows.d.ts +9 -0
- package/lib/types/workflows.d.ts.map +1 -0
- package/lib/workflows-bundle.js +18394 -0
- package/package.json +109 -0
- package/src/activities/advanced/createDocumentTypeFromInteractionRun.ts +54 -0
- package/src/activities/advanced/createOrUpdateDocumentFromInteractionRun.ts +97 -0
- package/src/activities/advanced/updateDocumentFromInteractionRun.ts +34 -0
- package/src/activities/chunkDocument.ts +124 -0
- package/src/activities/createDocumentFromOther.ts +92 -0
- package/src/activities/executeInteraction.ts +191 -0
- package/src/activities/extractDocumentText.ts +174 -0
- package/src/activities/generateDocumentProperties.ts +93 -0
- package/src/activities/generateEmbeddings.ts +345 -0
- package/src/activities/generateImageRendition.ts +134 -0
- package/src/activities/generateOrAssignContentType.ts +152 -0
- package/src/activities/getObjectFromStore.ts +31 -0
- package/src/activities/index.ts +21 -0
- package/src/activities/media/processPdfWithTextract.ts +141 -0
- package/src/activities/media/transcribeMediaWithGladia.ts +83 -0
- package/src/activities/notifyWebhook.test.ts +32 -0
- package/src/activities/notifyWebhook.ts +51 -0
- package/src/activities/setDocumentStatus.ts +25 -0
- package/src/conversion/TextractProcessor.ts +505 -0
- package/src/conversion/image.test.ts +26 -0
- package/src/conversion/image.ts +22 -0
- package/src/conversion/mutool.test.ts +74 -0
- package/src/conversion/mutool.ts +180 -0
- package/src/conversion/pandoc.test.ts +22 -0
- package/src/conversion/pandoc.ts +44 -0
- package/src/conversion/pdf.test.ts +35 -0
- package/src/conversion/pdf.ts +8 -0
- package/src/dsl/conditions.ts +76 -0
- package/src/dsl/dsl-workflow.test.ts +58 -0
- package/src/dsl/dsl-workflow.ts +235 -0
- package/src/dsl/ms.d.ts +11 -0
- package/src/dsl/projections.test.ts +159 -0
- package/src/dsl/projections.ts +72 -0
- package/src/dsl/setup/ActivityContext.ts +106 -0
- package/src/dsl/setup/fetch/DataProvider.ts +45 -0
- package/src/dsl/setup/fetch/index.ts +19 -0
- package/src/dsl/setup/fetch/providers.ts +67 -0
- package/src/dsl/test/test-child-workflow.ts +6 -0
- package/src/dsl/validation.test.ts +257 -0
- package/src/dsl/validation.ts +125 -0
- package/src/dsl/vars.test.ts +245 -0
- package/src/dsl/vars.ts +340 -0
- package/src/dsl/walk.test.ts +81 -0
- package/src/dsl/walk.ts +103 -0
- package/src/dsl/workflow-exec-child.test.ts +182 -0
- package/src/dsl/workflow-fetch.test.ts +135 -0
- package/src/dsl/workflow-import.test.ts +89 -0
- package/src/dsl/workflow.test.ts +110 -0
- package/src/errors.ts +24 -0
- package/src/index.ts +27 -0
- package/src/iterative-generation/activities/extractToc.ts +49 -0
- package/src/iterative-generation/activities/finalizeOutput.ts +77 -0
- package/src/iterative-generation/activities/generatePart.ts +82 -0
- package/src/iterative-generation/activities/generateToc.ts +98 -0
- package/src/iterative-generation/activities/index.ts +4 -0
- package/src/iterative-generation/iterativeGenerationWorkflow.ts +67 -0
- package/src/iterative-generation/types.ts +99 -0
- package/src/iterative-generation/utils.ts +123 -0
- package/src/result-types.ts +25 -0
- package/src/system/generateObjectText.ts +109 -0
- package/src/system/notifyWebhookWorkflow.ts +64 -0
- package/src/system/recalculateEmbeddingsWorkflow.ts +46 -0
- package/src/utils/auth.ts +10 -0
- package/src/utils/blobs.ts +58 -0
- package/src/utils/client.ts +31 -0
- package/src/utils/expand-vars.ts +31 -0
- package/src/utils/memory.ts +66 -0
- package/src/utils/tokens.ts +44 -0
- package/src/vars.ts +3 -0
- package/src/workflows.ts +9 -0
@@ -0,0 +1,141 @@
|
|
1
|
+
/**
|
2
|
+
* Use textract to convert a pdf into a data structure of the following format:
|
3
|
+
* <document>
|
4
|
+
* <page number="n">
|
5
|
+
* <text/>
|
6
|
+
* <table/>
|
7
|
+
* <text/>
|
8
|
+
* <figure/>
|
9
|
+
* ...
|
10
|
+
* </page>
|
11
|
+
*/
|
12
|
+
|
13
|
+
import { fromWebToken } from "@aws-sdk/credential-providers";
|
14
|
+
import { AwsConfiguration, CreateContentObjectPayload, DSLActivityExecutionPayload, DSLActivitySpec, SupportedIntegrations } from "@vertesia/common";
|
15
|
+
import type { AwsCredentialIdentityProvider } from "@smithy/types";
|
16
|
+
import { log } from "@temporalio/activity";
|
17
|
+
import { TextractProcessor } from "../../conversion/TextractProcessor.js";
|
18
|
+
import { setupActivity } from "../../dsl/setup/ActivityContext.js";
|
19
|
+
import { NoDocumentFound } from "../../errors.js";
|
20
|
+
import { TextExtractionResult, TextExtractionStatus } from "../../result-types.js";
|
21
|
+
import { fetchBlobAsBuffer, md5 } from "../../utils/blobs.js";
|
22
|
+
import { countTokens } from "../../utils/tokens.js";
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
export interface ConvertPdfToStructuredTextParams {
|
27
|
+
force?: boolean;
|
28
|
+
}
|
29
|
+
|
30
|
+
export interface ConvertPdfToStructuredText extends DSLActivitySpec<ConvertPdfToStructuredTextParams> {
|
31
|
+
name: 'ConvertPdfToStructuredText';
|
32
|
+
}
|
33
|
+
|
34
|
+
export interface StructuredTextResult extends TextExtractionResult {
|
35
|
+
message?: string;
|
36
|
+
}
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
export async function convertPdfToStructuredText(payload: DSLActivityExecutionPayload): Promise<StructuredTextResult> {
|
41
|
+
|
42
|
+
|
43
|
+
const { params, client, objectId } = await setupActivity<ConvertPdfToStructuredTextParams>(payload);
|
44
|
+
|
45
|
+
const object = await client.objects.retrieve(objectId, "+text");
|
46
|
+
|
47
|
+
if (object.text && !params.force) {
|
48
|
+
return { hasText: true, objectId, status: TextExtractionStatus.skipped, message: "text already present and force not enabled" }
|
49
|
+
}
|
50
|
+
|
51
|
+
if (!object.content?.source) {
|
52
|
+
throw new NoDocumentFound(`No source found for object ${objectId}`);
|
53
|
+
}
|
54
|
+
|
55
|
+
const pdfUrl = await client.store.objects.getContentSource(objectId).then(res => res.source);
|
56
|
+
|
57
|
+
if (!pdfUrl) {
|
58
|
+
throw new NoDocumentFound(`Error fetching source ${object.content.source}`);
|
59
|
+
}
|
60
|
+
|
61
|
+
|
62
|
+
const awsConfig = (await client.projects.integrations.retrieve(client.project!, SupportedIntegrations.aws)) as AwsConfiguration;
|
63
|
+
const credentials = await getS3AWSCredentials(awsConfig, payload.auth_token, client.project!);
|
64
|
+
|
65
|
+
const processor = new TextractProcessor({
|
66
|
+
fileKey: objectId,
|
67
|
+
region: "us-west-2",
|
68
|
+
bucket: "cp-textract-tests",
|
69
|
+
credentials,
|
70
|
+
log: log,
|
71
|
+
detectImages: true,
|
72
|
+
includeConfidenceInTables: true,
|
73
|
+
});
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
try {
|
78
|
+
|
79
|
+
if (!object.content.source.startsWith("s3://")) {
|
80
|
+
const buf = await fetchBlobAsBuffer(client, object.content.source);
|
81
|
+
await processor.upload(buf);
|
82
|
+
}
|
83
|
+
|
84
|
+
const jobId = await processor.startAnalysis(objectId);
|
85
|
+
|
86
|
+
let jobStatus = await processor.checkJobStatus(jobId);
|
87
|
+
while (jobStatus === "IN_PROGRESS") {
|
88
|
+
await new Promise(resolve => setTimeout(resolve, 5000));
|
89
|
+
jobStatus = await processor.checkJobStatus(jobId);
|
90
|
+
}
|
91
|
+
|
92
|
+
if (jobStatus === "SUCCEEDED") {
|
93
|
+
log.info(`Job ${jobId} succeeded, saving results`, { jobId });
|
94
|
+
const ftext = await processor.processResults(jobId);
|
95
|
+
const tokensData = countTokens(ftext);
|
96
|
+
const etag = object.content.etag ?? md5(ftext);
|
97
|
+
const updateData: CreateContentObjectPayload = {
|
98
|
+
text: ftext,
|
99
|
+
text_etag: etag,
|
100
|
+
tokens: {
|
101
|
+
...tokensData,
|
102
|
+
etag: etag,
|
103
|
+
}
|
104
|
+
}
|
105
|
+
|
106
|
+
await client.objects.update(objectId, updateData);
|
107
|
+
console.log("Full text updated");
|
108
|
+
|
109
|
+
return { hasText: true, objectId, status: TextExtractionStatus.success, message: "Text extracted successfully" }
|
110
|
+
|
111
|
+
} else {
|
112
|
+
throw new Error(`Job failed with status: ${jobStatus}`);
|
113
|
+
}
|
114
|
+
} catch (error) {
|
115
|
+
console.error("Error processing document:", error);
|
116
|
+
throw error;
|
117
|
+
}
|
118
|
+
|
119
|
+
|
120
|
+
}
|
121
|
+
|
122
|
+
export async function getS3AWSCredentials(awsConfig: AwsConfiguration, composableAuthToken: string, projectId: string): Promise<AwsCredentialIdentityProvider> {
|
123
|
+
|
124
|
+
// fetch s3 role ARN
|
125
|
+
if (!awsConfig || !awsConfig.enabled) {
|
126
|
+
throw new NoDocumentFound("AWS integration is not enabled for this project");
|
127
|
+
}
|
128
|
+
if (!awsConfig.s3_role_arn) {
|
129
|
+
throw new NoDocumentFound("S3 Role ARN is not defined in AWS project integration");
|
130
|
+
}
|
131
|
+
|
132
|
+
log.info("Getting AWS credentials for Textract", { projectId, composableAuthToken, roleArn: awsConfig.s3_role_arn });
|
133
|
+
|
134
|
+
const credentials = fromWebToken({
|
135
|
+
webIdentityToken: composableAuthToken,
|
136
|
+
roleArn: awsConfig.s3_role_arn,
|
137
|
+
roleSessionName: `cp-project-textract-${projectId}`,
|
138
|
+
});
|
139
|
+
|
140
|
+
return credentials;
|
141
|
+
}
|
@@ -0,0 +1,83 @@
|
|
1
|
+
import { DSLActivityExecutionPayload, DSLActivitySpec, GladiaConfiguration, SupportedIntegrations } from "@vertesia/common";
|
2
|
+
import { activityInfo, CompleteAsyncError, log } from "@temporalio/activity";
|
3
|
+
import { FetchClient } from "api-fetch-client";
|
4
|
+
import { setupActivity } from "../../dsl/setup/ActivityContext.js";
|
5
|
+
import { NoDocumentFound } from "../../errors.js";
|
6
|
+
import { TextExtractionResult, TextExtractionStatus } from "../../index.js";
|
7
|
+
|
8
|
+
|
9
|
+
export interface TranscriptMediaParams {
|
10
|
+
environmentId?: string;
|
11
|
+
force?: boolean;
|
12
|
+
}
|
13
|
+
|
14
|
+
export interface TranscriptMedia extends DSLActivitySpec<TranscriptMediaParams> {
|
15
|
+
name: 'TranscribeMedia';
|
16
|
+
}
|
17
|
+
|
18
|
+
export interface TranscriptMediaResult extends TextExtractionResult {
|
19
|
+
message?: string;
|
20
|
+
}
|
21
|
+
|
22
|
+
const GLADIA_URL = "https://api.gladia.io/v2";
|
23
|
+
|
24
|
+
export async function transcribeMedia(payload: DSLActivityExecutionPayload): Promise<TranscriptMediaResult> {
|
25
|
+
|
26
|
+
const { params, client, objectId } = await setupActivity<TranscriptMediaParams>(payload);
|
27
|
+
|
28
|
+
const gladiaConfig = await client.projects.integrations.retrieve(payload.project_id, SupportedIntegrations.gladia) as GladiaConfiguration | undefined;
|
29
|
+
if (!gladiaConfig || !gladiaConfig.enabled) {
|
30
|
+
throw new NoDocumentFound("Gladia integration not enabled");
|
31
|
+
}
|
32
|
+
|
33
|
+
const object = await client.objects.retrieve(objectId, "+text");
|
34
|
+
const gladiaClient = new FetchClient(gladiaConfig.url ?? GLADIA_URL);
|
35
|
+
gladiaClient.withHeaders({ "x-gladia-key": gladiaConfig.api_key });
|
36
|
+
|
37
|
+
if (object.text && !params.force) {
|
38
|
+
return { hasText: true, objectId, status: TextExtractionStatus.skipped, message: "text already present and force not enabled" }
|
39
|
+
}
|
40
|
+
|
41
|
+
if (!object.content?.source) {
|
42
|
+
throw new NoDocumentFound(`No source found for object ${objectId}`);
|
43
|
+
}
|
44
|
+
|
45
|
+
const mediaUrl = await client.store.objects.getContentSource(objectId).then(res => res.source);
|
46
|
+
|
47
|
+
if (!mediaUrl) {
|
48
|
+
throw new NoDocumentFound(`Error fetching source ${object.content.source}`);
|
49
|
+
}
|
50
|
+
|
51
|
+
const taskToken = Buffer.from(activityInfo().taskToken).toString('base64url');
|
52
|
+
const callbackUrl = generateCallbackUrlForGladia(client.store.baseUrl, payload.auth_token, taskToken, objectId);
|
53
|
+
|
54
|
+
log.info(`Transcribing media ${mediaUrl} with Gladia`, { objectId, callbackUrl });
|
55
|
+
|
56
|
+
const res = await gladiaClient.post("/transcription", {
|
57
|
+
payload: {
|
58
|
+
audio_url: mediaUrl,
|
59
|
+
callback_url: callbackUrl,
|
60
|
+
diarization_enhanced: true,
|
61
|
+
enable_code_switching: true,
|
62
|
+
subtitles: true,
|
63
|
+
subtitles_config: {
|
64
|
+
formats: ["vtt"],
|
65
|
+
}
|
66
|
+
}
|
67
|
+
}) as GladiaTranscriptRequestResponse;
|
68
|
+
|
69
|
+
log.info(`Transcription request sent to Gladia`, { objectId, res });
|
70
|
+
|
71
|
+
throw new CompleteAsyncError();
|
72
|
+
|
73
|
+
}
|
74
|
+
|
75
|
+
|
76
|
+
function generateCallbackUrlForGladia(baseUrl: string, authToken: string, taskToken: string, objectId: string) {
|
77
|
+
return `${baseUrl}/api/v1/webhooks/gladia/${objectId}?auth_token=${authToken}&task_token=${taskToken}`;
|
78
|
+
}
|
79
|
+
|
80
|
+
interface GladiaTranscriptRequestResponse {
|
81
|
+
id: string;
|
82
|
+
result_url: string;
|
83
|
+
}
|
@@ -0,0 +1,32 @@
|
|
1
|
+
import {
|
2
|
+
MockActivityEnvironment,
|
3
|
+
TestWorkflowEnvironment,
|
4
|
+
} from "@temporalio/testing";
|
5
|
+
import { beforeAll, describe, expect, test } from "vitest";
|
6
|
+
import { notifyWebhook, NotifyWebhook } from "./notifyWebhook.js";
|
7
|
+
|
8
|
+
let testEnv: TestWorkflowEnvironment;
|
9
|
+
let activityContext: MockActivityEnvironment;
|
10
|
+
|
11
|
+
beforeAll(async () => {
|
12
|
+
testEnv = await TestWorkflowEnvironment.createLocal();
|
13
|
+
activityContext = new MockActivityEnvironment();
|
14
|
+
});
|
15
|
+
|
16
|
+
// https://github.com/becomposable/studio/issues/432 Skip tests
|
17
|
+
// Cannot read properties of undefined (reading 'params')
|
18
|
+
describe("Webhook should be notified", () => {
|
19
|
+
test.skip("test POST", async () => {
|
20
|
+
const activityConfig = {
|
21
|
+
name: "notifyWebhook",
|
22
|
+
params: {
|
23
|
+
target_url: "https://en5zdcyvn4dc3.x.pipedream.net",
|
24
|
+
method: "POST",
|
25
|
+
payload: { message: "Hello World" },
|
26
|
+
},
|
27
|
+
} satisfies NotifyWebhook;
|
28
|
+
|
29
|
+
const res = await activityContext.run(notifyWebhook, activityConfig);
|
30
|
+
expect(res).toBeDefined();
|
31
|
+
});
|
32
|
+
});
|
@@ -0,0 +1,51 @@
|
|
1
|
+
import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
2
|
+
import { log } from "@temporalio/activity";
|
3
|
+
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
4
|
+
import { WorkflowParamNotFound } from "../errors.js";
|
5
|
+
|
6
|
+
interface NotifyWebhookParams {
|
7
|
+
target_url: string; //URL to send the notification to
|
8
|
+
method: 'GET' | 'POST'; //HTTP method to use
|
9
|
+
payload: Record<string, any>; //payload to send (if POST then as JSON body, if GET then as query string)
|
10
|
+
headers?: Record<string, string>; //additional headers to send
|
11
|
+
}
|
12
|
+
|
13
|
+
|
14
|
+
export interface NotifyWebhook extends DSLActivitySpec<NotifyWebhookParams> {
|
15
|
+
name: 'notifyWebhook';
|
16
|
+
}
|
17
|
+
|
18
|
+
|
19
|
+
export async function notifyWebhook(payload: DSLActivityExecutionPayload) {
|
20
|
+
|
21
|
+
const { params } = await setupActivity<NotifyWebhookParams>(payload);
|
22
|
+
const { target_url, method, payload: requestPayload, headers } = params
|
23
|
+
|
24
|
+
if (!target_url) throw new WorkflowParamNotFound('target_url');
|
25
|
+
|
26
|
+
const body = method === 'POST' ? JSON.stringify({
|
27
|
+
...requestPayload,
|
28
|
+
...params
|
29
|
+
}) : undefined
|
30
|
+
|
31
|
+
log.info(`Notifying webhook at ${target_url}`);
|
32
|
+
const res = await fetch(target_url, {
|
33
|
+
method,
|
34
|
+
body,
|
35
|
+
headers: {
|
36
|
+
'Content-Type': 'application/json',
|
37
|
+
...headers
|
38
|
+
},
|
39
|
+
}).catch(err => {
|
40
|
+
log.warn(`Failed to notify webhook ${target_url}: ${err}`);
|
41
|
+
throw new Error(`Failed to notify webhook ${target_url}: ${err}`);
|
42
|
+
});
|
43
|
+
|
44
|
+
if (!res.ok) {
|
45
|
+
log.warn(`Failed to notify webhook ${target_url} - ${res.status}: ${res.statusText}`, { res });
|
46
|
+
throw new Error(`Failed to notify webhook ${target_url}: ${res.statusText}`);
|
47
|
+
}
|
48
|
+
|
49
|
+
return {status: res.status, message: res.statusText, url: res.url }
|
50
|
+
|
51
|
+
}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
import { ContentObjectStatus, DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
2
|
+
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
3
|
+
|
4
|
+
export interface SetDocumentStatusParams {
|
5
|
+
status: ContentObjectStatus;
|
6
|
+
}
|
7
|
+
|
8
|
+
export interface SetDocumentStatus extends DSLActivitySpec<SetDocumentStatusParams> {
|
9
|
+
name: 'setDocumentStatus';
|
10
|
+
projection?: never;
|
11
|
+
}
|
12
|
+
|
13
|
+
/**
|
14
|
+
* We are using a union type for the status parameter since typescript enumbs breaks the workflow code generation
|
15
|
+
* @param objectId
|
16
|
+
* @param status
|
17
|
+
*/
|
18
|
+
export async function setDocumentStatus(payload: DSLActivityExecutionPayload) {
|
19
|
+
const { client, params, objectId } = await setupActivity<SetDocumentStatusParams>(payload);
|
20
|
+
|
21
|
+
const res = await client.objects.update(objectId, { status: params.status });
|
22
|
+
|
23
|
+
return res.status;
|
24
|
+
|
25
|
+
}
|