@vertesia/workflow 0.24.0-dev.202601221707
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +13 -0
- package/README.md +65 -0
- package/bin/bundle-workflows.mjs +39 -0
- package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js +33 -0
- package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +73 -0
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js +19 -0
- package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/chunkDocument.js +85 -0
- package/lib/cjs/activities/chunkDocument.js.map +1 -0
- package/lib/cjs/activities/copyParentArtifacts.js +127 -0
- package/lib/cjs/activities/copyParentArtifacts.js.map +1 -0
- package/lib/cjs/activities/createDocumentFromOther.js +64 -0
- package/lib/cjs/activities/createDocumentFromOther.js.map +1 -0
- package/lib/cjs/activities/executeInteraction.js +194 -0
- package/lib/cjs/activities/executeInteraction.js.map +1 -0
- package/lib/cjs/activities/extractDocumentText.js +156 -0
- package/lib/cjs/activities/extractDocumentText.js.map +1 -0
- package/lib/cjs/activities/generateDocumentProperties.js +83 -0
- package/lib/cjs/activities/generateDocumentProperties.js.map +1 -0
- package/lib/cjs/activities/generateEmbeddings.js +257 -0
- package/lib/cjs/activities/generateEmbeddings.js.map +1 -0
- package/lib/cjs/activities/generateOrAssignContentType.js +125 -0
- package/lib/cjs/activities/generateOrAssignContentType.js.map +1 -0
- package/lib/cjs/activities/getObjectFromStore.js +20 -0
- package/lib/cjs/activities/getObjectFromStore.js.map +1 -0
- package/lib/cjs/activities/handleError.js +22 -0
- package/lib/cjs/activities/handleError.js.map +1 -0
- package/lib/cjs/activities/index-dsl.js +51 -0
- package/lib/cjs/activities/index-dsl.js.map +1 -0
- package/lib/cjs/activities/index.js +21 -0
- package/lib/cjs/activities/index.js.map +1 -0
- package/lib/cjs/activities/media/prepareAudio.js +239 -0
- package/lib/cjs/activities/media/prepareAudio.js.map +1 -0
- package/lib/cjs/activities/media/prepareVideo.js +429 -0
- package/lib/cjs/activities/media/prepareVideo.js.map +1 -0
- package/lib/cjs/activities/media/processPdfWithTextract.js +103 -0
- package/lib/cjs/activities/media/processPdfWithTextract.js.map +1 -0
- package/lib/cjs/activities/media/saveGladiaTranscription.js +81 -0
- package/lib/cjs/activities/media/saveGladiaTranscription.js.map +1 -0
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js +82 -0
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js.map +1 -0
- package/lib/cjs/activities/notifyWebhook.js +158 -0
- package/lib/cjs/activities/notifyWebhook.js.map +1 -0
- package/lib/cjs/activities/rateLimiter.js +30 -0
- package/lib/cjs/activities/rateLimiter.js.map +1 -0
- package/lib/cjs/activities/renditions/generateImageRendition.js +66 -0
- package/lib/cjs/activities/renditions/generateImageRendition.js.map +1 -0
- package/lib/cjs/activities/renditions/generateVideoRendition.js +200 -0
- package/lib/cjs/activities/renditions/generateVideoRendition.js.map +1 -0
- package/lib/cjs/activities/setDocumentStatus.js +15 -0
- package/lib/cjs/activities/setDocumentStatus.js.map +1 -0
- package/lib/cjs/conversion/TextractProcessor.js +417 -0
- package/lib/cjs/conversion/TextractProcessor.js.map +1 -0
- package/lib/cjs/conversion/image.js +149 -0
- package/lib/cjs/conversion/image.js.map +1 -0
- package/lib/cjs/conversion/markitdown.js +42 -0
- package/lib/cjs/conversion/markitdown.js.map +1 -0
- package/lib/cjs/conversion/mutool.js +147 -0
- package/lib/cjs/conversion/mutool.js.map +1 -0
- package/lib/cjs/conversion/pandoc.js +39 -0
- package/lib/cjs/conversion/pandoc.js.map +1 -0
- package/lib/cjs/dsl/conditions.js +81 -0
- package/lib/cjs/dsl/conditions.js.map +1 -0
- package/lib/cjs/dsl/dsl-workflow.js +343 -0
- package/lib/cjs/dsl/dsl-workflow.js.map +1 -0
- package/lib/cjs/dsl/dslProxyActivities.js +23 -0
- package/lib/cjs/dsl/dslProxyActivities.js.map +1 -0
- package/lib/cjs/dsl/projections.js +59 -0
- package/lib/cjs/dsl/projections.js.map +1 -0
- package/lib/cjs/dsl/setup/ActivityContext.js +122 -0
- package/lib/cjs/dsl/setup/ActivityContext.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/DataProvider.js +51 -0
- package/lib/cjs/dsl/setup/fetch/DataProvider.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/index.js +16 -0
- package/lib/cjs/dsl/setup/fetch/index.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/providers.js +67 -0
- package/lib/cjs/dsl/setup/fetch/providers.js.map +1 -0
- package/lib/cjs/dsl/test/test-child-workflow.js +10 -0
- package/lib/cjs/dsl/test/test-child-workflow.js.map +1 -0
- package/lib/cjs/dsl/validation.js +122 -0
- package/lib/cjs/dsl/validation.js.map +1 -0
- package/lib/cjs/dsl/vars.js +341 -0
- package/lib/cjs/dsl/vars.js.map +1 -0
- package/lib/cjs/dsl/walk.js +100 -0
- package/lib/cjs/dsl/walk.js.map +1 -0
- package/lib/cjs/dsl.js +20 -0
- package/lib/cjs/dsl.js.map +1 -0
- package/lib/cjs/errors.js +79 -0
- package/lib/cjs/errors.js.map +1 -0
- package/lib/cjs/index.js +56 -0
- package/lib/cjs/index.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/extractToc.js +47 -0
- package/lib/cjs/iterative-generation/activities/extractToc.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js +72 -0
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/generatePart.js +78 -0
- package/lib/cjs/iterative-generation/activities/generatePart.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/generateToc.js +86 -0
- package/lib/cjs/iterative-generation/activities/generateToc.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/index.js +12 -0
- package/lib/cjs/iterative-generation/activities/index.js.map +1 -0
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js +56 -0
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
- package/lib/cjs/iterative-generation/types.js +5 -0
- package/lib/cjs/iterative-generation/types.js.map +1 -0
- package/lib/cjs/iterative-generation/utils.js +121 -0
- package/lib/cjs/iterative-generation/utils.js.map +1 -0
- package/lib/cjs/package.json +3 -0
- package/lib/cjs/result-types.js +10 -0
- package/lib/cjs/result-types.js.map +1 -0
- package/lib/cjs/system/notifyWebhookWorkflow.js +53 -0
- package/lib/cjs/system/notifyWebhookWorkflow.js.map +1 -0
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js +33 -0
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js.map +1 -0
- package/lib/cjs/utils/auth.js +15 -0
- package/lib/cjs/utils/auth.js.map +1 -0
- package/lib/cjs/utils/blobs.js +64 -0
- package/lib/cjs/utils/blobs.js.map +1 -0
- package/lib/cjs/utils/chunks.js +14 -0
- package/lib/cjs/utils/chunks.js.map +1 -0
- package/lib/cjs/utils/client.js +31 -0
- package/lib/cjs/utils/client.js.map +1 -0
- package/lib/cjs/utils/expand-vars.js +33 -0
- package/lib/cjs/utils/expand-vars.js.map +1 -0
- package/lib/cjs/utils/memory.js +65 -0
- package/lib/cjs/utils/memory.js.map +1 -0
- package/lib/cjs/utils/renditions.js +88 -0
- package/lib/cjs/utils/renditions.js.map +1 -0
- package/lib/cjs/utils/storage.js +54 -0
- package/lib/cjs/utils/storage.js.map +1 -0
- package/lib/cjs/utils/tokens.js +38 -0
- package/lib/cjs/utils/tokens.js.map +1 -0
- package/lib/cjs/vars.js +20 -0
- package/lib/cjs/vars.js.map +1 -0
- package/lib/cjs/workflows.js +15 -0
- package/lib/cjs/workflows.js.map +1 -0
- package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js +30 -0
- package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +70 -0
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js +16 -0
- package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/chunkDocument.js +82 -0
- package/lib/esm/activities/chunkDocument.js.map +1 -0
- package/lib/esm/activities/copyParentArtifacts.js +124 -0
- package/lib/esm/activities/copyParentArtifacts.js.map +1 -0
- package/lib/esm/activities/createDocumentFromOther.js +58 -0
- package/lib/esm/activities/createDocumentFromOther.js.map +1 -0
- package/lib/esm/activities/executeInteraction.js +190 -0
- package/lib/esm/activities/executeInteraction.js.map +1 -0
- package/lib/esm/activities/extractDocumentText.js +153 -0
- package/lib/esm/activities/extractDocumentText.js.map +1 -0
- package/lib/esm/activities/generateDocumentProperties.js +80 -0
- package/lib/esm/activities/generateDocumentProperties.js.map +1 -0
- package/lib/esm/activities/generateEmbeddings.js +254 -0
- package/lib/esm/activities/generateEmbeddings.js.map +1 -0
- package/lib/esm/activities/generateOrAssignContentType.js +122 -0
- package/lib/esm/activities/generateOrAssignContentType.js.map +1 -0
- package/lib/esm/activities/getObjectFromStore.js +17 -0
- package/lib/esm/activities/getObjectFromStore.js.map +1 -0
- package/lib/esm/activities/handleError.js +19 -0
- package/lib/esm/activities/handleError.js.map +1 -0
- package/lib/esm/activities/index-dsl.js +25 -0
- package/lib/esm/activities/index-dsl.js.map +1 -0
- package/lib/esm/activities/index.js +5 -0
- package/lib/esm/activities/index.js.map +1 -0
- package/lib/esm/activities/media/prepareAudio.js +200 -0
- package/lib/esm/activities/media/prepareAudio.js.map +1 -0
- package/lib/esm/activities/media/prepareVideo.js +390 -0
- package/lib/esm/activities/media/prepareVideo.js.map +1 -0
- package/lib/esm/activities/media/processPdfWithTextract.js +99 -0
- package/lib/esm/activities/media/processPdfWithTextract.js.map +1 -0
- package/lib/esm/activities/media/saveGladiaTranscription.js +78 -0
- package/lib/esm/activities/media/saveGladiaTranscription.js.map +1 -0
- package/lib/esm/activities/media/transcribeMediaWithGladia.js +79 -0
- package/lib/esm/activities/media/transcribeMediaWithGladia.js.map +1 -0
- package/lib/esm/activities/notifyWebhook.js +155 -0
- package/lib/esm/activities/notifyWebhook.js.map +1 -0
- package/lib/esm/activities/rateLimiter.js +27 -0
- package/lib/esm/activities/rateLimiter.js.map +1 -0
- package/lib/esm/activities/renditions/generateImageRendition.js +63 -0
- package/lib/esm/activities/renditions/generateImageRendition.js.map +1 -0
- package/lib/esm/activities/renditions/generateVideoRendition.js +194 -0
- package/lib/esm/activities/renditions/generateVideoRendition.js.map +1 -0
- package/lib/esm/activities/setDocumentStatus.js +12 -0
- package/lib/esm/activities/setDocumentStatus.js.map +1 -0
- package/lib/esm/conversion/TextractProcessor.js +410 -0
- package/lib/esm/conversion/TextractProcessor.js.map +1 -0
- package/lib/esm/conversion/image.js +143 -0
- package/lib/esm/conversion/image.js.map +1 -0
- package/lib/esm/conversion/markitdown.js +36 -0
- package/lib/esm/conversion/markitdown.js.map +1 -0
- package/lib/esm/conversion/mutool.js +139 -0
- package/lib/esm/conversion/mutool.js.map +1 -0
- package/lib/esm/conversion/pandoc.js +36 -0
- package/lib/esm/conversion/pandoc.js.map +1 -0
- package/lib/esm/dsl/conditions.js +75 -0
- package/lib/esm/dsl/conditions.js.map +1 -0
- package/lib/esm/dsl/dsl-workflow.js +336 -0
- package/lib/esm/dsl/dsl-workflow.js.map +1 -0
- package/lib/esm/dsl/dslProxyActivities.js +20 -0
- package/lib/esm/dsl/dslProxyActivities.js.map +1 -0
- package/lib/esm/dsl/projections.js +55 -0
- package/lib/esm/dsl/projections.js.map +1 -0
- package/lib/esm/dsl/setup/ActivityContext.js +117 -0
- package/lib/esm/dsl/setup/ActivityContext.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/DataProvider.js +47 -0
- package/lib/esm/dsl/setup/fetch/DataProvider.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/index.js +12 -0
- package/lib/esm/dsl/setup/fetch/index.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/providers.js +61 -0
- package/lib/esm/dsl/setup/fetch/providers.js.map +1 -0
- package/lib/esm/dsl/test/test-child-workflow.js +5 -0
- package/lib/esm/dsl/test/test-child-workflow.js.map +1 -0
- package/lib/esm/dsl/validation.js +118 -0
- package/lib/esm/dsl/validation.js.map +1 -0
- package/lib/esm/dsl/vars.js +335 -0
- package/lib/esm/dsl/vars.js.map +1 -0
- package/lib/esm/dsl/walk.js +96 -0
- package/lib/esm/dsl/walk.js.map +1 -0
- package/lib/esm/dsl.js +4 -0
- package/lib/esm/dsl.js.map +1 -0
- package/lib/esm/errors.js +69 -0
- package/lib/esm/errors.js.map +1 -0
- package/lib/esm/index.js +38 -0
- package/lib/esm/index.js.map +1 -0
- package/lib/esm/iterative-generation/activities/extractToc.js +44 -0
- package/lib/esm/iterative-generation/activities/extractToc.js.map +1 -0
- package/lib/esm/iterative-generation/activities/finalizeOutput.js +69 -0
- package/lib/esm/iterative-generation/activities/finalizeOutput.js.map +1 -0
- package/lib/esm/iterative-generation/activities/generatePart.js +75 -0
- package/lib/esm/iterative-generation/activities/generatePart.js.map +1 -0
- package/lib/esm/iterative-generation/activities/generateToc.js +83 -0
- package/lib/esm/iterative-generation/activities/generateToc.js.map +1 -0
- package/lib/esm/iterative-generation/activities/index.js +5 -0
- package/lib/esm/iterative-generation/activities/index.js.map +1 -0
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js +53 -0
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
- package/lib/esm/iterative-generation/types.js +2 -0
- package/lib/esm/iterative-generation/types.js.map +1 -0
- package/lib/esm/iterative-generation/utils.js +112 -0
- package/lib/esm/iterative-generation/utils.js.map +1 -0
- package/lib/esm/result-types.js +7 -0
- package/lib/esm/result-types.js.map +1 -0
- package/lib/esm/system/notifyWebhookWorkflow.js +50 -0
- package/lib/esm/system/notifyWebhookWorkflow.js.map +1 -0
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js +30 -0
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js.map +1 -0
- package/lib/esm/utils/auth.js +8 -0
- package/lib/esm/utils/auth.js.map +1 -0
- package/lib/esm/utils/blobs.js +54 -0
- package/lib/esm/utils/blobs.js.map +1 -0
- package/lib/esm/utils/chunks.js +9 -0
- package/lib/esm/utils/chunks.js.map +1 -0
- package/lib/esm/utils/client.js +27 -0
- package/lib/esm/utils/client.js.map +1 -0
- package/lib/esm/utils/expand-vars.js +30 -0
- package/lib/esm/utils/expand-vars.js.map +1 -0
- package/lib/esm/utils/memory.js +55 -0
- package/lib/esm/utils/memory.js.map +1 -0
- package/lib/esm/utils/renditions.js +80 -0
- package/lib/esm/utils/renditions.js.map +1 -0
- package/lib/esm/utils/storage.js +45 -0
- package/lib/esm/utils/storage.js.map +1 -0
- package/lib/esm/utils/tokens.js +34 -0
- package/lib/esm/utils/tokens.js.map +1 -0
- package/lib/esm/vars.js +4 -0
- package/lib/esm/vars.js.map +1 -0
- package/lib/esm/workflows.js +8 -0
- package/lib/esm/workflows.js.map +1 -0
- package/lib/tsconfig.tsbuildinfo +1 -0
- package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts +17 -0
- package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts +39 -0
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts +19 -0
- package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/chunkDocument.d.ts +33 -0
- package/lib/types/activities/chunkDocument.d.ts.map +1 -0
- package/lib/types/activities/copyParentArtifacts.d.ts +19 -0
- package/lib/types/activities/copyParentArtifacts.d.ts.map +1 -0
- package/lib/types/activities/createDocumentFromOther.d.ts +21 -0
- package/lib/types/activities/createDocumentFromOther.d.ts.map +1 -0
- package/lib/types/activities/executeInteraction.d.ts +61 -0
- package/lib/types/activities/executeInteraction.d.ts.map +1 -0
- package/lib/types/activities/extractDocumentText.d.ts +10 -0
- package/lib/types/activities/extractDocumentText.d.ts.map +1 -0
- package/lib/types/activities/generateDocumentProperties.d.ts +32 -0
- package/lib/types/activities/generateDocumentProperties.d.ts.map +1 -0
- package/lib/types/activities/generateEmbeddings.d.ts +53 -0
- package/lib/types/activities/generateEmbeddings.d.ts.map +1 -0
- package/lib/types/activities/generateOrAssignContentType.d.ts +44 -0
- package/lib/types/activities/generateOrAssignContentType.d.ts.map +1 -0
- package/lib/types/activities/getObjectFromStore.d.ts +14 -0
- package/lib/types/activities/getObjectFromStore.d.ts.map +1 -0
- package/lib/types/activities/handleError.d.ts +6 -0
- package/lib/types/activities/handleError.d.ts.map +1 -0
- package/lib/types/activities/index-dsl.d.ts +25 -0
- package/lib/types/activities/index-dsl.d.ts.map +1 -0
- package/lib/types/activities/index.d.ts +5 -0
- package/lib/types/activities/index.d.ts.map +1 -0
- package/lib/types/activities/media/prepareAudio.d.ts +25 -0
- package/lib/types/activities/media/prepareAudio.d.ts.map +1 -0
- package/lib/types/activities/media/prepareVideo.d.ts +30 -0
- package/lib/types/activities/media/prepareVideo.d.ts.map +1 -0
- package/lib/types/activities/media/processPdfWithTextract.d.ts +26 -0
- package/lib/types/activities/media/processPdfWithTextract.d.ts.map +1 -0
- package/lib/types/activities/media/saveGladiaTranscription.d.ts +14 -0
- package/lib/types/activities/media/saveGladiaTranscription.d.ts.map +1 -0
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts +19 -0
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts.map +1 -0
- package/lib/types/activities/notifyWebhook.d.ts +27 -0
- package/lib/types/activities/notifyWebhook.d.ts.map +1 -0
- package/lib/types/activities/rateLimiter.d.ts +11 -0
- package/lib/types/activities/rateLimiter.d.ts.map +1 -0
- package/lib/types/activities/renditions/generateImageRendition.d.ts +14 -0
- package/lib/types/activities/renditions/generateImageRendition.d.ts.map +1 -0
- package/lib/types/activities/renditions/generateVideoRendition.d.ts +15 -0
- package/lib/types/activities/renditions/generateVideoRendition.d.ts.map +1 -0
- package/lib/types/activities/setDocumentStatus.d.ts +15 -0
- package/lib/types/activities/setDocumentStatus.d.ts.map +1 -0
- package/lib/types/conversion/TextractProcessor.d.ts +45 -0
- package/lib/types/conversion/TextractProcessor.d.ts.map +1 -0
- package/lib/types/conversion/image.d.ts +13 -0
- package/lib/types/conversion/image.d.ts.map +1 -0
- package/lib/types/conversion/markitdown.d.ts +2 -0
- package/lib/types/conversion/markitdown.d.ts.map +1 -0
- package/lib/types/conversion/mutool.d.ts +19 -0
- package/lib/types/conversion/mutool.d.ts.map +1 -0
- package/lib/types/conversion/pandoc.d.ts +2 -0
- package/lib/types/conversion/pandoc.d.ts.map +1 -0
- package/lib/types/dsl/conditions.d.ts +2 -0
- package/lib/types/dsl/conditions.d.ts.map +1 -0
- package/lib/types/dsl/dsl-workflow.d.ts +5 -0
- package/lib/types/dsl/dsl-workflow.d.ts.map +1 -0
- package/lib/types/dsl/dslProxyActivities.d.ts +10 -0
- package/lib/types/dsl/dslProxyActivities.d.ts.map +1 -0
- package/lib/types/dsl/projections.d.ts +4 -0
- package/lib/types/dsl/projections.d.ts.map +1 -0
- package/lib/types/dsl/setup/ActivityContext.d.ts +17 -0
- package/lib/types/dsl/setup/ActivityContext.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/DataProvider.d.ts +9 -0
- package/lib/types/dsl/setup/fetch/DataProvider.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/index.d.ts +6 -0
- package/lib/types/dsl/setup/fetch/index.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/providers.d.ts +25 -0
- package/lib/types/dsl/setup/fetch/providers.d.ts.map +1 -0
- package/lib/types/dsl/test/test-child-workflow.d.ts +4 -0
- package/lib/types/dsl/test/test-child-workflow.d.ts.map +1 -0
- package/lib/types/dsl/validation.d.ts +4 -0
- package/lib/types/dsl/validation.d.ts.map +1 -0
- package/lib/types/dsl/vars.d.ts +48 -0
- package/lib/types/dsl/vars.d.ts.map +1 -0
- package/lib/types/dsl/walk.d.ts +18 -0
- package/lib/types/dsl/walk.d.ts.map +1 -0
- package/lib/types/dsl.d.ts +4 -0
- package/lib/types/dsl.d.ts.map +1 -0
- package/lib/types/errors.d.ts +37 -0
- package/lib/types/errors.d.ts.map +1 -0
- package/lib/types/index.d.ts +37 -0
- package/lib/types/index.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/extractToc.d.ts +10 -0
- package/lib/types/iterative-generation/activities/extractToc.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts +3 -0
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/generatePart.d.ts +3 -0
- package/lib/types/iterative-generation/activities/generatePart.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/generateToc.d.ts +4 -0
- package/lib/types/iterative-generation/activities/generateToc.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/index.d.ts +5 -0
- package/lib/types/iterative-generation/activities/index.d.ts.map +1 -0
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts +3 -0
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts.map +1 -0
- package/lib/types/iterative-generation/types.d.ts +79 -0
- package/lib/types/iterative-generation/types.d.ts.map +1 -0
- package/lib/types/iterative-generation/utils.d.ts +26 -0
- package/lib/types/iterative-generation/utils.d.ts.map +1 -0
- package/lib/types/result-types.d.ts +22 -0
- package/lib/types/result-types.d.ts.map +1 -0
- package/lib/types/system/notifyWebhookWorkflow.d.ts +8 -0
- package/lib/types/system/notifyWebhookWorkflow.d.ts.map +1 -0
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +25 -0
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +1 -0
- package/lib/types/utils/auth.d.ts +4 -0
- package/lib/types/utils/auth.d.ts.map +1 -0
- package/lib/types/utils/blobs.d.ts +7 -0
- package/lib/types/utils/blobs.d.ts.map +1 -0
- package/lib/types/utils/chunks.d.ts +9 -0
- package/lib/types/utils/chunks.d.ts.map +1 -0
- package/lib/types/utils/client.d.ts +8 -0
- package/lib/types/utils/client.d.ts.map +1 -0
- package/lib/types/utils/expand-vars.d.ts +8 -0
- package/lib/types/utils/expand-vars.d.ts.map +1 -0
- package/lib/types/utils/memory.d.ts +8 -0
- package/lib/types/utils/memory.d.ts.map +1 -0
- package/lib/types/utils/renditions.d.ts +23 -0
- package/lib/types/utils/renditions.d.ts.map +1 -0
- package/lib/types/utils/storage.d.ts +16 -0
- package/lib/types/utils/storage.d.ts.map +1 -0
- package/lib/types/utils/tokens.d.ts +11 -0
- package/lib/types/utils/tokens.d.ts.map +1 -0
- package/lib/types/vars.d.ts +3 -0
- package/lib/types/vars.d.ts.map +1 -0
- package/lib/types/workflows.d.ts +8 -0
- package/lib/types/workflows.d.ts.map +1 -0
- package/lib/workflows-bundle.js +17213 -0
- package/package.json +146 -0
- package/src/activities/advanced/createDocumentTypeFromInteractionRun.ts +55 -0
- package/src/activities/advanced/createOrUpdateDocumentFromInteractionRun.ts +119 -0
- package/src/activities/advanced/updateDocumentFromInteractionRun.ts +35 -0
- package/src/activities/chunkDocument.ts +146 -0
- package/src/activities/copyParentArtifacts.ts +162 -0
- package/src/activities/createDocumentFromOther.ts +92 -0
- package/src/activities/executeInteraction.ts +300 -0
- package/src/activities/extractDocumentText.ts +205 -0
- package/src/activities/generateDocumentProperties.ts +120 -0
- package/src/activities/generateEmbeddings.ts +387 -0
- package/src/activities/generateOrAssignContentType.ts +218 -0
- package/src/activities/getObjectFromStore.ts +31 -0
- package/src/activities/handleError.ts +25 -0
- package/src/activities/index-dsl.ts +25 -0
- package/src/activities/index.ts +4 -0
- package/src/activities/media/prepareAudio.ts +334 -0
- package/src/activities/media/prepareVideo.ts +622 -0
- package/src/activities/media/processPdfWithTextract.ts +141 -0
- package/src/activities/media/saveGladiaTranscription.ts +128 -0
- package/src/activities/media/transcribeMediaWithGladia.ts +117 -0
- package/src/activities/notifyWebhook.test.ts +134 -0
- package/src/activities/notifyWebhook.ts +199 -0
- package/src/activities/rateLimiter.ts +41 -0
- package/src/activities/renditions/generateImageRendition.ts +111 -0
- package/src/activities/renditions/generateVideoRendition.ts +293 -0
- package/src/activities/setDocumentStatus.ts +25 -0
- package/src/conversion/TextractProcessor.ts +506 -0
- package/src/conversion/image.test.ts +118 -0
- package/src/conversion/image.ts +168 -0
- package/src/conversion/markitdown.ts +41 -0
- package/src/conversion/mutool.test.ts +74 -0
- package/src/conversion/mutool.ts +180 -0
- package/src/conversion/pandoc.test.ts +24 -0
- package/src/conversion/pandoc.ts +40 -0
- package/src/dsl/conditions.ts +76 -0
- package/src/dsl/dsl-workflow.test.ts +58 -0
- package/src/dsl/dsl-workflow.ts +397 -0
- package/src/dsl/dslProxyActivities.ts +38 -0
- package/src/dsl/ms.d.ts +11 -0
- package/src/dsl/projections.test.ts +159 -0
- package/src/dsl/projections.ts +72 -0
- package/src/dsl/setup/ActivityContext.ts +178 -0
- package/src/dsl/setup/fetch/DataProvider.ts +45 -0
- package/src/dsl/setup/fetch/index.ts +19 -0
- package/src/dsl/setup/fetch/providers.ts +67 -0
- package/src/dsl/test/test-child-workflow.ts +6 -0
- package/src/dsl/validation.test.ts +257 -0
- package/src/dsl/validation.ts +125 -0
- package/src/dsl/vars.test.ts +245 -0
- package/src/dsl/vars.ts +340 -0
- package/src/dsl/walk.test.ts +81 -0
- package/src/dsl/walk.ts +103 -0
- package/src/dsl/workflow-exec-child.test.ts +273 -0
- package/src/dsl/workflow-fetch.test.ts +138 -0
- package/src/dsl/workflow-import.test.ts +89 -0
- package/src/dsl/workflow.test.ts +122 -0
- package/src/dsl.ts +3 -0
- package/src/errors.ts +101 -0
- package/src/index.ts +41 -0
- package/src/iterative-generation/activities/extractToc.ts +63 -0
- package/src/iterative-generation/activities/finalizeOutput.ts +100 -0
- package/src/iterative-generation/activities/generatePart.ts +123 -0
- package/src/iterative-generation/activities/generateToc.ts +116 -0
- package/src/iterative-generation/activities/index.ts +4 -0
- package/src/iterative-generation/iterativeGenerationWorkflow.ts +68 -0
- package/src/iterative-generation/types.ts +99 -0
- package/src/iterative-generation/utils.ts +126 -0
- package/src/result-types.ts +25 -0
- package/src/system/notifyWebhookWorkflow.ts +70 -0
- package/src/system/recalculateEmbeddingsWorkflow.ts +41 -0
- package/src/utils/auth.ts +10 -0
- package/src/utils/blobs.ts +59 -0
- package/src/utils/chunks.ts +17 -0
- package/src/utils/client.ts +46 -0
- package/src/utils/expand-vars.ts +31 -0
- package/src/utils/memory.ts +61 -0
- package/src/utils/renditions.ts +127 -0
- package/src/utils/storage.ts +60 -0
- package/src/utils/tokens.ts +44 -0
- package/src/vars.ts +3 -0
- package/src/workflows.ts +7 -0
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Use textract to convert a pdf into a data structure of the following format:
|
|
3
|
+
* <document>
|
|
4
|
+
* <page number="n">
|
|
5
|
+
* <text/>
|
|
6
|
+
* <table/>
|
|
7
|
+
* <text/>
|
|
8
|
+
* <figure/>
|
|
9
|
+
* ...
|
|
10
|
+
* </page>
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { fromWebToken } from "@aws-sdk/credential-providers";
|
|
14
|
+
import { AwsConfiguration, CreateContentObjectPayload, DSLActivityExecutionPayload, DSLActivitySpec, SupportedIntegrations } from "@vertesia/common";
|
|
15
|
+
import type { AwsCredentialIdentityProvider } from "@smithy/types";
|
|
16
|
+
import { log } from "@temporalio/activity";
|
|
17
|
+
import { TextractProcessor } from "../../conversion/TextractProcessor.js";
|
|
18
|
+
import { setupActivity } from "../../dsl/setup/ActivityContext.js";
|
|
19
|
+
import { DocumentNotFoundError } from "../../errors.js";
|
|
20
|
+
import { TextExtractionResult, TextExtractionStatus } from "../../result-types.js";
|
|
21
|
+
import { fetchBlobAsBuffer, md5 } from "../../utils/blobs.js";
|
|
22
|
+
import { countTokens } from "../../utils/tokens.js";
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
export interface ConvertPdfToStructuredTextParams {
|
|
27
|
+
force?: boolean;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface ConvertPdfToStructuredText extends DSLActivitySpec<ConvertPdfToStructuredTextParams> {
|
|
31
|
+
name: 'ConvertPdfToStructuredText';
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface StructuredTextResult extends TextExtractionResult {
|
|
35
|
+
message?: string;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
export async function convertPdfToStructuredText(payload: DSLActivityExecutionPayload<ConvertPdfToStructuredTextParams>): Promise<StructuredTextResult> {
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
const { params, client, objectId } = await setupActivity<ConvertPdfToStructuredTextParams>(payload);
|
|
44
|
+
|
|
45
|
+
const object = await client.objects.retrieve(objectId, "+text");
|
|
46
|
+
|
|
47
|
+
if (object.text && !params.force) {
|
|
48
|
+
return { hasText: true, objectId, status: TextExtractionStatus.skipped, message: "text already present and force not enabled" }
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (!object.content?.source) {
|
|
52
|
+
throw new DocumentNotFoundError(`No source found for object ${objectId}`);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const pdfUrl = await client.store.objects.getContentSource(objectId).then(res => res.source);
|
|
56
|
+
|
|
57
|
+
if (!pdfUrl) {
|
|
58
|
+
throw new DocumentNotFoundError(`Error fetching source ${object.content.source}`);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const project = await client.getProject();
|
|
62
|
+
const awsConfig = (await client.projects.integrations.retrieve(project!.id, SupportedIntegrations.aws)) as AwsConfiguration;
|
|
63
|
+
const credentials = await getS3AWSCredentials(awsConfig, payload.auth_token, project!.id);
|
|
64
|
+
|
|
65
|
+
const processor = new TextractProcessor({
|
|
66
|
+
fileKey: objectId,
|
|
67
|
+
region: "us-west-2",
|
|
68
|
+
bucket: "cp-textract-tests",
|
|
69
|
+
credentials,
|
|
70
|
+
log: log,
|
|
71
|
+
detectImages: true,
|
|
72
|
+
includeConfidenceInTables: true,
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
try {
|
|
78
|
+
|
|
79
|
+
if (!object.content.source.startsWith("s3://")) {
|
|
80
|
+
const buf = await fetchBlobAsBuffer(client, object.content.source);
|
|
81
|
+
await processor.upload(buf);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const jobId = await processor.startAnalysis(objectId);
|
|
85
|
+
|
|
86
|
+
let jobStatus = await processor.checkJobStatus(jobId);
|
|
87
|
+
while (jobStatus === "IN_PROGRESS") {
|
|
88
|
+
await new Promise(resolve => setTimeout(resolve, 5000));
|
|
89
|
+
jobStatus = await processor.checkJobStatus(jobId);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if (jobStatus === "SUCCEEDED") {
|
|
93
|
+
log.info(`Job ${jobId} succeeded, saving results`, { jobId });
|
|
94
|
+
const fText = await processor.processResults(jobId);
|
|
95
|
+
const tokensData = countTokens(fText);
|
|
96
|
+
const etag = object.content.etag ?? md5(fText);
|
|
97
|
+
const updateData: CreateContentObjectPayload = {
|
|
98
|
+
text: fText,
|
|
99
|
+
text_etag: etag,
|
|
100
|
+
tokens: {
|
|
101
|
+
...tokensData,
|
|
102
|
+
etag: etag,
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
await client.objects.update(objectId, updateData);
|
|
107
|
+
console.log("Full text updated");
|
|
108
|
+
|
|
109
|
+
return { hasText: true, objectId, status: TextExtractionStatus.success, message: "Text extracted successfully" }
|
|
110
|
+
|
|
111
|
+
} else {
|
|
112
|
+
throw new Error(`Job failed with status: ${jobStatus}`);
|
|
113
|
+
}
|
|
114
|
+
} catch (error) {
|
|
115
|
+
console.error("Error processing document:", error);
|
|
116
|
+
throw error;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
export async function getS3AWSCredentials(awsConfig: AwsConfiguration, composableAuthToken: string, projectId: string): Promise<AwsCredentialIdentityProvider> {
|
|
123
|
+
|
|
124
|
+
// fetch s3 role ARN
|
|
125
|
+
if (!awsConfig || !awsConfig.enabled) {
|
|
126
|
+
throw new DocumentNotFoundError("AWS integration is not enabled for this project");
|
|
127
|
+
}
|
|
128
|
+
if (!awsConfig.s3_role_arn) {
|
|
129
|
+
throw new DocumentNotFoundError("S3 Role ARN is not defined in AWS project integration");
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
log.info("Getting AWS credentials for Textract", { projectId, composableAuthToken, roleArn: awsConfig.s3_role_arn });
|
|
133
|
+
|
|
134
|
+
const credentials = fromWebToken({
|
|
135
|
+
webIdentityToken: composableAuthToken,
|
|
136
|
+
roleArn: awsConfig.s3_role_arn,
|
|
137
|
+
roleSessionName: `cp-project-textract-${projectId}`,
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
return credentials;
|
|
141
|
+
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import { log } from "@temporalio/activity";
|
|
2
|
+
import { FetchClient } from "@vertesia/api-fetch-client";
|
|
3
|
+
import { AudioMetadata, DSLActivityExecutionPayload, DSLActivitySpec, GladiaConfiguration, SupportedIntegrations, TranscriptSegment, VideoMetadata } from "@vertesia/common";
|
|
4
|
+
import { setupActivity } from "../../dsl/setup/ActivityContext.js";
|
|
5
|
+
import { TextExtractionResult, TextExtractionStatus } from "../../result-types.js";
|
|
6
|
+
|
|
7
|
+
export interface SaveGladiaTranscriptionParams {
|
|
8
|
+
gladiaTranscriptionId: string;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export interface SaveGladiaTranscription extends DSLActivitySpec<SaveGladiaTranscriptionParams> {
|
|
12
|
+
name: 'SaveGladiaTranscription';
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const GLADIA_URL = "https://api.gladia.io/v2";
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Fetches transcription results from Gladia and saves them to the content object.
|
|
19
|
+
* This activity is called after transcribeMedia completes via webhook callback.
|
|
20
|
+
*/
|
|
21
|
+
export async function saveGladiaTranscription(payload: DSLActivityExecutionPayload<SaveGladiaTranscriptionParams>): Promise<TextExtractionResult> {
|
|
22
|
+
const { params, client, objectId } = await setupActivity<SaveGladiaTranscriptionParams>(payload);
|
|
23
|
+
|
|
24
|
+
const gladiaConfig = await client.projects.integrations.retrieve(payload.project_id, SupportedIntegrations.gladia) as GladiaConfiguration | undefined;
|
|
25
|
+
if (!gladiaConfig || !gladiaConfig.enabled) {
|
|
26
|
+
return {
|
|
27
|
+
hasText: false,
|
|
28
|
+
objectId,
|
|
29
|
+
status: TextExtractionStatus.error,
|
|
30
|
+
error: "Gladia integration not enabled",
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const gladiaClient = new FetchClient(gladiaConfig.url ?? GLADIA_URL);
|
|
35
|
+
gladiaClient.withHeaders({ "x-gladia-key": gladiaConfig.api_key });
|
|
36
|
+
|
|
37
|
+
log.info(`Fetching transcription result from Gladia`, { objectId, transcriptionId: params.gladiaTranscriptionId });
|
|
38
|
+
|
|
39
|
+
const transcriptionResult = await gladiaClient.get(`/transcription/${params.gladiaTranscriptionId}`) as GladiaTranscriptionResult;
|
|
40
|
+
|
|
41
|
+
if (transcriptionResult.status === 'error') {
|
|
42
|
+
log.error(`Gladia transcription failed`, { objectId, error: transcriptionResult });
|
|
43
|
+
return {
|
|
44
|
+
hasText: false,
|
|
45
|
+
objectId,
|
|
46
|
+
status: TextExtractionStatus.error,
|
|
47
|
+
error: "Gladia transcription failed",
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (transcriptionResult.status !== 'done') {
|
|
52
|
+
log.warn(`Gladia transcription not ready`, { objectId, status: transcriptionResult.status });
|
|
53
|
+
return {
|
|
54
|
+
hasText: false,
|
|
55
|
+
objectId,
|
|
56
|
+
status: TextExtractionStatus.error,
|
|
57
|
+
error: `Gladia transcription not ready: ${transcriptionResult.status}`,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const object = await client.objects.retrieve(objectId, "+text");
|
|
62
|
+
|
|
63
|
+
const segments = processUtterances(transcriptionResult.result.transcription.utterances);
|
|
64
|
+
const fullText = transcriptionResult.result.transcription.full_transcript;
|
|
65
|
+
|
|
66
|
+
await client.objects.update(objectId, {
|
|
67
|
+
text: fullText,
|
|
68
|
+
text_etag: object.content?.etag,
|
|
69
|
+
transcript: {
|
|
70
|
+
segments,
|
|
71
|
+
etag: object.content?.etag
|
|
72
|
+
},
|
|
73
|
+
metadata: {
|
|
74
|
+
...object.metadata,
|
|
75
|
+
duration: transcriptionResult.result.metadata.audio_duration,
|
|
76
|
+
languages: transcriptionResult.result.transcription.languages
|
|
77
|
+
} as AudioMetadata | VideoMetadata
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
log.info(`Saved transcription for object`, { objectId, textLength: fullText?.length, segmentCount: segments.length });
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
hasText: (fullText?.length ?? 0) > 0,
|
|
84
|
+
objectId,
|
|
85
|
+
status: TextExtractionStatus.success,
|
|
86
|
+
message: `Transcription saved with ${segments.length} segments`
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function processUtterances(utterances: GladiaUtterance[]): TranscriptSegment[] {
|
|
91
|
+
return utterances.map(u => ({
|
|
92
|
+
start: u.start,
|
|
93
|
+
end: u.end,
|
|
94
|
+
text: u.text,
|
|
95
|
+
speaker: u.speaker,
|
|
96
|
+
confidence: u.confidence,
|
|
97
|
+
language: u.language
|
|
98
|
+
}));
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Gladia API response types
|
|
102
|
+
interface GladiaTranscriptionResult {
|
|
103
|
+
id: string;
|
|
104
|
+
status: 'queued' | 'processing' | 'done' | 'error';
|
|
105
|
+
result: {
|
|
106
|
+
metadata: {
|
|
107
|
+
audio_duration: number;
|
|
108
|
+
number_of_distinct_channels: number;
|
|
109
|
+
billing_time: number;
|
|
110
|
+
transcription_time: number;
|
|
111
|
+
};
|
|
112
|
+
transcription: {
|
|
113
|
+
full_transcript: string;
|
|
114
|
+
languages: string[];
|
|
115
|
+
utterances: GladiaUtterance[];
|
|
116
|
+
};
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
interface GladiaUtterance {
|
|
121
|
+
language: string;
|
|
122
|
+
start: number;
|
|
123
|
+
end: number;
|
|
124
|
+
confidence: number;
|
|
125
|
+
channel: number;
|
|
126
|
+
speaker: number;
|
|
127
|
+
text: string;
|
|
128
|
+
}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { activityInfo, CompleteAsyncError, log } from "@temporalio/activity";
|
|
2
|
+
import { FetchClient, RequestError } from "@vertesia/api-fetch-client";
|
|
3
|
+
import { AUDIO_RENDITION_NAME, ContentNature, DSLActivityExecutionPayload, DSLActivitySpec, GladiaConfiguration, SupportedIntegrations, VideoMetadata } from "@vertesia/common";
|
|
4
|
+
import { setupActivity } from "../../dsl/setup/ActivityContext.js";
|
|
5
|
+
import { DocumentNotFoundError } from "../../errors.js";
|
|
6
|
+
import { TextExtractionResult, TextExtractionStatus } from "../../index.js";
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
export interface TranscriptMediaParams {
|
|
10
|
+
environmentId?: string;
|
|
11
|
+
force?: boolean;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface TranscriptMedia extends DSLActivitySpec<TranscriptMediaParams> {
|
|
15
|
+
name: 'TranscribeMedia';
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface TranscriptMediaResult extends TextExtractionResult {
|
|
19
|
+
message?: string;
|
|
20
|
+
/**
|
|
21
|
+
* Gladia transcription ID for fetching results in a follow-up activity.
|
|
22
|
+
* Present when async media transcription completes successfully.
|
|
23
|
+
*/
|
|
24
|
+
gladiaTranscriptionId?: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const GLADIA_URL = "https://api.gladia.io/v2";
|
|
28
|
+
|
|
29
|
+
export async function transcribeMedia(payload: DSLActivityExecutionPayload<TranscriptMediaParams>): Promise<TranscriptMediaResult> {
|
|
30
|
+
|
|
31
|
+
const { params, client, objectId } = await setupActivity<TranscriptMediaParams>(payload);
|
|
32
|
+
|
|
33
|
+
const gladiaConfig = await client.projects.integrations.retrieve(payload.project_id, SupportedIntegrations.gladia) as GladiaConfiguration | undefined;
|
|
34
|
+
if (!gladiaConfig || !gladiaConfig.enabled) {
|
|
35
|
+
return {
|
|
36
|
+
hasText: false,
|
|
37
|
+
objectId,
|
|
38
|
+
status: TextExtractionStatus.error,
|
|
39
|
+
error: "Gladia integration not enabled",
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const object = await client.objects.retrieve(objectId, "+text");
|
|
44
|
+
const gladiaClient = new FetchClient(gladiaConfig.url ?? GLADIA_URL);
|
|
45
|
+
gladiaClient.withHeaders({ "x-gladia-key": gladiaConfig.api_key });
|
|
46
|
+
|
|
47
|
+
if (object.text && !params.force) {
|
|
48
|
+
return { hasText: true, objectId, status: TextExtractionStatus.skipped, message: "text already present and force not enabled" }
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (!object.content?.source) {
|
|
52
|
+
throw new DocumentNotFoundError(`No source found for object ${objectId}`);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Check for audio rendition in video metadata (preferred for videos)
|
|
56
|
+
let mediaSource: string = object.content.source;
|
|
57
|
+
if (object.metadata?.type === ContentNature.Video) {
|
|
58
|
+
const videoMetadata = object.metadata as VideoMetadata;
|
|
59
|
+
const audioRendition = videoMetadata.renditions?.find(r => r.name === AUDIO_RENDITION_NAME);
|
|
60
|
+
if (audioRendition?.content?.source) {
|
|
61
|
+
mediaSource = audioRendition.content.source;
|
|
62
|
+
log.info(`Found audio rendition for video object ${objectId}`, { mediaSource });
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Get download URL for the media source
|
|
67
|
+
const { url: mediaUrl } = await client.files.getDownloadUrl(mediaSource);
|
|
68
|
+
|
|
69
|
+
if (!mediaUrl) {
|
|
70
|
+
throw new DocumentNotFoundError(`Error fetching media URL for ${mediaSource}`);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
log.info(`Using media URL for transcription`, { objectId, mediaUrl: mediaSource });
|
|
74
|
+
|
|
75
|
+
const taskToken = Buffer.from(activityInfo().taskToken).toString('base64url');
|
|
76
|
+
const callbackUrl = generateCallbackUrlForGladia(client.store.baseUrl, taskToken, objectId);
|
|
77
|
+
|
|
78
|
+
log.info(`Transcribing media ${mediaUrl} with Gladia`, { objectId, callbackUrl });
|
|
79
|
+
|
|
80
|
+
try {
|
|
81
|
+
const res = await gladiaClient.post("/transcription", {
|
|
82
|
+
payload: {
|
|
83
|
+
audio_url: mediaUrl,
|
|
84
|
+
callback_url: callbackUrl,
|
|
85
|
+
diarization_enhanced: true,
|
|
86
|
+
enable_code_switching: true,
|
|
87
|
+
subtitles: true,
|
|
88
|
+
subtitles_config: {
|
|
89
|
+
formats: ["vtt"],
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}) as GladiaTranscriptRequestResponse;
|
|
93
|
+
log.info(`Transcription request sent to Gladia`, { objectId, res });
|
|
94
|
+
} catch (error: any) {
|
|
95
|
+
if (error instanceof RequestError && error.status === 422) {
|
|
96
|
+
return {
|
|
97
|
+
hasText: false,
|
|
98
|
+
objectId,
|
|
99
|
+
status: TextExtractionStatus.error,
|
|
100
|
+
error: `Gladia transcription error: ${error.message}`,
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
log.error(`Error sending transcription request to Gladia for object ${objectId}`, { error });
|
|
104
|
+
throw error;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
throw new CompleteAsyncError();
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function generateCallbackUrlForGladia(baseUrl: string, taskToken: string, objectId: string) {
|
|
111
|
+
return `${baseUrl}/webhooks/gladia/${objectId}?task_token=${taskToken}`;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
interface GladiaTranscriptRequestResponse {
|
|
115
|
+
id: string;
|
|
116
|
+
result_url: string;
|
|
117
|
+
}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import {
|
|
2
|
+
MockActivityEnvironment,
|
|
3
|
+
} from "@temporalio/testing";
|
|
4
|
+
import { ContentEventName, DSLActivityExecutionPayload } from "@vertesia/common";
|
|
5
|
+
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
|
6
|
+
import { notifyWebhook, NotifyWebhookParams } from "./notifyWebhook.js";
|
|
7
|
+
|
|
8
|
+
// Mock fetch globally
|
|
9
|
+
vi.stubGlobal('fetch', vi.fn());
|
|
10
|
+
|
|
11
|
+
let testEnv: MockActivityEnvironment;
|
|
12
|
+
const mockFetch = vi.mocked(fetch);
|
|
13
|
+
|
|
14
|
+
beforeAll(async () => {
|
|
15
|
+
testEnv = new MockActivityEnvironment();
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
beforeEach(() => {
|
|
19
|
+
vi.clearAllMocks();
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
const defaultParams = {
|
|
23
|
+
webhook: "https://vertesia.test",
|
|
24
|
+
method: "POST" as const,
|
|
25
|
+
detail: { message: "Hello World" },
|
|
26
|
+
workflow_id: "wf_id",
|
|
27
|
+
workflow_type: "wfFuncName",
|
|
28
|
+
workflow_run_id: "wf_run_id",
|
|
29
|
+
event_name: "completed",
|
|
30
|
+
} satisfies NotifyWebhookParams;
|
|
31
|
+
|
|
32
|
+
// Helper function to create test payload
|
|
33
|
+
const createTestPayload = (params: Partial<NotifyWebhookParams> = {}): DSLActivityExecutionPayload<NotifyWebhookParams> => {
|
|
34
|
+
const mergedParams = { ...defaultParams, ...params };
|
|
35
|
+
return {
|
|
36
|
+
auth_token: process.env.VERTESIA_KEY || "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJodHRwOi8vbW9jay10b2tlbi1zZXJ2ZXIiLCJzdWIiOiJ0ZXN0In0.signature",
|
|
37
|
+
account_id: "unset",
|
|
38
|
+
project_id: "unset",
|
|
39
|
+
params: mergedParams,
|
|
40
|
+
config: {
|
|
41
|
+
studio_url: "http://mock-studio",
|
|
42
|
+
store_url: "http://mock-store",
|
|
43
|
+
},
|
|
44
|
+
workflow_name: "",
|
|
45
|
+
event: ContentEventName.create,
|
|
46
|
+
objectIds: [],
|
|
47
|
+
vars: {},
|
|
48
|
+
activity: { name: "notifyWebhook", params: mergedParams }
|
|
49
|
+
};
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
describe("Webhook should be notified", () => {
|
|
53
|
+
it("test POST success", async () => {
|
|
54
|
+
// Mock successful response
|
|
55
|
+
const mockResponse = {
|
|
56
|
+
ok: true,
|
|
57
|
+
status: 200,
|
|
58
|
+
statusText: 'OK',
|
|
59
|
+
url: defaultParams.webhook
|
|
60
|
+
};
|
|
61
|
+
mockFetch.mockResolvedValueOnce(mockResponse as Response);
|
|
62
|
+
|
|
63
|
+
const payload = createTestPayload();
|
|
64
|
+
const res = await testEnv.run(notifyWebhook, payload);
|
|
65
|
+
|
|
66
|
+
// Verify fetch was called with correct parameters
|
|
67
|
+
expect(mockFetch).toHaveBeenCalledWith(defaultParams.webhook, {
|
|
68
|
+
method: 'POST',
|
|
69
|
+
body: JSON.stringify({ message: 'Hello World' }),
|
|
70
|
+
headers: {
|
|
71
|
+
'Content-Type': 'application/json',
|
|
72
|
+
},
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
// Verify response
|
|
76
|
+
expect(res).toEqual({
|
|
77
|
+
status: 200,
|
|
78
|
+
message: 'OK',
|
|
79
|
+
url: defaultParams.webhook
|
|
80
|
+
});
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it("test POST server error", async () => {
|
|
84
|
+
// Mock error response with response body
|
|
85
|
+
const mockResponse = {
|
|
86
|
+
ok: false,
|
|
87
|
+
status: 500,
|
|
88
|
+
statusText: 'Internal Server Error',
|
|
89
|
+
url: defaultParams.webhook,
|
|
90
|
+
text: vi.fn().mockResolvedValue('{"error": "Database connection failed", "code": "DB_ERROR"}')
|
|
91
|
+
} as unknown as Response;
|
|
92
|
+
mockFetch.mockResolvedValueOnce(mockResponse);
|
|
93
|
+
|
|
94
|
+
const payload = createTestPayload();
|
|
95
|
+
|
|
96
|
+
// Expect the function to throw an error with response payload
|
|
97
|
+
await expect(testEnv.run(notifyWebhook, payload)).rejects.toThrow(
|
|
98
|
+
`Webhook Notification to ${defaultParams.webhook} failed with status: 500 Internal Server Error - Response: {"error": "Database connection failed", "code": "DB_ERROR"}`
|
|
99
|
+
);
|
|
100
|
+
|
|
101
|
+
// Verify fetch was called with correct parameters
|
|
102
|
+
expect(mockFetch).toHaveBeenCalledWith(defaultParams.webhook, {
|
|
103
|
+
method: 'POST',
|
|
104
|
+
body: JSON.stringify({ message: 'Hello World' }),
|
|
105
|
+
headers: {
|
|
106
|
+
'Content-Type': 'application/json',
|
|
107
|
+
},
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
// Verify that text() was called to read the response
|
|
111
|
+
expect(mockResponse.text).toHaveBeenCalled();
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
it("test POST network error", async () => {
|
|
115
|
+
// Mock fetch to throw a network error
|
|
116
|
+
const networkError = new Error('Network request failed');
|
|
117
|
+
mockFetch.mockRejectedValueOnce(networkError);
|
|
118
|
+
|
|
119
|
+
const payload = createTestPayload();
|
|
120
|
+
|
|
121
|
+
// Expect the function to throw the network error
|
|
122
|
+
await expect(testEnv.run(notifyWebhook, payload)).rejects.toThrow('Network request failed');
|
|
123
|
+
|
|
124
|
+
// Verify fetch was called with correct parameters
|
|
125
|
+
expect(mockFetch).toHaveBeenCalledWith(defaultParams.webhook, {
|
|
126
|
+
method: 'POST',
|
|
127
|
+
body: JSON.stringify({ message: 'Hello World' }),
|
|
128
|
+
headers: {
|
|
129
|
+
'Content-Type': 'application/json',
|
|
130
|
+
},
|
|
131
|
+
});
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
});
|