@vertesia/workflow 0.24.0-dev.202601221707
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +13 -0
- package/README.md +65 -0
- package/bin/bundle-workflows.mjs +39 -0
- package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js +33 -0
- package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +73 -0
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js +19 -0
- package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/chunkDocument.js +85 -0
- package/lib/cjs/activities/chunkDocument.js.map +1 -0
- package/lib/cjs/activities/copyParentArtifacts.js +127 -0
- package/lib/cjs/activities/copyParentArtifacts.js.map +1 -0
- package/lib/cjs/activities/createDocumentFromOther.js +64 -0
- package/lib/cjs/activities/createDocumentFromOther.js.map +1 -0
- package/lib/cjs/activities/executeInteraction.js +194 -0
- package/lib/cjs/activities/executeInteraction.js.map +1 -0
- package/lib/cjs/activities/extractDocumentText.js +156 -0
- package/lib/cjs/activities/extractDocumentText.js.map +1 -0
- package/lib/cjs/activities/generateDocumentProperties.js +83 -0
- package/lib/cjs/activities/generateDocumentProperties.js.map +1 -0
- package/lib/cjs/activities/generateEmbeddings.js +257 -0
- package/lib/cjs/activities/generateEmbeddings.js.map +1 -0
- package/lib/cjs/activities/generateOrAssignContentType.js +125 -0
- package/lib/cjs/activities/generateOrAssignContentType.js.map +1 -0
- package/lib/cjs/activities/getObjectFromStore.js +20 -0
- package/lib/cjs/activities/getObjectFromStore.js.map +1 -0
- package/lib/cjs/activities/handleError.js +22 -0
- package/lib/cjs/activities/handleError.js.map +1 -0
- package/lib/cjs/activities/index-dsl.js +51 -0
- package/lib/cjs/activities/index-dsl.js.map +1 -0
- package/lib/cjs/activities/index.js +21 -0
- package/lib/cjs/activities/index.js.map +1 -0
- package/lib/cjs/activities/media/prepareAudio.js +239 -0
- package/lib/cjs/activities/media/prepareAudio.js.map +1 -0
- package/lib/cjs/activities/media/prepareVideo.js +429 -0
- package/lib/cjs/activities/media/prepareVideo.js.map +1 -0
- package/lib/cjs/activities/media/processPdfWithTextract.js +103 -0
- package/lib/cjs/activities/media/processPdfWithTextract.js.map +1 -0
- package/lib/cjs/activities/media/saveGladiaTranscription.js +81 -0
- package/lib/cjs/activities/media/saveGladiaTranscription.js.map +1 -0
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js +82 -0
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js.map +1 -0
- package/lib/cjs/activities/notifyWebhook.js +158 -0
- package/lib/cjs/activities/notifyWebhook.js.map +1 -0
- package/lib/cjs/activities/rateLimiter.js +30 -0
- package/lib/cjs/activities/rateLimiter.js.map +1 -0
- package/lib/cjs/activities/renditions/generateImageRendition.js +66 -0
- package/lib/cjs/activities/renditions/generateImageRendition.js.map +1 -0
- package/lib/cjs/activities/renditions/generateVideoRendition.js +200 -0
- package/lib/cjs/activities/renditions/generateVideoRendition.js.map +1 -0
- package/lib/cjs/activities/setDocumentStatus.js +15 -0
- package/lib/cjs/activities/setDocumentStatus.js.map +1 -0
- package/lib/cjs/conversion/TextractProcessor.js +417 -0
- package/lib/cjs/conversion/TextractProcessor.js.map +1 -0
- package/lib/cjs/conversion/image.js +149 -0
- package/lib/cjs/conversion/image.js.map +1 -0
- package/lib/cjs/conversion/markitdown.js +42 -0
- package/lib/cjs/conversion/markitdown.js.map +1 -0
- package/lib/cjs/conversion/mutool.js +147 -0
- package/lib/cjs/conversion/mutool.js.map +1 -0
- package/lib/cjs/conversion/pandoc.js +39 -0
- package/lib/cjs/conversion/pandoc.js.map +1 -0
- package/lib/cjs/dsl/conditions.js +81 -0
- package/lib/cjs/dsl/conditions.js.map +1 -0
- package/lib/cjs/dsl/dsl-workflow.js +343 -0
- package/lib/cjs/dsl/dsl-workflow.js.map +1 -0
- package/lib/cjs/dsl/dslProxyActivities.js +23 -0
- package/lib/cjs/dsl/dslProxyActivities.js.map +1 -0
- package/lib/cjs/dsl/projections.js +59 -0
- package/lib/cjs/dsl/projections.js.map +1 -0
- package/lib/cjs/dsl/setup/ActivityContext.js +122 -0
- package/lib/cjs/dsl/setup/ActivityContext.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/DataProvider.js +51 -0
- package/lib/cjs/dsl/setup/fetch/DataProvider.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/index.js +16 -0
- package/lib/cjs/dsl/setup/fetch/index.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/providers.js +67 -0
- package/lib/cjs/dsl/setup/fetch/providers.js.map +1 -0
- package/lib/cjs/dsl/test/test-child-workflow.js +10 -0
- package/lib/cjs/dsl/test/test-child-workflow.js.map +1 -0
- package/lib/cjs/dsl/validation.js +122 -0
- package/lib/cjs/dsl/validation.js.map +1 -0
- package/lib/cjs/dsl/vars.js +341 -0
- package/lib/cjs/dsl/vars.js.map +1 -0
- package/lib/cjs/dsl/walk.js +100 -0
- package/lib/cjs/dsl/walk.js.map +1 -0
- package/lib/cjs/dsl.js +20 -0
- package/lib/cjs/dsl.js.map +1 -0
- package/lib/cjs/errors.js +79 -0
- package/lib/cjs/errors.js.map +1 -0
- package/lib/cjs/index.js +56 -0
- package/lib/cjs/index.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/extractToc.js +47 -0
- package/lib/cjs/iterative-generation/activities/extractToc.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js +72 -0
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/generatePart.js +78 -0
- package/lib/cjs/iterative-generation/activities/generatePart.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/generateToc.js +86 -0
- package/lib/cjs/iterative-generation/activities/generateToc.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/index.js +12 -0
- package/lib/cjs/iterative-generation/activities/index.js.map +1 -0
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js +56 -0
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
- package/lib/cjs/iterative-generation/types.js +5 -0
- package/lib/cjs/iterative-generation/types.js.map +1 -0
- package/lib/cjs/iterative-generation/utils.js +121 -0
- package/lib/cjs/iterative-generation/utils.js.map +1 -0
- package/lib/cjs/package.json +3 -0
- package/lib/cjs/result-types.js +10 -0
- package/lib/cjs/result-types.js.map +1 -0
- package/lib/cjs/system/notifyWebhookWorkflow.js +53 -0
- package/lib/cjs/system/notifyWebhookWorkflow.js.map +1 -0
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js +33 -0
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js.map +1 -0
- package/lib/cjs/utils/auth.js +15 -0
- package/lib/cjs/utils/auth.js.map +1 -0
- package/lib/cjs/utils/blobs.js +64 -0
- package/lib/cjs/utils/blobs.js.map +1 -0
- package/lib/cjs/utils/chunks.js +14 -0
- package/lib/cjs/utils/chunks.js.map +1 -0
- package/lib/cjs/utils/client.js +31 -0
- package/lib/cjs/utils/client.js.map +1 -0
- package/lib/cjs/utils/expand-vars.js +33 -0
- package/lib/cjs/utils/expand-vars.js.map +1 -0
- package/lib/cjs/utils/memory.js +65 -0
- package/lib/cjs/utils/memory.js.map +1 -0
- package/lib/cjs/utils/renditions.js +88 -0
- package/lib/cjs/utils/renditions.js.map +1 -0
- package/lib/cjs/utils/storage.js +54 -0
- package/lib/cjs/utils/storage.js.map +1 -0
- package/lib/cjs/utils/tokens.js +38 -0
- package/lib/cjs/utils/tokens.js.map +1 -0
- package/lib/cjs/vars.js +20 -0
- package/lib/cjs/vars.js.map +1 -0
- package/lib/cjs/workflows.js +15 -0
- package/lib/cjs/workflows.js.map +1 -0
- package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js +30 -0
- package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +70 -0
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js +16 -0
- package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/chunkDocument.js +82 -0
- package/lib/esm/activities/chunkDocument.js.map +1 -0
- package/lib/esm/activities/copyParentArtifacts.js +124 -0
- package/lib/esm/activities/copyParentArtifacts.js.map +1 -0
- package/lib/esm/activities/createDocumentFromOther.js +58 -0
- package/lib/esm/activities/createDocumentFromOther.js.map +1 -0
- package/lib/esm/activities/executeInteraction.js +190 -0
- package/lib/esm/activities/executeInteraction.js.map +1 -0
- package/lib/esm/activities/extractDocumentText.js +153 -0
- package/lib/esm/activities/extractDocumentText.js.map +1 -0
- package/lib/esm/activities/generateDocumentProperties.js +80 -0
- package/lib/esm/activities/generateDocumentProperties.js.map +1 -0
- package/lib/esm/activities/generateEmbeddings.js +254 -0
- package/lib/esm/activities/generateEmbeddings.js.map +1 -0
- package/lib/esm/activities/generateOrAssignContentType.js +122 -0
- package/lib/esm/activities/generateOrAssignContentType.js.map +1 -0
- package/lib/esm/activities/getObjectFromStore.js +17 -0
- package/lib/esm/activities/getObjectFromStore.js.map +1 -0
- package/lib/esm/activities/handleError.js +19 -0
- package/lib/esm/activities/handleError.js.map +1 -0
- package/lib/esm/activities/index-dsl.js +25 -0
- package/lib/esm/activities/index-dsl.js.map +1 -0
- package/lib/esm/activities/index.js +5 -0
- package/lib/esm/activities/index.js.map +1 -0
- package/lib/esm/activities/media/prepareAudio.js +200 -0
- package/lib/esm/activities/media/prepareAudio.js.map +1 -0
- package/lib/esm/activities/media/prepareVideo.js +390 -0
- package/lib/esm/activities/media/prepareVideo.js.map +1 -0
- package/lib/esm/activities/media/processPdfWithTextract.js +99 -0
- package/lib/esm/activities/media/processPdfWithTextract.js.map +1 -0
- package/lib/esm/activities/media/saveGladiaTranscription.js +78 -0
- package/lib/esm/activities/media/saveGladiaTranscription.js.map +1 -0
- package/lib/esm/activities/media/transcribeMediaWithGladia.js +79 -0
- package/lib/esm/activities/media/transcribeMediaWithGladia.js.map +1 -0
- package/lib/esm/activities/notifyWebhook.js +155 -0
- package/lib/esm/activities/notifyWebhook.js.map +1 -0
- package/lib/esm/activities/rateLimiter.js +27 -0
- package/lib/esm/activities/rateLimiter.js.map +1 -0
- package/lib/esm/activities/renditions/generateImageRendition.js +63 -0
- package/lib/esm/activities/renditions/generateImageRendition.js.map +1 -0
- package/lib/esm/activities/renditions/generateVideoRendition.js +194 -0
- package/lib/esm/activities/renditions/generateVideoRendition.js.map +1 -0
- package/lib/esm/activities/setDocumentStatus.js +12 -0
- package/lib/esm/activities/setDocumentStatus.js.map +1 -0
- package/lib/esm/conversion/TextractProcessor.js +410 -0
- package/lib/esm/conversion/TextractProcessor.js.map +1 -0
- package/lib/esm/conversion/image.js +143 -0
- package/lib/esm/conversion/image.js.map +1 -0
- package/lib/esm/conversion/markitdown.js +36 -0
- package/lib/esm/conversion/markitdown.js.map +1 -0
- package/lib/esm/conversion/mutool.js +139 -0
- package/lib/esm/conversion/mutool.js.map +1 -0
- package/lib/esm/conversion/pandoc.js +36 -0
- package/lib/esm/conversion/pandoc.js.map +1 -0
- package/lib/esm/dsl/conditions.js +75 -0
- package/lib/esm/dsl/conditions.js.map +1 -0
- package/lib/esm/dsl/dsl-workflow.js +336 -0
- package/lib/esm/dsl/dsl-workflow.js.map +1 -0
- package/lib/esm/dsl/dslProxyActivities.js +20 -0
- package/lib/esm/dsl/dslProxyActivities.js.map +1 -0
- package/lib/esm/dsl/projections.js +55 -0
- package/lib/esm/dsl/projections.js.map +1 -0
- package/lib/esm/dsl/setup/ActivityContext.js +117 -0
- package/lib/esm/dsl/setup/ActivityContext.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/DataProvider.js +47 -0
- package/lib/esm/dsl/setup/fetch/DataProvider.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/index.js +12 -0
- package/lib/esm/dsl/setup/fetch/index.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/providers.js +61 -0
- package/lib/esm/dsl/setup/fetch/providers.js.map +1 -0
- package/lib/esm/dsl/test/test-child-workflow.js +5 -0
- package/lib/esm/dsl/test/test-child-workflow.js.map +1 -0
- package/lib/esm/dsl/validation.js +118 -0
- package/lib/esm/dsl/validation.js.map +1 -0
- package/lib/esm/dsl/vars.js +335 -0
- package/lib/esm/dsl/vars.js.map +1 -0
- package/lib/esm/dsl/walk.js +96 -0
- package/lib/esm/dsl/walk.js.map +1 -0
- package/lib/esm/dsl.js +4 -0
- package/lib/esm/dsl.js.map +1 -0
- package/lib/esm/errors.js +69 -0
- package/lib/esm/errors.js.map +1 -0
- package/lib/esm/index.js +38 -0
- package/lib/esm/index.js.map +1 -0
- package/lib/esm/iterative-generation/activities/extractToc.js +44 -0
- package/lib/esm/iterative-generation/activities/extractToc.js.map +1 -0
- package/lib/esm/iterative-generation/activities/finalizeOutput.js +69 -0
- package/lib/esm/iterative-generation/activities/finalizeOutput.js.map +1 -0
- package/lib/esm/iterative-generation/activities/generatePart.js +75 -0
- package/lib/esm/iterative-generation/activities/generatePart.js.map +1 -0
- package/lib/esm/iterative-generation/activities/generateToc.js +83 -0
- package/lib/esm/iterative-generation/activities/generateToc.js.map +1 -0
- package/lib/esm/iterative-generation/activities/index.js +5 -0
- package/lib/esm/iterative-generation/activities/index.js.map +1 -0
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js +53 -0
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
- package/lib/esm/iterative-generation/types.js +2 -0
- package/lib/esm/iterative-generation/types.js.map +1 -0
- package/lib/esm/iterative-generation/utils.js +112 -0
- package/lib/esm/iterative-generation/utils.js.map +1 -0
- package/lib/esm/result-types.js +7 -0
- package/lib/esm/result-types.js.map +1 -0
- package/lib/esm/system/notifyWebhookWorkflow.js +50 -0
- package/lib/esm/system/notifyWebhookWorkflow.js.map +1 -0
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js +30 -0
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js.map +1 -0
- package/lib/esm/utils/auth.js +8 -0
- package/lib/esm/utils/auth.js.map +1 -0
- package/lib/esm/utils/blobs.js +54 -0
- package/lib/esm/utils/blobs.js.map +1 -0
- package/lib/esm/utils/chunks.js +9 -0
- package/lib/esm/utils/chunks.js.map +1 -0
- package/lib/esm/utils/client.js +27 -0
- package/lib/esm/utils/client.js.map +1 -0
- package/lib/esm/utils/expand-vars.js +30 -0
- package/lib/esm/utils/expand-vars.js.map +1 -0
- package/lib/esm/utils/memory.js +55 -0
- package/lib/esm/utils/memory.js.map +1 -0
- package/lib/esm/utils/renditions.js +80 -0
- package/lib/esm/utils/renditions.js.map +1 -0
- package/lib/esm/utils/storage.js +45 -0
- package/lib/esm/utils/storage.js.map +1 -0
- package/lib/esm/utils/tokens.js +34 -0
- package/lib/esm/utils/tokens.js.map +1 -0
- package/lib/esm/vars.js +4 -0
- package/lib/esm/vars.js.map +1 -0
- package/lib/esm/workflows.js +8 -0
- package/lib/esm/workflows.js.map +1 -0
- package/lib/tsconfig.tsbuildinfo +1 -0
- package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts +17 -0
- package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts +39 -0
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts +19 -0
- package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/chunkDocument.d.ts +33 -0
- package/lib/types/activities/chunkDocument.d.ts.map +1 -0
- package/lib/types/activities/copyParentArtifacts.d.ts +19 -0
- package/lib/types/activities/copyParentArtifacts.d.ts.map +1 -0
- package/lib/types/activities/createDocumentFromOther.d.ts +21 -0
- package/lib/types/activities/createDocumentFromOther.d.ts.map +1 -0
- package/lib/types/activities/executeInteraction.d.ts +61 -0
- package/lib/types/activities/executeInteraction.d.ts.map +1 -0
- package/lib/types/activities/extractDocumentText.d.ts +10 -0
- package/lib/types/activities/extractDocumentText.d.ts.map +1 -0
- package/lib/types/activities/generateDocumentProperties.d.ts +32 -0
- package/lib/types/activities/generateDocumentProperties.d.ts.map +1 -0
- package/lib/types/activities/generateEmbeddings.d.ts +53 -0
- package/lib/types/activities/generateEmbeddings.d.ts.map +1 -0
- package/lib/types/activities/generateOrAssignContentType.d.ts +44 -0
- package/lib/types/activities/generateOrAssignContentType.d.ts.map +1 -0
- package/lib/types/activities/getObjectFromStore.d.ts +14 -0
- package/lib/types/activities/getObjectFromStore.d.ts.map +1 -0
- package/lib/types/activities/handleError.d.ts +6 -0
- package/lib/types/activities/handleError.d.ts.map +1 -0
- package/lib/types/activities/index-dsl.d.ts +25 -0
- package/lib/types/activities/index-dsl.d.ts.map +1 -0
- package/lib/types/activities/index.d.ts +5 -0
- package/lib/types/activities/index.d.ts.map +1 -0
- package/lib/types/activities/media/prepareAudio.d.ts +25 -0
- package/lib/types/activities/media/prepareAudio.d.ts.map +1 -0
- package/lib/types/activities/media/prepareVideo.d.ts +30 -0
- package/lib/types/activities/media/prepareVideo.d.ts.map +1 -0
- package/lib/types/activities/media/processPdfWithTextract.d.ts +26 -0
- package/lib/types/activities/media/processPdfWithTextract.d.ts.map +1 -0
- package/lib/types/activities/media/saveGladiaTranscription.d.ts +14 -0
- package/lib/types/activities/media/saveGladiaTranscription.d.ts.map +1 -0
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts +19 -0
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts.map +1 -0
- package/lib/types/activities/notifyWebhook.d.ts +27 -0
- package/lib/types/activities/notifyWebhook.d.ts.map +1 -0
- package/lib/types/activities/rateLimiter.d.ts +11 -0
- package/lib/types/activities/rateLimiter.d.ts.map +1 -0
- package/lib/types/activities/renditions/generateImageRendition.d.ts +14 -0
- package/lib/types/activities/renditions/generateImageRendition.d.ts.map +1 -0
- package/lib/types/activities/renditions/generateVideoRendition.d.ts +15 -0
- package/lib/types/activities/renditions/generateVideoRendition.d.ts.map +1 -0
- package/lib/types/activities/setDocumentStatus.d.ts +15 -0
- package/lib/types/activities/setDocumentStatus.d.ts.map +1 -0
- package/lib/types/conversion/TextractProcessor.d.ts +45 -0
- package/lib/types/conversion/TextractProcessor.d.ts.map +1 -0
- package/lib/types/conversion/image.d.ts +13 -0
- package/lib/types/conversion/image.d.ts.map +1 -0
- package/lib/types/conversion/markitdown.d.ts +2 -0
- package/lib/types/conversion/markitdown.d.ts.map +1 -0
- package/lib/types/conversion/mutool.d.ts +19 -0
- package/lib/types/conversion/mutool.d.ts.map +1 -0
- package/lib/types/conversion/pandoc.d.ts +2 -0
- package/lib/types/conversion/pandoc.d.ts.map +1 -0
- package/lib/types/dsl/conditions.d.ts +2 -0
- package/lib/types/dsl/conditions.d.ts.map +1 -0
- package/lib/types/dsl/dsl-workflow.d.ts +5 -0
- package/lib/types/dsl/dsl-workflow.d.ts.map +1 -0
- package/lib/types/dsl/dslProxyActivities.d.ts +10 -0
- package/lib/types/dsl/dslProxyActivities.d.ts.map +1 -0
- package/lib/types/dsl/projections.d.ts +4 -0
- package/lib/types/dsl/projections.d.ts.map +1 -0
- package/lib/types/dsl/setup/ActivityContext.d.ts +17 -0
- package/lib/types/dsl/setup/ActivityContext.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/DataProvider.d.ts +9 -0
- package/lib/types/dsl/setup/fetch/DataProvider.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/index.d.ts +6 -0
- package/lib/types/dsl/setup/fetch/index.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/providers.d.ts +25 -0
- package/lib/types/dsl/setup/fetch/providers.d.ts.map +1 -0
- package/lib/types/dsl/test/test-child-workflow.d.ts +4 -0
- package/lib/types/dsl/test/test-child-workflow.d.ts.map +1 -0
- package/lib/types/dsl/validation.d.ts +4 -0
- package/lib/types/dsl/validation.d.ts.map +1 -0
- package/lib/types/dsl/vars.d.ts +48 -0
- package/lib/types/dsl/vars.d.ts.map +1 -0
- package/lib/types/dsl/walk.d.ts +18 -0
- package/lib/types/dsl/walk.d.ts.map +1 -0
- package/lib/types/dsl.d.ts +4 -0
- package/lib/types/dsl.d.ts.map +1 -0
- package/lib/types/errors.d.ts +37 -0
- package/lib/types/errors.d.ts.map +1 -0
- package/lib/types/index.d.ts +37 -0
- package/lib/types/index.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/extractToc.d.ts +10 -0
- package/lib/types/iterative-generation/activities/extractToc.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts +3 -0
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/generatePart.d.ts +3 -0
- package/lib/types/iterative-generation/activities/generatePart.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/generateToc.d.ts +4 -0
- package/lib/types/iterative-generation/activities/generateToc.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/index.d.ts +5 -0
- package/lib/types/iterative-generation/activities/index.d.ts.map +1 -0
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts +3 -0
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts.map +1 -0
- package/lib/types/iterative-generation/types.d.ts +79 -0
- package/lib/types/iterative-generation/types.d.ts.map +1 -0
- package/lib/types/iterative-generation/utils.d.ts +26 -0
- package/lib/types/iterative-generation/utils.d.ts.map +1 -0
- package/lib/types/result-types.d.ts +22 -0
- package/lib/types/result-types.d.ts.map +1 -0
- package/lib/types/system/notifyWebhookWorkflow.d.ts +8 -0
- package/lib/types/system/notifyWebhookWorkflow.d.ts.map +1 -0
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +25 -0
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +1 -0
- package/lib/types/utils/auth.d.ts +4 -0
- package/lib/types/utils/auth.d.ts.map +1 -0
- package/lib/types/utils/blobs.d.ts +7 -0
- package/lib/types/utils/blobs.d.ts.map +1 -0
- package/lib/types/utils/chunks.d.ts +9 -0
- package/lib/types/utils/chunks.d.ts.map +1 -0
- package/lib/types/utils/client.d.ts +8 -0
- package/lib/types/utils/client.d.ts.map +1 -0
- package/lib/types/utils/expand-vars.d.ts +8 -0
- package/lib/types/utils/expand-vars.d.ts.map +1 -0
- package/lib/types/utils/memory.d.ts +8 -0
- package/lib/types/utils/memory.d.ts.map +1 -0
- package/lib/types/utils/renditions.d.ts +23 -0
- package/lib/types/utils/renditions.d.ts.map +1 -0
- package/lib/types/utils/storage.d.ts +16 -0
- package/lib/types/utils/storage.d.ts.map +1 -0
- package/lib/types/utils/tokens.d.ts +11 -0
- package/lib/types/utils/tokens.d.ts.map +1 -0
- package/lib/types/vars.d.ts +3 -0
- package/lib/types/vars.d.ts.map +1 -0
- package/lib/types/workflows.d.ts +8 -0
- package/lib/types/workflows.d.ts.map +1 -0
- package/lib/workflows-bundle.js +17213 -0
- package/package.json +146 -0
- package/src/activities/advanced/createDocumentTypeFromInteractionRun.ts +55 -0
- package/src/activities/advanced/createOrUpdateDocumentFromInteractionRun.ts +119 -0
- package/src/activities/advanced/updateDocumentFromInteractionRun.ts +35 -0
- package/src/activities/chunkDocument.ts +146 -0
- package/src/activities/copyParentArtifacts.ts +162 -0
- package/src/activities/createDocumentFromOther.ts +92 -0
- package/src/activities/executeInteraction.ts +300 -0
- package/src/activities/extractDocumentText.ts +205 -0
- package/src/activities/generateDocumentProperties.ts +120 -0
- package/src/activities/generateEmbeddings.ts +387 -0
- package/src/activities/generateOrAssignContentType.ts +218 -0
- package/src/activities/getObjectFromStore.ts +31 -0
- package/src/activities/handleError.ts +25 -0
- package/src/activities/index-dsl.ts +25 -0
- package/src/activities/index.ts +4 -0
- package/src/activities/media/prepareAudio.ts +334 -0
- package/src/activities/media/prepareVideo.ts +622 -0
- package/src/activities/media/processPdfWithTextract.ts +141 -0
- package/src/activities/media/saveGladiaTranscription.ts +128 -0
- package/src/activities/media/transcribeMediaWithGladia.ts +117 -0
- package/src/activities/notifyWebhook.test.ts +134 -0
- package/src/activities/notifyWebhook.ts +199 -0
- package/src/activities/rateLimiter.ts +41 -0
- package/src/activities/renditions/generateImageRendition.ts +111 -0
- package/src/activities/renditions/generateVideoRendition.ts +293 -0
- package/src/activities/setDocumentStatus.ts +25 -0
- package/src/conversion/TextractProcessor.ts +506 -0
- package/src/conversion/image.test.ts +118 -0
- package/src/conversion/image.ts +168 -0
- package/src/conversion/markitdown.ts +41 -0
- package/src/conversion/mutool.test.ts +74 -0
- package/src/conversion/mutool.ts +180 -0
- package/src/conversion/pandoc.test.ts +24 -0
- package/src/conversion/pandoc.ts +40 -0
- package/src/dsl/conditions.ts +76 -0
- package/src/dsl/dsl-workflow.test.ts +58 -0
- package/src/dsl/dsl-workflow.ts +397 -0
- package/src/dsl/dslProxyActivities.ts +38 -0
- package/src/dsl/ms.d.ts +11 -0
- package/src/dsl/projections.test.ts +159 -0
- package/src/dsl/projections.ts +72 -0
- package/src/dsl/setup/ActivityContext.ts +178 -0
- package/src/dsl/setup/fetch/DataProvider.ts +45 -0
- package/src/dsl/setup/fetch/index.ts +19 -0
- package/src/dsl/setup/fetch/providers.ts +67 -0
- package/src/dsl/test/test-child-workflow.ts +6 -0
- package/src/dsl/validation.test.ts +257 -0
- package/src/dsl/validation.ts +125 -0
- package/src/dsl/vars.test.ts +245 -0
- package/src/dsl/vars.ts +340 -0
- package/src/dsl/walk.test.ts +81 -0
- package/src/dsl/walk.ts +103 -0
- package/src/dsl/workflow-exec-child.test.ts +273 -0
- package/src/dsl/workflow-fetch.test.ts +138 -0
- package/src/dsl/workflow-import.test.ts +89 -0
- package/src/dsl/workflow.test.ts +122 -0
- package/src/dsl.ts +3 -0
- package/src/errors.ts +101 -0
- package/src/index.ts +41 -0
- package/src/iterative-generation/activities/extractToc.ts +63 -0
- package/src/iterative-generation/activities/finalizeOutput.ts +100 -0
- package/src/iterative-generation/activities/generatePart.ts +123 -0
- package/src/iterative-generation/activities/generateToc.ts +116 -0
- package/src/iterative-generation/activities/index.ts +4 -0
- package/src/iterative-generation/iterativeGenerationWorkflow.ts +68 -0
- package/src/iterative-generation/types.ts +99 -0
- package/src/iterative-generation/utils.ts +126 -0
- package/src/result-types.ts +25 -0
- package/src/system/notifyWebhookWorkflow.ts +70 -0
- package/src/system/recalculateEmbeddingsWorkflow.ts +41 -0
- package/src/utils/auth.ts +10 -0
- package/src/utils/blobs.ts +59 -0
- package/src/utils/chunks.ts +17 -0
- package/src/utils/client.ts +46 -0
- package/src/utils/expand-vars.ts +31 -0
- package/src/utils/memory.ts +61 -0
- package/src/utils/renditions.ts +127 -0
- package/src/utils/storage.ts +60 -0
- package/src/utils/tokens.ts +44 -0
- package/src/vars.ts +3 -0
- package/src/workflows.ts +7 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { log } from "@temporalio/activity";
|
|
2
|
+
import { NodeStreamSource } from "@vertesia/client/node";
|
|
3
|
+
import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
|
4
|
+
import fs from 'fs';
|
|
5
|
+
import { pdfExtractPages } from "../conversion/mutool.js";
|
|
6
|
+
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
|
7
|
+
import { DocumentNotFoundError } from "../errors.js";
|
|
8
|
+
import { saveBlobToTempFile } from "../utils/blobs.js";
|
|
9
|
+
|
|
10
|
+
interface CreatePdfDocumentFromSourceParams {
|
|
11
|
+
|
|
12
|
+
target_object_type: string; //type of the object to create
|
|
13
|
+
title: string; //title of the object to create
|
|
14
|
+
filename?: string; //filename of the object to create
|
|
15
|
+
pages: number[]; //pages to extract into the new document
|
|
16
|
+
parent?: string; //set the new document as child of the source document
|
|
17
|
+
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
export interface CreatePdfDocumentFromSource extends DSLActivitySpec<CreatePdfDocumentFromSourceParams> {
|
|
22
|
+
name: 'createPdfDocumentFromSource';
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Create a new PDF by extracting pages from a source PDF
|
|
28
|
+
* @returns
|
|
29
|
+
*/
|
|
30
|
+
export async function createPdfDocumentFromSource(payload: DSLActivityExecutionPayload<CreatePdfDocumentFromSourceParams>) {
|
|
31
|
+
const { client, objectId, params } = await setupActivity<CreatePdfDocumentFromSourceParams>(payload);
|
|
32
|
+
const inputObject = await client.objects.retrieve(objectId);
|
|
33
|
+
|
|
34
|
+
const { pages, filename, title } = params;
|
|
35
|
+
log.info(`Creating PDF from source`, { objectId, pages, filename, title });
|
|
36
|
+
|
|
37
|
+
if (!pages || pages.length === 0) {
|
|
38
|
+
log.error(`No pages provided`);
|
|
39
|
+
throw new Error(`No pages provided`);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (!inputObject) {
|
|
43
|
+
log.error(`Document ${objectId} not found`);
|
|
44
|
+
throw new DocumentNotFoundError(`Document ${objectId} not found`, [objectId]);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (!inputObject.content?.source) {
|
|
48
|
+
log.error(`Document ${objectId} has no source`);
|
|
49
|
+
throw new DocumentNotFoundError(`Document ${objectId} has no source`, [objectId]);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (!inputObject.content.type || (!inputObject.content.type?.startsWith('application/pdf'))) {
|
|
53
|
+
log.error(`Document ${objectId} is not an image`);
|
|
54
|
+
throw new DocumentNotFoundError(`Document ${objectId} is not an image or pdf: ${inputObject.content.type}`, [objectId]);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const targetType = await client.types.getTypeByName(params.target_object_type);
|
|
58
|
+
if (!targetType) {
|
|
59
|
+
log.error(`Type ${params.target_object_type} not found`);
|
|
60
|
+
throw new DocumentNotFoundError(`Type ${params.target_object_type} not found`);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const tmpFile = await saveBlobToTempFile(client, inputObject.content.source, ".pdf");
|
|
64
|
+
const newPdf = await pdfExtractPages(tmpFile, pages);
|
|
65
|
+
log.info(`PDF created from pages ${pages.join(', ')} `, { newPdf });
|
|
66
|
+
const name = `pages-${pages.join('-')}.pdf`;
|
|
67
|
+
|
|
68
|
+
const sourceToUpload = new NodeStreamSource(
|
|
69
|
+
fs.createReadStream(newPdf),
|
|
70
|
+
name,
|
|
71
|
+
"application/pdf"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
log.info(`Uploading file ${newPdf} `);
|
|
75
|
+
const upload = await client.objects.upload(sourceToUpload);
|
|
76
|
+
log.info(`File uploaded ${upload.source} `);
|
|
77
|
+
|
|
78
|
+
const newObject = await client.objects.create({
|
|
79
|
+
type: targetType.id,
|
|
80
|
+
name: title || targetType.name,
|
|
81
|
+
parent: objectId,
|
|
82
|
+
content: {
|
|
83
|
+
source: upload.source,
|
|
84
|
+
name: upload.name,
|
|
85
|
+
type: 'application/pdf'
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
return { newObjectId: newObject.id, uploadedFile: upload.name };
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
}
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
import { CompletionResult, ModelOptions } from "@llumiverse/common";
|
|
2
|
+
import { activityInfo, log } from "@temporalio/activity";
|
|
3
|
+
import { VertesiaClient } from "@vertesia/client";
|
|
4
|
+
import { NodeStreamSource } from "@vertesia/client/node";
|
|
5
|
+
import {
|
|
6
|
+
DSLActivityExecutionPayload,
|
|
7
|
+
DSLActivitySpec,
|
|
8
|
+
ExecutionRun,
|
|
9
|
+
ExecutionRunStatus,
|
|
10
|
+
ExecutionRunWorkflow,
|
|
11
|
+
InteractionExecutionConfiguration,
|
|
12
|
+
RunSearchPayload,
|
|
13
|
+
} from "@vertesia/common";
|
|
14
|
+
import { projectResult } from "../dsl/projections.js";
|
|
15
|
+
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
|
16
|
+
import { ActivityParamInvalidError, ActivityParamNotFoundError, ResourceExhaustedError } from "../errors.js";
|
|
17
|
+
import { TruncateSpec, truncByMaxTokens } from "../utils/tokens.js";
|
|
18
|
+
import { Readable } from "stream";
|
|
19
|
+
|
|
20
|
+
//Example:
|
|
21
|
+
//@ts-ignore
|
|
22
|
+
const JSON: DSLActivitySpec = {
|
|
23
|
+
name: "executeInteraction",
|
|
24
|
+
import: ["defaultModel", "guidlineId", "docTypeId"],
|
|
25
|
+
params: {
|
|
26
|
+
defaultModel: "${model}",
|
|
27
|
+
interactionName: "GenerateSummary",
|
|
28
|
+
model: "${defaultModel ?? 'gpt4'}",
|
|
29
|
+
environment: "13456",
|
|
30
|
+
max_tokens: 100,
|
|
31
|
+
temperature: 0.5,
|
|
32
|
+
tags: ["test"],
|
|
33
|
+
result_schema: "${docType.object_schema}",
|
|
34
|
+
prompt_data: {
|
|
35
|
+
documents: "${documents}",
|
|
36
|
+
guidline: "${guidline.text}",
|
|
37
|
+
},
|
|
38
|
+
},
|
|
39
|
+
fetch: {
|
|
40
|
+
documents: {
|
|
41
|
+
type: "document",
|
|
42
|
+
query: {
|
|
43
|
+
id: { $in: "${objectIds}" },
|
|
44
|
+
},
|
|
45
|
+
select: "+text",
|
|
46
|
+
},
|
|
47
|
+
guidline: {
|
|
48
|
+
type: "document",
|
|
49
|
+
limit: 1,
|
|
50
|
+
query: {
|
|
51
|
+
id: "${guidlineId}",
|
|
52
|
+
},
|
|
53
|
+
select: "+text",
|
|
54
|
+
on_not_found: "throw",
|
|
55
|
+
},
|
|
56
|
+
docType: {
|
|
57
|
+
type: "document_type",
|
|
58
|
+
limit: 1,
|
|
59
|
+
query: {
|
|
60
|
+
id: "${docTypeId}",
|
|
61
|
+
},
|
|
62
|
+
select: "+object_schema",
|
|
63
|
+
},
|
|
64
|
+
},
|
|
65
|
+
};
|
|
66
|
+
export interface InteractionExecutionParams {
|
|
67
|
+
/**
|
|
68
|
+
* The environment to use. If not specified the project default environment will be used.
|
|
69
|
+
* If the latter is not specified an exception will be thrown.
|
|
70
|
+
*/
|
|
71
|
+
environment?: string;
|
|
72
|
+
/**
|
|
73
|
+
* The model to use. If not specified the project default model will be used.
|
|
74
|
+
* If the latter is not specified the default model of the environment will be used.
|
|
75
|
+
* If the latter is not specified an exception will be thrown.
|
|
76
|
+
*/
|
|
77
|
+
model?: string;
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Request a JSON schema for the result
|
|
81
|
+
*/
|
|
82
|
+
result_schema?: any;
|
|
83
|
+
|
|
84
|
+
/** Wether to validate the result against the schema */
|
|
85
|
+
validate_result?: boolean;
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Tags to add to the execution run
|
|
89
|
+
*/
|
|
90
|
+
tags?: string[];
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Wether or not to include the previous error in the interaction prompt data
|
|
94
|
+
*/
|
|
95
|
+
include_previous_error?: boolean;
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Options to control generation
|
|
99
|
+
*/
|
|
100
|
+
model_options?: ModelOptions;
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* activity won't be retried if it fails due to resource exhaustion (429)
|
|
104
|
+
*/
|
|
105
|
+
exit_on_resource_exhaustion?: boolean;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* TODO: must be kept in sync with InteractionAsyncExecutionPayload form @vertesia/common
|
|
110
|
+
* Also see the executeInteractionAsync endpoint on the server for how the client payload is sent to the workflow.
|
|
111
|
+
* (interaction is translated to interactionName)
|
|
112
|
+
*/
|
|
113
|
+
export interface ExecuteInteractionParams extends InteractionExecutionParams {
|
|
114
|
+
//TODO rename to interaction as in InteractionAsyncExecutionPayload
|
|
115
|
+
interactionName: string;
|
|
116
|
+
prompt_data: Record<string, any>;
|
|
117
|
+
/**
|
|
118
|
+
* Additional prompt data passed by the workflow configuration. This will be merged with prompt_data if any.
|
|
119
|
+
* You should use `import: ["static_prompt_data"]` to import the workflow prompt data as static_prompt_data param.
|
|
120
|
+
* Otherwise the workflow prompt data will be ignored.
|
|
121
|
+
*/
|
|
122
|
+
static_prompt_data?: Record<string, any>;
|
|
123
|
+
truncate?: Record<string, TruncateSpec>;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export interface ExecuteInteraction extends DSLActivitySpec<ExecuteInteractionParams> {
|
|
127
|
+
name: "executeInteraction";
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
export async function executeInteraction(payload: DSLActivityExecutionPayload<ExecuteInteractionParams>) {
|
|
131
|
+
const { client, params } = await setupActivity<ExecuteInteractionParams>(payload);
|
|
132
|
+
|
|
133
|
+
const { interactionName, prompt_data, static_prompt_data: wf_prompt_data } = params;
|
|
134
|
+
if (wf_prompt_data) {
|
|
135
|
+
Object.assign(prompt_data, wf_prompt_data);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if (!interactionName) {
|
|
139
|
+
log.error("Missing interactionName", { params });
|
|
140
|
+
throw new ActivityParamNotFoundError("interactionName", payload.activity);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
if (params.truncate) {
|
|
144
|
+
const truncate = params.truncate;
|
|
145
|
+
for (const [key, value] of Object.entries(truncate)) {
|
|
146
|
+
prompt_data[key] = truncByMaxTokens(prompt_data[key], value);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
try {
|
|
151
|
+
const res = await executeInteractionFromActivity(
|
|
152
|
+
client,
|
|
153
|
+
interactionName,
|
|
154
|
+
params,
|
|
155
|
+
prompt_data,
|
|
156
|
+
payload.debug_mode,
|
|
157
|
+
);
|
|
158
|
+
|
|
159
|
+
let completionResult: CompletionResult[] = res.result;
|
|
160
|
+
|
|
161
|
+
// Handle image uploads if the result contains base64 images
|
|
162
|
+
const imageResults = completionResult.filter(r => r.type === "image");
|
|
163
|
+
if (imageResults.length > 0) {
|
|
164
|
+
const uploadedImages = await Promise.all(
|
|
165
|
+
completionResult.map(async (item, index) => {
|
|
166
|
+
if (item.type === "image") {
|
|
167
|
+
const image = item.value;
|
|
168
|
+
// Extract base64 data and create buffer
|
|
169
|
+
const base64Data = image.replace(/^data:image\/[a-z]+;base64,/, "");
|
|
170
|
+
const buffer = Buffer.from(base64Data, 'base64');
|
|
171
|
+
|
|
172
|
+
// Generate filename
|
|
173
|
+
const { runId } = activityInfo().workflowExecution;
|
|
174
|
+
const { activityId } = activityInfo();
|
|
175
|
+
const filename = `generated-image-${runId}-${activityId}-${index}.png`;
|
|
176
|
+
|
|
177
|
+
// Create a readable stream from the buffer
|
|
178
|
+
const stream = Readable.from(buffer);
|
|
179
|
+
|
|
180
|
+
const source = new NodeStreamSource(
|
|
181
|
+
stream,
|
|
182
|
+
filename,
|
|
183
|
+
"image/png",
|
|
184
|
+
);
|
|
185
|
+
|
|
186
|
+
const file = await client.files.uploadFile(source);
|
|
187
|
+
return { type: "image", value: file } as CompletionResult;
|
|
188
|
+
}
|
|
189
|
+
return item;
|
|
190
|
+
})
|
|
191
|
+
);
|
|
192
|
+
completionResult = uploadedImages;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
return projectResult(payload, params, res, {
|
|
196
|
+
runId: res.id,
|
|
197
|
+
status: res.status,
|
|
198
|
+
result: completionResult,
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
} catch (error: any) {
|
|
202
|
+
log.error(`Failed to execute interaction ${interactionName}`, { error });
|
|
203
|
+
if (error.statusCode === 429 && params.exit_on_resource_exhaustion) {
|
|
204
|
+
throw new ResourceExhaustedError(error.statusCode, "Resource exhausted - rate limit exceeded");
|
|
205
|
+
} else if (error.message.includes("Failed to validate merged prompt schema")) {
|
|
206
|
+
//issue with the input data, don't retry
|
|
207
|
+
throw new ActivityParamInvalidError("prompt_data", payload.activity, error.message);
|
|
208
|
+
} else if (error.message.includes("modelId: Path `modelId` is required")) {
|
|
209
|
+
//issue with the input data, don't retry
|
|
210
|
+
throw new ActivityParamInvalidError("model", payload.activity, error.message);
|
|
211
|
+
} else {
|
|
212
|
+
throw new Error(`Interaction Execution failed ${interactionName}: ${error.message}`);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
export async function executeInteractionFromActivity(
|
|
218
|
+
client: VertesiaClient,
|
|
219
|
+
interactionName: string,
|
|
220
|
+
params: InteractionExecutionParams,
|
|
221
|
+
prompt_data: any,
|
|
222
|
+
debug?: boolean,
|
|
223
|
+
) {
|
|
224
|
+
const userTags = params.tags;
|
|
225
|
+
const info = activityInfo();
|
|
226
|
+
const runId = info.workflowExecution.runId;
|
|
227
|
+
let tags = ["workflow"];
|
|
228
|
+
if (userTags) {
|
|
229
|
+
tags = tags.concat(userTags);
|
|
230
|
+
}
|
|
231
|
+
const workflow: ExecutionRunWorkflow = {
|
|
232
|
+
run_id: info.workflowExecution.runId,
|
|
233
|
+
workflow_id: info.workflowExecution.workflowId,
|
|
234
|
+
activity_type: info.activityType,
|
|
235
|
+
};
|
|
236
|
+
|
|
237
|
+
let previousStudioExecutionRun: ExecutionRun | undefined = undefined;
|
|
238
|
+
if (params.include_previous_error) {
|
|
239
|
+
//retrieve last failed run if any
|
|
240
|
+
if (info.attempt > 1) {
|
|
241
|
+
log.info("Retrying, searching for previous run", { prev_run_id: runId });
|
|
242
|
+
const payload: RunSearchPayload = {
|
|
243
|
+
query: { workflow_run_ids: [runId] },
|
|
244
|
+
limit: 1,
|
|
245
|
+
};
|
|
246
|
+
const previousRun = await client.runs.search(payload).then((res) => {
|
|
247
|
+
log.info("Search results", { results: res });
|
|
248
|
+
return res ? (res[0] ?? undefined) : undefined;
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
if (previousRun) {
|
|
252
|
+
log.info("Found previous run", { previousRun });
|
|
253
|
+
previousStudioExecutionRun = await client.runs.retrieve(previousRun.id);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
if (debug && previousStudioExecutionRun?.error) {
|
|
258
|
+
log.info(`Found previous run error`, { error: previousStudioExecutionRun?.error });
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
const config: InteractionExecutionConfiguration = {
|
|
262
|
+
environment: params.environment,
|
|
263
|
+
model: params.model,
|
|
264
|
+
model_options: params.model_options,
|
|
265
|
+
do_validate: params.validate_result,
|
|
266
|
+
};
|
|
267
|
+
const data = {
|
|
268
|
+
...prompt_data,
|
|
269
|
+
previous_error: previousStudioExecutionRun?.error,
|
|
270
|
+
};
|
|
271
|
+
|
|
272
|
+
const result_schema = params.result_schema;
|
|
273
|
+
|
|
274
|
+
log.debug(`About to execute interaction ${interactionName}`, { config, data, result_schema, tags, workflow });
|
|
275
|
+
|
|
276
|
+
const res = await client.interactions
|
|
277
|
+
.executeByName(interactionName, {
|
|
278
|
+
config,
|
|
279
|
+
data,
|
|
280
|
+
result_schema,
|
|
281
|
+
tags,
|
|
282
|
+
stream: false,
|
|
283
|
+
workflow,
|
|
284
|
+
})
|
|
285
|
+
.catch((err) => {
|
|
286
|
+
log.error(`Error executing interaction ${interactionName}`, { err });
|
|
287
|
+
throw err;
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
if (debug) {
|
|
291
|
+
log.info(`Interaction executed ${interactionName}`, res);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
if (res.error || res.status === ExecutionRunStatus.failed) {
|
|
295
|
+
log.error(`Error executing interaction ${interactionName}`, { error: res.error });
|
|
296
|
+
throw new Error(`Interaction Execution failed ${interactionName}: ${res.error}`);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
return res;
|
|
300
|
+
}
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
import { log } from "@temporalio/activity";
|
|
2
|
+
import {
|
|
3
|
+
ContentObject,
|
|
4
|
+
CreateContentObjectPayload,
|
|
5
|
+
DSLActivityExecutionPayload,
|
|
6
|
+
DSLActivitySpec,
|
|
7
|
+
} from "@vertesia/common";
|
|
8
|
+
import { markdownWithMarkitdown } from "../conversion/markitdown.js";
|
|
9
|
+
import { mutoolPdfToText } from "../conversion/mutool.js";
|
|
10
|
+
import { markdownWithPandoc } from "../conversion/pandoc.js";
|
|
11
|
+
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
|
12
|
+
import { DocumentNotFoundError } from "../errors.js";
|
|
13
|
+
import { TextExtractionResult, TextExtractionStatus } from "../result-types.js";
|
|
14
|
+
import { fetchBlobAsBuffer, md5 } from "../utils/blobs.js";
|
|
15
|
+
import { countTokens } from "../utils/tokens.js";
|
|
16
|
+
|
|
17
|
+
//@ts-ignore
|
|
18
|
+
const JSON: DSLActivitySpec = {
|
|
19
|
+
name: "extractDocumentText",
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
// doesn't have any own param
|
|
23
|
+
export interface ExtractDocumentTextParams {}
|
|
24
|
+
export interface ExtractDocumentText extends DSLActivitySpec<ExtractDocumentTextParams> {
|
|
25
|
+
name: "extractDocumentText";
|
|
26
|
+
projection?: never;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export async function extractDocumentText(
|
|
30
|
+
payload: DSLActivityExecutionPayload<ExtractDocumentTextParams>,
|
|
31
|
+
): Promise<TextExtractionResult> {
|
|
32
|
+
const { client, objectId } = await setupActivity(payload);
|
|
33
|
+
|
|
34
|
+
const r = await client.objects.find({
|
|
35
|
+
query: { _id: objectId },
|
|
36
|
+
limit: 1,
|
|
37
|
+
select: "+text",
|
|
38
|
+
});
|
|
39
|
+
const doc = r[0] as ContentObject;
|
|
40
|
+
if (!doc) {
|
|
41
|
+
log.error(`Document ${objectId} not found`);
|
|
42
|
+
throw new DocumentNotFoundError(`Document ${objectId} not found`, payload.objectIds);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
log.info(`Extracting text for object ${doc.id}`);
|
|
46
|
+
|
|
47
|
+
if (!doc.content?.type || !doc.content?.source) {
|
|
48
|
+
if (doc.text) {
|
|
49
|
+
return createResponse(doc, doc.text, TextExtractionStatus.skipped, "Text present and no source or type");
|
|
50
|
+
} else {
|
|
51
|
+
return createResponse(doc, "", TextExtractionStatus.error, "No source or type found");
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
//skip if text already extracted and proper etag
|
|
56
|
+
if (doc.text && doc.text.length > 0 && doc.text_etag === doc.content.etag) {
|
|
57
|
+
return createResponse(doc, doc.text, TextExtractionStatus.skipped, "Text already extracted");
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
let fileBuffer: Buffer;
|
|
61
|
+
try {
|
|
62
|
+
fileBuffer = await fetchBlobAsBuffer(client, doc.content.source);
|
|
63
|
+
} catch (e: any) {
|
|
64
|
+
log.error(`Error reading file: ${e}`);
|
|
65
|
+
return createResponse(doc, "", TextExtractionStatus.error, e.message);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
let txt: string;
|
|
69
|
+
|
|
70
|
+
switch (doc.content.type) {
|
|
71
|
+
case "application/pdf":
|
|
72
|
+
txt = await mutoolPdfToText(fileBuffer);
|
|
73
|
+
break;
|
|
74
|
+
|
|
75
|
+
case "text/plain":
|
|
76
|
+
txt = fileBuffer.toString("utf8");
|
|
77
|
+
break;
|
|
78
|
+
|
|
79
|
+
//docx
|
|
80
|
+
case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
|
81
|
+
txt = await markdownWithMarkitdown(fileBuffer, "docx");
|
|
82
|
+
break;
|
|
83
|
+
|
|
84
|
+
//pptx
|
|
85
|
+
case "application/vnd.openxmlformats-officedocument.presentationml.presentation":
|
|
86
|
+
txt = await markdownWithMarkitdown(fileBuffer, "pptx");
|
|
87
|
+
break;
|
|
88
|
+
|
|
89
|
+
//html
|
|
90
|
+
case "text/html":
|
|
91
|
+
txt = await markdownWithPandoc(fileBuffer, "html");
|
|
92
|
+
break;
|
|
93
|
+
|
|
94
|
+
//opendocument
|
|
95
|
+
case "application/vnd.oasis.opendocument.text":
|
|
96
|
+
txt = await markdownWithPandoc(fileBuffer, "odt");
|
|
97
|
+
break;
|
|
98
|
+
|
|
99
|
+
//rtf
|
|
100
|
+
case "application/rtf":
|
|
101
|
+
txt = await markdownWithPandoc(fileBuffer, "rtf");
|
|
102
|
+
break;
|
|
103
|
+
|
|
104
|
+
//markdown
|
|
105
|
+
case "text/markdown":
|
|
106
|
+
txt = fileBuffer.toString("utf8");
|
|
107
|
+
break;
|
|
108
|
+
|
|
109
|
+
//csv
|
|
110
|
+
case "text/csv":
|
|
111
|
+
txt = fileBuffer.toString("utf8");
|
|
112
|
+
break;
|
|
113
|
+
|
|
114
|
+
//typescript
|
|
115
|
+
case "application/typescript":
|
|
116
|
+
txt = fileBuffer.toString("utf8");
|
|
117
|
+
break;
|
|
118
|
+
|
|
119
|
+
//javascript
|
|
120
|
+
case "application/javascript":
|
|
121
|
+
txt = fileBuffer.toString("utf8");
|
|
122
|
+
break;
|
|
123
|
+
|
|
124
|
+
//json
|
|
125
|
+
case "application/json":
|
|
126
|
+
txt = fileBuffer.toString("utf8");
|
|
127
|
+
break;
|
|
128
|
+
|
|
129
|
+
default:
|
|
130
|
+
if (sniffIfText(fileBuffer)) {
|
|
131
|
+
txt = fileBuffer.toString("utf8"); //TODO: add charset detection
|
|
132
|
+
break;
|
|
133
|
+
}
|
|
134
|
+
return createResponse(
|
|
135
|
+
doc,
|
|
136
|
+
doc.text ?? "",
|
|
137
|
+
TextExtractionStatus.skipped,
|
|
138
|
+
`Unsupported mime type: ${doc.content.type}`,
|
|
139
|
+
);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const tokensData = countTokens(txt);
|
|
143
|
+
const etag = doc.content.etag ?? md5(txt);
|
|
144
|
+
|
|
145
|
+
const updateData: CreateContentObjectPayload = {
|
|
146
|
+
text: txt,
|
|
147
|
+
text_etag: etag,
|
|
148
|
+
tokens: {
|
|
149
|
+
...tokensData,
|
|
150
|
+
etag: etag,
|
|
151
|
+
},
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
await client.objects.update(doc.id, updateData);
|
|
155
|
+
|
|
156
|
+
return createResponse(doc, txt, TextExtractionStatus.success);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
function createResponse(
|
|
160
|
+
doc: ContentObject,
|
|
161
|
+
text: string,
|
|
162
|
+
status: TextExtractionStatus,
|
|
163
|
+
message?: string,
|
|
164
|
+
): TextExtractionResult {
|
|
165
|
+
return {
|
|
166
|
+
status,
|
|
167
|
+
message,
|
|
168
|
+
tokens: doc.tokens,
|
|
169
|
+
len: text.length,
|
|
170
|
+
objectId: doc.id,
|
|
171
|
+
hasText: !!text,
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
function sniffIfText(buf: Buffer) {
|
|
176
|
+
// If file is too large, don't even try
|
|
177
|
+
if (buf.length > 500 * 1024) {
|
|
178
|
+
return false;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Count binary/control characters
|
|
182
|
+
let binaryCount = 0;
|
|
183
|
+
const sampleSize = Math.min(buf.length, 1000); // Check first 1000 bytes
|
|
184
|
+
|
|
185
|
+
for (let i = 0; i < sampleSize; i++) {
|
|
186
|
+
// Count control characters (except common whitespace)
|
|
187
|
+
const byte = buf[i];
|
|
188
|
+
if ((byte < 32 && ![9, 10, 13].includes(byte)) || byte === 0) {
|
|
189
|
+
binaryCount++;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// If more than 10% binary/control chars, probably not text
|
|
194
|
+
if (binaryCount / sampleSize > 0.1) {
|
|
195
|
+
return false;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Additional check for valid UTF-8 encoding
|
|
199
|
+
try {
|
|
200
|
+
const s = buf.toString("utf8");
|
|
201
|
+
return s.length > 0 && !s.includes("\uFFFD"); // Replacement character
|
|
202
|
+
} catch (e) {
|
|
203
|
+
return false;
|
|
204
|
+
}
|
|
205
|
+
}
|