@vertesia/workflow 0.42.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +13 -0
- package/README.md +24 -0
- package/bin/bundle-workflows.mjs +26 -0
- package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js +32 -0
- package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +66 -0
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js +18 -0
- package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/chunkDocument.js +79 -0
- package/lib/cjs/activities/chunkDocument.js.map +1 -0
- package/lib/cjs/activities/createDocumentFromOther.js +64 -0
- package/lib/cjs/activities/createDocumentFromOther.js.map +1 -0
- package/lib/cjs/activities/executeInteraction.js +135 -0
- package/lib/cjs/activities/executeInteraction.js.map +1 -0
- package/lib/cjs/activities/extractDocumentText.js +140 -0
- package/lib/cjs/activities/extractDocumentText.js.map +1 -0
- package/lib/cjs/activities/generateDocumentProperties.js +59 -0
- package/lib/cjs/activities/generateDocumentProperties.js.map +1 -0
- package/lib/cjs/activities/generateEmbeddings.js +292 -0
- package/lib/cjs/activities/generateEmbeddings.js.map +1 -0
- package/lib/cjs/activities/generateImageRendition.js +104 -0
- package/lib/cjs/activities/generateImageRendition.js.map +1 -0
- package/lib/cjs/activities/generateOrAssignContentType.js +103 -0
- package/lib/cjs/activities/generateOrAssignContentType.js.map +1 -0
- package/lib/cjs/activities/getObjectFromStore.js +20 -0
- package/lib/cjs/activities/getObjectFromStore.js.map +1 -0
- package/lib/cjs/activities/index.js +54 -0
- package/lib/cjs/activities/index.js.map +1 -0
- package/lib/cjs/activities/media/processPdfWithTextract.js +102 -0
- package/lib/cjs/activities/media/processPdfWithTextract.js.map +1 -0
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js +51 -0
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js.map +1 -0
- package/lib/cjs/activities/notifyWebhook.js +34 -0
- package/lib/cjs/activities/notifyWebhook.js.map +1 -0
- package/lib/cjs/activities/setDocumentStatus.js +15 -0
- package/lib/cjs/activities/setDocumentStatus.js.map +1 -0
- package/lib/cjs/conversion/TextractProcessor.js +416 -0
- package/lib/cjs/conversion/TextractProcessor.js.map +1 -0
- package/lib/cjs/conversion/image.js +22 -0
- package/lib/cjs/conversion/image.js.map +1 -0
- package/lib/cjs/conversion/mutool.js +147 -0
- package/lib/cjs/conversion/mutool.js.map +1 -0
- package/lib/cjs/conversion/pandoc.js +39 -0
- package/lib/cjs/conversion/pandoc.js.map +1 -0
- package/lib/cjs/conversion/pdf.js +13 -0
- package/lib/cjs/conversion/pdf.js.map +1 -0
- package/lib/cjs/dsl/conditions.js +81 -0
- package/lib/cjs/dsl/conditions.js.map +1 -0
- package/lib/cjs/dsl/dsl-workflow.js +223 -0
- package/lib/cjs/dsl/dsl-workflow.js.map +1 -0
- package/lib/cjs/dsl/projections.js +59 -0
- package/lib/cjs/dsl/projections.js.map +1 -0
- package/lib/cjs/dsl/setup/ActivityContext.js +96 -0
- package/lib/cjs/dsl/setup/ActivityContext.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/DataProvider.js +51 -0
- package/lib/cjs/dsl/setup/fetch/DataProvider.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/index.js +16 -0
- package/lib/cjs/dsl/setup/fetch/index.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/providers.js +67 -0
- package/lib/cjs/dsl/setup/fetch/providers.js.map +1 -0
- package/lib/cjs/dsl/test/test-child-workflow.js +10 -0
- package/lib/cjs/dsl/test/test-child-workflow.js.map +1 -0
- package/lib/cjs/dsl/validation.js +122 -0
- package/lib/cjs/dsl/validation.js.map +1 -0
- package/lib/cjs/dsl/vars.js +341 -0
- package/lib/cjs/dsl/vars.js.map +1 -0
- package/lib/cjs/dsl/walk.js +100 -0
- package/lib/cjs/dsl/walk.js.map +1 -0
- package/lib/cjs/errors.js +36 -0
- package/lib/cjs/errors.js.map +1 -0
- package/lib/cjs/index.js +43 -0
- package/lib/cjs/index.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/extractToc.js +47 -0
- package/lib/cjs/iterative-generation/activities/extractToc.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js +69 -0
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/generatePart.js +73 -0
- package/lib/cjs/iterative-generation/activities/generatePart.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/generateToc.js +91 -0
- package/lib/cjs/iterative-generation/activities/generateToc.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/index.js +12 -0
- package/lib/cjs/iterative-generation/activities/index.js.map +1 -0
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js +55 -0
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
- package/lib/cjs/iterative-generation/types.js +5 -0
- package/lib/cjs/iterative-generation/types.js.map +1 -0
- package/lib/cjs/iterative-generation/utils.js +118 -0
- package/lib/cjs/iterative-generation/utils.js.map +1 -0
- package/lib/cjs/package.json +3 -0
- package/lib/cjs/result-types.js +10 -0
- package/lib/cjs/result-types.js.map +1 -0
- package/lib/cjs/system/generateObjectText.js +89 -0
- package/lib/cjs/system/generateObjectText.js.map +1 -0
- package/lib/cjs/system/notifyWebhookWorkflow.js +52 -0
- package/lib/cjs/system/notifyWebhookWorkflow.js.map +1 -0
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js +37 -0
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js.map +1 -0
- package/lib/cjs/utils/auth.js +15 -0
- package/lib/cjs/utils/auth.js.map +1 -0
- package/lib/cjs/utils/blobs.js +63 -0
- package/lib/cjs/utils/blobs.js.map +1 -0
- package/lib/cjs/utils/client.js +25 -0
- package/lib/cjs/utils/client.js.map +1 -0
- package/lib/cjs/utils/expand-vars.js +33 -0
- package/lib/cjs/utils/expand-vars.js.map +1 -0
- package/lib/cjs/utils/memory.js +72 -0
- package/lib/cjs/utils/memory.js.map +1 -0
- package/lib/cjs/utils/tokens.js +38 -0
- package/lib/cjs/utils/tokens.js.map +1 -0
- package/lib/cjs/vars.js +20 -0
- package/lib/cjs/vars.js.map +1 -0
- package/lib/cjs/workflows.js +17 -0
- package/lib/cjs/workflows.js.map +1 -0
- package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js +29 -0
- package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +63 -0
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js +15 -0
- package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/chunkDocument.js +76 -0
- package/lib/esm/activities/chunkDocument.js.map +1 -0
- package/lib/esm/activities/createDocumentFromOther.js +58 -0
- package/lib/esm/activities/createDocumentFromOther.js.map +1 -0
- package/lib/esm/activities/executeInteraction.js +131 -0
- package/lib/esm/activities/executeInteraction.js.map +1 -0
- package/lib/esm/activities/extractDocumentText.js +137 -0
- package/lib/esm/activities/extractDocumentText.js.map +1 -0
- package/lib/esm/activities/generateDocumentProperties.js +56 -0
- package/lib/esm/activities/generateDocumentProperties.js.map +1 -0
- package/lib/esm/activities/generateEmbeddings.js +256 -0
- package/lib/esm/activities/generateEmbeddings.js.map +1 -0
- package/lib/esm/activities/generateImageRendition.js +98 -0
- package/lib/esm/activities/generateImageRendition.js.map +1 -0
- package/lib/esm/activities/generateOrAssignContentType.js +100 -0
- package/lib/esm/activities/generateOrAssignContentType.js.map +1 -0
- package/lib/esm/activities/getObjectFromStore.js +17 -0
- package/lib/esm/activities/getObjectFromStore.js.map +1 -0
- package/lib/esm/activities/index.js +21 -0
- package/lib/esm/activities/index.js.map +1 -0
- package/lib/esm/activities/media/processPdfWithTextract.js +98 -0
- package/lib/esm/activities/media/processPdfWithTextract.js.map +1 -0
- package/lib/esm/activities/media/transcribeMediaWithGladia.js +48 -0
- package/lib/esm/activities/media/transcribeMediaWithGladia.js.map +1 -0
- package/lib/esm/activities/notifyWebhook.js +31 -0
- package/lib/esm/activities/notifyWebhook.js.map +1 -0
- package/lib/esm/activities/setDocumentStatus.js +12 -0
- package/lib/esm/activities/setDocumentStatus.js.map +1 -0
- package/lib/esm/conversion/TextractProcessor.js +409 -0
- package/lib/esm/conversion/TextractProcessor.js.map +1 -0
- package/lib/esm/conversion/image.js +16 -0
- package/lib/esm/conversion/image.js.map +1 -0
- package/lib/esm/conversion/mutool.js +139 -0
- package/lib/esm/conversion/mutool.js.map +1 -0
- package/lib/esm/conversion/pandoc.js +36 -0
- package/lib/esm/conversion/pandoc.js.map +1 -0
- package/lib/esm/conversion/pdf.js +7 -0
- package/lib/esm/conversion/pdf.js.map +1 -0
- package/lib/esm/dsl/conditions.js +75 -0
- package/lib/esm/dsl/conditions.js.map +1 -0
- package/lib/esm/dsl/dsl-workflow.js +216 -0
- package/lib/esm/dsl/dsl-workflow.js.map +1 -0
- package/lib/esm/dsl/projections.js +55 -0
- package/lib/esm/dsl/projections.js.map +1 -0
- package/lib/esm/dsl/setup/ActivityContext.js +91 -0
- package/lib/esm/dsl/setup/ActivityContext.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/DataProvider.js +47 -0
- package/lib/esm/dsl/setup/fetch/DataProvider.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/index.js +12 -0
- package/lib/esm/dsl/setup/fetch/index.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/providers.js +61 -0
- package/lib/esm/dsl/setup/fetch/providers.js.map +1 -0
- package/lib/esm/dsl/test/test-child-workflow.js +5 -0
- package/lib/esm/dsl/test/test-child-workflow.js.map +1 -0
- package/lib/esm/dsl/validation.js +118 -0
- package/lib/esm/dsl/validation.js.map +1 -0
- package/lib/esm/dsl/vars.js +335 -0
- package/lib/esm/dsl/vars.js.map +1 -0
- package/lib/esm/dsl/walk.js +96 -0
- package/lib/esm/dsl/walk.js.map +1 -0
- package/lib/esm/errors.js +30 -0
- package/lib/esm/errors.js.map +1 -0
- package/lib/esm/index.js +25 -0
- package/lib/esm/index.js.map +1 -0
- package/lib/esm/iterative-generation/activities/extractToc.js +44 -0
- package/lib/esm/iterative-generation/activities/extractToc.js.map +1 -0
- package/lib/esm/iterative-generation/activities/finalizeOutput.js +66 -0
- package/lib/esm/iterative-generation/activities/finalizeOutput.js.map +1 -0
- package/lib/esm/iterative-generation/activities/generatePart.js +70 -0
- package/lib/esm/iterative-generation/activities/generatePart.js.map +1 -0
- package/lib/esm/iterative-generation/activities/generateToc.js +88 -0
- package/lib/esm/iterative-generation/activities/generateToc.js.map +1 -0
- package/lib/esm/iterative-generation/activities/index.js +5 -0
- package/lib/esm/iterative-generation/activities/index.js.map +1 -0
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js +52 -0
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
- package/lib/esm/iterative-generation/types.js +2 -0
- package/lib/esm/iterative-generation/types.js.map +1 -0
- package/lib/esm/iterative-generation/utils.js +109 -0
- package/lib/esm/iterative-generation/utils.js.map +1 -0
- package/lib/esm/result-types.js +7 -0
- package/lib/esm/result-types.js.map +1 -0
- package/lib/esm/system/generateObjectText.js +86 -0
- package/lib/esm/system/generateObjectText.js.map +1 -0
- package/lib/esm/system/notifyWebhookWorkflow.js +49 -0
- package/lib/esm/system/notifyWebhookWorkflow.js.map +1 -0
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js +34 -0
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js.map +1 -0
- package/lib/esm/utils/auth.js +8 -0
- package/lib/esm/utils/auth.js.map +1 -0
- package/lib/esm/utils/blobs.js +52 -0
- package/lib/esm/utils/blobs.js.map +1 -0
- package/lib/esm/utils/client.js +22 -0
- package/lib/esm/utils/client.js.map +1 -0
- package/lib/esm/utils/expand-vars.js +30 -0
- package/lib/esm/utils/expand-vars.js.map +1 -0
- package/lib/esm/utils/memory.js +60 -0
- package/lib/esm/utils/memory.js.map +1 -0
- package/lib/esm/utils/tokens.js +34 -0
- package/lib/esm/utils/tokens.js.map +1 -0
- package/lib/esm/vars.js +4 -0
- package/lib/esm/vars.js.map +1 -0
- package/lib/esm/workflows.js +9 -0
- package/lib/esm/workflows.js.map +1 -0
- package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts +17 -0
- package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts +29 -0
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts +19 -0
- package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/chunkDocument.d.ts +18 -0
- package/lib/types/activities/chunkDocument.d.ts.map +1 -0
- package/lib/types/activities/createDocumentFromOther.d.ts +21 -0
- package/lib/types/activities/createDocumentFromOther.d.ts.map +1 -0
- package/lib/types/activities/executeInteraction.d.ts +40 -0
- package/lib/types/activities/executeInteraction.d.ts.map +1 -0
- package/lib/types/activities/extractDocumentText.d.ts +9 -0
- package/lib/types/activities/extractDocumentText.d.ts.map +1 -0
- package/lib/types/activities/generateDocumentProperties.d.ts +32 -0
- package/lib/types/activities/generateDocumentProperties.d.ts.map +1 -0
- package/lib/types/activities/generateEmbeddings.d.ts +49 -0
- package/lib/types/activities/generateEmbeddings.d.ts.map +1 -0
- package/lib/types/activities/generateImageRendition.d.ts +17 -0
- package/lib/types/activities/generateImageRendition.d.ts.map +1 -0
- package/lib/types/activities/generateOrAssignContentType.d.ts +44 -0
- package/lib/types/activities/generateOrAssignContentType.d.ts.map +1 -0
- package/lib/types/activities/getObjectFromStore.d.ts +14 -0
- package/lib/types/activities/getObjectFromStore.d.ts.map +1 -0
- package/lib/types/activities/index.d.ts +21 -0
- package/lib/types/activities/index.d.ts.map +1 -0
- package/lib/types/activities/media/processPdfWithTextract.d.ts +26 -0
- package/lib/types/activities/media/processPdfWithTextract.d.ts.map +1 -0
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts +14 -0
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts.map +1 -0
- package/lib/types/activities/notifyWebhook.d.ts +17 -0
- package/lib/types/activities/notifyWebhook.d.ts.map +1 -0
- package/lib/types/activities/setDocumentStatus.d.ts +15 -0
- package/lib/types/activities/setDocumentStatus.d.ts.map +1 -0
- package/lib/types/conversion/TextractProcessor.d.ts +45 -0
- package/lib/types/conversion/TextractProcessor.d.ts.map +1 -0
- package/lib/types/conversion/image.d.ts +9 -0
- package/lib/types/conversion/image.d.ts.map +1 -0
- package/lib/types/conversion/mutool.d.ts +19 -0
- package/lib/types/conversion/mutool.d.ts.map +1 -0
- package/lib/types/conversion/pandoc.d.ts +2 -0
- package/lib/types/conversion/pandoc.d.ts.map +1 -0
- package/lib/types/conversion/pdf.d.ts +2 -0
- package/lib/types/conversion/pdf.d.ts.map +1 -0
- package/lib/types/dsl/conditions.d.ts +2 -0
- package/lib/types/dsl/conditions.d.ts.map +1 -0
- package/lib/types/dsl/dsl-workflow.d.ts +5 -0
- package/lib/types/dsl/dsl-workflow.d.ts.map +1 -0
- package/lib/types/dsl/projections.d.ts +4 -0
- package/lib/types/dsl/projections.d.ts.map +1 -0
- package/lib/types/dsl/setup/ActivityContext.d.ts +14 -0
- package/lib/types/dsl/setup/ActivityContext.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/DataProvider.d.ts +9 -0
- package/lib/types/dsl/setup/fetch/DataProvider.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/index.d.ts +6 -0
- package/lib/types/dsl/setup/fetch/index.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/providers.d.ts +25 -0
- package/lib/types/dsl/setup/fetch/providers.d.ts.map +1 -0
- package/lib/types/dsl/test/test-child-workflow.d.ts +4 -0
- package/lib/types/dsl/test/test-child-workflow.d.ts.map +1 -0
- package/lib/types/dsl/validation.d.ts +4 -0
- package/lib/types/dsl/validation.d.ts.map +1 -0
- package/lib/types/dsl/vars.d.ts +48 -0
- package/lib/types/dsl/vars.d.ts.map +1 -0
- package/lib/types/dsl/walk.d.ts +18 -0
- package/lib/types/dsl/walk.d.ts.map +1 -0
- package/lib/types/errors.d.ts +16 -0
- package/lib/types/errors.d.ts.map +1 -0
- package/lib/types/index.d.ts +24 -0
- package/lib/types/index.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/extractToc.d.ts +10 -0
- package/lib/types/iterative-generation/activities/extractToc.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts +3 -0
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/generatePart.d.ts +3 -0
- package/lib/types/iterative-generation/activities/generatePart.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/generateToc.d.ts +4 -0
- package/lib/types/iterative-generation/activities/generateToc.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/index.d.ts +5 -0
- package/lib/types/iterative-generation/activities/index.d.ts.map +1 -0
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts +3 -0
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts.map +1 -0
- package/lib/types/iterative-generation/types.d.ts +79 -0
- package/lib/types/iterative-generation/types.d.ts.map +1 -0
- package/lib/types/iterative-generation/utils.d.ts +27 -0
- package/lib/types/iterative-generation/utils.d.ts.map +1 -0
- package/lib/types/result-types.d.ts +22 -0
- package/lib/types/result-types.d.ts.map +1 -0
- package/lib/types/system/generateObjectText.d.ts +4 -0
- package/lib/types/system/generateObjectText.d.ts.map +1 -0
- package/lib/types/system/notifyWebhookWorkflow.d.ts +6 -0
- package/lib/types/system/notifyWebhookWorkflow.d.ts.map +1 -0
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +40 -0
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +1 -0
- package/lib/types/utils/auth.d.ts +4 -0
- package/lib/types/utils/auth.d.ts.map +1 -0
- package/lib/types/utils/blobs.d.ts +8 -0
- package/lib/types/utils/blobs.d.ts.map +1 -0
- package/lib/types/utils/client.d.ts +7 -0
- package/lib/types/utils/client.d.ts.map +1 -0
- package/lib/types/utils/expand-vars.d.ts +8 -0
- package/lib/types/utils/expand-vars.d.ts.map +1 -0
- package/lib/types/utils/memory.d.ts +12 -0
- package/lib/types/utils/memory.d.ts.map +1 -0
- package/lib/types/utils/tokens.d.ts +11 -0
- package/lib/types/utils/tokens.d.ts.map +1 -0
- package/lib/types/vars.d.ts +3 -0
- package/lib/types/vars.d.ts.map +1 -0
- package/lib/types/workflows.d.ts +9 -0
- package/lib/types/workflows.d.ts.map +1 -0
- package/lib/workflows-bundle.js +18394 -0
- package/package.json +109 -0
- package/src/activities/advanced/createDocumentTypeFromInteractionRun.ts +54 -0
- package/src/activities/advanced/createOrUpdateDocumentFromInteractionRun.ts +97 -0
- package/src/activities/advanced/updateDocumentFromInteractionRun.ts +34 -0
- package/src/activities/chunkDocument.ts +124 -0
- package/src/activities/createDocumentFromOther.ts +92 -0
- package/src/activities/executeInteraction.ts +191 -0
- package/src/activities/extractDocumentText.ts +174 -0
- package/src/activities/generateDocumentProperties.ts +93 -0
- package/src/activities/generateEmbeddings.ts +345 -0
- package/src/activities/generateImageRendition.ts +134 -0
- package/src/activities/generateOrAssignContentType.ts +152 -0
- package/src/activities/getObjectFromStore.ts +31 -0
- package/src/activities/index.ts +21 -0
- package/src/activities/media/processPdfWithTextract.ts +141 -0
- package/src/activities/media/transcribeMediaWithGladia.ts +83 -0
- package/src/activities/notifyWebhook.test.ts +32 -0
- package/src/activities/notifyWebhook.ts +51 -0
- package/src/activities/setDocumentStatus.ts +25 -0
- package/src/conversion/TextractProcessor.ts +505 -0
- package/src/conversion/image.test.ts +26 -0
- package/src/conversion/image.ts +22 -0
- package/src/conversion/mutool.test.ts +74 -0
- package/src/conversion/mutool.ts +180 -0
- package/src/conversion/pandoc.test.ts +22 -0
- package/src/conversion/pandoc.ts +44 -0
- package/src/conversion/pdf.test.ts +35 -0
- package/src/conversion/pdf.ts +8 -0
- package/src/dsl/conditions.ts +76 -0
- package/src/dsl/dsl-workflow.test.ts +58 -0
- package/src/dsl/dsl-workflow.ts +235 -0
- package/src/dsl/ms.d.ts +11 -0
- package/src/dsl/projections.test.ts +159 -0
- package/src/dsl/projections.ts +72 -0
- package/src/dsl/setup/ActivityContext.ts +106 -0
- package/src/dsl/setup/fetch/DataProvider.ts +45 -0
- package/src/dsl/setup/fetch/index.ts +19 -0
- package/src/dsl/setup/fetch/providers.ts +67 -0
- package/src/dsl/test/test-child-workflow.ts +6 -0
- package/src/dsl/validation.test.ts +257 -0
- package/src/dsl/validation.ts +125 -0
- package/src/dsl/vars.test.ts +245 -0
- package/src/dsl/vars.ts +340 -0
- package/src/dsl/walk.test.ts +81 -0
- package/src/dsl/walk.ts +103 -0
- package/src/dsl/workflow-exec-child.test.ts +182 -0
- package/src/dsl/workflow-fetch.test.ts +135 -0
- package/src/dsl/workflow-import.test.ts +89 -0
- package/src/dsl/workflow.test.ts +110 -0
- package/src/errors.ts +24 -0
- package/src/index.ts +27 -0
- package/src/iterative-generation/activities/extractToc.ts +49 -0
- package/src/iterative-generation/activities/finalizeOutput.ts +77 -0
- package/src/iterative-generation/activities/generatePart.ts +82 -0
- package/src/iterative-generation/activities/generateToc.ts +98 -0
- package/src/iterative-generation/activities/index.ts +4 -0
- package/src/iterative-generation/iterativeGenerationWorkflow.ts +67 -0
- package/src/iterative-generation/types.ts +99 -0
- package/src/iterative-generation/utils.ts +123 -0
- package/src/result-types.ts +25 -0
- package/src/system/generateObjectText.ts +109 -0
- package/src/system/notifyWebhookWorkflow.ts +64 -0
- package/src/system/recalculateEmbeddingsWorkflow.ts +46 -0
- package/src/utils/auth.ts +10 -0
- package/src/utils/blobs.ts +58 -0
- package/src/utils/client.ts +31 -0
- package/src/utils/expand-vars.ts +31 -0
- package/src/utils/memory.ts +66 -0
- package/src/utils/tokens.ts +44 -0
- package/src/vars.ts +3 -0
- package/src/workflows.ts +9 -0
@@ -0,0 +1,345 @@
|
|
1
|
+
import { ComposableClient } from "@vertesia/client";
|
2
|
+
import { ContentObject, DSLActivityExecutionPayload, DSLActivitySpec, ProjectConfigurationEmbeddings, SupportedEmbeddingTypes } from "@vertesia/common";
|
3
|
+
import { EmbeddingsResult } from "@llumiverse/core";
|
4
|
+
import { log } from "@temporalio/activity";
|
5
|
+
import * as tf from '@tensorflow/tfjs-node';
|
6
|
+
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
7
|
+
import { NoDocumentFound } from '../errors.js';
|
8
|
+
import { fetchBlobAsBase64, md5 } from "../utils/blobs.js";
|
9
|
+
import { countTokens } from "../utils/tokens.js";
|
10
|
+
|
11
|
+
|
12
|
+
export interface GenerateEmbeddingsParams {
|
13
|
+
model?: string;
|
14
|
+
environment?: string;
|
15
|
+
force?: boolean;
|
16
|
+
type: SupportedEmbeddingTypes;
|
17
|
+
}
|
18
|
+
|
19
|
+
export interface GenerateEmbeddings extends DSLActivitySpec<GenerateEmbeddingsParams> {
|
20
|
+
name: 'generateEmbeddings';
|
21
|
+
}
|
22
|
+
|
23
|
+
export async function generateEmbeddings(payload: DSLActivityExecutionPayload) {
|
24
|
+
const { params, client, objectId, fetchProject } = await setupActivity<GenerateEmbeddingsParams>(payload);
|
25
|
+
const { force, type } = params;
|
26
|
+
|
27
|
+
const projectData = await fetchProject();
|
28
|
+
const config = projectData?.configuration.embeddings[type];
|
29
|
+
if (!projectData) {
|
30
|
+
throw new NoDocumentFound('Project not found', [payload.project_id]);
|
31
|
+
}
|
32
|
+
if (!config) {
|
33
|
+
throw new NoDocumentFound('Embeddings configuration not found', [objectId])
|
34
|
+
}
|
35
|
+
|
36
|
+
if (!projectData) {
|
37
|
+
throw new NoDocumentFound('Project not found', [payload.project_id]);
|
38
|
+
}
|
39
|
+
|
40
|
+
if (!projectData?.configuration.embeddings[type]?.enabled) {
|
41
|
+
log.info(`Embeddings generation disabled for type ${type} on project: ${projectData.name} (${projectData.namespace})`, { config });
|
42
|
+
return { id: objectId, status: "skipped", message: `Embeddings generation disabled for type ${type}` }
|
43
|
+
}
|
44
|
+
|
45
|
+
log.info(`${type} embedding generation starting for object ${objectId}`, { force, config });
|
46
|
+
|
47
|
+
if (!config.environment) {
|
48
|
+
throw new Error('No environment found in project configuration. Set environment in project configuration to generate embeddings.');
|
49
|
+
}
|
50
|
+
|
51
|
+
const document = await client.objects.retrieve(objectId, "+text +parts +embeddings +tokens +properties");
|
52
|
+
|
53
|
+
if (!document) {
|
54
|
+
throw new NoDocumentFound('Document not found', [objectId]);
|
55
|
+
}
|
56
|
+
|
57
|
+
if (!document.content) {
|
58
|
+
throw new NoDocumentFound('Document content not found', [objectId]);
|
59
|
+
}
|
60
|
+
|
61
|
+
let res;
|
62
|
+
|
63
|
+
switch (type) {
|
64
|
+
case SupportedEmbeddingTypes.text:
|
65
|
+
res = await generateTextEmbeddings({
|
66
|
+
client,
|
67
|
+
config,
|
68
|
+
document,
|
69
|
+
type
|
70
|
+
})
|
71
|
+
break;
|
72
|
+
case SupportedEmbeddingTypes.properties:
|
73
|
+
res = await generateTextEmbeddings({
|
74
|
+
client,
|
75
|
+
config,
|
76
|
+
document,
|
77
|
+
type,
|
78
|
+
});
|
79
|
+
break;
|
80
|
+
case SupportedEmbeddingTypes.image:
|
81
|
+
res = await generateImageEmbeddings({
|
82
|
+
client,
|
83
|
+
config,
|
84
|
+
document,
|
85
|
+
type
|
86
|
+
});
|
87
|
+
break;
|
88
|
+
default:
|
89
|
+
res = { id: objectId, status: "failed", message: `unsupported embedding type: ${type}` }
|
90
|
+
}
|
91
|
+
|
92
|
+
return res;
|
93
|
+
|
94
|
+
}
|
95
|
+
|
96
|
+
|
97
|
+
interface ExecuteGenerateEmbeddingsParams {
|
98
|
+
document: ContentObject;
|
99
|
+
client: ComposableClient;
|
100
|
+
type: SupportedEmbeddingTypes;
|
101
|
+
config: ProjectConfigurationEmbeddings;
|
102
|
+
property?: string;
|
103
|
+
force?: boolean;
|
104
|
+
}
|
105
|
+
|
106
|
+
async function generateTextEmbeddings({ document, client, type, config }: ExecuteGenerateEmbeddingsParams) {
|
107
|
+
// if (!force && document.embeddings[type]?.etag === (document.text_etag ?? md5(document.text))) {
|
108
|
+
// return { id: objectId, status: "skipped", message: "embeddings already generated" }
|
109
|
+
// }
|
110
|
+
|
111
|
+
if (!document) {
|
112
|
+
return { status: "error", message: "document is null or undefined" }
|
113
|
+
}
|
114
|
+
|
115
|
+
if (type !== SupportedEmbeddingTypes.text && type !== SupportedEmbeddingTypes.properties) {
|
116
|
+
return { id: document.id, status: "failed", message: `unsupported embedding type: ${type}` }
|
117
|
+
}
|
118
|
+
|
119
|
+
if (type === SupportedEmbeddingTypes.text && !document.text) {
|
120
|
+
return { id: document.id, status: "failed", message: "no text found" }
|
121
|
+
}
|
122
|
+
if (type === SupportedEmbeddingTypes.properties && !document?.properties) {
|
123
|
+
return { id: document.id, status: "failed", message: "no properties found" }
|
124
|
+
}
|
125
|
+
|
126
|
+
const { environment, model } = config;
|
127
|
+
|
128
|
+
// Count tokens if not already done
|
129
|
+
if (!document.tokens?.count && type === SupportedEmbeddingTypes.text) {
|
130
|
+
log.debug('Updating token count for document: ' + document.id);
|
131
|
+
const tokensData = countTokens(document.text!);
|
132
|
+
await client.objects.update(document.id, {
|
133
|
+
tokens: {
|
134
|
+
...tokensData,
|
135
|
+
etag: document.text_etag ?? md5(document.text!)
|
136
|
+
}
|
137
|
+
});
|
138
|
+
document.tokens = {
|
139
|
+
...tokensData,
|
140
|
+
etag: document.text_etag ?? md5(document.text!)
|
141
|
+
};
|
142
|
+
}
|
143
|
+
|
144
|
+
const maxTokens = config.max_tokens ?? 8000;
|
145
|
+
|
146
|
+
//generate embeddings for the main doc if document isn't too large
|
147
|
+
//if too large, we'll just generate embeddings for the parts
|
148
|
+
//then we can generate embeddings for the main document by averaging the tensors
|
149
|
+
log.info(`Generating ${type} embeddings for document ${document.id}`);
|
150
|
+
if (type === SupportedEmbeddingTypes.text && document.tokens?.count && document.tokens?.count > maxTokens) {
|
151
|
+
log.info('Document too large, generating embeddings for parts');
|
152
|
+
|
153
|
+
if (!document.parts || document.parts.length === 0) {
|
154
|
+
return { id: document.id, status: "skipped", message: "no parts found" }
|
155
|
+
}
|
156
|
+
|
157
|
+
const docParts = await Promise.all(document.parts?.map(async (partId) => client.objects.retrieve(partId, "+text +embeddings +properties +tokens")));
|
158
|
+
log.info(`Retrieved ${docParts.length} parts`)
|
159
|
+
|
160
|
+
const generatePartEmbeddings = async (part: ContentObject<any>, i: number) => {
|
161
|
+
try {
|
162
|
+
log.info(`Generating embeddings for part ${part.id}`, { text_len: part.text?.length })
|
163
|
+
if (!part.text) {
|
164
|
+
return { id: part.id, number: i, result: null, status: "skipped", message: "no text found" }
|
165
|
+
}
|
166
|
+
|
167
|
+
if (part.tokens?.count && part.tokens.count > maxTokens) {
|
168
|
+
log.info('Part too large, skipping embeddings generation for part', { part: part.id, tokens: part.tokens.count });
|
169
|
+
return { id: part.id, number: i, result: null, message: "part too large" }
|
170
|
+
}
|
171
|
+
|
172
|
+
const e = await generateEmbeddingsFromStudio(part.text, environment, client, model).catch(e => {
|
173
|
+
log.error('Error generating embeddings for part', { part: part.id, tokens: part.tokens, text_length: part.text?.length, error: e });
|
174
|
+
return null;
|
175
|
+
});
|
176
|
+
|
177
|
+
if (!e || !e.values) {
|
178
|
+
return { id: part.id, number: i, result: null, message: "no embeddings generated" }
|
179
|
+
}
|
180
|
+
|
181
|
+
log.info(`Embeddings generated for part ${part.id}, updating object in the store.`)
|
182
|
+
await client.objects.setEmbedding(part.id, SupportedEmbeddingTypes.text,
|
183
|
+
{
|
184
|
+
values: e.values,
|
185
|
+
model: e.model,
|
186
|
+
etag: part.text_etag
|
187
|
+
}).catch(err => {
|
188
|
+
log.info(`Error updating embeddings on part ${part.id}`);
|
189
|
+
return { id: part.id, number: i, result: null, message: "error setting embeddings on part", error: err.message }
|
190
|
+
})
|
191
|
+
|
192
|
+
log.info('Generated embeddings for part: ' + part.id);
|
193
|
+
return { id: part.id, number: i, result: e }
|
194
|
+
} catch (err: any) {
|
195
|
+
log.info(`Error generating ${type} embeddings for part ${part.id} of ${document.id}`, { error: err });
|
196
|
+
return { id: part.id, number: i, result: null, message: "error generating embeddings", error: err.message }
|
197
|
+
}
|
198
|
+
}
|
199
|
+
|
200
|
+
const promises = docParts.map((p, i) => generatePartEmbeddings(p, i))
|
201
|
+
const res = await Promise.all(promises);
|
202
|
+
// let i = 0;
|
203
|
+
// for (const p of docParts) {
|
204
|
+
// log.info(`Processing part ${p.id}`)
|
205
|
+
// const r = await generatePartEmbeddings(p, i++);
|
206
|
+
// res.push(r)
|
207
|
+
// }
|
208
|
+
|
209
|
+
|
210
|
+
// Filter out parts without embeddings
|
211
|
+
const validEmbeddings = res.filter(item => item.result !== null) as { id: string, number: number, result: EmbeddingsResult }[];
|
212
|
+
|
213
|
+
// Compute the document-level embedding using TensorFlow for attention mechanism
|
214
|
+
log.info('Computing document-level embedding using TF');
|
215
|
+
const documentEmbedding = computeAttentionEmbedding(validEmbeddings.map(item => item.result.values));
|
216
|
+
|
217
|
+
// Save the document-level embedding
|
218
|
+
await client.objects.setEmbedding(document.id, type,
|
219
|
+
{
|
220
|
+
values: documentEmbedding,
|
221
|
+
model: "attention",
|
222
|
+
etag: document.text_etag
|
223
|
+
}
|
224
|
+
);
|
225
|
+
return { id: document.id, status: "completed", parts: docParts.map(i => i.id), len: documentEmbedding.length, part_embeddings: res.map(r => { return { id: r.id, status: r.status, error: r.error, message: r.message } }) }
|
226
|
+
|
227
|
+
} else {
|
228
|
+
log.info(`Generating ${type} embeddings for document`);
|
229
|
+
|
230
|
+
const res = await generateEmbeddingsFromStudio(JSON.stringify(document[type]), environment, client);
|
231
|
+
if (!res || !res.values) {
|
232
|
+
return { id: document.id, status: "failed", message: "no embeddings generated" }
|
233
|
+
}
|
234
|
+
|
235
|
+
log.info(`${type} embeddings generated for document ${document.id}`, { len: res.values.length });
|
236
|
+
await client.objects.setEmbedding(document.id, type,
|
237
|
+
{
|
238
|
+
values: res.values,
|
239
|
+
model: res.model,
|
240
|
+
etag: document.text_etag
|
241
|
+
}
|
242
|
+
);
|
243
|
+
|
244
|
+
return { id: document.id, type, status: "completed", len: res.values.length }
|
245
|
+
|
246
|
+
}
|
247
|
+
|
248
|
+
}
|
249
|
+
|
250
|
+
async function generateImageEmbeddings({ document, client, type, config }: ExecuteGenerateEmbeddingsParams) {
|
251
|
+
|
252
|
+
log.info('Generating image embeddings for document ' + document.id, { content: document.content });
|
253
|
+
if (!document.content?.type?.startsWith('image/') && !document.content?.type?.includes('pdf')) {
|
254
|
+
return { id: document.id, type, status: "failed", message: "content is not an image" }
|
255
|
+
}
|
256
|
+
const { environment, model } = config
|
257
|
+
|
258
|
+
const resRnd = await client.store.objects.getRendition(document.id, {
|
259
|
+
format: "image/png",
|
260
|
+
max_hw: 1024,
|
261
|
+
generate_if_missing: true
|
262
|
+
});
|
263
|
+
|
264
|
+
if (resRnd.status === 'generating') {
|
265
|
+
throw new Error("Rendition is generating, will retry later")
|
266
|
+
} else if (resRnd.status === "failed" || !resRnd.rendition) {
|
267
|
+
throw new NoDocumentFound("Rendition retrieval failed", [document.id])
|
268
|
+
}
|
269
|
+
|
270
|
+
if (!resRnd.rendition.content.source) {
|
271
|
+
throw new NoDocumentFound("No source found in rendition", [document.id])
|
272
|
+
}
|
273
|
+
|
274
|
+
const image = await fetchBlobAsBase64(client, resRnd.rendition.content.source);
|
275
|
+
|
276
|
+
const res = await client.environments.embeddings(environment, {
|
277
|
+
image,
|
278
|
+
model
|
279
|
+
}).then(res => res).catch(e => {
|
280
|
+
log.error('Error generating embeddings for image', { error: e })
|
281
|
+
throw e;
|
282
|
+
});
|
283
|
+
|
284
|
+
if (!res || !res.values) {
|
285
|
+
return { id: document.id, status: "failed", message: "no embeddings generated" }
|
286
|
+
}
|
287
|
+
|
288
|
+
await client.objects.setEmbedding(document.id, SupportedEmbeddingTypes.image,
|
289
|
+
{
|
290
|
+
values: res.values,
|
291
|
+
model: res.model,
|
292
|
+
etag: document.text_etag
|
293
|
+
}
|
294
|
+
);
|
295
|
+
|
296
|
+
return { id: document.id, type, status: "completed", len: res.values.length }
|
297
|
+
|
298
|
+
}
|
299
|
+
|
300
|
+
async function generateEmbeddingsFromStudio(text: string, env: string, client: ComposableClient, model?: string): Promise<EmbeddingsResult> {
|
301
|
+
|
302
|
+
log.info(`Generating embeddings for text of ${text.length} chars with environment ${env}`);
|
303
|
+
|
304
|
+
return client.environments.embeddings(env, {
|
305
|
+
text,
|
306
|
+
model
|
307
|
+
}).then(res => res).catch(e => {
|
308
|
+
log.error('Error generating embeddings for text', { error: e })
|
309
|
+
throw e;
|
310
|
+
});
|
311
|
+
|
312
|
+
}
|
313
|
+
|
314
|
+
function computeAttentionEmbedding(embeddingsArray: number[][], axis: number = 0) {
|
315
|
+
if (embeddingsArray.length === 0) return [];
|
316
|
+
log.info('Computing attention embedding for', { embeddingsArrays: embeddingsArray.map(a => a.length) });
|
317
|
+
const start = new Date().getTime();
|
318
|
+
|
319
|
+
// Convert embeddings array to TensorFlow tensor
|
320
|
+
const embeddingsTensor = tf.tensor(embeddingsArray);
|
321
|
+
|
322
|
+
// Initialize trainable attention weights
|
323
|
+
const attentionWeights = tf.variable(tf.randomNormal([embeddingsArray.length]), true);
|
324
|
+
|
325
|
+
// Compute attention scoresje sui
|
326
|
+
const attentionScores = tf.softmax(attentionWeights);
|
327
|
+
|
328
|
+
// Compute weighted sum of embeddings
|
329
|
+
const weightedEmbeddings = tf.mul(embeddingsTensor.transpose(), attentionScores).transpose();
|
330
|
+
const documentEmbeddingTensor = tf.sum(weightedEmbeddings, axis);
|
331
|
+
|
332
|
+
// Convert the result back to a JavaScript array
|
333
|
+
const documentEmbedding = documentEmbeddingTensor.arraySync() as number[];
|
334
|
+
const duration = (new Date().getTime() - start);
|
335
|
+
log.info(`Computed attention embeddings in ${duration}ms - array size: ${documentEmbedding.length}`, { length: documentEmbedding.length });
|
336
|
+
|
337
|
+
// Clean up tensors
|
338
|
+
embeddingsTensor.dispose();
|
339
|
+
attentionWeights.dispose();
|
340
|
+
attentionScores.dispose();
|
341
|
+
weightedEmbeddings.dispose();
|
342
|
+
documentEmbeddingTensor.dispose();
|
343
|
+
|
344
|
+
return documentEmbedding;
|
345
|
+
}
|
@@ -0,0 +1,134 @@
|
|
1
|
+
import { DSLActivityExecutionPayload, DSLActivitySpec, RenditionProperties } from "@vertesia/common";
|
2
|
+
import { log } from "@temporalio/activity";
|
3
|
+
import fs from 'fs';
|
4
|
+
import sharp, { FormatEnum } from "sharp";
|
5
|
+
import { imageResizer } from "../conversion/image.js";
|
6
|
+
import { pdfToImages } from "../conversion/mutool.js";
|
7
|
+
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
8
|
+
import { NoDocumentFound, WorkflowParamNotFound } from "../errors.js";
|
9
|
+
import { fetchBlobAsBuffer, saveBlobToTempFile } from "../utils/blobs.js";
|
10
|
+
import { NodeStreamSource } from "../utils/memory.js";
|
11
|
+
interface GenerateImageRenditionParams {
|
12
|
+
max_hw: number; //maximum size of the longuest side of the image
|
13
|
+
format: keyof FormatEnum; //format of the output image
|
14
|
+
multi_page?: boolean; //if true, generate a multi-page rendition
|
15
|
+
}
|
16
|
+
|
17
|
+
|
18
|
+
export interface GenerateImageRendition extends DSLActivitySpec<GenerateImageRenditionParams> {
|
19
|
+
|
20
|
+
name: 'generateImageRendition';
|
21
|
+
|
22
|
+
}
|
23
|
+
|
24
|
+
|
25
|
+
export async function generateImageRendition(payload: DSLActivityExecutionPayload) {
|
26
|
+
const { client, objectId, params } = await setupActivity<GenerateImageRenditionParams>(payload);
|
27
|
+
|
28
|
+
const supportedNonImageInputTypes = ['application/pdf']
|
29
|
+
const inputObject = await client.objects.retrieve(objectId).catch((err) => {
|
30
|
+
log.error(`Failed to retrieve document ${objectId}`, err);
|
31
|
+
if (err.response?.status === 404) {
|
32
|
+
throw new NoDocumentFound(`Document ${objectId} not found`, [objectId]);
|
33
|
+
}
|
34
|
+
throw err;
|
35
|
+
});
|
36
|
+
const renditionType = await client.types.getTypeByName('Rendition');
|
37
|
+
|
38
|
+
if (!params.format) {
|
39
|
+
log.error(`Format not found`);
|
40
|
+
throw new WorkflowParamNotFound(`format`);
|
41
|
+
}
|
42
|
+
|
43
|
+
if (!renditionType) {
|
44
|
+
log.error(`Rendition type not found`);
|
45
|
+
throw new NoDocumentFound(`Rendition type not found`, [objectId]);
|
46
|
+
}
|
47
|
+
|
48
|
+
if (!inputObject.content?.source) {
|
49
|
+
log.error(`Document ${objectId} has no source`);
|
50
|
+
throw new NoDocumentFound(`Document ${objectId} has no source`, [objectId]);
|
51
|
+
}
|
52
|
+
|
53
|
+
if (!inputObject.content.type || (!inputObject.content.type?.startsWith('image/') && !supportedNonImageInputTypes.includes(inputObject.content.type))) {
|
54
|
+
log.error(`Document ${objectId} is not an image`);
|
55
|
+
throw new NoDocumentFound(`Document ${objectId} is not an image or pdf: ${inputObject.content.type}`, [objectId]);
|
56
|
+
}
|
57
|
+
|
58
|
+
//array of rendition files to upload
|
59
|
+
let renditionPages: string[] = [];
|
60
|
+
|
61
|
+
//if PDF, convert to pages
|
62
|
+
if (inputObject.content.type === 'application/pdf') {
|
63
|
+
const pdfBuffer = await fetchBlobAsBuffer(client, inputObject.content.source);
|
64
|
+
const pages = await pdfToImages(pdfBuffer);
|
65
|
+
if (!pages.length) {
|
66
|
+
log.error(`Failed to convert pdf to image`);
|
67
|
+
throw new Error(`Failed to convert pdf to image`);
|
68
|
+
}
|
69
|
+
renditionPages = [...pages];
|
70
|
+
} else if (inputObject.content.type.startsWith('image/')) {
|
71
|
+
const tmpFile = await saveBlobToTempFile(client, inputObject.content.source);
|
72
|
+
const filestats = fs.statSync(tmpFile);
|
73
|
+
log.info(`Image ${objectId} copied to ${tmpFile}`, { filestats });
|
74
|
+
renditionPages.push(tmpFile);
|
75
|
+
}
|
76
|
+
|
77
|
+
//generate rendition name, pass an index for multi parts
|
78
|
+
const getRenditionName = (index: number = 0) => {
|
79
|
+
const name = `renditions/${objectId}/${params.max_hw}/${index}.${params.format}`;
|
80
|
+
return name;
|
81
|
+
}
|
82
|
+
|
83
|
+
if (!renditionPages || !renditionPages.length) {
|
84
|
+
log.error(`Failed to generate rendition for ${objectId}`);
|
85
|
+
throw new Error(`Failed to generate rendition for ${objectId}`);
|
86
|
+
}
|
87
|
+
|
88
|
+
log.info(`Uploading rendition for ${objectId} with ${renditionPages.length} pages (max_hw: ${params.max_hw}, format: ${params.format})`, { renditionPages });
|
89
|
+
const uploads = renditionPages.map(async (page, i) => {
|
90
|
+
const pageId = getRenditionName(i);
|
91
|
+
const resized = sharp(page).pipe(imageResizer(params.max_hw, params.format));
|
92
|
+
|
93
|
+
const source = new NodeStreamSource(
|
94
|
+
resized,
|
95
|
+
pageId.replace('renditions/', '').replace('/', '_'),
|
96
|
+
'image/' + params.format,
|
97
|
+
pageId,
|
98
|
+
)
|
99
|
+
|
100
|
+
log.info(`Uploading rendition for ${objectId} page ${i} with max_hw: ${params.max_hw} and format: ${params.format}`);
|
101
|
+
return client.objects.upload(source).catch((err) => {
|
102
|
+
log.error(`Failed to upload rendition for ${objectId} page ${i}`, err);
|
103
|
+
return Promise.resolve(null);
|
104
|
+
});
|
105
|
+
});
|
106
|
+
|
107
|
+
const uploaded = await Promise.all(uploads);
|
108
|
+
if (!uploaded || !uploaded.length || !uploaded[0]) {
|
109
|
+
log.error(`Failed to upload rendition for ${objectId}`);
|
110
|
+
throw new Error(`Failed to upload rendition for ${objectId}`);
|
111
|
+
}
|
112
|
+
|
113
|
+
|
114
|
+
log.info(`Creating rendition for ${objectId} with max_hw: ${params.max_hw} and format: ${params.format}`, { uploaded });
|
115
|
+
const rendition = await client.objects.create({
|
116
|
+
name: inputObject.name + ` [Rendition ${params.max_hw}]`,
|
117
|
+
type: renditionType.id,
|
118
|
+
parent: inputObject.id,
|
119
|
+
content: uploaded[0],
|
120
|
+
properties: {
|
121
|
+
mime_type: 'image/' + params.format,
|
122
|
+
source_etag: inputObject.content.source,
|
123
|
+
height: params.max_hw,
|
124
|
+
width: params.max_hw,
|
125
|
+
multipart: uploaded.length > 1,
|
126
|
+
total_parts: uploaded.length
|
127
|
+
} satisfies RenditionProperties
|
128
|
+
});
|
129
|
+
|
130
|
+
log.info(`Rendition ${rendition.id} created for ${objectId}`, { rendition });
|
131
|
+
|
132
|
+
return { id: rendition.id, format: params.format, status: "success" };
|
133
|
+
|
134
|
+
}
|
@@ -0,0 +1,152 @@
|
|
1
|
+
import { CreateContentObjectTypePayload, DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
2
|
+
import { log } from "@temporalio/activity";
|
3
|
+
import { ActivityContext, setupActivity } from "../dsl/setup/ActivityContext.js";
|
4
|
+
import { TruncateSpec, truncByMaxTokens } from "../utils/tokens.js";
|
5
|
+
import { InteractionExecutionParams, executeInteractionFromActivity } from "./executeInteraction.js";
|
6
|
+
|
7
|
+
const INT_SELECT_DOCUMENT_TYPE = "sys:SelectDocumentType"
|
8
|
+
const INT_GENERATE_METADATA_MODEL = "sys:GenerateMetadataModel"
|
9
|
+
|
10
|
+
export interface GenerateOrAssignContentTypeParams extends InteractionExecutionParams {
|
11
|
+
typesHint?: string[];
|
12
|
+
/**
|
13
|
+
* truncate the input doc text to the specified max_tokens
|
14
|
+
*/
|
15
|
+
truncate?: TruncateSpec;
|
16
|
+
|
17
|
+
/**
|
18
|
+
* The name of the interaction to execute
|
19
|
+
* @default SelectDocumentType
|
20
|
+
*/
|
21
|
+
interactionNames?: {
|
22
|
+
selectDocumentType?: string;
|
23
|
+
generateMetadataModel?: string;
|
24
|
+
}
|
25
|
+
}
|
26
|
+
|
27
|
+
export interface GenerateOrAssignContentType extends DSLActivitySpec<GenerateOrAssignContentTypeParams> {
|
28
|
+
name: 'generateOrAssignContentType';
|
29
|
+
}
|
30
|
+
|
31
|
+
export async function generateOrAssignContentType(payload: DSLActivityExecutionPayload) {
|
32
|
+
const context = await setupActivity<GenerateOrAssignContentTypeParams>(payload);
|
33
|
+
const { params, client, objectId } = context;
|
34
|
+
|
35
|
+
const interactionName = params.interactionNames?.selectDocumentType ?? INT_SELECT_DOCUMENT_TYPE;
|
36
|
+
|
37
|
+
|
38
|
+
log.info("SelectDocumentType for object: " + objectId, { payload });
|
39
|
+
|
40
|
+
const object = await client.objects.retrieve(objectId, "+text");
|
41
|
+
|
42
|
+
//Expects object.type to be null on first ingestion of content
|
43
|
+
//User initiated Content Type change via the Composable UI,
|
44
|
+
//sets object.type to null when they let Composable choose for them.
|
45
|
+
//sets object.type to chosen type (thus non-null) when user picks a type.
|
46
|
+
if (object.type) {
|
47
|
+
log.warn(`Object ${objectId} has already a type. Skipping type creation.`);
|
48
|
+
return { status: "skipped", message: "Object already has a type: " + object.type.name };
|
49
|
+
}
|
50
|
+
|
51
|
+
if (!object || (!object.text && !object.content?.type?.startsWith("image/") && !object.content?.type?.startsWith("application/pdf"))) {
|
52
|
+
log.info(`Object ${objectId} not found or text is empty and not an image`, { object });
|
53
|
+
return { status: "failed", error: "no-text" };
|
54
|
+
}
|
55
|
+
|
56
|
+
const types = await client.types.list();
|
57
|
+
|
58
|
+
//make a list of all existing types, and add hints if any
|
59
|
+
const existing_types = types.map(t => t.name).filter(n => !["DocumentPart", "Rendition"].includes(n));
|
60
|
+
if (params.typesHint) {
|
61
|
+
const newHints = params.typesHint.filter((t: string) => !existing_types.includes(t));
|
62
|
+
existing_types.push(...newHints);
|
63
|
+
}
|
64
|
+
|
65
|
+
const content = object.text ? truncByMaxTokens(object.text, params.truncate || 4000) : undefined;
|
66
|
+
|
67
|
+
const getImage = async () => {
|
68
|
+
if (object.content?.type?.includes("pdf") && object.text?.length && object.text?.length < 100) {
|
69
|
+
return "store:" + objectId
|
70
|
+
}
|
71
|
+
if (!object.content?.type?.startsWith("image/")) {
|
72
|
+
return undefined;
|
73
|
+
}
|
74
|
+
const res = await client.objects.getRendition(objectId, { max_hw: 1024, format: "image/png", generate_if_missing: true });
|
75
|
+
if (!res.rendition && res.status === "generating") {
|
76
|
+
//throw to try again
|
77
|
+
throw new Error(`Rendition for object ${objectId} is in progress`);
|
78
|
+
} else if (res.rendition) {
|
79
|
+
return "store:" + objectId;
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
const fileRef = await getImage();
|
84
|
+
|
85
|
+
log.info("Execute SelectDocumentType interaction on content with \nexisting types: " + existing_types.join(","));
|
86
|
+
|
87
|
+
const res = await executeInteractionFromActivity(client, interactionName, params, {
|
88
|
+
existing_types, content, image: fileRef
|
89
|
+
});
|
90
|
+
|
91
|
+
log.info("Selected Content Type Result: " + JSON.stringify(res.result));
|
92
|
+
|
93
|
+
//if type is not identified or not present in the database, generate a new type
|
94
|
+
let selectedType: { id: string, name: string } | undefined = undefined;
|
95
|
+
|
96
|
+
selectedType = types.find(t => t.name === res.result.document_type);
|
97
|
+
|
98
|
+
if (!selectedType) {
|
99
|
+
log.warn("Document type not idenfified: starting type generation");
|
100
|
+
const newType = await generateNewType(context, existing_types, content, fileRef);
|
101
|
+
|
102
|
+
selectedType = { id: newType.id, name: newType.name };
|
103
|
+
}
|
104
|
+
|
105
|
+
if (!selectedType) {
|
106
|
+
log.error("Type not found: ", res.result);
|
107
|
+
throw new Error("Type not found: " + res.result.document_type);
|
108
|
+
}
|
109
|
+
|
110
|
+
//update object with selected type
|
111
|
+
await client.objects.update(objectId, {
|
112
|
+
type: selectedType.id,
|
113
|
+
});
|
114
|
+
|
115
|
+
return {
|
116
|
+
id: selectedType.id,
|
117
|
+
name: selectedType.name,
|
118
|
+
isNew: !types.find(t => t.name === selectedType.name)
|
119
|
+
};
|
120
|
+
}
|
121
|
+
|
122
|
+
async function generateNewType(context: ActivityContext, existing_types: string[], content?: string, fileRef?: string) {
|
123
|
+
const { client, params } = context;
|
124
|
+
|
125
|
+
const project = await context.fetchProject();
|
126
|
+
const interactionName = params.interactionNames?.generateMetadataModel ?? INT_GENERATE_METADATA_MODEL;
|
127
|
+
|
128
|
+
const genTypeRes = await executeInteractionFromActivity(client, interactionName, params, {
|
129
|
+
existing_types: existing_types,
|
130
|
+
content: content,
|
131
|
+
human_context: project?.configuration?.human_context ?? undefined,
|
132
|
+
image: fileRef ? fileRef : undefined
|
133
|
+
});
|
134
|
+
|
135
|
+
|
136
|
+
if (!genTypeRes.result.document_type) {
|
137
|
+
log.error("No name generated for type", genTypeRes);
|
138
|
+
throw new Error("No name generated for type");
|
139
|
+
}
|
140
|
+
|
141
|
+
log.info("Generated schema for type", genTypeRes.result.metadata_schema);
|
142
|
+
const typeData: CreateContentObjectTypePayload = {
|
143
|
+
name: genTypeRes.result.document_type,
|
144
|
+
object_schema: genTypeRes.result.metadata_schema,
|
145
|
+
is_chunkable: genTypeRes.result.is_chunkable,
|
146
|
+
}
|
147
|
+
|
148
|
+
const type = await client.types.create(typeData);
|
149
|
+
|
150
|
+
return type;
|
151
|
+
|
152
|
+
}
|
@@ -0,0 +1,31 @@
|
|
1
|
+
import { ContentObject, DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
2
|
+
import { projectResult } from "../dsl/projections.js";
|
3
|
+
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
4
|
+
|
5
|
+
|
6
|
+
export interface GetObjectParams {
|
7
|
+
select?: string;
|
8
|
+
}
|
9
|
+
|
10
|
+
export interface GetObject extends DSLActivitySpec<GetObjectParams> {
|
11
|
+
name: 'getObject';
|
12
|
+
}
|
13
|
+
|
14
|
+
/**
|
15
|
+
* We are using a union type for the status parameter since typescript enumbs breaks the workflow code generation
|
16
|
+
* @param objectId
|
17
|
+
* @param status
|
18
|
+
*/
|
19
|
+
export async function getObjectFromStore(payload: DSLActivityExecutionPayload): Promise<ContentObject> {
|
20
|
+
const { client, params, objectId } = await setupActivity<GetObjectParams>(payload);
|
21
|
+
|
22
|
+
const obj = await client.objects.retrieve(objectId, params.select);
|
23
|
+
|
24
|
+
const projection = projectResult(payload, params, obj, obj);
|
25
|
+
|
26
|
+
return {
|
27
|
+
...projection,
|
28
|
+
id: obj.id,
|
29
|
+
}
|
30
|
+
|
31
|
+
}
|
@@ -0,0 +1,21 @@
|
|
1
|
+
/**
|
2
|
+
* Here we export all activities to be registered with the temporal worker
|
3
|
+
*/
|
4
|
+
export { createDocumentTypeFromInteractionRun } from "./advanced/createDocumentTypeFromInteractionRun.js";
|
5
|
+
export { createOrUpdateDocumentFromInteractionRun } from "./advanced/createOrUpdateDocumentFromInteractionRun.js";
|
6
|
+
export { updateDocumentFromInteractionRun } from "./advanced/updateDocumentFromInteractionRun.js";
|
7
|
+
export { chunkDocument } from "./chunkDocument.js";
|
8
|
+
export { createPdfDocumentFromSource } from "./createDocumentFromOther.js";
|
9
|
+
export { executeInteraction } from "./executeInteraction.js";
|
10
|
+
export { extractDocumentText } from "./extractDocumentText.js";
|
11
|
+
export { generateDocumentProperties } from "./generateDocumentProperties.js";
|
12
|
+
export { generateEmbeddings } from "./generateEmbeddings.js";
|
13
|
+
export { generateImageRendition } from "./generateImageRendition.js";
|
14
|
+
export { generateOrAssignContentType } from "./generateOrAssignContentType.js";
|
15
|
+
export { getObjectFromStore } from "./getObjectFromStore.js";
|
16
|
+
export { convertPdfToStructuredText } from "./media/processPdfWithTextract.js";
|
17
|
+
export { transcribeMedia } from "./media/transcribeMediaWithGladia.js";
|
18
|
+
export { notifyWebhook } from "./notifyWebhook.js";
|
19
|
+
export { setDocumentStatus } from "./setDocumentStatus.js";
|
20
|
+
|
21
|
+
export * from "../iterative-generation/activities/index.js";
|