@vertesia/workflow 0.54.0 → 0.55.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js +32 -0
- package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +72 -0
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js +18 -0
- package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
- package/lib/cjs/activities/chunkDocument.js +84 -0
- package/lib/cjs/activities/chunkDocument.js.map +1 -0
- package/lib/cjs/activities/createDocumentFromOther.js +64 -0
- package/lib/cjs/activities/createDocumentFromOther.js.map +1 -0
- package/lib/cjs/activities/executeInteraction.js +154 -0
- package/lib/cjs/activities/executeInteraction.js.map +1 -0
- package/lib/cjs/activities/extractDocumentText.js +156 -0
- package/lib/cjs/activities/extractDocumentText.js.map +1 -0
- package/lib/cjs/activities/generateDocumentProperties.js +77 -0
- package/lib/cjs/activities/generateDocumentProperties.js.map +1 -0
- package/lib/cjs/activities/generateEmbeddings.js +248 -0
- package/lib/cjs/activities/generateEmbeddings.js.map +1 -0
- package/lib/cjs/activities/generateImageRendition.js +167 -0
- package/lib/cjs/activities/generateImageRendition.js.map +1 -0
- package/lib/cjs/activities/generateOrAssignContentType.js +112 -0
- package/lib/cjs/activities/generateOrAssignContentType.js.map +1 -0
- package/lib/cjs/activities/getObjectFromStore.js +20 -0
- package/lib/cjs/activities/getObjectFromStore.js.map +1 -0
- package/lib/cjs/activities/handleError.js +22 -0
- package/lib/cjs/activities/handleError.js.map +1 -0
- package/lib/cjs/activities/index-dsl.js +39 -0
- package/lib/cjs/activities/index-dsl.js.map +1 -0
- package/lib/cjs/activities/index.js +21 -0
- package/lib/cjs/activities/index.js.map +1 -0
- package/lib/cjs/activities/media/processPdfWithTextract.js +102 -0
- package/lib/cjs/activities/media/processPdfWithTextract.js.map +1 -0
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js +51 -0
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js.map +1 -0
- package/lib/cjs/activities/notifyWebhook.js +34 -0
- package/lib/cjs/activities/notifyWebhook.js.map +1 -0
- package/lib/cjs/activities/setDocumentStatus.js +15 -0
- package/lib/cjs/activities/setDocumentStatus.js.map +1 -0
- package/lib/cjs/conversion/TextractProcessor.js +417 -0
- package/lib/cjs/conversion/TextractProcessor.js.map +1 -0
- package/lib/cjs/conversion/image.js +94 -0
- package/lib/cjs/conversion/image.js.map +1 -0
- package/lib/cjs/conversion/markitdown.js +42 -0
- package/lib/cjs/conversion/markitdown.js.map +1 -0
- package/lib/cjs/conversion/mutool.js +147 -0
- package/lib/cjs/conversion/mutool.js.map +1 -0
- package/lib/cjs/conversion/pandoc.js +39 -0
- package/lib/cjs/conversion/pandoc.js.map +1 -0
- package/lib/cjs/dsl/conditions.js +81 -0
- package/lib/cjs/dsl/conditions.js.map +1 -0
- package/lib/cjs/dsl/dsl-workflow.js +271 -0
- package/lib/cjs/dsl/dsl-workflow.js.map +1 -0
- package/lib/cjs/dsl/dslProxyActivities.js +23 -0
- package/lib/cjs/dsl/dslProxyActivities.js.map +1 -0
- package/lib/cjs/dsl/projections.js +59 -0
- package/lib/cjs/dsl/projections.js.map +1 -0
- package/lib/cjs/dsl/setup/ActivityContext.js +120 -0
- package/lib/cjs/dsl/setup/ActivityContext.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/DataProvider.js +51 -0
- package/lib/cjs/dsl/setup/fetch/DataProvider.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/index.js +16 -0
- package/lib/cjs/dsl/setup/fetch/index.js.map +1 -0
- package/lib/cjs/dsl/setup/fetch/providers.js +67 -0
- package/lib/cjs/dsl/setup/fetch/providers.js.map +1 -0
- package/lib/cjs/dsl/test/test-child-workflow.js +10 -0
- package/lib/cjs/dsl/test/test-child-workflow.js.map +1 -0
- package/lib/cjs/dsl/validation.js +122 -0
- package/lib/cjs/dsl/validation.js.map +1 -0
- package/lib/cjs/dsl/vars.js +341 -0
- package/lib/cjs/dsl/vars.js.map +1 -0
- package/lib/cjs/dsl/walk.js +100 -0
- package/lib/cjs/dsl/walk.js.map +1 -0
- package/lib/cjs/dsl.js +20 -0
- package/lib/cjs/dsl.js.map +1 -0
- package/lib/cjs/errors.js +48 -0
- package/lib/cjs/errors.js.map +1 -0
- package/lib/cjs/index.js +50 -0
- package/lib/cjs/index.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/extractToc.js +47 -0
- package/lib/cjs/iterative-generation/activities/extractToc.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js +69 -0
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/generatePart.js +73 -0
- package/lib/cjs/iterative-generation/activities/generatePart.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/generateToc.js +91 -0
- package/lib/cjs/iterative-generation/activities/generateToc.js.map +1 -0
- package/lib/cjs/iterative-generation/activities/index.js +12 -0
- package/lib/cjs/iterative-generation/activities/index.js.map +1 -0
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js +56 -0
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
- package/lib/cjs/iterative-generation/types.js +5 -0
- package/lib/cjs/iterative-generation/types.js.map +1 -0
- package/lib/cjs/iterative-generation/utils.js +121 -0
- package/lib/cjs/iterative-generation/utils.js.map +1 -0
- package/lib/cjs/package.json +3 -0
- package/lib/cjs/result-types.js +10 -0
- package/lib/cjs/result-types.js.map +1 -0
- package/lib/cjs/system/notifyWebhookWorkflow.js +47 -0
- package/lib/cjs/system/notifyWebhookWorkflow.js.map +1 -0
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js +28 -0
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js.map +1 -0
- package/lib/cjs/utils/auth.js +15 -0
- package/lib/cjs/utils/auth.js.map +1 -0
- package/lib/cjs/utils/blobs.js +69 -0
- package/lib/cjs/utils/blobs.js.map +1 -0
- package/lib/cjs/utils/chunks.js +14 -0
- package/lib/cjs/utils/chunks.js.map +1 -0
- package/lib/cjs/utils/client.js +26 -0
- package/lib/cjs/utils/client.js.map +1 -0
- package/lib/cjs/utils/expand-vars.js +33 -0
- package/lib/cjs/utils/expand-vars.js.map +1 -0
- package/lib/cjs/utils/memory.js +65 -0
- package/lib/cjs/utils/memory.js.map +1 -0
- package/lib/cjs/utils/tokens.js +38 -0
- package/lib/cjs/utils/tokens.js.map +1 -0
- package/lib/cjs/vars.js +20 -0
- package/lib/cjs/vars.js.map +1 -0
- package/lib/cjs/workflows.js +15 -0
- package/lib/cjs/workflows.js.map +1 -0
- package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js +29 -0
- package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +69 -0
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js +15 -0
- package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
- package/lib/esm/activities/chunkDocument.js +81 -0
- package/lib/esm/activities/chunkDocument.js.map +1 -0
- package/lib/esm/activities/createDocumentFromOther.js +58 -0
- package/lib/esm/activities/createDocumentFromOther.js.map +1 -0
- package/lib/esm/activities/executeInteraction.js +150 -0
- package/lib/esm/activities/executeInteraction.js.map +1 -0
- package/lib/esm/activities/extractDocumentText.js +153 -0
- package/lib/esm/activities/extractDocumentText.js.map +1 -0
- package/lib/esm/activities/generateDocumentProperties.js +74 -0
- package/lib/esm/activities/generateDocumentProperties.js.map +1 -0
- package/lib/esm/activities/generateEmbeddings.js +245 -0
- package/lib/esm/activities/generateEmbeddings.js.map +1 -0
- package/lib/esm/activities/generateImageRendition.js +161 -0
- package/lib/esm/activities/generateImageRendition.js.map +1 -0
- package/lib/esm/activities/generateOrAssignContentType.js +109 -0
- package/lib/esm/activities/generateOrAssignContentType.js.map +1 -0
- package/lib/esm/activities/getObjectFromStore.js +17 -0
- package/lib/esm/activities/getObjectFromStore.js.map +1 -0
- package/lib/esm/activities/handleError.js +19 -0
- package/lib/esm/activities/handleError.js.map +1 -0
- package/lib/esm/activities/index-dsl.js +19 -0
- package/lib/esm/activities/index-dsl.js.map +1 -0
- package/lib/esm/activities/index.js +5 -0
- package/lib/esm/activities/index.js.map +1 -0
- package/lib/esm/activities/media/processPdfWithTextract.js +98 -0
- package/lib/esm/activities/media/processPdfWithTextract.js.map +1 -0
- package/lib/esm/activities/media/transcribeMediaWithGladia.js +48 -0
- package/lib/esm/activities/media/transcribeMediaWithGladia.js.map +1 -0
- package/lib/esm/activities/notifyWebhook.js +31 -0
- package/lib/esm/activities/notifyWebhook.js.map +1 -0
- package/lib/esm/activities/setDocumentStatus.js +12 -0
- package/lib/esm/activities/setDocumentStatus.js.map +1 -0
- package/lib/esm/conversion/TextractProcessor.js +410 -0
- package/lib/esm/conversion/TextractProcessor.js.map +1 -0
- package/lib/esm/conversion/image.js +88 -0
- package/lib/esm/conversion/image.js.map +1 -0
- package/lib/esm/conversion/markitdown.js +36 -0
- package/lib/esm/conversion/markitdown.js.map +1 -0
- package/lib/esm/conversion/mutool.js +139 -0
- package/lib/esm/conversion/mutool.js.map +1 -0
- package/lib/esm/conversion/pandoc.js +36 -0
- package/lib/esm/conversion/pandoc.js.map +1 -0
- package/lib/esm/dsl/conditions.js +75 -0
- package/lib/esm/dsl/conditions.js.map +1 -0
- package/lib/esm/dsl/dsl-workflow.js +264 -0
- package/lib/esm/dsl/dsl-workflow.js.map +1 -0
- package/lib/esm/dsl/dslProxyActivities.js +20 -0
- package/lib/esm/dsl/dslProxyActivities.js.map +1 -0
- package/lib/esm/dsl/projections.js +55 -0
- package/lib/esm/dsl/projections.js.map +1 -0
- package/lib/esm/dsl/setup/ActivityContext.js +115 -0
- package/lib/esm/dsl/setup/ActivityContext.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/DataProvider.js +47 -0
- package/lib/esm/dsl/setup/fetch/DataProvider.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/index.js +12 -0
- package/lib/esm/dsl/setup/fetch/index.js.map +1 -0
- package/lib/esm/dsl/setup/fetch/providers.js +61 -0
- package/lib/esm/dsl/setup/fetch/providers.js.map +1 -0
- package/lib/esm/dsl/test/test-child-workflow.js +5 -0
- package/lib/esm/dsl/test/test-child-workflow.js.map +1 -0
- package/lib/esm/dsl/validation.js +118 -0
- package/lib/esm/dsl/validation.js.map +1 -0
- package/lib/esm/dsl/vars.js +335 -0
- package/lib/esm/dsl/vars.js.map +1 -0
- package/lib/esm/dsl/walk.js +96 -0
- package/lib/esm/dsl/walk.js.map +1 -0
- package/lib/esm/dsl.js +4 -0
- package/lib/esm/dsl.js.map +1 -0
- package/lib/esm/errors.js +41 -0
- package/lib/esm/errors.js.map +1 -0
- package/lib/esm/index.js +32 -0
- package/lib/esm/index.js.map +1 -0
- package/lib/esm/iterative-generation/activities/extractToc.js +44 -0
- package/lib/esm/iterative-generation/activities/extractToc.js.map +1 -0
- package/lib/esm/iterative-generation/activities/finalizeOutput.js +66 -0
- package/lib/esm/iterative-generation/activities/finalizeOutput.js.map +1 -0
- package/lib/esm/iterative-generation/activities/generatePart.js +70 -0
- package/lib/esm/iterative-generation/activities/generatePart.js.map +1 -0
- package/lib/esm/iterative-generation/activities/generateToc.js +88 -0
- package/lib/esm/iterative-generation/activities/generateToc.js.map +1 -0
- package/lib/esm/iterative-generation/activities/index.js +5 -0
- package/lib/esm/iterative-generation/activities/index.js.map +1 -0
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js +53 -0
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
- package/lib/esm/iterative-generation/types.js +2 -0
- package/lib/esm/iterative-generation/types.js.map +1 -0
- package/lib/esm/iterative-generation/utils.js +112 -0
- package/lib/esm/iterative-generation/utils.js.map +1 -0
- package/lib/esm/result-types.js +7 -0
- package/lib/esm/result-types.js.map +1 -0
- package/lib/esm/system/notifyWebhookWorkflow.js +44 -0
- package/lib/esm/system/notifyWebhookWorkflow.js.map +1 -0
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js +25 -0
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js.map +1 -0
- package/lib/esm/utils/auth.js +8 -0
- package/lib/esm/utils/auth.js.map +1 -0
- package/lib/esm/utils/blobs.js +58 -0
- package/lib/esm/utils/blobs.js.map +1 -0
- package/lib/esm/utils/chunks.js +9 -0
- package/lib/esm/utils/chunks.js.map +1 -0
- package/lib/esm/utils/client.js +23 -0
- package/lib/esm/utils/client.js.map +1 -0
- package/lib/esm/utils/expand-vars.js +30 -0
- package/lib/esm/utils/expand-vars.js.map +1 -0
- package/lib/esm/utils/memory.js +55 -0
- package/lib/esm/utils/memory.js.map +1 -0
- package/lib/esm/utils/tokens.js +34 -0
- package/lib/esm/utils/tokens.js.map +1 -0
- package/lib/esm/vars.js +4 -0
- package/lib/esm/vars.js.map +1 -0
- package/lib/esm/workflows.js +8 -0
- package/lib/esm/workflows.js.map +1 -0
- package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts +17 -0
- package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts +39 -0
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts +19 -0
- package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts.map +1 -0
- package/lib/types/activities/chunkDocument.d.ts +33 -0
- package/lib/types/activities/chunkDocument.d.ts.map +1 -0
- package/lib/types/activities/createDocumentFromOther.d.ts +21 -0
- package/lib/types/activities/createDocumentFromOther.d.ts.map +1 -0
- package/lib/types/activities/executeInteraction.d.ts +55 -0
- package/lib/types/activities/executeInteraction.d.ts.map +1 -0
- package/lib/types/activities/extractDocumentText.d.ts +10 -0
- package/lib/types/activities/extractDocumentText.d.ts.map +1 -0
- package/lib/types/activities/generateDocumentProperties.d.ts +32 -0
- package/lib/types/activities/generateDocumentProperties.d.ts.map +1 -0
- package/lib/types/activities/generateEmbeddings.d.ts +53 -0
- package/lib/types/activities/generateEmbeddings.d.ts.map +1 -0
- package/lib/types/activities/generateImageRendition.d.ts +15 -0
- package/lib/types/activities/generateImageRendition.d.ts.map +1 -0
- package/lib/types/activities/generateOrAssignContentType.d.ts +44 -0
- package/lib/types/activities/generateOrAssignContentType.d.ts.map +1 -0
- package/lib/types/activities/getObjectFromStore.d.ts +14 -0
- package/lib/types/activities/getObjectFromStore.d.ts.map +1 -0
- package/lib/types/activities/handleError.d.ts +6 -0
- package/lib/types/activities/handleError.d.ts.map +1 -0
- package/lib/types/activities/index-dsl.d.ts +18 -0
- package/lib/types/activities/index-dsl.d.ts.map +1 -0
- package/lib/types/activities/index.d.ts +5 -0
- package/lib/types/activities/index.d.ts.map +1 -0
- package/lib/types/activities/media/processPdfWithTextract.d.ts +26 -0
- package/lib/types/activities/media/processPdfWithTextract.d.ts.map +1 -0
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts +14 -0
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts.map +1 -0
- package/lib/types/activities/notifyWebhook.d.ts +16 -0
- package/lib/types/activities/notifyWebhook.d.ts.map +1 -0
- package/lib/types/activities/setDocumentStatus.d.ts +15 -0
- package/lib/types/activities/setDocumentStatus.d.ts.map +1 -0
- package/lib/types/conversion/TextractProcessor.d.ts +45 -0
- package/lib/types/conversion/TextractProcessor.d.ts.map +1 -0
- package/lib/types/conversion/image.d.ts +11 -0
- package/lib/types/conversion/image.d.ts.map +1 -0
- package/lib/types/conversion/markitdown.d.ts +2 -0
- package/lib/types/conversion/markitdown.d.ts.map +1 -0
- package/lib/types/conversion/mutool.d.ts +19 -0
- package/lib/types/conversion/mutool.d.ts.map +1 -0
- package/lib/types/conversion/pandoc.d.ts +2 -0
- package/lib/types/conversion/pandoc.d.ts.map +1 -0
- package/lib/types/dsl/conditions.d.ts +2 -0
- package/lib/types/dsl/conditions.d.ts.map +1 -0
- package/lib/types/dsl/dsl-workflow.d.ts +5 -0
- package/lib/types/dsl/dsl-workflow.d.ts.map +1 -0
- package/lib/types/dsl/dslProxyActivities.d.ts +10 -0
- package/lib/types/dsl/dslProxyActivities.d.ts.map +1 -0
- package/lib/types/dsl/projections.d.ts +4 -0
- package/lib/types/dsl/projections.d.ts.map +1 -0
- package/lib/types/dsl/setup/ActivityContext.d.ts +17 -0
- package/lib/types/dsl/setup/ActivityContext.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/DataProvider.d.ts +9 -0
- package/lib/types/dsl/setup/fetch/DataProvider.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/index.d.ts +6 -0
- package/lib/types/dsl/setup/fetch/index.d.ts.map +1 -0
- package/lib/types/dsl/setup/fetch/providers.d.ts +25 -0
- package/lib/types/dsl/setup/fetch/providers.d.ts.map +1 -0
- package/lib/types/dsl/test/test-child-workflow.d.ts +4 -0
- package/lib/types/dsl/test/test-child-workflow.d.ts.map +1 -0
- package/lib/types/dsl/validation.d.ts +4 -0
- package/lib/types/dsl/validation.d.ts.map +1 -0
- package/lib/types/dsl/vars.d.ts +48 -0
- package/lib/types/dsl/vars.d.ts.map +1 -0
- package/lib/types/dsl/walk.d.ts +18 -0
- package/lib/types/dsl/walk.d.ts.map +1 -0
- package/lib/types/dsl.d.ts +4 -0
- package/lib/types/dsl.d.ts.map +1 -0
- package/lib/types/errors.d.ts +22 -0
- package/lib/types/errors.d.ts.map +1 -0
- package/lib/types/index.d.ts +31 -0
- package/lib/types/index.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/extractToc.d.ts +10 -0
- package/lib/types/iterative-generation/activities/extractToc.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts +3 -0
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/generatePart.d.ts +3 -0
- package/lib/types/iterative-generation/activities/generatePart.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/generateToc.d.ts +4 -0
- package/lib/types/iterative-generation/activities/generateToc.d.ts.map +1 -0
- package/lib/types/iterative-generation/activities/index.d.ts +5 -0
- package/lib/types/iterative-generation/activities/index.d.ts.map +1 -0
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts +3 -0
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts.map +1 -0
- package/lib/types/iterative-generation/types.d.ts +79 -0
- package/lib/types/iterative-generation/types.d.ts.map +1 -0
- package/lib/types/iterative-generation/utils.d.ts +27 -0
- package/lib/types/iterative-generation/utils.d.ts.map +1 -0
- package/lib/types/result-types.d.ts +22 -0
- package/lib/types/result-types.d.ts.map +1 -0
- package/lib/types/system/notifyWebhookWorkflow.d.ts +3 -0
- package/lib/types/system/notifyWebhookWorkflow.d.ts.map +1 -0
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +25 -0
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +1 -0
- package/lib/types/utils/auth.d.ts +4 -0
- package/lib/types/utils/auth.d.ts.map +1 -0
- package/lib/types/utils/blobs.d.ts +8 -0
- package/lib/types/utils/blobs.d.ts.map +1 -0
- package/lib/types/utils/chunks.d.ts +9 -0
- package/lib/types/utils/chunks.d.ts.map +1 -0
- package/lib/types/utils/client.d.ts +7 -0
- package/lib/types/utils/client.d.ts.map +1 -0
- package/lib/types/utils/expand-vars.d.ts +8 -0
- package/lib/types/utils/expand-vars.d.ts.map +1 -0
- package/lib/types/utils/memory.d.ts +8 -0
- package/lib/types/utils/memory.d.ts.map +1 -0
- package/lib/types/utils/tokens.d.ts +11 -0
- package/lib/types/utils/tokens.d.ts.map +1 -0
- package/lib/types/vars.d.ts +3 -0
- package/lib/types/vars.d.ts.map +1 -0
- package/lib/types/workflows.d.ts +8 -0
- package/lib/types/workflows.d.ts.map +1 -0
- package/lib/workflows-bundle.js +20991 -0
- package/package.json +3 -3
@@ -0,0 +1,150 @@
|
|
1
|
+
import { activityInfo, log } from "@temporalio/activity";
|
2
|
+
import { ExecutionRunStatus, } from "@vertesia/common";
|
3
|
+
import { projectResult } from "../dsl/projections.js";
|
4
|
+
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
5
|
+
import { ActivityParamInvalid, ActivityParamNotFound } from "../errors.js";
|
6
|
+
import { truncByMaxTokens } from "../utils/tokens.js";
|
7
|
+
//Example:
|
8
|
+
//@ts-ignore
|
9
|
+
const JSON = {
|
10
|
+
name: "executeInteraction",
|
11
|
+
import: ["defaultModel", "guidlineId", "docTypeId"],
|
12
|
+
params: {
|
13
|
+
defaultModel: "${model}",
|
14
|
+
interactionName: "GenerateSummary",
|
15
|
+
model: "${defaultModel ?? 'gpt4'}",
|
16
|
+
environment: "13456",
|
17
|
+
max_tokens: 100,
|
18
|
+
temperature: 0.5,
|
19
|
+
tags: ["test"],
|
20
|
+
result_schema: "${docType.object_schema}",
|
21
|
+
prompt_data: {
|
22
|
+
documents: "${documents}",
|
23
|
+
guidline: "${guidline.text}",
|
24
|
+
},
|
25
|
+
},
|
26
|
+
fetch: {
|
27
|
+
documents: {
|
28
|
+
type: "document",
|
29
|
+
query: {
|
30
|
+
id: { $in: "${objectIds}" },
|
31
|
+
},
|
32
|
+
select: "+text",
|
33
|
+
},
|
34
|
+
guidline: {
|
35
|
+
type: "document",
|
36
|
+
limit: 1,
|
37
|
+
query: {
|
38
|
+
id: "${guidlineId}",
|
39
|
+
},
|
40
|
+
select: "+text",
|
41
|
+
on_not_found: "throw",
|
42
|
+
},
|
43
|
+
docType: {
|
44
|
+
type: "document_type",
|
45
|
+
limit: 1,
|
46
|
+
query: {
|
47
|
+
id: "${docTypeId}",
|
48
|
+
},
|
49
|
+
select: "+object_schema",
|
50
|
+
},
|
51
|
+
},
|
52
|
+
};
|
53
|
+
export async function executeInteraction(payload) {
|
54
|
+
const { client, params } = await setupActivity(payload);
|
55
|
+
const { interactionName, prompt_data, static_prompt_data: wf_prompt_data } = params;
|
56
|
+
if (wf_prompt_data) {
|
57
|
+
Object.assign(prompt_data, wf_prompt_data);
|
58
|
+
}
|
59
|
+
if (!interactionName) {
|
60
|
+
log.error("Missing interactionName", { params });
|
61
|
+
throw new ActivityParamNotFound("interactionName", payload.activity);
|
62
|
+
}
|
63
|
+
if (params.truncate) {
|
64
|
+
const truncate = params.truncate;
|
65
|
+
for (const [key, value] of Object.entries(truncate)) {
|
66
|
+
prompt_data[key] = truncByMaxTokens(prompt_data[key], value);
|
67
|
+
}
|
68
|
+
}
|
69
|
+
try {
|
70
|
+
const res = await executeInteractionFromActivity(client, interactionName, params, prompt_data, payload.debug_mode);
|
71
|
+
return projectResult(payload, params, res, {
|
72
|
+
runId: res.id,
|
73
|
+
status: res.status,
|
74
|
+
result: res.result,
|
75
|
+
});
|
76
|
+
}
|
77
|
+
catch (error) {
|
78
|
+
log.error("Failed to execute interaction", { error });
|
79
|
+
if (error.message.includes("Failed to validate merged prompt schema")) {
|
80
|
+
//issue with the input data, don't retry
|
81
|
+
throw new ActivityParamInvalid("prompt_data", payload.activity, error.message);
|
82
|
+
}
|
83
|
+
else {
|
84
|
+
throw error;
|
85
|
+
}
|
86
|
+
}
|
87
|
+
}
|
88
|
+
export async function executeInteractionFromActivity(client, interactionName, params, prompt_data, debug) {
|
89
|
+
const userTags = params.tags;
|
90
|
+
const info = activityInfo();
|
91
|
+
const runId = info.workflowExecution.runId;
|
92
|
+
let tags = ["workflow", `tmpRunId:${runId}`]; //TODO use wf:wfName
|
93
|
+
if (userTags) {
|
94
|
+
tags = tags.concat(userTags);
|
95
|
+
}
|
96
|
+
let previousStudioExecutionRun = undefined;
|
97
|
+
if (params.include_previous_error) {
|
98
|
+
//retrieve last failed run if any
|
99
|
+
if (info.attempt > 1) {
|
100
|
+
log.info("Retrying, searching for previous run", { tags: ["tmpRunId:" + runId] });
|
101
|
+
const payload = {
|
102
|
+
query: { tags: ["tmpRunId:" + info.workflowExecution.runId] },
|
103
|
+
limit: 1,
|
104
|
+
};
|
105
|
+
const previousRun = await client.runs.search(payload).then((res) => {
|
106
|
+
log.info("Search results", { results: res });
|
107
|
+
return res ? (res[0] ?? undefined) : undefined;
|
108
|
+
});
|
109
|
+
if (previousRun) {
|
110
|
+
log.info("Found previous run", { previousRun });
|
111
|
+
previousStudioExecutionRun = await client.runs.retrieve(previousRun.id);
|
112
|
+
}
|
113
|
+
}
|
114
|
+
}
|
115
|
+
if (debug && previousStudioExecutionRun?.error) {
|
116
|
+
log.info(`Found previous run error`, { error: previousStudioExecutionRun?.error });
|
117
|
+
}
|
118
|
+
const config = {
|
119
|
+
environment: params.environment,
|
120
|
+
model: params.model,
|
121
|
+
model_options: params.model_options,
|
122
|
+
};
|
123
|
+
const data = {
|
124
|
+
...prompt_data,
|
125
|
+
previous_error: previousStudioExecutionRun?.error,
|
126
|
+
};
|
127
|
+
const result_schema = params.result_schema;
|
128
|
+
log.debug(`About to execute interaction ${interactionName}`, { config, data, result_schema, tags });
|
129
|
+
const res = await client.interactions
|
130
|
+
.executeByName(interactionName, {
|
131
|
+
config,
|
132
|
+
data,
|
133
|
+
result_schema,
|
134
|
+
tags,
|
135
|
+
stream: false,
|
136
|
+
})
|
137
|
+
.catch((err) => {
|
138
|
+
log.error(`Error executing interaction ${interactionName}`, { err });
|
139
|
+
throw new Error(`Interaction Execution failed ${interactionName}: ${err.message}`);
|
140
|
+
});
|
141
|
+
if (debug) {
|
142
|
+
log.info(`Interaction executed ${interactionName}`, res);
|
143
|
+
}
|
144
|
+
if (res.error || res.status === ExecutionRunStatus.failed) {
|
145
|
+
log.error(`Error executing interaction ${interactionName}`, { error: res.error });
|
146
|
+
throw new Error(`Interaction Execution failed ${interactionName}: ${res.error}`);
|
147
|
+
}
|
148
|
+
return res;
|
149
|
+
}
|
150
|
+
//# sourceMappingURL=executeInteraction.js.map
|
@@ -0,0 +1 @@
|
|
1
|
+
{"version":3,"file":"executeInteraction.js","sourceRoot":"","sources":["../../../src/activities/executeInteraction.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAC;AAEzD,OAAO,EAIH,kBAAkB,GAGrB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AACtD,OAAO,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;AAChE,OAAO,EAAE,oBAAoB,EAAE,qBAAqB,EAAE,MAAM,cAAc,CAAC;AAC3E,OAAO,EAAgB,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAEpE,UAAU;AACV,YAAY;AACZ,MAAM,IAAI,GAAoB;IAC1B,IAAI,EAAE,oBAAoB;IAC1B,MAAM,EAAE,CAAC,cAAc,EAAE,YAAY,EAAE,WAAW,CAAC;IACnD,MAAM,EAAE;QACJ,YAAY,EAAE,UAAU;QACxB,eAAe,EAAE,iBAAiB;QAClC,KAAK,EAAE,2BAA2B;QAClC,WAAW,EAAE,OAAO;QACpB,UAAU,EAAE,GAAG;QACf,WAAW,EAAE,GAAG;QAChB,IAAI,EAAE,CAAC,MAAM,CAAC;QACd,aAAa,EAAE,0BAA0B;QACzC,WAAW,EAAE;YACT,SAAS,EAAE,cAAc;YACzB,QAAQ,EAAE,kBAAkB;SAC/B;KACJ;IACD,KAAK,EAAE;QACH,SAAS,EAAE;YACP,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE;gBACH,EAAE,EAAE,EAAE,GAAG,EAAE,cAAc,EAAE;aAC9B;YACD,MAAM,EAAE,OAAO;SAClB;QACD,QAAQ,EAAE;YACN,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,CAAC;YACR,KAAK,EAAE;gBACH,EAAE,EAAE,eAAe;aACtB;YACD,MAAM,EAAE,OAAO;YACf,YAAY,EAAE,OAAO;SACxB;QACD,OAAO,EAAE;YACL,IAAI,EAAE,eAAe;YACrB,KAAK,EAAE,CAAC;YACR,KAAK,EAAE;gBACH,EAAE,EAAE,cAAc;aACrB;YACD,MAAM,EAAE,gBAAgB;SAC3B;KACJ;CACJ,CAAC;AAyDF,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,OAA8D;IACnG,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,aAAa,CAA2B,OAAO,CAAC,CAAC;IAElF,MAAM,EAAE,eAAe,EAAE,WAAW,EAAE,kBAAkB,EAAE,cAAc,EAAE,GAAG,MAAM,CAAC;IACpF,IAAI,cAAc,EAAE,CAAC;QACjB,MAAM,CAAC,MAAM,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;IAC/C,CAAC;IAED,IAAI,CAAC,eAAe,EAAE,CAAC;QACnB,GAAG,CAAC,KAAK,CAAC,yBAAyB,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC;QACjD,MAAM,IAAI,qBAAqB,CAAC,iBAAiB,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IACzE,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QAClB,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;QACjC,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;YAClD,WAAW,CAAC,GAAG,CAAC,GAAG,gBAAgB,CAAC,WAAW,CAAC,GAAG,CAAC,EAAE,KAAK,CAAC,CAAC;QACjE,CAAC;IACL,CAAC;IAED,IAAI,CAAC;QACD,MAAM,GAAG,GAAG,MAAM,8BAA8B,CAC5C,MAAM,EACN,eAAe,EACf,MAAM,EACN,WAAW,EACX,OAAO,CAAC,UAAU,CACrB,CAAC;QACF,OAAO,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE;YACvC,KAAK,EAAE,GAAG,CAAC,EAAE;YACb,MAAM,EAAE,GAAG,CAAC,MAAM;YAClB,MAAM,EAAE,GAAG,CAAC,MAAM;SACrB,CAAC,CAAC;IACP,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QAClB,GAAG,CAAC,KAAK,CAAC,+BAA+B,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;QACtD,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,yCAAyC,CAAC,EAAE,CAAC;YACpE,wCAAwC;YACxC,MAAM,IAAI,oBAAoB,CAAC,aAAa,EAAE,OAAO,CAAC,QAAQ,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;QACnF,CAAC;aAAM,CAAC;YACJ,MAAM,KAAK,CAAC;QAChB,CAAC;IACL,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,8BAA8B,CAChD,MAAsB,EACtB,eAAuB,EACvB,MAAkC,EAClC,WAAgB,EAChB,KAAe;IAEf,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC;IAC7B,MAAM,IAAI,GAAG,YAAY,EAAE,CAAC;IAC5B,MAAM,KAAK,GAAG,IAAI,CAAC,iBAAiB,CAAC,KAAK,CAAC;IAC3C,IAAI,IAAI,GAAG,CAAC,UAAU,EAAE,YAAY,KAAK,EAAE,CAAC,CAAC,CAAC,oBAAoB;IAClE,IAAI,QAAQ,EAAE,CAAC;QACX,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IACjC,CAAC;IAED,IAAI,0BAA0B,GAA6B,SAAS,CAAC;IACrE,IAAI,MAAM,CAAC,sBAAsB,EAAE,CAAC;QAChC,iCAAiC;QACjC,IAAI,IAAI,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC;YACnB,GAAG,CAAC,IAAI,CAAC,sCAAsC,EAAE,EAAE,IAAI,EAAE,CAAC,WAAW,GAAG,KAAK,CAAC,EAAE,CAAC,CAAC;YAClF,MAAM,OAAO,GAAqB;gBAC9B,KAAK,EAAE,EAAE,IAAI,EAAE,CAAC,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,KAAK,CAAC,EAAE;gBAC7D,KAAK,EAAE,CAAC;aACX,CAAC;YACF,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE;gBAC/D,GAAG,CAAC,IAAI,CAAC,gBAAgB,EAAE,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC,CAAC;gBAC7C,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YACnD,CAAC,CAAC,CAAC;YAEH,IAAI,WAAW,EAAE,CAAC;gBACd,GAAG,CAAC,IAAI,CAAC,oBAAoB,EAAE,EAAE,WAAW,EAAE,CAAC,CAAC;gBAChD,0BAA0B,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC;YAC5E,CAAC;QACL,CAAC;IACL,CAAC;IACD,IAAI,KAAK,IAAI,0BAA0B,EAAE,KAAK,EAAE,CAAC;QAC7C,GAAG,CAAC,IAAI,CAAC,2BAA2B,EAAE,EAAE,KAAK,EAAE,0BAA0B,EAAE,KAAK,EAAE,CAAC,CAAC;IACxF,CAAC;IAED,MAAM,MAAM,GAAsC;QAC9C,WAAW,EAAE,MAAM,CAAC,WAAW;QAC/B,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,aAAa,EAAE,MAAM,CAAC,aAAa;KACtC,CAAC;IACF,MAAM,IAAI,GAAG;QACT,GAAG,WAAW;QACd,cAAc,EAAE,0BAA0B,EAAE,KAAK;KACpD,CAAC;IAEF,MAAM,aAAa,GAAG,MAAM,CAAC,aAAa,CAAC;IAE3C,GAAG,CAAC,KAAK,CAAC,gCAAgC,eAAe,EAAE,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;IAEpG,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,YAAY;SAChC,aAAa,CAAC,eAAe,EAAE;QAC5B,MAAM;QACN,IAAI;QACJ,aAAa;QACb,IAAI;QACJ,MAAM,EAAE,KAAK;KAChB,CAAC;SACD,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;QACX,GAAG,CAAC,KAAK,CAAC,+BAA+B,eAAe,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QACrE,MAAM,IAAI,KAAK,CAAC,gCAAgC,eAAe,KAAK,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;IACvF,CAAC,CAAC,CAAC;IAEP,IAAI,KAAK,EAAE,CAAC;QACR,GAAG,CAAC,IAAI,CAAC,wBAAwB,eAAe,EAAE,EAAE,GAAG,CAAC,CAAC;IAC7D,CAAC;IAED,IAAI,GAAG,CAAC,KAAK,IAAI,GAAG,CAAC,MAAM,KAAK,kBAAkB,CAAC,MAAM,EAAE,CAAC;QACxD,GAAG,CAAC,KAAK,CAAC,+BAA+B,eAAe,EAAE,EAAE,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC;QAClF,MAAM,IAAI,KAAK,CAAC,gCAAgC,eAAe,KAAK,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC;IACrF,CAAC;IAED,OAAO,GAAG,CAAC;AACf,CAAC"}
|
@@ -0,0 +1,153 @@
|
|
1
|
+
import { log } from "@temporalio/activity";
|
2
|
+
import { mutoolPdfToText } from "../conversion/mutool.js";
|
3
|
+
import { markdownWithPandoc } from "../conversion/pandoc.js";
|
4
|
+
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
5
|
+
import { NoDocumentFound } from "../errors.js";
|
6
|
+
import { TextExtractionStatus } from "../result-types.js";
|
7
|
+
import { fetchBlobAsBuffer, md5 } from "../utils/blobs.js";
|
8
|
+
import { countTokens } from "../utils/tokens.js";
|
9
|
+
import { markdownWithMarkitdown } from "../conversion/markitdown.js";
|
10
|
+
//@ts-ignore
|
11
|
+
const JSON = {
|
12
|
+
name: "extractDocumentText",
|
13
|
+
};
|
14
|
+
export async function extractDocumentText(payload) {
|
15
|
+
const { client, objectId } = await setupActivity(payload);
|
16
|
+
const r = await client.objects.find({
|
17
|
+
query: { _id: objectId },
|
18
|
+
limit: 1,
|
19
|
+
select: "+text",
|
20
|
+
});
|
21
|
+
const doc = r[0];
|
22
|
+
if (!doc) {
|
23
|
+
log.error(`Document ${objectId} not found`);
|
24
|
+
throw new NoDocumentFound(`Document ${objectId} not found`, payload.objectIds);
|
25
|
+
}
|
26
|
+
log.info(`Extracting text for object ${doc.id}`);
|
27
|
+
if (!doc.content?.type || !doc.content?.source) {
|
28
|
+
if (doc.text) {
|
29
|
+
return createResponse(doc, doc.text, TextExtractionStatus.skipped, "Text present and no source or type");
|
30
|
+
}
|
31
|
+
else {
|
32
|
+
return createResponse(doc, "", TextExtractionStatus.error, "No source or type found");
|
33
|
+
}
|
34
|
+
}
|
35
|
+
//skip if text already extracted and proper etag
|
36
|
+
if (doc.text && doc.text.length > 0 && doc.text_etag === doc.content.etag) {
|
37
|
+
return createResponse(doc, doc.text, TextExtractionStatus.skipped, "Text already extracted");
|
38
|
+
}
|
39
|
+
let fileBuffer;
|
40
|
+
try {
|
41
|
+
fileBuffer = await fetchBlobAsBuffer(client, doc.content.source);
|
42
|
+
}
|
43
|
+
catch (e) {
|
44
|
+
log.error(`Error reading file: ${e}`);
|
45
|
+
return createResponse(doc, "", TextExtractionStatus.error, e.message);
|
46
|
+
}
|
47
|
+
let txt;
|
48
|
+
switch (doc.content.type) {
|
49
|
+
case "application/pdf":
|
50
|
+
txt = await mutoolPdfToText(fileBuffer);
|
51
|
+
break;
|
52
|
+
case "text/plain":
|
53
|
+
txt = fileBuffer.toString("utf8");
|
54
|
+
break;
|
55
|
+
//docx
|
56
|
+
case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
57
|
+
txt = await markdownWithMarkitdown(fileBuffer, "docx");
|
58
|
+
break;
|
59
|
+
//pptx
|
60
|
+
case "application/vnd.openxmlformats-officedocument.presentationml.presentation":
|
61
|
+
txt = await markdownWithMarkitdown(fileBuffer, "pptx");
|
62
|
+
break;
|
63
|
+
//html
|
64
|
+
case "text/html":
|
65
|
+
txt = await markdownWithPandoc(fileBuffer, "html");
|
66
|
+
break;
|
67
|
+
//opendocument
|
68
|
+
case "application/vnd.oasis.opendocument.text":
|
69
|
+
txt = await markdownWithPandoc(fileBuffer, "odt");
|
70
|
+
break;
|
71
|
+
//rtf
|
72
|
+
case "application/rtf":
|
73
|
+
txt = await markdownWithPandoc(fileBuffer, "rtf");
|
74
|
+
break;
|
75
|
+
//markdown
|
76
|
+
case "text/markdown":
|
77
|
+
txt = fileBuffer.toString("utf8");
|
78
|
+
break;
|
79
|
+
//csv
|
80
|
+
case "text/csv":
|
81
|
+
txt = fileBuffer.toString("utf8");
|
82
|
+
break;
|
83
|
+
//typescript
|
84
|
+
case "application/typescript":
|
85
|
+
txt = fileBuffer.toString("utf8");
|
86
|
+
break;
|
87
|
+
//javascript
|
88
|
+
case "application/javascript":
|
89
|
+
txt = fileBuffer.toString("utf8");
|
90
|
+
break;
|
91
|
+
//json
|
92
|
+
case "application/json":
|
93
|
+
txt = fileBuffer.toString("utf8");
|
94
|
+
break;
|
95
|
+
default:
|
96
|
+
if (sniffIfText(fileBuffer)) {
|
97
|
+
txt = fileBuffer.toString("utf8"); //TODO: add charset detection
|
98
|
+
break;
|
99
|
+
}
|
100
|
+
return createResponse(doc, doc.text ?? "", TextExtractionStatus.skipped, `Unsupported mime type: ${doc.content.type}`);
|
101
|
+
}
|
102
|
+
const tokensData = countTokens(txt);
|
103
|
+
const etag = doc.content.etag ?? md5(txt);
|
104
|
+
const updateData = {
|
105
|
+
text: txt,
|
106
|
+
text_etag: etag,
|
107
|
+
tokens: {
|
108
|
+
...tokensData,
|
109
|
+
etag: etag,
|
110
|
+
},
|
111
|
+
};
|
112
|
+
await client.objects.update(doc.id, updateData);
|
113
|
+
return createResponse(doc, txt, TextExtractionStatus.success);
|
114
|
+
}
|
115
|
+
function createResponse(doc, text, status, message) {
|
116
|
+
return {
|
117
|
+
status,
|
118
|
+
message,
|
119
|
+
tokens: doc.tokens,
|
120
|
+
len: text.length,
|
121
|
+
objectId: doc.id,
|
122
|
+
hasText: !!text,
|
123
|
+
};
|
124
|
+
}
|
125
|
+
function sniffIfText(buf) {
|
126
|
+
// If file is too large, don't even try
|
127
|
+
if (buf.length > 500 * 1024) {
|
128
|
+
return false;
|
129
|
+
}
|
130
|
+
// Count binary/control characters
|
131
|
+
let binaryCount = 0;
|
132
|
+
const sampleSize = Math.min(buf.length, 1000); // Check first 1000 bytes
|
133
|
+
for (let i = 0; i < sampleSize; i++) {
|
134
|
+
// Count control characters (except common whitespace)
|
135
|
+
const byte = buf[i];
|
136
|
+
if ((byte < 32 && ![9, 10, 13].includes(byte)) || byte === 0) {
|
137
|
+
binaryCount++;
|
138
|
+
}
|
139
|
+
}
|
140
|
+
// If more than 10% binary/control chars, probably not text
|
141
|
+
if (binaryCount / sampleSize > 0.1) {
|
142
|
+
return false;
|
143
|
+
}
|
144
|
+
// Additional check for valid UTF-8 encoding
|
145
|
+
try {
|
146
|
+
const s = buf.toString("utf8");
|
147
|
+
return s.length > 0 && !s.includes("\uFFFD"); // Replacement character
|
148
|
+
}
|
149
|
+
catch (e) {
|
150
|
+
return false;
|
151
|
+
}
|
152
|
+
}
|
153
|
+
//# sourceMappingURL=extractDocumentText.js.map
|
@@ -0,0 +1 @@
|
|
1
|
+
{"version":3,"file":"extractDocumentText.js","sourceRoot":"","sources":["../../../src/activities/extractDocumentText.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAC;AAO3C,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,yBAAyB,CAAC;AAC7D,OAAO,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;AAChE,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAC/C,OAAO,EAAwB,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAChF,OAAO,EAAE,iBAAiB,EAAE,GAAG,EAAE,MAAM,mBAAmB,CAAC;AAC3D,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAErE,YAAY;AACZ,MAAM,IAAI,GAAoB;IAC1B,IAAI,EAAE,qBAAqB;CAC9B,CAAC;AASF,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACrC,OAA+D;IAE/D,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;IAE1D,MAAM,CAAC,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC;QAChC,KAAK,EAAE,EAAE,GAAG,EAAE,QAAQ,EAAE;QACxB,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,OAAO;KAClB,CAAC,CAAC;IACH,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAkB,CAAC;IAClC,IAAI,CAAC,GAAG,EAAE,CAAC;QACP,GAAG,CAAC,KAAK,CAAC,YAAY,QAAQ,YAAY,CAAC,CAAC;QAC5C,MAAM,IAAI,eAAe,CAAC,YAAY,QAAQ,YAAY,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;IACnF,CAAC;IAED,GAAG,CAAC,IAAI,CAAC,8BAA8B,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC;IAEjD,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,CAAC;QAC7C,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;YACX,OAAO,cAAc,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,EAAE,oBAAoB,CAAC,OAAO,EAAE,oCAAoC,CAAC,CAAC;QAC7G,CAAC;aAAM,CAAC;YACJ,OAAO,cAAc,CAAC,GAAG,EAAE,EAAE,EAAE,oBAAoB,CAAC,KAAK,EAAE,yBAAyB,CAAC,CAAC;QAC1F,CAAC;IACL,CAAC;IAED,gDAAgD;IAChD,IAAI,GAAG,CAAC,IAAI,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,GAAG,CAAC,SAAS,KAAK,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QACxE,OAAO,cAAc,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,EAAE,oBAAoB,CAAC,OAAO,EAAE,wBAAwB,CAAC,CAAC;IACjG,CAAC;IAED,IAAI,UAAkB,CAAC;IACvB,IAAI,CAAC;QACD,UAAU,GAAG,MAAM,iBAAiB,CAAC,MAAM,EAAE,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IACrE,CAAC;IAAC,OAAO,CAAM,EAAE,CAAC;QACd,GAAG,CAAC,KAAK,CAAC,uBAAuB,CAAC,EAAE,CAAC,CAAC;QACtC,OAAO,cAAc,CAAC,GAAG,EAAE,EAAE,EAAE,oBAAoB,CAAC,KAAK,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC;IAC1E,CAAC;IAED,IAAI,GAAW,CAAC;IAEhB,QAAQ,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QACvB,KAAK,iBAAiB;YAClB,GAAG,GAAG,MAAM,eAAe,CAAC,UAAU,CAAC,CAAC;YACxC,MAAM;QAEV,KAAK,YAAY;YACb,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAClC,MAAM;QAEV,MAAM;QACN,KAAK,yEAAyE;YAC1E,GAAG,GAAG,MAAM,sBAAsB,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;YACvD,MAAM;QAEV,MAAM;QACN,KAAK,2EAA2E;YAC5E,GAAG,GAAG,MAAM,sBAAsB,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;YACvD,MAAM;QAEV,MAAM;QACN,KAAK,WAAW;YACZ,GAAG,GAAG,MAAM,kBAAkB,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;YACnD,MAAM;QAEV,cAAc;QACd,KAAK,yCAAyC;YAC1C,GAAG,GAAG,MAAM,kBAAkB,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YAClD,MAAM;QAEV,KAAK;QACL,KAAK,iBAAiB;YAClB,GAAG,GAAG,MAAM,kBAAkB,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YAClD,MAAM;QAEV,UAAU;QACV,KAAK,eAAe;YAChB,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAClC,MAAM;QAEV,KAAK;QACL,KAAK,UAAU;YACX,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAClC,MAAM;QAEV,YAAY;QACZ,KAAK,wBAAwB;YACzB,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAClC,MAAM;QAEV,YAAY;QACZ,KAAK,wBAAwB;YACzB,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAClC,MAAM;QAEV,MAAM;QACN,KAAK,kBAAkB;YACnB,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAClC,MAAM;QAEV;YACI,IAAI,WAAW,CAAC,UAAU,CAAC,EAAE,CAAC;gBAC1B,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,6BAA6B;gBAChE,MAAM;YACV,CAAC;YACD,OAAO,cAAc,CACjB,GAAG,EACH,GAAG,CAAC,IAAI,IAAI,EAAE,EACd,oBAAoB,CAAC,OAAO,EAC5B,0BAA0B,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAC/C,CAAC;IACV,CAAC;IAED,MAAM,UAAU,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IACpC,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,CAAC,IAAI,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IAE1C,MAAM,UAAU,GAA+B;QAC3C,IAAI,EAAE,GAAG;QACT,SAAS,EAAE,IAAI;QACf,MAAM,EAAE;YACJ,GAAG,UAAU;YACb,IAAI,EAAE,IAAI;SACb;KACJ,CAAC;IAEF,MAAM,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,UAAU,CAAC,CAAC;IAEhD,OAAO,cAAc,CAAC,GAAG,EAAE,GAAG,EAAE,oBAAoB,CAAC,OAAO,CAAC,CAAC;AAClE,CAAC;AAED,SAAS,cAAc,CACnB,GAAkB,EAClB,IAAY,EACZ,MAA4B,EAC5B,OAAgB;IAEhB,OAAO;QACH,MAAM;QACN,OAAO;QACP,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,GAAG,EAAE,IAAI,CAAC,MAAM;QAChB,QAAQ,EAAE,GAAG,CAAC,EAAE;QAChB,OAAO,EAAE,CAAC,CAAC,IAAI;KAClB,CAAC;AACN,CAAC;AAED,SAAS,WAAW,CAAC,GAAW;IAC5B,uCAAuC;IACvC,IAAI,GAAG,CAAC,MAAM,GAAG,GAAG,GAAG,IAAI,EAAE,CAAC;QAC1B,OAAO,KAAK,CAAC;IACjB,CAAC;IAED,kCAAkC;IAClC,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,yBAAyB;IAExE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,sDAAsD;QACtD,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,CAAC,IAAI,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;YAC3D,WAAW,EAAE,CAAC;QAClB,CAAC;IACL,CAAC;IAED,2DAA2D;IAC3D,IAAI,WAAW,GAAG,UAAU,GAAG,GAAG,EAAE,CAAC;QACjC,OAAO,KAAK,CAAC;IACjB,CAAC;IAED,4CAA4C;IAC5C,IAAI,CAAC;QACD,MAAM,CAAC,GAAG,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAC/B,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,wBAAwB;IAC1E,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACT,OAAO,KAAK,CAAC;IACjB,CAAC;AACL,CAAC"}
|
@@ -0,0 +1,74 @@
|
|
1
|
+
import { log } from "@temporalio/activity";
|
2
|
+
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
3
|
+
import { executeInteractionFromActivity } from "./executeInteraction.js";
|
4
|
+
const INT_EXTRACT_INFORMATION = "sys:ExtractInformation";
|
5
|
+
export async function generateDocumentProperties(payload) {
|
6
|
+
const context = await setupActivity(payload);
|
7
|
+
const { params, client, objectId } = context;
|
8
|
+
const interactionName = params.interactionName ?? INT_EXTRACT_INFORMATION;
|
9
|
+
const project = await context.fetchProject();
|
10
|
+
const doc = await client.objects.retrieve(objectId, "+text");
|
11
|
+
const type = doc.type ? await client.types.retrieve(doc.type.id) : undefined;
|
12
|
+
if (!doc?.text && !params.use_vision && !doc?.content?.type?.startsWith("image/")) {
|
13
|
+
log.warn(`Object ${objectId} not found or text is empty`);
|
14
|
+
return { status: "failed", error: "no-text" };
|
15
|
+
}
|
16
|
+
if (!type || !type.object_schema) {
|
17
|
+
log.info(`Object ${objectId} has no schema`);
|
18
|
+
return { document: objectId, status: "skipped", message: "no schema defined on type" };
|
19
|
+
}
|
20
|
+
const getImageRef = () => {
|
21
|
+
if (doc.content?.type?.startsWith("image/")) {
|
22
|
+
return "store:" + doc.id;
|
23
|
+
}
|
24
|
+
if (params.use_vision && doc.content?.type?.startsWith("application/pdf")) {
|
25
|
+
return "store:" + doc.id;
|
26
|
+
}
|
27
|
+
log.info(`Object ${objectId} is not an image or pdf`);
|
28
|
+
return undefined;
|
29
|
+
};
|
30
|
+
const promptData = {
|
31
|
+
content: doc.text ?? undefined,
|
32
|
+
image: getImageRef() ?? undefined,
|
33
|
+
human_context: project?.configuration?.human_context ?? undefined,
|
34
|
+
};
|
35
|
+
log.info(` Extracting information from object ${objectId} with type ${type.name}`, payload.debug_mode ? { params } : undefined);
|
36
|
+
const infoRes = await executeInteractionFromActivity(client, interactionName, {
|
37
|
+
...params,
|
38
|
+
include_previous_error: true,
|
39
|
+
result_schema: type.object_schema,
|
40
|
+
}, promptData, payload.debug_mode ?? false);
|
41
|
+
const getText = () => {
|
42
|
+
if (doc.text) {
|
43
|
+
return undefined;
|
44
|
+
}
|
45
|
+
let text = "";
|
46
|
+
if (infoRes.result.title) {
|
47
|
+
text += infoRes.result.title + "\n";
|
48
|
+
}
|
49
|
+
if (infoRes.result.description) {
|
50
|
+
text += infoRes.result.description;
|
51
|
+
}
|
52
|
+
if (text) {
|
53
|
+
return text;
|
54
|
+
}
|
55
|
+
else {
|
56
|
+
return undefined;
|
57
|
+
}
|
58
|
+
};
|
59
|
+
log.info(`Extracted information from object ${objectId} with type ${type.name}`, { runId: infoRes.id });
|
60
|
+
await client.objects.update(doc.id, {
|
61
|
+
properties: {
|
62
|
+
...infoRes.result,
|
63
|
+
etag: doc.text_etag,
|
64
|
+
},
|
65
|
+
text: getText(),
|
66
|
+
generation_run_info: {
|
67
|
+
id: infoRes.id,
|
68
|
+
date: new Date().toISOString(),
|
69
|
+
model: infoRes.modelId,
|
70
|
+
},
|
71
|
+
});
|
72
|
+
return { status: "completed" };
|
73
|
+
}
|
74
|
+
//# sourceMappingURL=generateDocumentProperties.js.map
|
@@ -0,0 +1 @@
|
|
1
|
+
{"version":3,"file":"generateDocumentProperties.js","sourceRoot":"","sources":["../../../src/activities/generateDocumentProperties.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAC;AAE3C,OAAO,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;AAEhE,OAAO,EAA8B,8BAA8B,EAAE,MAAM,yBAAyB,CAAC;AAErG,MAAM,uBAAuB,GAAG,wBAAwB,CAAC;AAgBzD,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAC5C,OAAsE;IAEtE,MAAM,OAAO,GAAG,MAAM,aAAa,CAAmC,OAAO,CAAC,CAAC;IAC/E,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC;IAC7C,MAAM,eAAe,GAAG,MAAM,CAAC,eAAe,IAAI,uBAAuB,CAAC;IAE1E,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,YAAY,EAAE,CAAC;IAE7C,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAC7D,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAE7E,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QAChF,GAAG,CAAC,IAAI,CAAC,UAAU,QAAQ,6BAA6B,CAAC,CAAC;QAC1D,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;IAClD,CAAC;IAED,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC;QAC/B,GAAG,CAAC,IAAI,CAAC,UAAU,QAAQ,gBAAgB,CAAC,CAAC;QAC7C,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,2BAA2B,EAAE,CAAC;IAC3F,CAAC;IAED,MAAM,WAAW,GAAG,GAAG,EAAE;QACrB,IAAI,GAAG,CAAC,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC1C,OAAO,QAAQ,GAAG,GAAG,CAAC,EAAE,CAAC;QAC7B,CAAC;QAED,IAAI,MAAM,CAAC,UAAU,IAAI,GAAG,CAAC,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,iBAAiB,CAAC,EAAE,CAAC;YACxE,OAAO,QAAQ,GAAG,GAAG,CAAC,EAAE,CAAC;QAC7B,CAAC;QAED,GAAG,CAAC,IAAI,CAAC,UAAU,QAAQ,yBAAyB,CAAC,CAAC;QACtD,OAAO,SAAS,CAAC;IACrB,CAAC,CAAC;IAEF,MAAM,UAAU,GAAG;QACf,OAAO,EAAE,GAAG,CAAC,IAAI,IAAI,SAAS;QAC9B,KAAK,EAAE,WAAW,EAAE,IAAI,SAAS;QACjC,aAAa,EAAE,OAAO,EAAE,aAAa,EAAE,aAAa,IAAI,SAAS;KACpE,CAAC;IAEF,GAAG,CAAC,IAAI,CACJ,uCAAuC,QAAQ,cAAc,IAAI,CAAC,IAAI,EAAE,EACxE,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,SAAS,CAC9C,CAAC;IAEF,MAAM,OAAO,GAAG,MAAM,8BAA8B,CAChD,MAAM,EACN,eAAe,EACf;QACI,GAAG,MAAM;QACT,sBAAsB,EAAE,IAAI;QAC5B,aAAa,EAAE,IAAI,CAAC,aAAa;KACpC,EACD,UAAU,EACV,OAAO,CAAC,UAAU,IAAI,KAAK,CAC9B,CAAC;IAEF,MAAM,OAAO,GAAG,GAAG,EAAE;QACjB,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;YACX,OAAO,SAAS,CAAC;QACrB,CAAC;QACD,IAAI,IAAI,GAAG,EAAE,CAAC;QACd,IAAI,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;YACvB,IAAI,IAAI,OAAO,CAAC,MAAM,CAAC,KAAK,GAAG,IAAI,CAAC;QACxC,CAAC;QACD,IAAI,OAAO,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;YAC7B,IAAI,IAAI,OAAO,CAAC,MAAM,CAAC,WAAW,CAAC;QACvC,CAAC;QACD,IAAI,IAAI,EAAE,CAAC;YACP,OAAO,IAAI,CAAC;QAChB,CAAC;aAAM,CAAC;YACJ,OAAO,SAAS,CAAC;QACrB,CAAC;IACL,CAAC,CAAC;IAEF,GAAG,CAAC,IAAI,CAAC,qCAAqC,QAAQ,cAAc,IAAI,CAAC,IAAI,EAAE,EAAE,EAAE,KAAK,EAAE,OAAO,CAAC,EAAE,EAAE,CAAC,CAAC;IACxG,MAAM,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE;QAChC,UAAU,EAAE;YACR,GAAG,OAAO,CAAC,MAAM;YACjB,IAAI,EAAE,GAAG,CAAC,SAAS;SACtB;QACD,IAAI,EAAE,OAAO,EAAE;QACf,mBAAmB,EAAE;YACjB,EAAE,EAAE,OAAO,CAAC,EAAE;YACd,IAAI,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YAC9B,KAAK,EAAE,OAAO,CAAC,OAAO;SACzB;KACJ,CAAC,CAAC;IAEH,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;AACnC,CAAC"}
|
@@ -0,0 +1,245 @@
|
|
1
|
+
import { log } from "@temporalio/activity";
|
2
|
+
import { SupportedEmbeddingTypes } from "@vertesia/common";
|
3
|
+
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
4
|
+
import { NoDocumentFound } from '../errors.js';
|
5
|
+
import { fetchBlobAsBase64, md5 } from "../utils/blobs.js";
|
6
|
+
import { getContentParts } from "../utils/chunks.js";
|
7
|
+
import { countTokens } from "../utils/tokens.js";
|
8
|
+
export async function generateEmbeddings(payload) {
|
9
|
+
const { params, client, objectId, fetchProject } = await setupActivity(payload);
|
10
|
+
const { force, type } = params;
|
11
|
+
const projectData = await fetchProject();
|
12
|
+
const config = projectData?.configuration.embeddings[type];
|
13
|
+
if (!projectData) {
|
14
|
+
throw new NoDocumentFound('Project not found', [payload.project_id]);
|
15
|
+
}
|
16
|
+
if (!config) {
|
17
|
+
throw new NoDocumentFound('Embeddings configuration not found', [objectId]);
|
18
|
+
}
|
19
|
+
if (!projectData) {
|
20
|
+
throw new NoDocumentFound('Project not found', [payload.project_id]);
|
21
|
+
}
|
22
|
+
if (!projectData?.configuration.embeddings[type]?.enabled) {
|
23
|
+
log.info(`Embeddings generation disabled for type ${type} on project: ${projectData.name} (${projectData.namespace})`, { config });
|
24
|
+
return { id: objectId, status: "skipped", message: `Embeddings generation disabled for type ${type}` };
|
25
|
+
}
|
26
|
+
log.info(`${type} embedding generation starting for object ${objectId}`, { force, config });
|
27
|
+
if (!config.environment) {
|
28
|
+
throw new Error('No environment found in project configuration. Set environment in project configuration to generate embeddings.');
|
29
|
+
}
|
30
|
+
const document = await client.objects.retrieve(objectId, "+text +parts +embeddings +tokens +properties");
|
31
|
+
if (!document) {
|
32
|
+
throw new NoDocumentFound('Document not found', [objectId]);
|
33
|
+
}
|
34
|
+
if (!document.content) {
|
35
|
+
throw new NoDocumentFound('Document content not found', [objectId]);
|
36
|
+
}
|
37
|
+
let res;
|
38
|
+
switch (type) {
|
39
|
+
case SupportedEmbeddingTypes.text:
|
40
|
+
res = await generateTextEmbeddings({
|
41
|
+
client,
|
42
|
+
config,
|
43
|
+
document,
|
44
|
+
type
|
45
|
+
});
|
46
|
+
break;
|
47
|
+
case SupportedEmbeddingTypes.properties:
|
48
|
+
res = await generateTextEmbeddings({
|
49
|
+
client,
|
50
|
+
config,
|
51
|
+
document,
|
52
|
+
type,
|
53
|
+
});
|
54
|
+
break;
|
55
|
+
case SupportedEmbeddingTypes.image:
|
56
|
+
res = await generateImageEmbeddings({
|
57
|
+
client,
|
58
|
+
config,
|
59
|
+
document,
|
60
|
+
type
|
61
|
+
});
|
62
|
+
break;
|
63
|
+
default:
|
64
|
+
res = { id: objectId, status: "failed", message: `unsupported embedding type: ${type}` };
|
65
|
+
}
|
66
|
+
return res;
|
67
|
+
}
|
68
|
+
async function generateTextEmbeddings({ document, client, type, config }, parts) {
|
69
|
+
// if (!force && document.embeddings[type]?.etag === (document.text_etag ?? md5(document.text))) {
|
70
|
+
// return { id: objectId, status: "skipped", message: "embeddings already generated" }
|
71
|
+
// }
|
72
|
+
if (!document) {
|
73
|
+
return { status: "error", message: "document is null or undefined" };
|
74
|
+
}
|
75
|
+
if (type !== SupportedEmbeddingTypes.text && type !== SupportedEmbeddingTypes.properties) {
|
76
|
+
return { id: document.id, status: "failed", message: `unsupported embedding type: ${type}` };
|
77
|
+
}
|
78
|
+
if (type === SupportedEmbeddingTypes.text && !document.text) {
|
79
|
+
return { id: document.id, status: "failed", message: "no text found" };
|
80
|
+
}
|
81
|
+
if (type === SupportedEmbeddingTypes.properties && !document?.properties) {
|
82
|
+
return { id: document.id, status: "failed", message: "no properties found" };
|
83
|
+
}
|
84
|
+
const { environment, model } = config;
|
85
|
+
const partDefinitions = parts ?? [];
|
86
|
+
// Count tokens if not already done
|
87
|
+
if (!document.tokens?.count && type === SupportedEmbeddingTypes.text) {
|
88
|
+
log.debug('Updating token count for document: ' + document.id);
|
89
|
+
const tokensData = countTokens(document.text);
|
90
|
+
await client.objects.update(document.id, {
|
91
|
+
tokens: {
|
92
|
+
...tokensData,
|
93
|
+
etag: document.text_etag ?? md5(document.text)
|
94
|
+
}
|
95
|
+
});
|
96
|
+
document.tokens = {
|
97
|
+
...tokensData,
|
98
|
+
etag: document.text_etag ?? md5(document.text)
|
99
|
+
};
|
100
|
+
}
|
101
|
+
const maxTokens = config.max_tokens ?? 8000;
|
102
|
+
//generate embeddings for the main doc if document isn't too large
|
103
|
+
//if too large, we'll just generate embeddings for the parts
|
104
|
+
//then we can generate embeddings for the main document by averaging the tensors
|
105
|
+
log.info(`Generating ${type} embeddings for document ${document.id}`);
|
106
|
+
if (type === SupportedEmbeddingTypes.text && document.tokens?.count && document.tokens?.count > maxTokens) {
|
107
|
+
log.info('Document too large, generating embeddings for parts');
|
108
|
+
if (!document.text) {
|
109
|
+
return { id: document.id, status: "failed", message: "no text found" };
|
110
|
+
}
|
111
|
+
if (!partDefinitions || partDefinitions.length === 0) {
|
112
|
+
log.info('No parts found for document, skipping embeddings generation');
|
113
|
+
return { id: document.id, status: "failed", message: "no parts found" };
|
114
|
+
}
|
115
|
+
log.info('Generating embeddings for parts', { parts: partDefinitions, max_tokens: maxTokens });
|
116
|
+
const docParts = getContentParts(document.text, partDefinitions);
|
117
|
+
log.info(`Retrieved ${docParts.length} parts`);
|
118
|
+
const start = new Date().getTime();
|
119
|
+
const generatePartEmbeddings = async (partContent, i) => {
|
120
|
+
const localStart = new Date().getTime();
|
121
|
+
try {
|
122
|
+
log.info(`Generating embeddings for part ${i}`, { text_len: partContent.length });
|
123
|
+
if (!partContent) {
|
124
|
+
return { id: i, number: i, result: null, status: "skipped", message: "no text found" };
|
125
|
+
}
|
126
|
+
const e = await generateEmbeddingsFromStudio(partContent, environment, client, model).catch(e => {
|
127
|
+
log.error('Error generating embeddings for part ' + i, { text_length: partContent.length, error: e });
|
128
|
+
return null;
|
129
|
+
});
|
130
|
+
if (!e || !e.values) {
|
131
|
+
return { id: i, number: i, result: null, message: "no embeddings generated" };
|
132
|
+
}
|
133
|
+
if (e.values.length === 0) {
|
134
|
+
return { id: i, number: i, result: null, message: "no embeddings generated" };
|
135
|
+
}
|
136
|
+
log.info(`Generated embeddings for part ${i}`, { len: e.values.length, duration: new Date().getTime() - localStart });
|
137
|
+
return { number: i, result: e };
|
138
|
+
}
|
139
|
+
catch (err) {
|
140
|
+
log.info(`Error generating ${type} embeddings for part ${i} of ${document.id}`, { error: err });
|
141
|
+
return { number: i, result: null, message: "error generating embeddings", error: err.message };
|
142
|
+
}
|
143
|
+
};
|
144
|
+
const partEmbeddings = await Promise.all(docParts.map((part, i) => generatePartEmbeddings(part, i)));
|
145
|
+
const validPartEmbeddings = partEmbeddings.filter(e => e.result !== null).map(e => e.result);
|
146
|
+
const averagedEmbedding = computeAttentionEmbedding(validPartEmbeddings.map(e => e.values));
|
147
|
+
log.info(`Averaged embeddings for document ${document.id} in ${(new Date().getTime() - start) / 1000} seconds`, { len: averagedEmbedding.length, count: validPartEmbeddings.length, max_tokens: maxTokens });
|
148
|
+
await client.objects.setEmbedding(document.id, type, {
|
149
|
+
values: averagedEmbedding,
|
150
|
+
model: validPartEmbeddings[0].model,
|
151
|
+
etag: document.text_etag
|
152
|
+
});
|
153
|
+
log.info(`Object ${document.id} embedding set`, { type, len: averagedEmbedding.length });
|
154
|
+
}
|
155
|
+
else {
|
156
|
+
log.info(`Generating ${type} embeddings for document`);
|
157
|
+
const res = await generateEmbeddingsFromStudio(JSON.stringify(document[type]), environment, client);
|
158
|
+
if (!res || !res.values) {
|
159
|
+
return { id: document.id, status: "failed", message: "no embeddings generated" };
|
160
|
+
}
|
161
|
+
log.info(`${type} embeddings generated for document ${document.id}`, { len: res.values.length });
|
162
|
+
await client.objects.setEmbedding(document.id, type, {
|
163
|
+
values: res.values,
|
164
|
+
model: res.model,
|
165
|
+
etag: document.text_etag
|
166
|
+
});
|
167
|
+
return { id: document.id, type, status: "completed", len: res.values.length };
|
168
|
+
}
|
169
|
+
}
|
170
|
+
async function generateImageEmbeddings({ document, client, type, config }) {
|
171
|
+
log.info('Generating image embeddings for document ' + document.id, { content: document.content });
|
172
|
+
if (!document.content?.type?.startsWith('image/') && !document.content?.type?.includes('pdf')) {
|
173
|
+
return { id: document.id, type, status: "failed", message: "content is not an image" };
|
174
|
+
}
|
175
|
+
const { environment, model } = config;
|
176
|
+
const resRnd = await client.store.objects.getRendition(document.id, {
|
177
|
+
format: "image/png",
|
178
|
+
max_hw: 1024,
|
179
|
+
generate_if_missing: true
|
180
|
+
});
|
181
|
+
if (resRnd.status === 'generating') {
|
182
|
+
throw new Error("Rendition is generating, will retry later");
|
183
|
+
}
|
184
|
+
else if (resRnd.status === "failed" || !resRnd.rendition) {
|
185
|
+
throw new NoDocumentFound("Rendition retrieval failed", [document.id]);
|
186
|
+
}
|
187
|
+
if (!resRnd.rendition.content.source) {
|
188
|
+
throw new NoDocumentFound("No source found in rendition", [document.id]);
|
189
|
+
}
|
190
|
+
const image = await fetchBlobAsBase64(client, resRnd.rendition.content.source);
|
191
|
+
const res = await client.environments.embeddings(environment, {
|
192
|
+
image,
|
193
|
+
model
|
194
|
+
}).then(res => res).catch(e => {
|
195
|
+
log.error('Error generating embeddings for image', { error: e });
|
196
|
+
throw e;
|
197
|
+
});
|
198
|
+
if (!res || !res.values) {
|
199
|
+
return { id: document.id, status: "failed", message: "no embeddings generated" };
|
200
|
+
}
|
201
|
+
await client.objects.setEmbedding(document.id, SupportedEmbeddingTypes.image, {
|
202
|
+
values: res.values,
|
203
|
+
model: res.model,
|
204
|
+
etag: document.text_etag
|
205
|
+
});
|
206
|
+
return { id: document.id, type, status: "completed", len: res.values.length };
|
207
|
+
}
|
208
|
+
async function generateEmbeddingsFromStudio(text, env, client, model) {
|
209
|
+
log.info(`Generating embeddings for text of ${text.length} chars with environment ${env}`);
|
210
|
+
return client.environments.embeddings(env, {
|
211
|
+
text,
|
212
|
+
model
|
213
|
+
}).then(res => res).catch(e => {
|
214
|
+
log.error('Error generating embeddings for text', { error: e });
|
215
|
+
throw e;
|
216
|
+
});
|
217
|
+
}
|
218
|
+
//Simplified attention mechanism
|
219
|
+
// This is a naive implementation and should be replaced with a more sophisticated
|
220
|
+
// using tensorflow in a specific package
|
221
|
+
function computeAttentionEmbedding(chunkEmbeddings) {
|
222
|
+
if (chunkEmbeddings.length === 0)
|
223
|
+
return [];
|
224
|
+
const start = new Date().getTime();
|
225
|
+
// Generate random attention weights
|
226
|
+
const attentionWeights = chunkEmbeddings.map(() => Math.random());
|
227
|
+
// Apply softmax to get attention scores
|
228
|
+
const expWeights = attentionWeights.map(w => Math.exp(w));
|
229
|
+
const sumExpWeights = expWeights.reduce((sum, val) => sum + val, 0);
|
230
|
+
const attentionScores = expWeights.map(w => w / sumExpWeights);
|
231
|
+
// Get embedding dimension
|
232
|
+
const embeddingDim = chunkEmbeddings[0].length;
|
233
|
+
// Initialize document embedding
|
234
|
+
const documentEmbedding = new Array(embeddingDim).fill(0);
|
235
|
+
// Weighted sum of embeddings
|
236
|
+
for (let i = 0; i < chunkEmbeddings.length; i++) {
|
237
|
+
for (let j = 0; j < embeddingDim; j++) {
|
238
|
+
documentEmbedding[j] += chunkEmbeddings[i][j] * attentionScores[i];
|
239
|
+
}
|
240
|
+
}
|
241
|
+
const duration = new Date().getTime() - start;
|
242
|
+
console.log(`Computed document embedding in ${duration}ms for ${chunkEmbeddings.length} chunks`);
|
243
|
+
return documentEmbedding;
|
244
|
+
}
|
245
|
+
//# sourceMappingURL=generateEmbeddings.js.map
|