@vertesia/workflow 0.51.0 → 0.54.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +9 -6
- package/src/activities/advanced/createOrUpdateDocumentFromInteractionRun.ts +20 -1
- package/src/activities/chunkDocument.ts +62 -42
- package/src/activities/createDocumentFromOther.ts +2 -2
- package/src/activities/executeInteraction.ts +92 -47
- package/src/activities/extractDocumentText.ts +91 -54
- package/src/activities/generateDocumentProperties.ts +37 -16
- package/src/activities/generateEmbeddings.ts +91 -79
- package/src/activities/generateImageRendition.ts +127 -59
- package/src/activities/generateOrAssignContentType.ts +52 -32
- package/src/activities/getObjectFromStore.ts +1 -1
- package/src/activities/handleError.ts +25 -0
- package/src/activities/index-dsl.ts +1 -0
- package/src/activities/index.ts +0 -1
- package/src/activities/media/processPdfWithTextract.ts +4 -4
- package/src/activities/media/transcribeMediaWithGladia.ts +1 -1
- package/src/activities/notifyWebhook.ts +2 -2
- package/src/activities/setDocumentStatus.ts +1 -1
- package/src/conversion/TextractProcessor.ts +9 -9
- package/src/conversion/image.test.ts +110 -18
- package/src/conversion/image.ts +96 -15
- package/src/conversion/markitdown.ts +41 -0
- package/src/conversion/mutool.ts +1 -1
- package/src/conversion/pandoc.test.ts +8 -6
- package/src/conversion/pandoc.ts +38 -42
- package/src/dsl/dsl-workflow.ts +80 -12
- package/src/dsl/setup/ActivityContext.ts +57 -16
- package/src/dsl/validation.test.ts +2 -2
- package/src/dsl/vars.test.ts +1 -1
- package/src/dsl/vars.ts +6 -6
- package/src/dsl/workflow-exec-child.test.ts +14 -4
- package/src/dsl/workflow-fetch.test.ts +1 -1
- package/src/dsl/workflow-import.test.ts +1 -1
- package/src/dsl/workflow.test.ts +12 -2
- package/src/dsl.ts +1 -1
- package/src/errors.ts +27 -6
- package/src/index.ts +1 -1
- package/src/iterative-generation/activities/extractToc.ts +1 -1
- package/src/iterative-generation/activities/generatePart.ts +2 -2
- package/src/iterative-generation/activities/generateToc.ts +1 -1
- package/src/iterative-generation/iterativeGenerationWorkflow.ts +3 -2
- package/src/iterative-generation/types.ts +4 -4
- package/src/iterative-generation/utils.ts +4 -4
- package/src/system/notifyWebhookWorkflow.ts +2 -1
- package/src/system/recalculateEmbeddingsWorkflow.ts +2 -2
- package/src/utils/blobs.ts +11 -6
- package/src/utils/chunks.ts +17 -0
- package/src/utils/client.ts +4 -3
- package/src/utils/memory.ts +3 -8
- package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js +0 -32
- package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js.map +0 -1
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +0 -66
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +0 -1
- package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js +0 -18
- package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js.map +0 -1
- package/lib/cjs/activities/chunkDocument.js +0 -79
- package/lib/cjs/activities/chunkDocument.js.map +0 -1
- package/lib/cjs/activities/createDocumentFromOther.js +0 -64
- package/lib/cjs/activities/createDocumentFromOther.js.map +0 -1
- package/lib/cjs/activities/executeInteraction.js +0 -134
- package/lib/cjs/activities/executeInteraction.js.map +0 -1
- package/lib/cjs/activities/extractDocumentText.js +0 -135
- package/lib/cjs/activities/extractDocumentText.js.map +0 -1
- package/lib/cjs/activities/generateDocumentProperties.js +0 -59
- package/lib/cjs/activities/generateDocumentProperties.js.map +0 -1
- package/lib/cjs/activities/generateEmbeddings.js +0 -292
- package/lib/cjs/activities/generateEmbeddings.js.map +0 -1
- package/lib/cjs/activities/generateImageRendition.js +0 -104
- package/lib/cjs/activities/generateImageRendition.js.map +0 -1
- package/lib/cjs/activities/generateOrAssignContentType.js +0 -103
- package/lib/cjs/activities/generateOrAssignContentType.js.map +0 -1
- package/lib/cjs/activities/getObjectFromStore.js +0 -20
- package/lib/cjs/activities/getObjectFromStore.js.map +0 -1
- package/lib/cjs/activities/index-dsl.js +0 -37
- package/lib/cjs/activities/index-dsl.js.map +0 -1
- package/lib/cjs/activities/index.js +0 -22
- package/lib/cjs/activities/index.js.map +0 -1
- package/lib/cjs/activities/media/processPdfWithTextract.js +0 -102
- package/lib/cjs/activities/media/processPdfWithTextract.js.map +0 -1
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js +0 -51
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js.map +0 -1
- package/lib/cjs/activities/notifyWebhook.js +0 -34
- package/lib/cjs/activities/notifyWebhook.js.map +0 -1
- package/lib/cjs/activities/setDocumentStatus.js +0 -15
- package/lib/cjs/activities/setDocumentStatus.js.map +0 -1
- package/lib/cjs/conversion/TextractProcessor.js +0 -417
- package/lib/cjs/conversion/TextractProcessor.js.map +0 -1
- package/lib/cjs/conversion/image.js +0 -22
- package/lib/cjs/conversion/image.js.map +0 -1
- package/lib/cjs/conversion/mutool.js +0 -147
- package/lib/cjs/conversion/mutool.js.map +0 -1
- package/lib/cjs/conversion/pandoc.js +0 -39
- package/lib/cjs/conversion/pandoc.js.map +0 -1
- package/lib/cjs/dsl/conditions.js +0 -81
- package/lib/cjs/dsl/conditions.js.map +0 -1
- package/lib/cjs/dsl/dsl-workflow.js +0 -223
- package/lib/cjs/dsl/dsl-workflow.js.map +0 -1
- package/lib/cjs/dsl/dslProxyActivities.js +0 -23
- package/lib/cjs/dsl/dslProxyActivities.js.map +0 -1
- package/lib/cjs/dsl/projections.js +0 -59
- package/lib/cjs/dsl/projections.js.map +0 -1
- package/lib/cjs/dsl/setup/ActivityContext.js +0 -96
- package/lib/cjs/dsl/setup/ActivityContext.js.map +0 -1
- package/lib/cjs/dsl/setup/fetch/DataProvider.js +0 -51
- package/lib/cjs/dsl/setup/fetch/DataProvider.js.map +0 -1
- package/lib/cjs/dsl/setup/fetch/index.js +0 -16
- package/lib/cjs/dsl/setup/fetch/index.js.map +0 -1
- package/lib/cjs/dsl/setup/fetch/providers.js +0 -67
- package/lib/cjs/dsl/setup/fetch/providers.js.map +0 -1
- package/lib/cjs/dsl/test/test-child-workflow.js +0 -10
- package/lib/cjs/dsl/test/test-child-workflow.js.map +0 -1
- package/lib/cjs/dsl/validation.js +0 -122
- package/lib/cjs/dsl/validation.js.map +0 -1
- package/lib/cjs/dsl/vars.js +0 -341
- package/lib/cjs/dsl/vars.js.map +0 -1
- package/lib/cjs/dsl/walk.js +0 -100
- package/lib/cjs/dsl/walk.js.map +0 -1
- package/lib/cjs/dsl.js +0 -20
- package/lib/cjs/dsl.js.map +0 -1
- package/lib/cjs/errors.js +0 -36
- package/lib/cjs/errors.js.map +0 -1
- package/lib/cjs/index.js +0 -50
- package/lib/cjs/index.js.map +0 -1
- package/lib/cjs/iterative-generation/activities/extractToc.js +0 -47
- package/lib/cjs/iterative-generation/activities/extractToc.js.map +0 -1
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js +0 -69
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js.map +0 -1
- package/lib/cjs/iterative-generation/activities/generatePart.js +0 -73
- package/lib/cjs/iterative-generation/activities/generatePart.js.map +0 -1
- package/lib/cjs/iterative-generation/activities/generateToc.js +0 -91
- package/lib/cjs/iterative-generation/activities/generateToc.js.map +0 -1
- package/lib/cjs/iterative-generation/activities/index.js +0 -12
- package/lib/cjs/iterative-generation/activities/index.js.map +0 -1
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js +0 -55
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js.map +0 -1
- package/lib/cjs/iterative-generation/types.js +0 -5
- package/lib/cjs/iterative-generation/types.js.map +0 -1
- package/lib/cjs/iterative-generation/utils.js +0 -121
- package/lib/cjs/iterative-generation/utils.js.map +0 -1
- package/lib/cjs/package.json +0 -3
- package/lib/cjs/result-types.js +0 -10
- package/lib/cjs/result-types.js.map +0 -1
- package/lib/cjs/system/notifyWebhookWorkflow.js +0 -46
- package/lib/cjs/system/notifyWebhookWorkflow.js.map +0 -1
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js +0 -28
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js.map +0 -1
- package/lib/cjs/utils/auth.js +0 -15
- package/lib/cjs/utils/auth.js.map +0 -1
- package/lib/cjs/utils/blobs.js +0 -63
- package/lib/cjs/utils/blobs.js.map +0 -1
- package/lib/cjs/utils/client.js +0 -25
- package/lib/cjs/utils/client.js.map +0 -1
- package/lib/cjs/utils/expand-vars.js +0 -33
- package/lib/cjs/utils/expand-vars.js.map +0 -1
- package/lib/cjs/utils/memory.js +0 -72
- package/lib/cjs/utils/memory.js.map +0 -1
- package/lib/cjs/utils/tokens.js +0 -38
- package/lib/cjs/utils/tokens.js.map +0 -1
- package/lib/cjs/vars.js +0 -20
- package/lib/cjs/vars.js.map +0 -1
- package/lib/cjs/workflows.js +0 -15
- package/lib/cjs/workflows.js.map +0 -1
- package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js +0 -29
- package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js.map +0 -1
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +0 -63
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +0 -1
- package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js +0 -15
- package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js.map +0 -1
- package/lib/esm/activities/chunkDocument.js +0 -76
- package/lib/esm/activities/chunkDocument.js.map +0 -1
- package/lib/esm/activities/createDocumentFromOther.js +0 -58
- package/lib/esm/activities/createDocumentFromOther.js.map +0 -1
- package/lib/esm/activities/executeInteraction.js +0 -130
- package/lib/esm/activities/executeInteraction.js.map +0 -1
- package/lib/esm/activities/extractDocumentText.js +0 -132
- package/lib/esm/activities/extractDocumentText.js.map +0 -1
- package/lib/esm/activities/generateDocumentProperties.js +0 -56
- package/lib/esm/activities/generateDocumentProperties.js.map +0 -1
- package/lib/esm/activities/generateEmbeddings.js +0 -256
- package/lib/esm/activities/generateEmbeddings.js.map +0 -1
- package/lib/esm/activities/generateImageRendition.js +0 -98
- package/lib/esm/activities/generateImageRendition.js.map +0 -1
- package/lib/esm/activities/generateOrAssignContentType.js +0 -100
- package/lib/esm/activities/generateOrAssignContentType.js.map +0 -1
- package/lib/esm/activities/getObjectFromStore.js +0 -17
- package/lib/esm/activities/getObjectFromStore.js.map +0 -1
- package/lib/esm/activities/index-dsl.js +0 -18
- package/lib/esm/activities/index-dsl.js.map +0 -1
- package/lib/esm/activities/index.js +0 -6
- package/lib/esm/activities/index.js.map +0 -1
- package/lib/esm/activities/media/processPdfWithTextract.js +0 -98
- package/lib/esm/activities/media/processPdfWithTextract.js.map +0 -1
- package/lib/esm/activities/media/transcribeMediaWithGladia.js +0 -48
- package/lib/esm/activities/media/transcribeMediaWithGladia.js.map +0 -1
- package/lib/esm/activities/notifyWebhook.js +0 -31
- package/lib/esm/activities/notifyWebhook.js.map +0 -1
- package/lib/esm/activities/setDocumentStatus.js +0 -12
- package/lib/esm/activities/setDocumentStatus.js.map +0 -1
- package/lib/esm/conversion/TextractProcessor.js +0 -410
- package/lib/esm/conversion/TextractProcessor.js.map +0 -1
- package/lib/esm/conversion/image.js +0 -16
- package/lib/esm/conversion/image.js.map +0 -1
- package/lib/esm/conversion/mutool.js +0 -139
- package/lib/esm/conversion/mutool.js.map +0 -1
- package/lib/esm/conversion/pandoc.js +0 -36
- package/lib/esm/conversion/pandoc.js.map +0 -1
- package/lib/esm/dsl/conditions.js +0 -75
- package/lib/esm/dsl/conditions.js.map +0 -1
- package/lib/esm/dsl/dsl-workflow.js +0 -216
- package/lib/esm/dsl/dsl-workflow.js.map +0 -1
- package/lib/esm/dsl/dslProxyActivities.js +0 -20
- package/lib/esm/dsl/dslProxyActivities.js.map +0 -1
- package/lib/esm/dsl/projections.js +0 -55
- package/lib/esm/dsl/projections.js.map +0 -1
- package/lib/esm/dsl/setup/ActivityContext.js +0 -91
- package/lib/esm/dsl/setup/ActivityContext.js.map +0 -1
- package/lib/esm/dsl/setup/fetch/DataProvider.js +0 -47
- package/lib/esm/dsl/setup/fetch/DataProvider.js.map +0 -1
- package/lib/esm/dsl/setup/fetch/index.js +0 -12
- package/lib/esm/dsl/setup/fetch/index.js.map +0 -1
- package/lib/esm/dsl/setup/fetch/providers.js +0 -61
- package/lib/esm/dsl/setup/fetch/providers.js.map +0 -1
- package/lib/esm/dsl/test/test-child-workflow.js +0 -5
- package/lib/esm/dsl/test/test-child-workflow.js.map +0 -1
- package/lib/esm/dsl/validation.js +0 -118
- package/lib/esm/dsl/validation.js.map +0 -1
- package/lib/esm/dsl/vars.js +0 -335
- package/lib/esm/dsl/vars.js.map +0 -1
- package/lib/esm/dsl/walk.js +0 -96
- package/lib/esm/dsl/walk.js.map +0 -1
- package/lib/esm/dsl.js +0 -4
- package/lib/esm/dsl.js.map +0 -1
- package/lib/esm/errors.js +0 -30
- package/lib/esm/errors.js.map +0 -1
- package/lib/esm/index.js +0 -32
- package/lib/esm/index.js.map +0 -1
- package/lib/esm/iterative-generation/activities/extractToc.js +0 -44
- package/lib/esm/iterative-generation/activities/extractToc.js.map +0 -1
- package/lib/esm/iterative-generation/activities/finalizeOutput.js +0 -66
- package/lib/esm/iterative-generation/activities/finalizeOutput.js.map +0 -1
- package/lib/esm/iterative-generation/activities/generatePart.js +0 -70
- package/lib/esm/iterative-generation/activities/generatePart.js.map +0 -1
- package/lib/esm/iterative-generation/activities/generateToc.js +0 -88
- package/lib/esm/iterative-generation/activities/generateToc.js.map +0 -1
- package/lib/esm/iterative-generation/activities/index.js +0 -5
- package/lib/esm/iterative-generation/activities/index.js.map +0 -1
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js +0 -52
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js.map +0 -1
- package/lib/esm/iterative-generation/types.js +0 -2
- package/lib/esm/iterative-generation/types.js.map +0 -1
- package/lib/esm/iterative-generation/utils.js +0 -112
- package/lib/esm/iterative-generation/utils.js.map +0 -1
- package/lib/esm/result-types.js +0 -7
- package/lib/esm/result-types.js.map +0 -1
- package/lib/esm/system/notifyWebhookWorkflow.js +0 -43
- package/lib/esm/system/notifyWebhookWorkflow.js.map +0 -1
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js +0 -25
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js.map +0 -1
- package/lib/esm/utils/auth.js +0 -8
- package/lib/esm/utils/auth.js.map +0 -1
- package/lib/esm/utils/blobs.js +0 -52
- package/lib/esm/utils/blobs.js.map +0 -1
- package/lib/esm/utils/client.js +0 -22
- package/lib/esm/utils/client.js.map +0 -1
- package/lib/esm/utils/expand-vars.js +0 -30
- package/lib/esm/utils/expand-vars.js.map +0 -1
- package/lib/esm/utils/memory.js +0 -60
- package/lib/esm/utils/memory.js.map +0 -1
- package/lib/esm/utils/tokens.js +0 -34
- package/lib/esm/utils/tokens.js.map +0 -1
- package/lib/esm/vars.js +0 -4
- package/lib/esm/vars.js.map +0 -1
- package/lib/esm/workflows.js +0 -8
- package/lib/esm/workflows.js.map +0 -1
- package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts +0 -17
- package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts.map +0 -1
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts +0 -29
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts.map +0 -1
- package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts +0 -19
- package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts.map +0 -1
- package/lib/types/activities/chunkDocument.d.ts +0 -18
- package/lib/types/activities/chunkDocument.d.ts.map +0 -1
- package/lib/types/activities/createDocumentFromOther.d.ts +0 -21
- package/lib/types/activities/createDocumentFromOther.d.ts.map +0 -1
- package/lib/types/activities/executeInteraction.d.ts +0 -44
- package/lib/types/activities/executeInteraction.d.ts.map +0 -1
- package/lib/types/activities/extractDocumentText.d.ts +0 -10
- package/lib/types/activities/extractDocumentText.d.ts.map +0 -1
- package/lib/types/activities/generateDocumentProperties.d.ts +0 -32
- package/lib/types/activities/generateDocumentProperties.d.ts.map +0 -1
- package/lib/types/activities/generateEmbeddings.d.ts +0 -49
- package/lib/types/activities/generateEmbeddings.d.ts.map +0 -1
- package/lib/types/activities/generateImageRendition.d.ts +0 -17
- package/lib/types/activities/generateImageRendition.d.ts.map +0 -1
- package/lib/types/activities/generateOrAssignContentType.d.ts +0 -44
- package/lib/types/activities/generateOrAssignContentType.d.ts.map +0 -1
- package/lib/types/activities/getObjectFromStore.d.ts +0 -14
- package/lib/types/activities/getObjectFromStore.d.ts.map +0 -1
- package/lib/types/activities/index-dsl.d.ts +0 -17
- package/lib/types/activities/index-dsl.d.ts.map +0 -1
- package/lib/types/activities/index.d.ts +0 -6
- package/lib/types/activities/index.d.ts.map +0 -1
- package/lib/types/activities/media/processPdfWithTextract.d.ts +0 -26
- package/lib/types/activities/media/processPdfWithTextract.d.ts.map +0 -1
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts +0 -14
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts.map +0 -1
- package/lib/types/activities/notifyWebhook.d.ts +0 -17
- package/lib/types/activities/notifyWebhook.d.ts.map +0 -1
- package/lib/types/activities/setDocumentStatus.d.ts +0 -15
- package/lib/types/activities/setDocumentStatus.d.ts.map +0 -1
- package/lib/types/conversion/TextractProcessor.d.ts +0 -45
- package/lib/types/conversion/TextractProcessor.d.ts.map +0 -1
- package/lib/types/conversion/image.d.ts +0 -9
- package/lib/types/conversion/image.d.ts.map +0 -1
- package/lib/types/conversion/mutool.d.ts +0 -19
- package/lib/types/conversion/mutool.d.ts.map +0 -1
- package/lib/types/conversion/pandoc.d.ts +0 -2
- package/lib/types/conversion/pandoc.d.ts.map +0 -1
- package/lib/types/dsl/conditions.d.ts +0 -2
- package/lib/types/dsl/conditions.d.ts.map +0 -1
- package/lib/types/dsl/dsl-workflow.d.ts +0 -5
- package/lib/types/dsl/dsl-workflow.d.ts.map +0 -1
- package/lib/types/dsl/dslProxyActivities.d.ts +0 -10
- package/lib/types/dsl/dslProxyActivities.d.ts.map +0 -1
- package/lib/types/dsl/projections.d.ts +0 -4
- package/lib/types/dsl/projections.d.ts.map +0 -1
- package/lib/types/dsl/setup/ActivityContext.d.ts +0 -14
- package/lib/types/dsl/setup/ActivityContext.d.ts.map +0 -1
- package/lib/types/dsl/setup/fetch/DataProvider.d.ts +0 -9
- package/lib/types/dsl/setup/fetch/DataProvider.d.ts.map +0 -1
- package/lib/types/dsl/setup/fetch/index.d.ts +0 -6
- package/lib/types/dsl/setup/fetch/index.d.ts.map +0 -1
- package/lib/types/dsl/setup/fetch/providers.d.ts +0 -25
- package/lib/types/dsl/setup/fetch/providers.d.ts.map +0 -1
- package/lib/types/dsl/test/test-child-workflow.d.ts +0 -4
- package/lib/types/dsl/test/test-child-workflow.d.ts.map +0 -1
- package/lib/types/dsl/validation.d.ts +0 -4
- package/lib/types/dsl/validation.d.ts.map +0 -1
- package/lib/types/dsl/vars.d.ts +0 -48
- package/lib/types/dsl/vars.d.ts.map +0 -1
- package/lib/types/dsl/walk.d.ts +0 -18
- package/lib/types/dsl/walk.d.ts.map +0 -1
- package/lib/types/dsl.d.ts +0 -4
- package/lib/types/dsl.d.ts.map +0 -1
- package/lib/types/errors.d.ts +0 -16
- package/lib/types/errors.d.ts.map +0 -1
- package/lib/types/index.d.ts +0 -31
- package/lib/types/index.d.ts.map +0 -1
- package/lib/types/iterative-generation/activities/extractToc.d.ts +0 -10
- package/lib/types/iterative-generation/activities/extractToc.d.ts.map +0 -1
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts +0 -3
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts.map +0 -1
- package/lib/types/iterative-generation/activities/generatePart.d.ts +0 -3
- package/lib/types/iterative-generation/activities/generatePart.d.ts.map +0 -1
- package/lib/types/iterative-generation/activities/generateToc.d.ts +0 -4
- package/lib/types/iterative-generation/activities/generateToc.d.ts.map +0 -1
- package/lib/types/iterative-generation/activities/index.d.ts +0 -5
- package/lib/types/iterative-generation/activities/index.d.ts.map +0 -1
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts +0 -3
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts.map +0 -1
- package/lib/types/iterative-generation/types.d.ts +0 -79
- package/lib/types/iterative-generation/types.d.ts.map +0 -1
- package/lib/types/iterative-generation/utils.d.ts +0 -27
- package/lib/types/iterative-generation/utils.d.ts.map +0 -1
- package/lib/types/result-types.d.ts +0 -22
- package/lib/types/result-types.d.ts.map +0 -1
- package/lib/types/system/notifyWebhookWorkflow.d.ts +0 -3
- package/lib/types/system/notifyWebhookWorkflow.d.ts.map +0 -1
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +0 -40
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +0 -1
- package/lib/types/utils/auth.d.ts +0 -4
- package/lib/types/utils/auth.d.ts.map +0 -1
- package/lib/types/utils/blobs.d.ts +0 -8
- package/lib/types/utils/blobs.d.ts.map +0 -1
- package/lib/types/utils/client.d.ts +0 -7
- package/lib/types/utils/client.d.ts.map +0 -1
- package/lib/types/utils/expand-vars.d.ts +0 -8
- package/lib/types/utils/expand-vars.d.ts.map +0 -1
- package/lib/types/utils/memory.d.ts +0 -12
- package/lib/types/utils/memory.d.ts.map +0 -1
- package/lib/types/utils/tokens.d.ts +0 -11
- package/lib/types/utils/tokens.d.ts.map +0 -1
- package/lib/types/vars.d.ts +0 -3
- package/lib/types/vars.d.ts.map +0 -1
- package/lib/types/workflows.d.ts +0 -8
- package/lib/types/workflows.d.ts.map +0 -1
- package/lib/workflows-bundle.js +0 -19897
@@ -1,39 +1,44 @@
|
|
1
|
-
import { DSLActivityExecutionPayload, DSLActivitySpec, RenditionProperties } from "@vertesia/common";
|
2
1
|
import { log } from "@temporalio/activity";
|
2
|
+
import { NodeStreamSource } from "@vertesia/client/node";
|
3
|
+
import { DSLActivityExecutionPayload, DSLActivitySpec, RenditionProperties } from "@vertesia/common";
|
4
|
+
import ffmpeg from 'fluent-ffmpeg';
|
3
5
|
import fs from 'fs';
|
4
|
-
import
|
6
|
+
import os from 'os';
|
7
|
+
import path from 'path';
|
5
8
|
import { imageResizer } from "../conversion/image.js";
|
6
|
-
import { pdfToImages } from "../conversion/mutool.js";
|
7
9
|
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
8
10
|
import { NoDocumentFound, WorkflowParamNotFound } from "../errors.js";
|
9
|
-
import {
|
10
|
-
|
11
|
+
import { saveBlobToTempFile } from "../utils/blobs.js";
|
12
|
+
|
11
13
|
interface GenerateImageRenditionParams {
|
12
|
-
max_hw: number; //maximum size of the
|
13
|
-
format:
|
14
|
-
multi_page?: boolean; //if true, generate a multi-page rendition
|
14
|
+
max_hw: number; //maximum size of the longest side of the image
|
15
|
+
format: string; //format of the output image
|
15
16
|
}
|
16
17
|
|
17
|
-
|
18
18
|
export interface GenerateImageRendition extends DSLActivitySpec<GenerateImageRenditionParams> {
|
19
|
-
|
20
|
-
name: 'generateImageRendition';
|
21
|
-
|
19
|
+
name: "generateImageRendition";
|
22
20
|
}
|
23
21
|
|
24
|
-
|
25
22
|
export async function generateImageRendition(payload: DSLActivityExecutionPayload<GenerateImageRenditionParams>) {
|
26
|
-
const { client, objectId, params } = await setupActivity<GenerateImageRenditionParams>(payload);
|
23
|
+
const { client, objectId, params: originParams } = await setupActivity<GenerateImageRenditionParams>(payload);
|
24
|
+
|
25
|
+
// Fix: Use maxHeightWidth if max_hw is not provided
|
26
|
+
const params = {
|
27
|
+
...originParams,
|
28
|
+
max_hw: originParams.max_hw || (originParams as any).maxHeightWidth || 1024, // Default to 1024 if both are missing
|
29
|
+
format: originParams.format || (originParams as any).format_output || 'png' // Default to png if format is missing
|
30
|
+
};
|
31
|
+
|
32
|
+
log.info(`Generating image rendition for ${objectId}`, { originParams, params });
|
27
33
|
|
28
|
-
const supportedNonImageInputTypes = ['application/pdf']
|
29
34
|
const inputObject = await client.objects.retrieve(objectId).catch((err) => {
|
30
|
-
log.error(`Failed to retrieve document ${objectId}`, err);
|
31
|
-
if (err.
|
35
|
+
log.error(`Failed to retrieve document ${objectId}`, { err });
|
36
|
+
if (err.message.includes("not found")) {
|
32
37
|
throw new NoDocumentFound(`Document ${objectId} not found`, [objectId]);
|
33
38
|
}
|
34
39
|
throw err;
|
35
40
|
});
|
36
|
-
const renditionType = await client.types.getTypeByName(
|
41
|
+
const renditionType = await client.types.getTypeByName("Rendition");
|
37
42
|
|
38
43
|
if (!params.format) {
|
39
44
|
log.error(`Format not found`);
|
@@ -50,85 +55,148 @@ export async function generateImageRendition(payload: DSLActivityExecutionPayloa
|
|
50
55
|
throw new NoDocumentFound(`Document ${objectId} has no source`, [objectId]);
|
51
56
|
}
|
52
57
|
|
53
|
-
if (!inputObject.content.type || (!inputObject.content.type?.startsWith(
|
54
|
-
log.error(`Document ${objectId} is not an image`);
|
55
|
-
throw new NoDocumentFound(`Document ${objectId} is not an image or
|
58
|
+
if (!inputObject.content.type || (!inputObject.content.type?.startsWith("image/") && !inputObject.content.type?.startsWith("video/"))) {
|
59
|
+
log.error(`Document ${objectId} is not an image or a video: ${inputObject.content.type}`);
|
60
|
+
throw new NoDocumentFound(`Document ${objectId} is not an image or a video: ${inputObject.content.type}`, [objectId]);
|
56
61
|
}
|
57
62
|
|
58
63
|
//array of rendition files to upload
|
59
64
|
let renditionPages: string[] = [];
|
60
65
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
66
|
+
if (inputObject.content.type.startsWith('image/')) {
|
67
|
+
const imageFile = await saveBlobToTempFile(client, inputObject.content.source);
|
68
|
+
log.info(`Image ${objectId} copied to ${imageFile}`);
|
69
|
+
renditionPages.push(imageFile);
|
70
|
+
} else if (inputObject.content.type.startsWith('video/')) {
|
71
|
+
const videoFile = await saveBlobToTempFile(client, inputObject.content.source);
|
72
|
+
const tempOutputDir = fs.mkdtempSync(path.join(os.tmpdir(), 'video-rendition-'));
|
73
|
+
const thumbnailPath = path.join(tempOutputDir, 'thumbnail.png');
|
74
|
+
|
75
|
+
try {
|
76
|
+
// Extract a frame at 10% of the video duration
|
77
|
+
await new Promise<void>((resolve, reject) => {
|
78
|
+
ffmpeg.ffprobe(videoFile, (err, metadata) => {
|
79
|
+
if (err) {
|
80
|
+
log.error(`Failed to probe video metadata: ${err.message}`);
|
81
|
+
return reject(err);
|
82
|
+
}
|
83
|
+
|
84
|
+
const duration = metadata.format.duration || 0;
|
85
|
+
const timestamp = Math.max(0.1 * duration, 1);
|
86
|
+
|
87
|
+
ffmpeg(videoFile)
|
88
|
+
.screenshots({
|
89
|
+
timestamps: [timestamp],
|
90
|
+
filename: 'thumbnail.png',
|
91
|
+
folder: tempOutputDir,
|
92
|
+
size: `${params.max_hw}x?`
|
93
|
+
})
|
94
|
+
.on('end', () => {
|
95
|
+
log.info(`Video frame extraction complete for ${objectId}`);
|
96
|
+
resolve();
|
97
|
+
})
|
98
|
+
.on('error', (err) => {
|
99
|
+
log.error(`Error extracting frame from video: ${err.message}`);
|
100
|
+
reject(err);
|
101
|
+
});
|
102
|
+
});
|
103
|
+
});
|
104
|
+
|
105
|
+
if (fs.existsSync(thumbnailPath)) {
|
106
|
+
renditionPages.push(thumbnailPath);
|
107
|
+
} else {
|
108
|
+
throw new Error(`Failed to generate thumbnail for video ${objectId}`);
|
109
|
+
}
|
110
|
+
} catch (error) {
|
111
|
+
log.error(`Error generating image rendition for video: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
112
|
+
throw new Error(`Failed to generate image rendition for video: ${objectId}`);
|
68
113
|
}
|
69
|
-
renditionPages = [...pages];
|
70
|
-
} else if (inputObject.content.type.startsWith('image/')) {
|
71
|
-
const tmpFile = await saveBlobToTempFile(client, inputObject.content.source);
|
72
|
-
const filestats = fs.statSync(tmpFile);
|
73
|
-
log.info(`Image ${objectId} copied to ${tmpFile}`, { filestats });
|
74
|
-
renditionPages.push(tmpFile);
|
75
114
|
}
|
76
115
|
|
77
116
|
//generate rendition name, pass an index for multi parts
|
78
117
|
const getRenditionName = (index: number = 0) => {
|
79
118
|
const name = `renditions/${objectId}/${params.max_hw}/${index}.${params.format}`;
|
80
119
|
return name;
|
81
|
-
}
|
120
|
+
};
|
82
121
|
|
83
122
|
if (!renditionPages || !renditionPages.length) {
|
84
123
|
log.error(`Failed to generate rendition for ${objectId}`);
|
85
124
|
throw new Error(`Failed to generate rendition for ${objectId}`);
|
86
125
|
}
|
87
126
|
|
88
|
-
log.info(
|
127
|
+
log.info(
|
128
|
+
`Uploading rendition for ${objectId} with ${renditionPages.length} pages (max_hw: ${params.max_hw}, format: ${params.format})`,
|
129
|
+
{ renditionPages },
|
130
|
+
);
|
89
131
|
const uploads = renditionPages.map(async (page, i) => {
|
90
132
|
const pageId = getRenditionName(i);
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
133
|
+
let resizedImagePath = null;
|
134
|
+
|
135
|
+
try {
|
136
|
+
log.info(`Resizing image for ${objectId} page ${i}`, { page, params });
|
137
|
+
// Resize the image using ImageMagick
|
138
|
+
resizedImagePath = await imageResizer(page, params.max_hw, params.format);
|
139
|
+
|
140
|
+
// Create a read stream from the resized image file
|
141
|
+
const fileStream = fs.createReadStream(resizedImagePath);
|
142
|
+
const format = "image/" + params.format;
|
143
|
+
const fileId = pageId.split("/").pop() ?? "0." + params.format;
|
144
|
+
const source = new NodeStreamSource(
|
145
|
+
fileStream,
|
146
|
+
fileId,
|
147
|
+
format,
|
148
|
+
pageId,
|
149
|
+
);
|
150
|
+
|
151
|
+
log.info(
|
152
|
+
`Uploading rendition for ${objectId} page ${i} with max_hw: ${params.max_hw} and format: ${params.format}`, {
|
153
|
+
resizedImagePath,
|
154
|
+
fileId,
|
155
|
+
format,
|
156
|
+
pageId,
|
157
|
+
}
|
158
|
+
);
|
159
|
+
|
160
|
+
const result = await client.objects.upload(source).catch((err) => {
|
161
|
+
log.error(`Failed to upload rendition for ${objectId} page ${i}`, {
|
162
|
+
error: err,
|
163
|
+
errorMessage: err.message,
|
164
|
+
stack: err.stack
|
165
|
+
});
|
166
|
+
return Promise.reject(`Upload failed: ${err.message}`);
|
167
|
+
});
|
168
|
+
log.info(`Rendition uploaded for ${objectId} page ${i}`, { result });
|
169
|
+
|
170
|
+
return result;
|
171
|
+
} catch (error) {
|
172
|
+
log.error(`Failed to process rendition for ${objectId} page ${i}`, { error });
|
173
|
+
return Promise.reject(error instanceof Error ? error.message : null);
|
174
|
+
}
|
105
175
|
});
|
106
176
|
|
107
177
|
const uploaded = await Promise.all(uploads);
|
108
178
|
if (!uploaded || !uploaded.length || !uploaded[0]) {
|
109
|
-
log.error(`Failed to upload rendition for ${objectId}
|
110
|
-
throw new Error(`Failed to upload rendition for ${objectId}`);
|
179
|
+
log.error(`Failed to upload rendition for ${objectId}`, { uploaded });
|
180
|
+
throw new Error(`Failed to upload rendition for ${objectId} - upload object is empty`);
|
111
181
|
}
|
112
182
|
|
113
|
-
|
114
|
-
|
183
|
+
log.info(`Creating rendition for ${objectId} with max_hw: ${params.max_hw} and format: ${params.format}`, {
|
184
|
+
uploaded,
|
185
|
+
});
|
115
186
|
const rendition = await client.objects.create({
|
116
187
|
name: inputObject.name + ` [Rendition ${params.max_hw}]`,
|
117
188
|
type: renditionType.id,
|
118
189
|
parent: inputObject.id,
|
119
190
|
content: uploaded[0],
|
120
191
|
properties: {
|
121
|
-
mime_type:
|
192
|
+
mime_type: "image/" + params.format,
|
122
193
|
source_etag: inputObject.content.source,
|
123
194
|
height: params.max_hw,
|
124
195
|
width: params.max_hw,
|
125
|
-
|
126
|
-
total_parts: uploaded.length
|
127
|
-
} satisfies RenditionProperties
|
196
|
+
} satisfies RenditionProperties,
|
128
197
|
});
|
129
198
|
|
130
199
|
log.info(`Rendition ${rendition.id} created for ${objectId}`, { rendition });
|
131
200
|
|
132
201
|
return { id: rendition.id, format: params.format, status: "success" };
|
133
|
-
|
134
202
|
}
|
@@ -1,11 +1,16 @@
|
|
1
1
|
import { log } from "@temporalio/activity";
|
2
|
-
import {
|
2
|
+
import {
|
3
|
+
ContentObjectTypeItem,
|
4
|
+
CreateContentObjectTypePayload,
|
5
|
+
DSLActivityExecutionPayload,
|
6
|
+
DSLActivitySpec,
|
7
|
+
} from "@vertesia/common";
|
3
8
|
import { ActivityContext, setupActivity } from "../dsl/setup/ActivityContext.js";
|
4
9
|
import { TruncateSpec, truncByMaxTokens } from "../utils/tokens.js";
|
5
10
|
import { InteractionExecutionParams, executeInteractionFromActivity } from "./executeInteraction.js";
|
6
11
|
|
7
|
-
const INT_SELECT_DOCUMENT_TYPE = "sys:SelectDocumentType"
|
8
|
-
const INT_GENERATE_METADATA_MODEL = "sys:GenerateMetadataModel"
|
12
|
+
const INT_SELECT_DOCUMENT_TYPE = "sys:SelectDocumentType";
|
13
|
+
const INT_GENERATE_METADATA_MODEL = "sys:GenerateMetadataModel";
|
9
14
|
|
10
15
|
export interface GenerateOrAssignContentTypeParams extends InteractionExecutionParams {
|
11
16
|
typesHint?: string[];
|
@@ -21,20 +26,21 @@ export interface GenerateOrAssignContentTypeParams extends InteractionExecutionP
|
|
21
26
|
interactionNames?: {
|
22
27
|
selectDocumentType?: string;
|
23
28
|
generateMetadataModel?: string;
|
24
|
-
}
|
29
|
+
};
|
25
30
|
}
|
26
31
|
|
27
32
|
export interface GenerateOrAssignContentType extends DSLActivitySpec<GenerateOrAssignContentTypeParams> {
|
28
|
-
name:
|
33
|
+
name: "generateOrAssignContentType";
|
29
34
|
}
|
30
35
|
|
31
|
-
export async function generateOrAssignContentType(
|
36
|
+
export async function generateOrAssignContentType(
|
37
|
+
payload: DSLActivityExecutionPayload<GenerateOrAssignContentTypeParams>,
|
38
|
+
) {
|
32
39
|
const context = await setupActivity<GenerateOrAssignContentTypeParams>(payload);
|
33
40
|
const { params, client, objectId } = context;
|
34
41
|
|
35
42
|
const interactionName = params.interactionNames?.selectDocumentType ?? INT_SELECT_DOCUMENT_TYPE;
|
36
43
|
|
37
|
-
|
38
44
|
log.info("SelectDocumentType for object: " + objectId, { payload });
|
39
45
|
|
40
46
|
const object = await client.objects.retrieve(objectId, "+text");
|
@@ -48,57 +54,67 @@ export async function generateOrAssignContentType(payload: DSLActivityExecutionP
|
|
48
54
|
return { status: "skipped", message: "Object already has a type: " + object.type.name };
|
49
55
|
}
|
50
56
|
|
51
|
-
if (
|
57
|
+
if (
|
58
|
+
!object ||
|
59
|
+
(!object.text &&
|
60
|
+
!object.content?.type?.startsWith("image/") &&
|
61
|
+
!object.content?.type?.startsWith("application/pdf"))
|
62
|
+
) {
|
52
63
|
log.info(`Object ${objectId} not found or text is empty and not an image`, { object });
|
53
64
|
return { status: "failed", error: "no-text" };
|
54
65
|
}
|
55
66
|
|
56
|
-
const types = await client.types.list(
|
67
|
+
const types = await client.types.list(undefined, {
|
68
|
+
schema: true,
|
69
|
+
});
|
57
70
|
|
58
71
|
//make a list of all existing types, and add hints if any
|
59
|
-
const existing_types = types.
|
60
|
-
|
61
|
-
const newHints = params.typesHint.filter((t: string) => !existing_types.includes(t));
|
62
|
-
existing_types.push(...newHints);
|
63
|
-
}
|
64
|
-
|
65
|
-
const content = object.text ? truncByMaxTokens(object.text, params.truncate || 4000) : undefined;
|
72
|
+
const existing_types = types.filter((t) => !["DocumentPart", "Rendition"].includes(t.name));
|
73
|
+
const content = object.text ? truncByMaxTokens(object.text, params.truncate || 30000) : undefined;
|
66
74
|
|
67
75
|
const getImage = async () => {
|
68
76
|
if (object.content?.type?.includes("pdf") && object.text?.length && object.text?.length < 100) {
|
69
|
-
return "store:" + objectId
|
77
|
+
return "store:" + objectId;
|
70
78
|
}
|
71
79
|
if (!object.content?.type?.startsWith("image/")) {
|
72
80
|
return undefined;
|
73
81
|
}
|
74
|
-
const res = await client.objects.getRendition(objectId, {
|
82
|
+
const res = await client.objects.getRendition(objectId, {
|
83
|
+
max_hw: 1024,
|
84
|
+
format: "image/png",
|
85
|
+
generate_if_missing: true,
|
86
|
+
});
|
75
87
|
if (!res.rendition && res.status === "generating") {
|
76
88
|
//throw to try again
|
77
89
|
throw new Error(`Rendition for object ${objectId} is in progress`);
|
78
90
|
} else if (res.rendition) {
|
79
91
|
return "store:" + objectId;
|
80
92
|
}
|
81
|
-
}
|
93
|
+
};
|
82
94
|
|
83
95
|
const fileRef = await getImage();
|
84
96
|
|
85
|
-
log.info(
|
97
|
+
log.info(
|
98
|
+
"Execute SelectDocumentType interaction on content with \nexisting types - passing full types: " +
|
99
|
+
existing_types.filter((t) => !t.tags?.includes("system")),
|
100
|
+
);
|
86
101
|
|
87
102
|
const res = await executeInteractionFromActivity(client, interactionName, params, {
|
88
|
-
existing_types,
|
103
|
+
existing_types,
|
104
|
+
content,
|
105
|
+
image: fileRef,
|
89
106
|
});
|
90
107
|
|
91
108
|
log.info("Selected Content Type Result: " + JSON.stringify(res.result));
|
92
109
|
|
93
110
|
//if type is not identified or not present in the database, generate a new type
|
94
|
-
let selectedType: { id: string
|
111
|
+
let selectedType: { id: string; name: string } | undefined = undefined;
|
95
112
|
|
96
|
-
selectedType = types.find(t => t.name === res.result.document_type);
|
113
|
+
selectedType = types.find((t) => t.name === res.result.document_type);
|
97
114
|
|
98
115
|
if (!selectedType) {
|
99
|
-
log.warn("Document type not
|
116
|
+
log.warn("Document type not identified: starting type generation");
|
100
117
|
const newType = await generateNewType(context, existing_types, content, fileRef);
|
101
|
-
|
102
118
|
selectedType = { id: newType.id, name: newType.name };
|
103
119
|
}
|
104
120
|
|
@@ -115,24 +131,28 @@ export async function generateOrAssignContentType(payload: DSLActivityExecutionP
|
|
115
131
|
return {
|
116
132
|
id: selectedType.id,
|
117
133
|
name: selectedType.name,
|
118
|
-
isNew: !types.find(t => t.name === selectedType.name)
|
134
|
+
isNew: !types.find((t) => t.name === selectedType.name),
|
119
135
|
};
|
120
136
|
}
|
121
137
|
|
122
|
-
async function generateNewType(
|
138
|
+
async function generateNewType(
|
139
|
+
context: ActivityContext<GenerateOrAssignContentTypeParams>,
|
140
|
+
existing_types: ContentObjectTypeItem[],
|
141
|
+
content?: string,
|
142
|
+
fileRef?: string,
|
143
|
+
) {
|
123
144
|
const { client, params } = context;
|
124
145
|
|
125
146
|
const project = await context.fetchProject();
|
126
147
|
const interactionName = params.interactionNames?.generateMetadataModel ?? INT_GENERATE_METADATA_MODEL;
|
127
148
|
|
128
149
|
const genTypeRes = await executeInteractionFromActivity(client, interactionName, params, {
|
129
|
-
existing_types
|
150
|
+
existing_types,
|
130
151
|
content: content,
|
131
152
|
human_context: project?.configuration?.human_context ?? undefined,
|
132
|
-
image: fileRef ? fileRef : undefined
|
153
|
+
image: fileRef ? fileRef : undefined,
|
133
154
|
});
|
134
155
|
|
135
|
-
|
136
156
|
if (!genTypeRes.result.document_type) {
|
137
157
|
log.error("No name generated for type", genTypeRes);
|
138
158
|
throw new Error("No name generated for type");
|
@@ -143,10 +163,10 @@ async function generateNewType(context: ActivityContext<GenerateOrAssignContentT
|
|
143
163
|
name: genTypeRes.result.document_type,
|
144
164
|
object_schema: genTypeRes.result.metadata_schema,
|
145
165
|
is_chunkable: genTypeRes.result.is_chunkable,
|
146
|
-
|
166
|
+
table_layout: genTypeRes.result.table_layout,
|
167
|
+
};
|
147
168
|
|
148
169
|
const type = await client.types.create(typeData);
|
149
170
|
|
150
171
|
return type;
|
151
|
-
|
152
172
|
}
|
@@ -12,7 +12,7 @@ export interface GetObject extends DSLActivitySpec<GetObjectParams> {
|
|
12
12
|
}
|
13
13
|
|
14
14
|
/**
|
15
|
-
* We are using a union type for the status parameter since typescript
|
15
|
+
* We are using a union type for the status parameter since typescript enums breaks the workflow code generation
|
16
16
|
* @param objectId
|
17
17
|
* @param status
|
18
18
|
*/
|
@@ -0,0 +1,25 @@
|
|
1
|
+
import { ContentObjectStatus, DSLActivityExecutionPayload } from "@vertesia/common";
|
2
|
+
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
3
|
+
import { log } from "@temporalio/activity"
|
4
|
+
|
5
|
+
export interface HandleDslErrorParams {
|
6
|
+
errorMessage: string;
|
7
|
+
}
|
8
|
+
|
9
|
+
export async function handleDslError(payload: DSLActivityExecutionPayload<HandleDslErrorParams>): Promise<void> {
|
10
|
+
const { client, params, objectId } = await setupActivity<HandleDslErrorParams>(payload);
|
11
|
+
const isIntake = payload.workflow_name === "StandardDocumentIntake" || payload.workflow_name === "StandardImageIntake";
|
12
|
+
if (!isIntake) {
|
13
|
+
log.warn(`Workflow execution failed, but no error handler registered for this workflow: ${payload.workflow_name}`,
|
14
|
+
{ error: params.errorMessage },
|
15
|
+
);
|
16
|
+
return;
|
17
|
+
}
|
18
|
+
|
19
|
+
try {
|
20
|
+
await client.objects.update(objectId, { status: ContentObjectStatus.failed });
|
21
|
+
} catch (e) {
|
22
|
+
log.error("Failed to handle error", { error: e });
|
23
|
+
}
|
24
|
+
return;
|
25
|
+
}
|
@@ -11,6 +11,7 @@ export { generateEmbeddings } from "./generateEmbeddings.js";
|
|
11
11
|
export { generateImageRendition } from "./generateImageRendition.js";
|
12
12
|
export { generateOrAssignContentType } from "./generateOrAssignContentType.js";
|
13
13
|
export { getObjectFromStore } from "./getObjectFromStore.js";
|
14
|
+
export { handleDslError } from "./handleError.js";
|
14
15
|
export { convertPdfToStructuredText } from "./media/processPdfWithTextract.js";
|
15
16
|
export { transcribeMedia } from "./media/transcribeMediaWithGladia.js";
|
16
17
|
export { notifyWebhook } from "./notifyWebhook.js";
|
package/src/activities/index.ts
CHANGED
@@ -91,11 +91,11 @@ export async function convertPdfToStructuredText(payload: DSLActivityExecutionPa
|
|
91
91
|
|
92
92
|
if (jobStatus === "SUCCEEDED") {
|
93
93
|
log.info(`Job ${jobId} succeeded, saving results`, { jobId });
|
94
|
-
const
|
95
|
-
const tokensData = countTokens(
|
96
|
-
const etag = object.content.etag ?? md5(
|
94
|
+
const fText = await processor.processResults(jobId);
|
95
|
+
const tokensData = countTokens(fText);
|
96
|
+
const etag = object.content.etag ?? md5(fText);
|
97
97
|
const updateData: CreateContentObjectPayload = {
|
98
|
-
text:
|
98
|
+
text: fText,
|
99
99
|
text_etag: etag,
|
100
100
|
tokens: {
|
101
101
|
...tokensData,
|
@@ -74,7 +74,7 @@ export async function transcribeMedia(payload: DSLActivityExecutionPayload<Trans
|
|
74
74
|
|
75
75
|
|
76
76
|
function generateCallbackUrlForGladia(baseUrl: string, authToken: string, taskToken: string, objectId: string) {
|
77
|
-
return `${baseUrl}/api/v1/webhooks/gladia/${objectId}?
|
77
|
+
return `${baseUrl}/api/v1/webhooks/gladia/${objectId}?access_token=${authToken}&task_token=${taskToken}`;
|
78
78
|
}
|
79
79
|
|
80
80
|
interface GladiaTranscriptRequestResponse {
|
@@ -1,9 +1,9 @@
|
|
1
|
-
import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
2
1
|
import { log } from "@temporalio/activity";
|
2
|
+
import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
3
3
|
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
4
4
|
import { WorkflowParamNotFound } from "../errors.js";
|
5
5
|
|
6
|
-
interface NotifyWebhookParams {
|
6
|
+
export interface NotifyWebhookParams {
|
7
7
|
target_url: string; //URL to send the notification to
|
8
8
|
method: 'GET' | 'POST'; //HTTP method to use
|
9
9
|
payload: Record<string, any>; //payload to send (if POST then as JSON body, if GET then as query string)
|
@@ -11,7 +11,7 @@ export interface SetDocumentStatus extends DSLActivitySpec<SetDocumentStatusPara
|
|
11
11
|
}
|
12
12
|
|
13
13
|
/**
|
14
|
-
* We are using a union type for the status parameter since typescript
|
14
|
+
* We are using a union type for the status parameter since typescript enums breaks the workflow code generation
|
15
15
|
* @param objectId
|
16
16
|
* @param status
|
17
17
|
*/
|
@@ -472,21 +472,21 @@ export class TextractProcessor {
|
|
472
472
|
}
|
473
473
|
|
474
474
|
// Build final output
|
475
|
-
let
|
475
|
+
let fullText = '';
|
476
476
|
let imgNumber = 1;
|
477
477
|
let tableNumber = 1;
|
478
478
|
for (const page of pageContents) {
|
479
|
-
|
479
|
+
fullText += `<page number="${page.pageNumber}">\n`;
|
480
480
|
for (const block of page.blocks) {
|
481
481
|
if (block.type === 'text') {
|
482
|
-
|
482
|
+
fullText += `<text>\n${block.content}\n</text>\n\n`;
|
483
483
|
} else if (block.type === 'table') {
|
484
484
|
const confidenceAttr = block.confidence !== undefined && this.includeConfidenceInTables
|
485
485
|
? ` confidence="${block.confidence.toFixed(2)}"`
|
486
486
|
: '';
|
487
|
-
|
488
|
-
|
489
|
-
|
487
|
+
fullText += `<table number=${tableNumber++} type="csv" ${confidenceAttr}>\n`;
|
488
|
+
fullText += `${block.content}\n`;
|
489
|
+
fullText += `</table>\n\n`;
|
490
490
|
} else if (block.type === 'image') {
|
491
491
|
// Include geometry if you like
|
492
492
|
const leftAttr = block.left ? ` left="${block.left.toFixed(4)}"` : '';
|
@@ -494,13 +494,13 @@ export class TextractProcessor {
|
|
494
494
|
const widthAttr = block.width ? ` width="${block.width.toFixed(4)}"` : '';
|
495
495
|
const heightAttr = block.height ? ` height="${block.height.toFixed(4)}"` : '';
|
496
496
|
|
497
|
-
|
497
|
+
fullText += `<image id="${imgNumber++}" ${leftAttr}${topAttr}${widthAttr}${heightAttr}>\n${block.content.trim()}\n</image>\n\n`;
|
498
498
|
}
|
499
499
|
}
|
500
|
-
|
500
|
+
fullText += `</page>\n\n`;
|
501
501
|
}
|
502
502
|
|
503
|
-
return
|
503
|
+
return fullText;
|
504
504
|
}
|
505
505
|
|
506
506
|
}
|