@vertesia/workflow 0.50.1 → 0.52.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +7 -1
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -1
- package/lib/cjs/activities/chunkDocument.js +39 -34
- package/lib/cjs/activities/chunkDocument.js.map +1 -1
- package/lib/cjs/activities/createDocumentFromOther.js +2 -2
- package/lib/cjs/activities/createDocumentFromOther.js.map +1 -1
- package/lib/cjs/activities/executeInteraction.js +12 -7
- package/lib/cjs/activities/executeInteraction.js.map +1 -1
- package/lib/cjs/activities/extractDocumentText.js +25 -13
- package/lib/cjs/activities/extractDocumentText.js.map +1 -1
- package/lib/cjs/activities/generateDocumentProperties.js +22 -4
- package/lib/cjs/activities/generateDocumentProperties.js.map +1 -1
- package/lib/cjs/activities/generateEmbeddings.js +58 -102
- package/lib/cjs/activities/generateEmbeddings.js.map +1 -1
- package/lib/cjs/activities/generateImageRendition.js +77 -34
- package/lib/cjs/activities/generateImageRendition.js.map +1 -1
- package/lib/cjs/activities/generateOrAssignContentType.js +3 -7
- package/lib/cjs/activities/generateOrAssignContentType.js.map +1 -1
- package/lib/cjs/activities/notifyWebhook.js.map +1 -1
- package/lib/cjs/conversion/image.js +80 -12
- package/lib/cjs/conversion/image.js.map +1 -1
- package/lib/cjs/dsl/setup/ActivityContext.js +32 -8
- package/lib/cjs/dsl/setup/ActivityContext.js.map +1 -1
- package/lib/cjs/dsl.js +20 -0
- package/lib/cjs/dsl.js.map +1 -0
- package/lib/cjs/errors.js +13 -1
- package/lib/cjs/errors.js.map +1 -1
- package/lib/cjs/index.js +8 -2
- package/lib/cjs/index.js.map +1 -1
- package/lib/cjs/iterative-generation/activities/extractToc.js +2 -2
- package/lib/cjs/iterative-generation/activities/extractToc.js.map +1 -1
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js +1 -1
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js.map +1 -1
- package/lib/cjs/iterative-generation/activities/generatePart.js +1 -1
- package/lib/cjs/iterative-generation/activities/generatePart.js.map +1 -1
- package/lib/cjs/iterative-generation/activities/generateToc.js +1 -1
- package/lib/cjs/iterative-generation/activities/generateToc.js.map +1 -1
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js +2 -1
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js.map +1 -1
- package/lib/cjs/iterative-generation/utils.js +7 -4
- package/lib/cjs/iterative-generation/utils.js.map +1 -1
- package/lib/cjs/system/notifyWebhookWorkflow.js +2 -1
- package/lib/cjs/system/notifyWebhookWorkflow.js.map +1 -1
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js +1 -1
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js.map +1 -1
- package/lib/cjs/utils/blobs.js +13 -7
- package/lib/cjs/utils/blobs.js.map +1 -1
- package/lib/cjs/utils/chunks.js +14 -0
- package/lib/cjs/utils/chunks.js.map +1 -0
- package/lib/cjs/utils/client.js +6 -5
- package/lib/cjs/utils/client.js.map +1 -1
- package/lib/cjs/utils/memory.js +2 -9
- package/lib/cjs/utils/memory.js.map +1 -1
- package/lib/cjs/workflows.js +1 -3
- package/lib/cjs/workflows.js.map +1 -1
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +7 -1
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -1
- package/lib/esm/activities/chunkDocument.js +39 -34
- package/lib/esm/activities/chunkDocument.js.map +1 -1
- package/lib/esm/activities/createDocumentFromOther.js +1 -1
- package/lib/esm/activities/createDocumentFromOther.js.map +1 -1
- package/lib/esm/activities/executeInteraction.js +12 -7
- package/lib/esm/activities/executeInteraction.js.map +1 -1
- package/lib/esm/activities/extractDocumentText.js +25 -13
- package/lib/esm/activities/extractDocumentText.js.map +1 -1
- package/lib/esm/activities/generateDocumentProperties.js +22 -4
- package/lib/esm/activities/generateDocumentProperties.js.map +1 -1
- package/lib/esm/activities/generateEmbeddings.js +58 -69
- package/lib/esm/activities/generateEmbeddings.js.map +1 -1
- package/lib/esm/activities/generateImageRendition.js +78 -35
- package/lib/esm/activities/generateImageRendition.js.map +1 -1
- package/lib/esm/activities/generateOrAssignContentType.js +3 -7
- package/lib/esm/activities/generateOrAssignContentType.js.map +1 -1
- package/lib/esm/activities/notifyWebhook.js.map +1 -1
- package/lib/esm/conversion/image.js +80 -12
- package/lib/esm/conversion/image.js.map +1 -1
- package/lib/esm/dsl/setup/ActivityContext.js +34 -10
- package/lib/esm/dsl/setup/ActivityContext.js.map +1 -1
- package/lib/esm/dsl.js +4 -0
- package/lib/esm/dsl.js.map +1 -0
- package/lib/esm/errors.js +11 -0
- package/lib/esm/errors.js.map +1 -1
- package/lib/esm/index.js +8 -2
- package/lib/esm/index.js.map +1 -1
- package/lib/esm/iterative-generation/activities/extractToc.js +3 -3
- package/lib/esm/iterative-generation/activities/extractToc.js.map +1 -1
- package/lib/esm/iterative-generation/activities/finalizeOutput.js +2 -2
- package/lib/esm/iterative-generation/activities/finalizeOutput.js.map +1 -1
- package/lib/esm/iterative-generation/activities/generatePart.js +2 -2
- package/lib/esm/iterative-generation/activities/generatePart.js.map +1 -1
- package/lib/esm/iterative-generation/activities/generateToc.js +2 -2
- package/lib/esm/iterative-generation/activities/generateToc.js.map +1 -1
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js +2 -1
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js.map +1 -1
- package/lib/esm/iterative-generation/utils.js +7 -4
- package/lib/esm/iterative-generation/utils.js.map +1 -1
- package/lib/esm/system/notifyWebhookWorkflow.js +2 -1
- package/lib/esm/system/notifyWebhookWorkflow.js.map +1 -1
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js +2 -2
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js.map +1 -1
- package/lib/esm/utils/blobs.js +13 -7
- package/lib/esm/utils/blobs.js.map +1 -1
- package/lib/esm/utils/chunks.js +9 -0
- package/lib/esm/utils/chunks.js.map +1 -0
- package/lib/esm/utils/client.js +5 -4
- package/lib/esm/utils/client.js.map +1 -1
- package/lib/esm/utils/memory.js +2 -7
- package/lib/esm/utils/memory.js.map +1 -1
- package/lib/esm/workflows.js +0 -1
- package/lib/esm/workflows.js.map +1 -1
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts +10 -0
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts.map +1 -1
- package/lib/types/activities/chunkDocument.d.ts +15 -0
- package/lib/types/activities/chunkDocument.d.ts.map +1 -1
- package/lib/types/activities/createDocumentFromOther.d.ts.map +1 -1
- package/lib/types/activities/executeInteraction.d.ts +19 -4
- package/lib/types/activities/executeInteraction.d.ts.map +1 -1
- package/lib/types/activities/extractDocumentText.d.ts.map +1 -1
- package/lib/types/activities/generateDocumentProperties.d.ts +1 -1
- package/lib/types/activities/generateDocumentProperties.d.ts.map +1 -1
- package/lib/types/activities/generateEmbeddings.d.ts +21 -17
- package/lib/types/activities/generateEmbeddings.d.ts.map +1 -1
- package/lib/types/activities/generateImageRendition.d.ts +3 -5
- package/lib/types/activities/generateImageRendition.d.ts.map +1 -1
- package/lib/types/activities/generateOrAssignContentType.d.ts.map +1 -1
- package/lib/types/activities/notifyWebhook.d.ts +1 -2
- package/lib/types/activities/notifyWebhook.d.ts.map +1 -1
- package/lib/types/conversion/image.d.ts +8 -6
- package/lib/types/conversion/image.d.ts.map +1 -1
- package/lib/types/dsl/dslProxyActivities.d.ts +2 -2
- package/lib/types/dsl/dslProxyActivities.d.ts.map +1 -1
- package/lib/types/dsl/setup/ActivityContext.d.ts +3 -0
- package/lib/types/dsl/setup/ActivityContext.d.ts.map +1 -1
- package/lib/types/dsl.d.ts +4 -0
- package/lib/types/dsl.d.ts.map +1 -0
- package/lib/types/errors.d.ts +6 -0
- package/lib/types/errors.d.ts.map +1 -1
- package/lib/types/index.d.ts +8 -2
- package/lib/types/index.d.ts.map +1 -1
- package/lib/types/iterative-generation/activities/extractToc.d.ts.map +1 -1
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts.map +1 -1
- package/lib/types/iterative-generation/activities/generatePart.d.ts.map +1 -1
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts.map +1 -1
- package/lib/types/iterative-generation/utils.d.ts +2 -2
- package/lib/types/iterative-generation/utils.d.ts.map +1 -1
- package/lib/types/system/notifyWebhookWorkflow.d.ts.map +1 -1
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +2 -17
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +1 -1
- package/lib/types/utils/blobs.d.ts.map +1 -1
- package/lib/types/utils/chunks.d.ts +9 -0
- package/lib/types/utils/chunks.d.ts.map +1 -0
- package/lib/types/utils/client.d.ts +2 -2
- package/lib/types/utils/client.d.ts.map +1 -1
- package/lib/types/utils/memory.d.ts +1 -5
- package/lib/types/utils/memory.d.ts.map +1 -1
- package/lib/types/workflows.d.ts +0 -1
- package/lib/types/workflows.d.ts.map +1 -1
- package/lib/workflows-bundle.js +8311 -5790
- package/package.json +28 -10
- package/src/activities/advanced/createOrUpdateDocumentFromInteractionRun.ts +20 -1
- package/src/activities/chunkDocument.ts +62 -42
- package/src/activities/createDocumentFromOther.ts +2 -2
- package/src/activities/executeInteraction.ts +33 -12
- package/src/activities/extractDocumentText.ts +30 -14
- package/src/activities/generateDocumentProperties.ts +37 -16
- package/src/activities/generateEmbeddings.ts +91 -79
- package/src/activities/generateImageRendition.ts +100 -53
- package/src/activities/generateOrAssignContentType.ts +5 -11
- package/src/activities/notifyWebhook.ts +2 -2
- package/src/conversion/image.test.ts +110 -18
- package/src/conversion/image.ts +90 -15
- package/src/conversion/pandoc.test.ts +7 -5
- package/src/dsl/dslProxyActivities.ts +2 -2
- package/src/dsl/setup/ActivityContext.ts +60 -19
- package/src/dsl.ts +3 -0
- package/src/errors.ts +27 -6
- package/src/index.ts +9 -2
- package/src/iterative-generation/activities/extractToc.ts +3 -3
- package/src/iterative-generation/activities/finalizeOutput.ts +3 -3
- package/src/iterative-generation/activities/generatePart.ts +3 -3
- package/src/iterative-generation/activities/generateToc.ts +2 -2
- package/src/iterative-generation/iterativeGenerationWorkflow.ts +2 -1
- package/src/iterative-generation/utils.ts +10 -6
- package/src/system/notifyWebhookWorkflow.ts +3 -2
- package/src/system/recalculateEmbeddingsWorkflow.ts +2 -2
- package/src/utils/blobs.ts +12 -7
- package/src/utils/chunks.ts +17 -0
- package/src/utils/client.ts +6 -5
- package/src/utils/memory.ts +3 -8
- package/src/workflows.ts +0 -2
- package/lib/cjs/conversion/pdf.js +0 -13
- package/lib/cjs/conversion/pdf.js.map +0 -1
- package/lib/cjs/system/generateObjectText.js +0 -76
- package/lib/cjs/system/generateObjectText.js.map +0 -1
- package/lib/esm/conversion/pdf.js +0 -7
- package/lib/esm/conversion/pdf.js.map +0 -1
- package/lib/esm/system/generateObjectText.js +0 -73
- package/lib/esm/system/generateObjectText.js.map +0 -1
- package/lib/types/conversion/pdf.d.ts +0 -2
- package/lib/types/conversion/pdf.d.ts.map +0 -1
- package/lib/types/system/generateObjectText.d.ts +0 -4
- package/lib/types/system/generateObjectText.d.ts.map +0 -1
- package/src/conversion/pdf.test.ts +0 -35
- package/src/conversion/pdf.ts +0 -8
- package/src/system/generateObjectText.ts +0 -95
package/package.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
{
|
2
2
|
"name": "@vertesia/workflow",
|
3
|
-
"version": "0.
|
3
|
+
"version": "0.52.0",
|
4
4
|
"type": "module",
|
5
5
|
"description": "Composable prompts workflow dsl",
|
6
6
|
"main": "./lib/esm/index.js",
|
@@ -18,28 +18,28 @@
|
|
18
18
|
},
|
19
19
|
"license": "Apache-2.0",
|
20
20
|
"devDependencies": {
|
21
|
+
"@smithy/types": "^3.7.2",
|
21
22
|
"@temporalio/testing": "^1.11.5",
|
22
23
|
"@temporalio/worker": "^1.11.5",
|
24
|
+
"@types/fluent-ffmpeg": "^2.1.27",
|
23
25
|
"@types/jsonwebtoken": "^9.0.7",
|
24
26
|
"@types/node": "^22.5.1",
|
25
27
|
"@types/papaparse": "^5.3.15",
|
26
28
|
"@types/tmp": "^0.2.6",
|
27
29
|
"ts-dual-module": "^0.6.3",
|
28
|
-
"vitest": "^
|
29
|
-
"@smithy/types": "^3.7.2"
|
30
|
+
"vitest": "^3.0.9"
|
30
31
|
},
|
31
32
|
"dependencies": {
|
32
33
|
"@aws-sdk/client-s3": "^3.693.0",
|
33
34
|
"@aws-sdk/client-textract": "^3.693.0",
|
34
35
|
"@aws-sdk/credential-providers": "^3.693.0",
|
35
|
-
"@vertesia/memory": "^0.43.0",
|
36
|
-
"@opendocsg/pdf2md": "0.2.0",
|
37
36
|
"@temporalio/activity": "^1.11.5",
|
38
37
|
"@temporalio/workflow": "^1.11.5",
|
39
|
-
"@tensorflow/tfjs-node": "^4.19.0",
|
40
38
|
"@types/json-schema": "^7.0.15",
|
39
|
+
"@vertesia/memory": "^0.43.0",
|
41
40
|
"api-fetch-client": "^0.13.0",
|
42
41
|
"fast-deep-equal": "^3.1.3",
|
42
|
+
"fluent-ffmpeg": "^2.1.3",
|
43
43
|
"jsonwebtoken": "^9.0.2",
|
44
44
|
"ms": "3.0.0-canary.1",
|
45
45
|
"node-web-stream-adapters": "^0.2.1",
|
@@ -48,15 +48,17 @@
|
|
48
48
|
"sharp": "^0.33.4",
|
49
49
|
"tiktoken": "^1.0.15",
|
50
50
|
"tmp": "^0.2.3",
|
51
|
+
"tmp-promise": "^3.0.3",
|
51
52
|
"yaml": "^2.6.0",
|
52
|
-
"@
|
53
|
-
"@vertesia/client": "0.
|
54
|
-
"@
|
53
|
+
"@vertesia/common": "0.52.0",
|
54
|
+
"@vertesia/client": "0.53.0",
|
55
|
+
"@llumiverse/core": "0.17.0"
|
55
56
|
},
|
56
57
|
"ts_dual_module": {
|
57
58
|
"outDir": "lib",
|
58
59
|
"exports": {
|
59
|
-
"activities": "./activities",
|
60
|
+
"activities": "./activities/index.js",
|
61
|
+
"dsl-activities": "./activities/index-dsl.js",
|
60
62
|
"workflows": "./workflows",
|
61
63
|
"workflows-bundle": "./workflows-bundle.js",
|
62
64
|
"vars": "./vars"
|
@@ -78,11 +80,21 @@
|
|
78
80
|
"import": "./lib/esm/activities/index.js",
|
79
81
|
"require": "./lib/cjs/activities/index.js"
|
80
82
|
},
|
83
|
+
"./dsl-activities": {
|
84
|
+
"types": "./lib/types/activities/index-dsl.d.ts",
|
85
|
+
"import": "./lib/esm/activities/index-dsl.js",
|
86
|
+
"require": "./lib/cjs/activities/index-dsl.js"
|
87
|
+
},
|
81
88
|
"./workflows": {
|
82
89
|
"types": "./lib/types/workflows.d.ts",
|
83
90
|
"import": "./lib/esm/workflows.js",
|
84
91
|
"require": "./lib/cjs/workflows.js"
|
85
92
|
},
|
93
|
+
"./dsl": {
|
94
|
+
"types": "./lib/types/dsl.d.ts",
|
95
|
+
"import": "./lib/esm/dsl.js",
|
96
|
+
"require": "./lib/cjs/dsl.js"
|
97
|
+
},
|
86
98
|
"./workflows-bundle": {
|
87
99
|
"import": "./lib/workflows-bundle.js"
|
88
100
|
}
|
@@ -93,9 +105,15 @@
|
|
93
105
|
"activities": [
|
94
106
|
"./lib/types/activities/index.d.ts"
|
95
107
|
],
|
108
|
+
"dsl-activities": [
|
109
|
+
"./lib/types/activities/index-dsl.d.ts"
|
110
|
+
],
|
96
111
|
"workflows": [
|
97
112
|
"./lib/types/workflows.d.ts"
|
98
113
|
],
|
114
|
+
"dsl": [
|
115
|
+
"./lib/types/dsl.d.ts"
|
116
|
+
],
|
99
117
|
"vars": [
|
100
118
|
"./lib/types/dsl/vars.d.ts"
|
101
119
|
]
|
@@ -1,5 +1,5 @@
|
|
1
|
-
import { ContentObjectStatus, DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
2
1
|
import { log } from "@temporalio/activity";
|
2
|
+
import { ContentObjectStatus, DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
3
3
|
import { setupActivity } from "../../dsl/setup/ActivityContext.js";
|
4
4
|
import { ActivityParamNotFound, NoDocumentFound } from "../../errors.js";
|
5
5
|
interface CreateOrUpdateObjectFromInteractionRunParams {
|
@@ -19,9 +19,21 @@ interface CreateOrUpdateObjectFromInteractionRunParams {
|
|
19
19
|
*/
|
20
20
|
update_existing_id?: string,
|
21
21
|
|
22
|
+
/**
|
23
|
+
* The name of the object to use. If not provided, the name will be generated from the interaction result
|
24
|
+
*/
|
22
25
|
fallback_name?: string, // a name to use if no one was generated by the interaction
|
23
26
|
|
27
|
+
/**
|
28
|
+
* The name of the parent object to use. If not provided, the document will be created at the root level
|
29
|
+
*/
|
24
30
|
parent?: string, // the parent object id
|
31
|
+
|
32
|
+
/**
|
33
|
+
* The name of the property to use for the text. If not provided, the text will be set to the result of the interaction
|
34
|
+
*/
|
35
|
+
update_text_from_property?: string,
|
36
|
+
|
25
37
|
}
|
26
38
|
|
27
39
|
export interface CreateOrUpdateObjectFromInteractionRun extends DSLActivitySpec<CreateOrUpdateObjectFromInteractionRunParams> {
|
@@ -80,6 +92,13 @@ export async function createOrUpdateDocumentFromInteractionRun(payload: DSLActiv
|
|
80
92
|
}
|
81
93
|
};
|
82
94
|
|
95
|
+
if (params.update_text_from_property) {
|
96
|
+
const text = docPayload.properties[params.update_text_from_property];
|
97
|
+
if (text) {
|
98
|
+
docPayload.text = text;
|
99
|
+
}
|
100
|
+
}
|
101
|
+
|
83
102
|
//create or update the document
|
84
103
|
let newDoc: boolean = false;
|
85
104
|
let doc = undefined;
|
@@ -1,18 +1,12 @@
|
|
1
|
-
import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
2
1
|
import { log } from "@temporalio/activity";
|
2
|
+
import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
3
3
|
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
4
|
+
import { DocPart } from "../utils/chunks.js";
|
4
5
|
import { InteractionExecutionParams, executeInteractionFromActivity } from "./executeInteraction.js";
|
5
6
|
|
6
7
|
const INT_CHUNK_DOCUMENT = "sys:ChunkDocument"
|
7
8
|
|
8
|
-
interface DocPart {
|
9
9
|
|
10
|
-
line_number_start: number
|
11
|
-
line_number_end: number
|
12
|
-
name: string
|
13
|
-
type: string
|
14
|
-
|
15
|
-
}
|
16
10
|
|
17
11
|
export interface ChunkDocumentResult {
|
18
12
|
id: string
|
@@ -22,9 +16,28 @@ export interface ChunkDocumentResult {
|
|
22
16
|
}
|
23
17
|
|
24
18
|
export interface ChunkDocumentParams extends InteractionExecutionParams {
|
19
|
+
|
20
|
+
/**
|
21
|
+
* If true, force chunking even if the document is already chunked
|
22
|
+
*/
|
25
23
|
force?: boolean;
|
24
|
+
|
25
|
+
/**
|
26
|
+
* The interaction name to use for chunking
|
27
|
+
* If not set, the default interaction will be used
|
28
|
+
*/
|
26
29
|
interactionName?: string;
|
30
|
+
|
31
|
+
/**
|
32
|
+
* The object type to use for the document parts
|
33
|
+
* If not set, the type of the document will be used
|
34
|
+
*/
|
27
35
|
docPartType?: string;
|
36
|
+
|
37
|
+
/**
|
38
|
+
* If true, create parts as document objects
|
39
|
+
*/
|
40
|
+
createParts?: boolean;
|
28
41
|
}
|
29
42
|
|
30
43
|
export interface ChunkDocument extends DSLActivitySpec<ChunkDocumentParams> {
|
@@ -75,48 +88,55 @@ export async function chunkDocument(payload: DSLActivityExecutionPayload<ChunkDo
|
|
75
88
|
return { id: objectId, status: "failed", parts: [], message: "no parts found" }
|
76
89
|
}
|
77
90
|
|
78
|
-
const partDocs = await Promise.all(parts.map(async (part, i) => {
|
79
91
|
|
80
|
-
|
92
|
+
/**
|
93
|
+
* Only create parts as document if the flag is set
|
94
|
+
*/
|
95
|
+
if (params.createParts) {
|
96
|
+
|
97
|
+
const partDocs = await Promise.all(parts.map(async (part, i) => {
|
81
98
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
location
|
99
|
+
const text = lines.filter((_l, i) => i >= part.line_number_start && i <= part.line_number_end).join('\n');
|
100
|
+
|
101
|
+
const location = () => {
|
102
|
+
let location = document.location;
|
103
|
+
if (location.endsWith('/')) {
|
104
|
+
location += document.name + "/" + part.type
|
105
|
+
}
|
106
|
+
location += '/' + document.name + "/" + part.type;
|
107
|
+
return location;
|
86
108
|
}
|
87
|
-
|
88
|
-
|
109
|
+
|
110
|
+
const docPart = await client.objects.create({
|
111
|
+
name: part.name,
|
112
|
+
parent: objectId,
|
113
|
+
text: text,
|
114
|
+
location: location(),
|
115
|
+
properties: {
|
116
|
+
part_number: i + 1,
|
117
|
+
etag: document.text_etag,
|
118
|
+
source_line_start: part.line_number_start,
|
119
|
+
source_line_end: part.line_number_end,
|
120
|
+
title: part.name
|
121
|
+
}
|
122
|
+
});
|
123
|
+
return docPart;
|
124
|
+
}));
|
125
|
+
|
126
|
+
//delete previous parts
|
127
|
+
if (document.parts && document.parts.length > 0) {
|
128
|
+
log.info('Deleting previous parts for object ID: ' + objectId, { parts: document.parts });
|
129
|
+
await Promise.all(document.parts.map(async (partId) => {
|
130
|
+
await client.objects.delete(partId);
|
131
|
+
}));
|
89
132
|
}
|
90
133
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
text: text,
|
95
|
-
location: location(),
|
96
|
-
properties: {
|
97
|
-
part_number: i + 1,
|
98
|
-
etag: document.text_etag,
|
99
|
-
source_line_start: part.line_number_start,
|
100
|
-
source_line_end: part.line_number_end,
|
101
|
-
title: part.name
|
102
|
-
}
|
134
|
+
await client.objects.update(objectId, {
|
135
|
+
parts: partDocs.map(p => p.id),
|
136
|
+
parts_etag: document.text_etag
|
103
137
|
});
|
104
|
-
return docPart;
|
105
|
-
}));
|
106
|
-
|
107
|
-
//delete previous parts
|
108
|
-
if (document.parts && document.parts.length > 0) {
|
109
|
-
log.info('Deleting previous parts for object ID: ' + objectId, { parts: document.parts });
|
110
|
-
await Promise.all(document.parts.map(async (partId) => {
|
111
|
-
await client.objects.delete(partId);
|
112
|
-
}));
|
113
138
|
}
|
114
139
|
|
115
|
-
await client.objects.update(objectId, {
|
116
|
-
parts: partDocs.map(p => p.id),
|
117
|
-
parts_etag: document.text_etag
|
118
|
-
});
|
119
|
-
|
120
140
|
log.info(`Object ${objectId} chunking completed`, { parts: document.parts });
|
121
141
|
|
122
142
|
return { id: objectId, status: "completed", parts: document.parts }
|
@@ -1,11 +1,11 @@
|
|
1
|
-
import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
2
1
|
import { log } from "@temporalio/activity";
|
2
|
+
import { NodeStreamSource } from "@vertesia/client/node";
|
3
|
+
import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
3
4
|
import fs from 'fs';
|
4
5
|
import { pdfExtractPages } from "../conversion/mutool.js";
|
5
6
|
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
6
7
|
import { NoDocumentFound } from "../errors.js";
|
7
8
|
import { saveBlobToTempFile } from "../utils/blobs.js";
|
8
|
-
import { NodeStreamSource } from "../utils/memory.js";
|
9
9
|
|
10
10
|
interface CreatePdfDocumentFromSourceParams {
|
11
11
|
|
@@ -1,10 +1,11 @@
|
|
1
|
+
import { ModelOptions } from "@llumiverse/core";
|
2
|
+
import { activityInfo, log } from "@temporalio/activity";
|
1
3
|
import { VertesiaClient } from "@vertesia/client";
|
2
4
|
import { DSLActivityExecutionPayload, DSLActivitySpec, ExecutionRun, ExecutionRunStatus, InteractionExecutionConfiguration, RunSearchPayload } from "@vertesia/common";
|
3
|
-
import { activityInfo, log } from "@temporalio/activity";
|
4
5
|
import { projectResult } from "../dsl/projections.js";
|
5
6
|
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
7
|
+
import { ActivityParamNotFound } from "../errors.js";
|
6
8
|
import { TruncateSpec, truncByMaxTokens } from "../utils/tokens.js";
|
7
|
-
import { ModelOptions } from "@llumiverse/core";
|
8
9
|
|
9
10
|
//Example:
|
10
11
|
//@ts-ignore
|
@@ -52,8 +53,7 @@ const JSON: DSLActivitySpec = {
|
|
52
53
|
}
|
53
54
|
}
|
54
55
|
}
|
55
|
-
|
56
|
-
export interface InteractionExecutionParams extends ModelOptions {
|
56
|
+
export interface InteractionExecutionParams {
|
57
57
|
/**
|
58
58
|
* The environment to use. If not specified the project default environment will be used.
|
59
59
|
* If the latter is not specified an exeption will be thrown.
|
@@ -80,13 +80,29 @@ export interface InteractionExecutionParams extends ModelOptions {
|
|
80
80
|
* Wether or not to include the previous error in the interaction prompt data
|
81
81
|
*/
|
82
82
|
include_previous_error?: boolean;
|
83
|
-
}
|
84
83
|
|
84
|
+
/**
|
85
|
+
* Options to control generation
|
86
|
+
*/
|
87
|
+
model_options?: ModelOptions;
|
88
|
+
}
|
85
89
|
|
90
|
+
/**
|
91
|
+
* TODO: must be kept in sync with InteractionAsyncExecutionPayload form @vertesia/common
|
92
|
+
* Also see the executeInteractionAsync endpoint on the server for how the client payload is sent to the workflow.
|
93
|
+
* (interaction is translsted to interactionName)
|
94
|
+
*/
|
86
95
|
export interface ExecuteInteractionParams extends InteractionExecutionParams {
|
96
|
+
//TODO rename to interaction as in InteractionAsyncExecutionPayload
|
87
97
|
interactionName: string;
|
88
98
|
prompt_data: Record<string, any>;
|
89
|
-
|
99
|
+
/**
|
100
|
+
* Additional prompt data passed by the workflow configuration. This will be merged with prompt_data if any.
|
101
|
+
* You should use `import: ["static_prompt_data"]` to import the workflow prompt data as static_prompt_data param.
|
102
|
+
* Otherwise the workflow prompt data will be ignored.
|
103
|
+
*/
|
104
|
+
static_prompt_data?: Record<string, any>;
|
105
|
+
truncate?: Record<string, TruncateSpec>;
|
90
106
|
}
|
91
107
|
|
92
108
|
export interface ExecuteInteraction extends DSLActivitySpec<ExecuteInteractionParams> {
|
@@ -98,7 +114,15 @@ export async function executeInteraction(payload: DSLActivityExecutionPayload<Ex
|
|
98
114
|
client, params
|
99
115
|
} = await setupActivity<ExecuteInteractionParams>(payload);
|
100
116
|
|
101
|
-
const { interactionName, prompt_data } = params;
|
117
|
+
const { interactionName, prompt_data, static_prompt_data: wf_prompt_data } = params;
|
118
|
+
if (wf_prompt_data) {
|
119
|
+
Object.assign(prompt_data, wf_prompt_data);
|
120
|
+
}
|
121
|
+
|
122
|
+
if (!interactionName) {
|
123
|
+
log.error("Missing interactionName", { params });
|
124
|
+
throw new ActivityParamNotFound("interactionName", payload.activity);
|
125
|
+
}
|
102
126
|
|
103
127
|
if (params.truncate) {
|
104
128
|
const truncate = params.truncate;
|
@@ -153,8 +177,7 @@ export async function executeInteractionFromActivity(client: VertesiaClient, int
|
|
153
177
|
const config: InteractionExecutionConfiguration = {
|
154
178
|
environment: params.environment,
|
155
179
|
model: params.model,
|
156
|
-
|
157
|
-
temperature: params.temperature
|
180
|
+
model_options: params.model_options,
|
158
181
|
}
|
159
182
|
const data = {
|
160
183
|
...prompt_data,
|
@@ -163,9 +186,7 @@ export async function executeInteractionFromActivity(client: VertesiaClient, int
|
|
163
186
|
|
164
187
|
const result_schema = params.result_schema;
|
165
188
|
|
166
|
-
|
167
|
-
log.info(`About to execute interaction ${interactionName}`, { config, data, result_schema, tags });
|
168
|
-
}
|
189
|
+
log.debug(`About to execute interaction ${interactionName}`, { config, data, result_schema, tags });
|
169
190
|
|
170
191
|
const res = await client.interactions.executeByName(interactionName, {
|
171
192
|
config,
|
@@ -1,8 +1,7 @@
|
|
1
|
-
import { ContentObject, CreateContentObjectPayload, DSLActivityExecutionPayload, DSLActivitySpec } from '@vertesia/common';
|
2
1
|
import { log } from "@temporalio/activity";
|
2
|
+
import { ContentObject, CreateContentObjectPayload, DSLActivityExecutionPayload, DSLActivitySpec } from '@vertesia/common';
|
3
3
|
import { mutoolPdfToText } from '../conversion/mutool.js';
|
4
4
|
import { manyToMarkdown } from '../conversion/pandoc.js';
|
5
|
-
import { trasformPdfToMarkdown } from '../conversion/pdf.js';
|
6
5
|
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
7
6
|
import { NoDocumentFound } from '../errors.js';
|
8
7
|
import { TextExtractionResult, TextExtractionStatus } from '../result-types.js';
|
@@ -66,11 +65,7 @@ export async function extractDocumentText(payload: DSLActivityExecutionPayload<E
|
|
66
65
|
|
67
66
|
case 'application/pdf':
|
68
67
|
//if pdf is more than 2MB, use mutool
|
69
|
-
|
70
|
-
txt = await mutoolPdfToText(fileBuffer);
|
71
|
-
} else {
|
72
|
-
txt = await trasformPdfToMarkdown(fileBuffer);
|
73
|
-
}
|
68
|
+
txt = await mutoolPdfToText(fileBuffer);
|
74
69
|
break;
|
75
70
|
|
76
71
|
case 'text/plain':
|
@@ -161,13 +156,34 @@ function createResponse(doc: ContentObject, text: string, status: TextExtraction
|
|
161
156
|
}
|
162
157
|
|
163
158
|
|
164
|
-
//if file is less than 100KB, check if it looks like text
|
165
159
|
function sniffIfText(buf: Buffer) {
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
160
|
+
// If file is too large, don't even try
|
161
|
+
if (buf.length > 500 * 1024) {
|
162
|
+
return false;
|
163
|
+
}
|
164
|
+
|
165
|
+
// Count binary/control characters
|
166
|
+
let binaryCount = 0;
|
167
|
+
const sampleSize = Math.min(buf.length, 1000); // Check first 1000 bytes
|
168
|
+
|
169
|
+
for (let i = 0; i < sampleSize; i++) {
|
170
|
+
// Count control characters (except common whitespace)
|
171
|
+
const byte = buf[i];
|
172
|
+
if ((byte < 32 && ![9, 10, 13].includes(byte)) || byte === 0) {
|
173
|
+
binaryCount++;
|
170
174
|
}
|
171
175
|
}
|
172
|
-
|
173
|
-
|
176
|
+
|
177
|
+
// If more than 10% binary/control chars, probably not text
|
178
|
+
if (binaryCount / sampleSize > 0.1) {
|
179
|
+
return false;
|
180
|
+
}
|
181
|
+
|
182
|
+
// Additional check for valid UTF-8 encoding
|
183
|
+
try {
|
184
|
+
const s = buf.toString('utf8');
|
185
|
+
return s.length > 0 && !s.includes('\uFFFD'); // Replacement character
|
186
|
+
} catch (e) {
|
187
|
+
return false;
|
188
|
+
}
|
189
|
+
}
|
@@ -1,10 +1,10 @@
|
|
1
|
-
import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
2
1
|
import { log } from "@temporalio/activity";
|
2
|
+
import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
3
3
|
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
4
4
|
import { TruncateSpec } from "../utils/tokens.js";
|
5
5
|
import { InteractionExecutionParams, executeInteractionFromActivity } from "./executeInteraction.js";
|
6
6
|
|
7
|
-
const INT_EXTRACT_INFORMATION = "sys:ExtractInformation"
|
7
|
+
const INT_EXTRACT_INFORMATION = "sys:ExtractInformation";
|
8
8
|
export interface GenerateDocumentPropertiesParams extends InteractionExecutionParams {
|
9
9
|
typesHint?: string[];
|
10
10
|
/**
|
@@ -17,10 +17,12 @@ export interface GenerateDocumentPropertiesParams extends InteractionExecutionPa
|
|
17
17
|
use_vision?: boolean;
|
18
18
|
}
|
19
19
|
export interface GenerateDocumentProperties extends DSLActivitySpec<GenerateDocumentPropertiesParams> {
|
20
|
-
name:
|
20
|
+
name: "generateDocumentProperties";
|
21
21
|
}
|
22
22
|
|
23
|
-
export async function generateDocumentProperties(
|
23
|
+
export async function generateDocumentProperties(
|
24
|
+
payload: DSLActivityExecutionPayload<GenerateDocumentPropertiesParams>,
|
25
|
+
) {
|
24
26
|
const context = await setupActivity<GenerateDocumentPropertiesParams>(payload);
|
25
27
|
const { params, client, objectId } = context;
|
26
28
|
const interactionName = params.interactionName ?? INT_EXTRACT_INFORMATION;
|
@@ -32,7 +34,7 @@ export async function generateDocumentProperties(payload: DSLActivityExecutionPa
|
|
32
34
|
|
33
35
|
if (!doc?.text && !params.use_vision && !doc?.content?.type?.startsWith("image/")) {
|
34
36
|
log.warn(`Object ${objectId} not found or text is empty`);
|
35
|
-
return { status: "failed", error: "no-text" }
|
37
|
+
return { status: "failed", error: "no-text" };
|
36
38
|
}
|
37
39
|
|
38
40
|
if (!type || !type.object_schema) {
|
@@ -50,16 +52,19 @@ export async function generateDocumentProperties(payload: DSLActivityExecutionPa
|
|
50
52
|
}
|
51
53
|
|
52
54
|
log.info(`Object ${objectId} is not an image or pdf`);
|
53
|
-
return undefined
|
54
|
-
}
|
55
|
+
return undefined;
|
56
|
+
};
|
55
57
|
|
56
58
|
const promptData = {
|
57
59
|
content: doc.text ?? undefined,
|
58
60
|
image: getImageRef() ?? undefined,
|
59
61
|
human_context: project?.configuration?.human_context ?? undefined,
|
60
|
-
}
|
62
|
+
};
|
61
63
|
|
62
|
-
log.info(
|
64
|
+
log.info(
|
65
|
+
` Extracting information from object ${objectId} with type ${type.name}`,
|
66
|
+
payload.debug_mode ? { params } : undefined,
|
67
|
+
);
|
63
68
|
|
64
69
|
const infoRes = await executeInteractionFromActivity(
|
65
70
|
client,
|
@@ -70,24 +75,40 @@ export async function generateDocumentProperties(payload: DSLActivityExecutionPa
|
|
70
75
|
result_schema: type.object_schema,
|
71
76
|
},
|
72
77
|
promptData,
|
73
|
-
payload.debug_mode ?? false
|
78
|
+
payload.debug_mode ?? false,
|
74
79
|
);
|
75
80
|
|
81
|
+
const getText = () => {
|
82
|
+
if (doc.text) {
|
83
|
+
return undefined;
|
84
|
+
}
|
85
|
+
let text = "";
|
86
|
+
if (infoRes.result.title) {
|
87
|
+
text += infoRes.result.title + "\n";
|
88
|
+
}
|
89
|
+
if (infoRes.result.description) {
|
90
|
+
text += infoRes.result.description;
|
91
|
+
}
|
92
|
+
if (text) {
|
93
|
+
return text;
|
94
|
+
} else {
|
95
|
+
return undefined;
|
96
|
+
}
|
97
|
+
};
|
98
|
+
|
76
99
|
log.info(`Extracted information from object ${objectId} with type ${type.name}`, { runId: infoRes.id });
|
77
100
|
await client.objects.update(doc.id, {
|
78
101
|
properties: {
|
79
102
|
...infoRes.result,
|
80
|
-
etag: doc.text_etag
|
103
|
+
etag: doc.text_etag,
|
81
104
|
},
|
82
|
-
text:
|
105
|
+
text: getText(),
|
83
106
|
generation_run_info: {
|
84
107
|
id: infoRes.id,
|
85
108
|
date: new Date().toISOString(),
|
86
109
|
model: infoRes.modelId,
|
87
|
-
}
|
110
|
+
},
|
88
111
|
});
|
89
112
|
|
90
|
-
|
91
113
|
return { status: "completed" };
|
92
|
-
|
93
|
-
}
|
114
|
+
}
|