@vertesia/workflow 0.51.0 → 0.52.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +7 -1
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -1
- package/lib/cjs/activities/chunkDocument.js +39 -34
- package/lib/cjs/activities/chunkDocument.js.map +1 -1
- package/lib/cjs/activities/createDocumentFromOther.js +2 -2
- package/lib/cjs/activities/createDocumentFromOther.js.map +1 -1
- package/lib/cjs/activities/executeInteraction.js +11 -5
- package/lib/cjs/activities/executeInteraction.js.map +1 -1
- package/lib/cjs/activities/extractDocumentText.js +24 -6
- package/lib/cjs/activities/extractDocumentText.js.map +1 -1
- package/lib/cjs/activities/generateDocumentProperties.js +22 -4
- package/lib/cjs/activities/generateDocumentProperties.js.map +1 -1
- package/lib/cjs/activities/generateEmbeddings.js +58 -102
- package/lib/cjs/activities/generateEmbeddings.js.map +1 -1
- package/lib/cjs/activities/generateImageRendition.js +77 -34
- package/lib/cjs/activities/generateImageRendition.js.map +1 -1
- package/lib/cjs/activities/generateOrAssignContentType.js +3 -7
- package/lib/cjs/activities/generateOrAssignContentType.js.map +1 -1
- package/lib/cjs/activities/notifyWebhook.js.map +1 -1
- package/lib/cjs/conversion/image.js +80 -12
- package/lib/cjs/conversion/image.js.map +1 -1
- package/lib/cjs/dsl/setup/ActivityContext.js +30 -6
- package/lib/cjs/dsl/setup/ActivityContext.js.map +1 -1
- package/lib/cjs/dsl.js +1 -1
- package/lib/cjs/dsl.js.map +1 -1
- package/lib/cjs/errors.js +13 -1
- package/lib/cjs/errors.js.map +1 -1
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js +2 -1
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js.map +1 -1
- package/lib/cjs/system/notifyWebhookWorkflow.js +2 -1
- package/lib/cjs/system/notifyWebhookWorkflow.js.map +1 -1
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js +1 -1
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js.map +1 -1
- package/lib/cjs/utils/blobs.js +12 -6
- package/lib/cjs/utils/blobs.js.map +1 -1
- package/lib/cjs/utils/chunks.js +14 -0
- package/lib/cjs/utils/chunks.js.map +1 -0
- package/lib/cjs/utils/client.js +4 -3
- package/lib/cjs/utils/client.js.map +1 -1
- package/lib/cjs/utils/memory.js +2 -9
- package/lib/cjs/utils/memory.js.map +1 -1
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +7 -1
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -1
- package/lib/esm/activities/chunkDocument.js +39 -34
- package/lib/esm/activities/chunkDocument.js.map +1 -1
- package/lib/esm/activities/createDocumentFromOther.js +1 -1
- package/lib/esm/activities/createDocumentFromOther.js.map +1 -1
- package/lib/esm/activities/executeInteraction.js +11 -5
- package/lib/esm/activities/executeInteraction.js.map +1 -1
- package/lib/esm/activities/extractDocumentText.js +24 -6
- package/lib/esm/activities/extractDocumentText.js.map +1 -1
- package/lib/esm/activities/generateDocumentProperties.js +22 -4
- package/lib/esm/activities/generateDocumentProperties.js.map +1 -1
- package/lib/esm/activities/generateEmbeddings.js +58 -69
- package/lib/esm/activities/generateEmbeddings.js.map +1 -1
- package/lib/esm/activities/generateImageRendition.js +78 -35
- package/lib/esm/activities/generateImageRendition.js.map +1 -1
- package/lib/esm/activities/generateOrAssignContentType.js +3 -7
- package/lib/esm/activities/generateOrAssignContentType.js.map +1 -1
- package/lib/esm/activities/notifyWebhook.js.map +1 -1
- package/lib/esm/conversion/image.js +80 -12
- package/lib/esm/conversion/image.js.map +1 -1
- package/lib/esm/dsl/setup/ActivityContext.js +31 -7
- package/lib/esm/dsl/setup/ActivityContext.js.map +1 -1
- package/lib/esm/dsl.js +1 -1
- package/lib/esm/dsl.js.map +1 -1
- package/lib/esm/errors.js +11 -0
- package/lib/esm/errors.js.map +1 -1
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js +2 -1
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js.map +1 -1
- package/lib/esm/system/notifyWebhookWorkflow.js +2 -1
- package/lib/esm/system/notifyWebhookWorkflow.js.map +1 -1
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js +2 -2
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js.map +1 -1
- package/lib/esm/utils/blobs.js +12 -6
- package/lib/esm/utils/blobs.js.map +1 -1
- package/lib/esm/utils/chunks.js +9 -0
- package/lib/esm/utils/chunks.js.map +1 -0
- package/lib/esm/utils/client.js +4 -3
- package/lib/esm/utils/client.js.map +1 -1
- package/lib/esm/utils/memory.js +2 -7
- package/lib/esm/utils/memory.js.map +1 -1
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts +10 -0
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts.map +1 -1
- package/lib/types/activities/chunkDocument.d.ts +15 -0
- package/lib/types/activities/chunkDocument.d.ts.map +1 -1
- package/lib/types/activities/createDocumentFromOther.d.ts.map +1 -1
- package/lib/types/activities/executeInteraction.d.ts +14 -3
- package/lib/types/activities/executeInteraction.d.ts.map +1 -1
- package/lib/types/activities/generateDocumentProperties.d.ts +1 -1
- package/lib/types/activities/generateDocumentProperties.d.ts.map +1 -1
- package/lib/types/activities/generateEmbeddings.d.ts +21 -17
- package/lib/types/activities/generateEmbeddings.d.ts.map +1 -1
- package/lib/types/activities/generateImageRendition.d.ts +3 -5
- package/lib/types/activities/generateImageRendition.d.ts.map +1 -1
- package/lib/types/activities/generateOrAssignContentType.d.ts.map +1 -1
- package/lib/types/activities/notifyWebhook.d.ts +1 -2
- package/lib/types/activities/notifyWebhook.d.ts.map +1 -1
- package/lib/types/conversion/image.d.ts +8 -6
- package/lib/types/conversion/image.d.ts.map +1 -1
- package/lib/types/dsl/setup/ActivityContext.d.ts +3 -0
- package/lib/types/dsl/setup/ActivityContext.d.ts.map +1 -1
- package/lib/types/dsl.d.ts +1 -1
- package/lib/types/dsl.d.ts.map +1 -1
- package/lib/types/errors.d.ts +6 -0
- package/lib/types/errors.d.ts.map +1 -1
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts.map +1 -1
- package/lib/types/system/notifyWebhookWorkflow.d.ts.map +1 -1
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +2 -17
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +1 -1
- package/lib/types/utils/blobs.d.ts.map +1 -1
- package/lib/types/utils/chunks.d.ts +9 -0
- package/lib/types/utils/chunks.d.ts.map +1 -0
- package/lib/types/utils/client.d.ts.map +1 -1
- package/lib/types/utils/memory.d.ts +1 -5
- package/lib/types/utils/memory.d.ts.map +1 -1
- package/lib/workflows-bundle.js +15394 -14602
- package/package.json +8 -6
- package/src/activities/advanced/createOrUpdateDocumentFromInteractionRun.ts +20 -1
- package/src/activities/chunkDocument.ts +62 -42
- package/src/activities/createDocumentFromOther.ts +1 -1
- package/src/activities/executeInteraction.ts +27 -9
- package/src/activities/extractDocumentText.ts +28 -7
- package/src/activities/generateDocumentProperties.ts +37 -16
- package/src/activities/generateEmbeddings.ts +91 -79
- package/src/activities/generateImageRendition.ts +100 -53
- package/src/activities/generateOrAssignContentType.ts +5 -11
- package/src/activities/notifyWebhook.ts +2 -2
- package/src/conversion/image.test.ts +110 -18
- package/src/conversion/image.ts +90 -15
- package/src/conversion/pandoc.test.ts +7 -5
- package/src/dsl/setup/ActivityContext.ts +57 -16
- package/src/dsl.ts +1 -1
- package/src/errors.ts +27 -6
- package/src/iterative-generation/iterativeGenerationWorkflow.ts +2 -1
- package/src/system/notifyWebhookWorkflow.ts +2 -1
- package/src/system/recalculateEmbeddingsWorkflow.ts +2 -2
- package/src/utils/blobs.ts +11 -6
- package/src/utils/chunks.ts +17 -0
- package/src/utils/client.ts +4 -3
- package/src/utils/memory.ts +3 -8
package/package.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
{
|
2
2
|
"name": "@vertesia/workflow",
|
3
|
-
"version": "0.
|
3
|
+
"version": "0.52.0",
|
4
4
|
"type": "module",
|
5
5
|
"description": "Composable prompts workflow dsl",
|
6
6
|
"main": "./lib/esm/index.js",
|
@@ -21,12 +21,13 @@
|
|
21
21
|
"@smithy/types": "^3.7.2",
|
22
22
|
"@temporalio/testing": "^1.11.5",
|
23
23
|
"@temporalio/worker": "^1.11.5",
|
24
|
+
"@types/fluent-ffmpeg": "^2.1.27",
|
24
25
|
"@types/jsonwebtoken": "^9.0.7",
|
25
26
|
"@types/node": "^22.5.1",
|
26
27
|
"@types/papaparse": "^5.3.15",
|
27
28
|
"@types/tmp": "^0.2.6",
|
28
29
|
"ts-dual-module": "^0.6.3",
|
29
|
-
"vitest": "^
|
30
|
+
"vitest": "^3.0.9"
|
30
31
|
},
|
31
32
|
"dependencies": {
|
32
33
|
"@aws-sdk/client-s3": "^3.693.0",
|
@@ -34,11 +35,11 @@
|
|
34
35
|
"@aws-sdk/credential-providers": "^3.693.0",
|
35
36
|
"@temporalio/activity": "^1.11.5",
|
36
37
|
"@temporalio/workflow": "^1.11.5",
|
37
|
-
"@tensorflow/tfjs-node": "^4.19.0",
|
38
38
|
"@types/json-schema": "^7.0.15",
|
39
39
|
"@vertesia/memory": "^0.43.0",
|
40
40
|
"api-fetch-client": "^0.13.0",
|
41
41
|
"fast-deep-equal": "^3.1.3",
|
42
|
+
"fluent-ffmpeg": "^2.1.3",
|
42
43
|
"jsonwebtoken": "^9.0.2",
|
43
44
|
"ms": "3.0.0-canary.1",
|
44
45
|
"node-web-stream-adapters": "^0.2.1",
|
@@ -47,10 +48,11 @@
|
|
47
48
|
"sharp": "^0.33.4",
|
48
49
|
"tiktoken": "^1.0.15",
|
49
50
|
"tmp": "^0.2.3",
|
51
|
+
"tmp-promise": "^3.0.3",
|
50
52
|
"yaml": "^2.6.0",
|
51
|
-
"@
|
52
|
-
"@vertesia/client": "0.
|
53
|
-
"@
|
53
|
+
"@vertesia/common": "0.52.0",
|
54
|
+
"@vertesia/client": "0.53.0",
|
55
|
+
"@llumiverse/core": "0.17.0"
|
54
56
|
},
|
55
57
|
"ts_dual_module": {
|
56
58
|
"outDir": "lib",
|
@@ -1,5 +1,5 @@
|
|
1
|
-
import { ContentObjectStatus, DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
2
1
|
import { log } from "@temporalio/activity";
|
2
|
+
import { ContentObjectStatus, DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
3
3
|
import { setupActivity } from "../../dsl/setup/ActivityContext.js";
|
4
4
|
import { ActivityParamNotFound, NoDocumentFound } from "../../errors.js";
|
5
5
|
interface CreateOrUpdateObjectFromInteractionRunParams {
|
@@ -19,9 +19,21 @@ interface CreateOrUpdateObjectFromInteractionRunParams {
|
|
19
19
|
*/
|
20
20
|
update_existing_id?: string,
|
21
21
|
|
22
|
+
/**
|
23
|
+
* The name of the object to use. If not provided, the name will be generated from the interaction result
|
24
|
+
*/
|
22
25
|
fallback_name?: string, // a name to use if no one was generated by the interaction
|
23
26
|
|
27
|
+
/**
|
28
|
+
* The name of the parent object to use. If not provided, the document will be created at the root level
|
29
|
+
*/
|
24
30
|
parent?: string, // the parent object id
|
31
|
+
|
32
|
+
/**
|
33
|
+
* The name of the property to use for the text. If not provided, the text will be set to the result of the interaction
|
34
|
+
*/
|
35
|
+
update_text_from_property?: string,
|
36
|
+
|
25
37
|
}
|
26
38
|
|
27
39
|
export interface CreateOrUpdateObjectFromInteractionRun extends DSLActivitySpec<CreateOrUpdateObjectFromInteractionRunParams> {
|
@@ -80,6 +92,13 @@ export async function createOrUpdateDocumentFromInteractionRun(payload: DSLActiv
|
|
80
92
|
}
|
81
93
|
};
|
82
94
|
|
95
|
+
if (params.update_text_from_property) {
|
96
|
+
const text = docPayload.properties[params.update_text_from_property];
|
97
|
+
if (text) {
|
98
|
+
docPayload.text = text;
|
99
|
+
}
|
100
|
+
}
|
101
|
+
|
83
102
|
//create or update the document
|
84
103
|
let newDoc: boolean = false;
|
85
104
|
let doc = undefined;
|
@@ -1,18 +1,12 @@
|
|
1
|
-
import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
2
1
|
import { log } from "@temporalio/activity";
|
2
|
+
import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
3
3
|
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
4
|
+
import { DocPart } from "../utils/chunks.js";
|
4
5
|
import { InteractionExecutionParams, executeInteractionFromActivity } from "./executeInteraction.js";
|
5
6
|
|
6
7
|
const INT_CHUNK_DOCUMENT = "sys:ChunkDocument"
|
7
8
|
|
8
|
-
interface DocPart {
|
9
9
|
|
10
|
-
line_number_start: number
|
11
|
-
line_number_end: number
|
12
|
-
name: string
|
13
|
-
type: string
|
14
|
-
|
15
|
-
}
|
16
10
|
|
17
11
|
export interface ChunkDocumentResult {
|
18
12
|
id: string
|
@@ -22,9 +16,28 @@ export interface ChunkDocumentResult {
|
|
22
16
|
}
|
23
17
|
|
24
18
|
export interface ChunkDocumentParams extends InteractionExecutionParams {
|
19
|
+
|
20
|
+
/**
|
21
|
+
* If true, force chunking even if the document is already chunked
|
22
|
+
*/
|
25
23
|
force?: boolean;
|
24
|
+
|
25
|
+
/**
|
26
|
+
* The interaction name to use for chunking
|
27
|
+
* If not set, the default interaction will be used
|
28
|
+
*/
|
26
29
|
interactionName?: string;
|
30
|
+
|
31
|
+
/**
|
32
|
+
* The object type to use for the document parts
|
33
|
+
* If not set, the type of the document will be used
|
34
|
+
*/
|
27
35
|
docPartType?: string;
|
36
|
+
|
37
|
+
/**
|
38
|
+
* If true, create parts as document objects
|
39
|
+
*/
|
40
|
+
createParts?: boolean;
|
28
41
|
}
|
29
42
|
|
30
43
|
export interface ChunkDocument extends DSLActivitySpec<ChunkDocumentParams> {
|
@@ -75,48 +88,55 @@ export async function chunkDocument(payload: DSLActivityExecutionPayload<ChunkDo
|
|
75
88
|
return { id: objectId, status: "failed", parts: [], message: "no parts found" }
|
76
89
|
}
|
77
90
|
|
78
|
-
const partDocs = await Promise.all(parts.map(async (part, i) => {
|
79
91
|
|
80
|
-
|
92
|
+
/**
|
93
|
+
* Only create parts as document if the flag is set
|
94
|
+
*/
|
95
|
+
if (params.createParts) {
|
96
|
+
|
97
|
+
const partDocs = await Promise.all(parts.map(async (part, i) => {
|
81
98
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
location
|
99
|
+
const text = lines.filter((_l, i) => i >= part.line_number_start && i <= part.line_number_end).join('\n');
|
100
|
+
|
101
|
+
const location = () => {
|
102
|
+
let location = document.location;
|
103
|
+
if (location.endsWith('/')) {
|
104
|
+
location += document.name + "/" + part.type
|
105
|
+
}
|
106
|
+
location += '/' + document.name + "/" + part.type;
|
107
|
+
return location;
|
86
108
|
}
|
87
|
-
|
88
|
-
|
109
|
+
|
110
|
+
const docPart = await client.objects.create({
|
111
|
+
name: part.name,
|
112
|
+
parent: objectId,
|
113
|
+
text: text,
|
114
|
+
location: location(),
|
115
|
+
properties: {
|
116
|
+
part_number: i + 1,
|
117
|
+
etag: document.text_etag,
|
118
|
+
source_line_start: part.line_number_start,
|
119
|
+
source_line_end: part.line_number_end,
|
120
|
+
title: part.name
|
121
|
+
}
|
122
|
+
});
|
123
|
+
return docPart;
|
124
|
+
}));
|
125
|
+
|
126
|
+
//delete previous parts
|
127
|
+
if (document.parts && document.parts.length > 0) {
|
128
|
+
log.info('Deleting previous parts for object ID: ' + objectId, { parts: document.parts });
|
129
|
+
await Promise.all(document.parts.map(async (partId) => {
|
130
|
+
await client.objects.delete(partId);
|
131
|
+
}));
|
89
132
|
}
|
90
133
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
text: text,
|
95
|
-
location: location(),
|
96
|
-
properties: {
|
97
|
-
part_number: i + 1,
|
98
|
-
etag: document.text_etag,
|
99
|
-
source_line_start: part.line_number_start,
|
100
|
-
source_line_end: part.line_number_end,
|
101
|
-
title: part.name
|
102
|
-
}
|
134
|
+
await client.objects.update(objectId, {
|
135
|
+
parts: partDocs.map(p => p.id),
|
136
|
+
parts_etag: document.text_etag
|
103
137
|
});
|
104
|
-
return docPart;
|
105
|
-
}));
|
106
|
-
|
107
|
-
//delete previous parts
|
108
|
-
if (document.parts && document.parts.length > 0) {
|
109
|
-
log.info('Deleting previous parts for object ID: ' + objectId, { parts: document.parts });
|
110
|
-
await Promise.all(document.parts.map(async (partId) => {
|
111
|
-
await client.objects.delete(partId);
|
112
|
-
}));
|
113
138
|
}
|
114
139
|
|
115
|
-
await client.objects.update(objectId, {
|
116
|
-
parts: partDocs.map(p => p.id),
|
117
|
-
parts_etag: document.text_etag
|
118
|
-
});
|
119
|
-
|
120
140
|
log.info(`Object ${objectId} chunking completed`, { parts: document.parts });
|
121
141
|
|
122
142
|
return { id: objectId, status: "completed", parts: document.parts }
|
@@ -1,11 +1,11 @@
|
|
1
1
|
import { log } from "@temporalio/activity";
|
2
|
+
import { NodeStreamSource } from "@vertesia/client/node";
|
2
3
|
import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
3
4
|
import fs from 'fs';
|
4
5
|
import { pdfExtractPages } from "../conversion/mutool.js";
|
5
6
|
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
6
7
|
import { NoDocumentFound } from "../errors.js";
|
7
8
|
import { saveBlobToTempFile } from "../utils/blobs.js";
|
8
|
-
import { NodeStreamSource } from "../utils/memory.js";
|
9
9
|
|
10
10
|
interface CreatePdfDocumentFromSourceParams {
|
11
11
|
|
@@ -1,10 +1,11 @@
|
|
1
|
+
import { ModelOptions } from "@llumiverse/core";
|
2
|
+
import { activityInfo, log } from "@temporalio/activity";
|
1
3
|
import { VertesiaClient } from "@vertesia/client";
|
2
4
|
import { DSLActivityExecutionPayload, DSLActivitySpec, ExecutionRun, ExecutionRunStatus, InteractionExecutionConfiguration, RunSearchPayload } from "@vertesia/common";
|
3
|
-
import { activityInfo, log } from "@temporalio/activity";
|
4
5
|
import { projectResult } from "../dsl/projections.js";
|
5
6
|
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
7
|
+
import { ActivityParamNotFound } from "../errors.js";
|
6
8
|
import { TruncateSpec, truncByMaxTokens } from "../utils/tokens.js";
|
7
|
-
import { ModelOptions } from "@llumiverse/core";
|
8
9
|
|
9
10
|
//Example:
|
10
11
|
//@ts-ignore
|
@@ -83,14 +84,25 @@ export interface InteractionExecutionParams {
|
|
83
84
|
/**
|
84
85
|
* Options to control generation
|
85
86
|
*/
|
86
|
-
model_options?: ModelOptions;
|
87
|
+
model_options?: ModelOptions;
|
87
88
|
}
|
88
89
|
|
89
|
-
|
90
|
+
/**
|
91
|
+
* TODO: must be kept in sync with InteractionAsyncExecutionPayload form @vertesia/common
|
92
|
+
* Also see the executeInteractionAsync endpoint on the server for how the client payload is sent to the workflow.
|
93
|
+
* (interaction is translsted to interactionName)
|
94
|
+
*/
|
90
95
|
export interface ExecuteInteractionParams extends InteractionExecutionParams {
|
96
|
+
//TODO rename to interaction as in InteractionAsyncExecutionPayload
|
91
97
|
interactionName: string;
|
92
98
|
prompt_data: Record<string, any>;
|
93
|
-
|
99
|
+
/**
|
100
|
+
* Additional prompt data passed by the workflow configuration. This will be merged with prompt_data if any.
|
101
|
+
* You should use `import: ["static_prompt_data"]` to import the workflow prompt data as static_prompt_data param.
|
102
|
+
* Otherwise the workflow prompt data will be ignored.
|
103
|
+
*/
|
104
|
+
static_prompt_data?: Record<string, any>;
|
105
|
+
truncate?: Record<string, TruncateSpec>;
|
94
106
|
}
|
95
107
|
|
96
108
|
export interface ExecuteInteraction extends DSLActivitySpec<ExecuteInteractionParams> {
|
@@ -102,7 +114,15 @@ export async function executeInteraction(payload: DSLActivityExecutionPayload<Ex
|
|
102
114
|
client, params
|
103
115
|
} = await setupActivity<ExecuteInteractionParams>(payload);
|
104
116
|
|
105
|
-
const { interactionName, prompt_data } = params;
|
117
|
+
const { interactionName, prompt_data, static_prompt_data: wf_prompt_data } = params;
|
118
|
+
if (wf_prompt_data) {
|
119
|
+
Object.assign(prompt_data, wf_prompt_data);
|
120
|
+
}
|
121
|
+
|
122
|
+
if (!interactionName) {
|
123
|
+
log.error("Missing interactionName", { params });
|
124
|
+
throw new ActivityParamNotFound("interactionName", payload.activity);
|
125
|
+
}
|
106
126
|
|
107
127
|
if (params.truncate) {
|
108
128
|
const truncate = params.truncate;
|
@@ -166,9 +186,7 @@ export async function executeInteractionFromActivity(client: VertesiaClient, int
|
|
166
186
|
|
167
187
|
const result_schema = params.result_schema;
|
168
188
|
|
169
|
-
|
170
|
-
log.info(`About to execute interaction ${interactionName}`, { config, data, result_schema, tags });
|
171
|
-
}
|
189
|
+
log.debug(`About to execute interaction ${interactionName}`, { config, data, result_schema, tags });
|
172
190
|
|
173
191
|
const res = await client.interactions.executeByName(interactionName, {
|
174
192
|
config,
|
@@ -156,13 +156,34 @@ function createResponse(doc: ContentObject, text: string, status: TextExtraction
|
|
156
156
|
}
|
157
157
|
|
158
158
|
|
159
|
-
//if file is less than 100KB, check if it looks like text
|
160
159
|
function sniffIfText(buf: Buffer) {
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
160
|
+
// If file is too large, don't even try
|
161
|
+
if (buf.length > 500 * 1024) {
|
162
|
+
return false;
|
163
|
+
}
|
164
|
+
|
165
|
+
// Count binary/control characters
|
166
|
+
let binaryCount = 0;
|
167
|
+
const sampleSize = Math.min(buf.length, 1000); // Check first 1000 bytes
|
168
|
+
|
169
|
+
for (let i = 0; i < sampleSize; i++) {
|
170
|
+
// Count control characters (except common whitespace)
|
171
|
+
const byte = buf[i];
|
172
|
+
if ((byte < 32 && ![9, 10, 13].includes(byte)) || byte === 0) {
|
173
|
+
binaryCount++;
|
165
174
|
}
|
166
175
|
}
|
167
|
-
|
168
|
-
|
176
|
+
|
177
|
+
// If more than 10% binary/control chars, probably not text
|
178
|
+
if (binaryCount / sampleSize > 0.1) {
|
179
|
+
return false;
|
180
|
+
}
|
181
|
+
|
182
|
+
// Additional check for valid UTF-8 encoding
|
183
|
+
try {
|
184
|
+
const s = buf.toString('utf8');
|
185
|
+
return s.length > 0 && !s.includes('\uFFFD'); // Replacement character
|
186
|
+
} catch (e) {
|
187
|
+
return false;
|
188
|
+
}
|
189
|
+
}
|
@@ -1,10 +1,10 @@
|
|
1
|
-
import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
2
1
|
import { log } from "@temporalio/activity";
|
2
|
+
import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
3
3
|
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
4
4
|
import { TruncateSpec } from "../utils/tokens.js";
|
5
5
|
import { InteractionExecutionParams, executeInteractionFromActivity } from "./executeInteraction.js";
|
6
6
|
|
7
|
-
const INT_EXTRACT_INFORMATION = "sys:ExtractInformation"
|
7
|
+
const INT_EXTRACT_INFORMATION = "sys:ExtractInformation";
|
8
8
|
export interface GenerateDocumentPropertiesParams extends InteractionExecutionParams {
|
9
9
|
typesHint?: string[];
|
10
10
|
/**
|
@@ -17,10 +17,12 @@ export interface GenerateDocumentPropertiesParams extends InteractionExecutionPa
|
|
17
17
|
use_vision?: boolean;
|
18
18
|
}
|
19
19
|
export interface GenerateDocumentProperties extends DSLActivitySpec<GenerateDocumentPropertiesParams> {
|
20
|
-
name:
|
20
|
+
name: "generateDocumentProperties";
|
21
21
|
}
|
22
22
|
|
23
|
-
export async function generateDocumentProperties(
|
23
|
+
export async function generateDocumentProperties(
|
24
|
+
payload: DSLActivityExecutionPayload<GenerateDocumentPropertiesParams>,
|
25
|
+
) {
|
24
26
|
const context = await setupActivity<GenerateDocumentPropertiesParams>(payload);
|
25
27
|
const { params, client, objectId } = context;
|
26
28
|
const interactionName = params.interactionName ?? INT_EXTRACT_INFORMATION;
|
@@ -32,7 +34,7 @@ export async function generateDocumentProperties(payload: DSLActivityExecutionPa
|
|
32
34
|
|
33
35
|
if (!doc?.text && !params.use_vision && !doc?.content?.type?.startsWith("image/")) {
|
34
36
|
log.warn(`Object ${objectId} not found or text is empty`);
|
35
|
-
return { status: "failed", error: "no-text" }
|
37
|
+
return { status: "failed", error: "no-text" };
|
36
38
|
}
|
37
39
|
|
38
40
|
if (!type || !type.object_schema) {
|
@@ -50,16 +52,19 @@ export async function generateDocumentProperties(payload: DSLActivityExecutionPa
|
|
50
52
|
}
|
51
53
|
|
52
54
|
log.info(`Object ${objectId} is not an image or pdf`);
|
53
|
-
return undefined
|
54
|
-
}
|
55
|
+
return undefined;
|
56
|
+
};
|
55
57
|
|
56
58
|
const promptData = {
|
57
59
|
content: doc.text ?? undefined,
|
58
60
|
image: getImageRef() ?? undefined,
|
59
61
|
human_context: project?.configuration?.human_context ?? undefined,
|
60
|
-
}
|
62
|
+
};
|
61
63
|
|
62
|
-
log.info(
|
64
|
+
log.info(
|
65
|
+
` Extracting information from object ${objectId} with type ${type.name}`,
|
66
|
+
payload.debug_mode ? { params } : undefined,
|
67
|
+
);
|
63
68
|
|
64
69
|
const infoRes = await executeInteractionFromActivity(
|
65
70
|
client,
|
@@ -70,24 +75,40 @@ export async function generateDocumentProperties(payload: DSLActivityExecutionPa
|
|
70
75
|
result_schema: type.object_schema,
|
71
76
|
},
|
72
77
|
promptData,
|
73
|
-
payload.debug_mode ?? false
|
78
|
+
payload.debug_mode ?? false,
|
74
79
|
);
|
75
80
|
|
81
|
+
const getText = () => {
|
82
|
+
if (doc.text) {
|
83
|
+
return undefined;
|
84
|
+
}
|
85
|
+
let text = "";
|
86
|
+
if (infoRes.result.title) {
|
87
|
+
text += infoRes.result.title + "\n";
|
88
|
+
}
|
89
|
+
if (infoRes.result.description) {
|
90
|
+
text += infoRes.result.description;
|
91
|
+
}
|
92
|
+
if (text) {
|
93
|
+
return text;
|
94
|
+
} else {
|
95
|
+
return undefined;
|
96
|
+
}
|
97
|
+
};
|
98
|
+
|
76
99
|
log.info(`Extracted information from object ${objectId} with type ${type.name}`, { runId: infoRes.id });
|
77
100
|
await client.objects.update(doc.id, {
|
78
101
|
properties: {
|
79
102
|
...infoRes.result,
|
80
|
-
etag: doc.text_etag
|
103
|
+
etag: doc.text_etag,
|
81
104
|
},
|
82
|
-
text:
|
105
|
+
text: getText(),
|
83
106
|
generation_run_info: {
|
84
107
|
id: infoRes.id,
|
85
108
|
date: new Date().toISOString(),
|
86
109
|
model: infoRes.modelId,
|
87
|
-
}
|
110
|
+
},
|
88
111
|
});
|
89
112
|
|
90
|
-
|
91
113
|
return { status: "completed" };
|
92
|
-
|
93
|
-
}
|
114
|
+
}
|