@vertesia/workflow 0.56.0 → 0.58.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/activities/generateEmbeddings.js +158 -61
- package/lib/cjs/activities/generateEmbeddings.js.map +1 -1
- package/lib/cjs/activities/generateOrAssignContentType.js +19 -8
- package/lib/cjs/activities/generateOrAssignContentType.js.map +1 -1
- package/lib/cjs/activities/index-dsl.js +4 -2
- package/lib/cjs/activities/index-dsl.js.map +1 -1
- package/lib/cjs/activities/renditions/generateImageRendition.js +57 -0
- package/lib/cjs/activities/renditions/generateImageRendition.js.map +1 -0
- package/lib/cjs/activities/renditions/generateVideoRendition.js +196 -0
- package/lib/cjs/activities/renditions/generateVideoRendition.js.map +1 -0
- package/lib/cjs/dsl/dsl-workflow.js +8 -0
- package/lib/cjs/dsl/dsl-workflow.js.map +1 -1
- package/lib/cjs/index.js +3 -1
- package/lib/cjs/index.js.map +1 -1
- package/lib/cjs/utils/renditions.js +88 -0
- package/lib/cjs/utils/renditions.js.map +1 -0
- package/lib/esm/activities/generateEmbeddings.js +160 -63
- package/lib/esm/activities/generateEmbeddings.js.map +1 -1
- package/lib/esm/activities/generateOrAssignContentType.js +21 -10
- package/lib/esm/activities/generateOrAssignContentType.js.map +1 -1
- package/lib/esm/activities/index-dsl.js +2 -1
- package/lib/esm/activities/index-dsl.js.map +1 -1
- package/lib/esm/activities/renditions/generateImageRendition.js +54 -0
- package/lib/esm/activities/renditions/generateImageRendition.js.map +1 -0
- package/lib/esm/activities/renditions/generateVideoRendition.js +190 -0
- package/lib/esm/activities/renditions/generateVideoRendition.js.map +1 -0
- package/lib/esm/dsl/dsl-workflow.js +8 -0
- package/lib/esm/dsl/dsl-workflow.js.map +1 -1
- package/lib/esm/index.js +3 -1
- package/lib/esm/index.js.map +1 -1
- package/lib/esm/utils/renditions.js +80 -0
- package/lib/esm/utils/renditions.js.map +1 -0
- package/lib/types/activities/generateEmbeddings.d.ts +1 -1
- package/lib/types/activities/generateEmbeddings.d.ts.map +1 -1
- package/lib/types/activities/generateOrAssignContentType.d.ts.map +1 -1
- package/lib/types/activities/index-dsl.d.ts +2 -1
- package/lib/types/activities/index-dsl.d.ts.map +1 -1
- package/lib/types/activities/{generateImageRendition.d.ts → renditions/generateImageRendition.d.ts} +4 -5
- package/lib/types/activities/renditions/generateImageRendition.d.ts.map +1 -0
- package/lib/types/activities/renditions/generateVideoRendition.d.ts +15 -0
- package/lib/types/activities/renditions/generateVideoRendition.d.ts.map +1 -0
- package/lib/types/dsl/dsl-workflow.d.ts.map +1 -1
- package/lib/types/index.d.ts +3 -1
- package/lib/types/index.d.ts.map +1 -1
- package/lib/types/utils/renditions.d.ts +23 -0
- package/lib/types/utils/renditions.d.ts.map +1 -0
- package/lib/workflows-bundle.js +99 -34
- package/package.json +3 -3
- package/src/activities/generateEmbeddings.ts +440 -296
- package/src/activities/generateOrAssignContentType.ts +185 -144
- package/src/activities/index-dsl.ts +2 -1
- package/src/activities/renditions/generateImageRendition.ts +99 -0
- package/src/activities/renditions/generateVideoRendition.ts +288 -0
- package/src/dsl/dsl-workflow.ts +8 -0
- package/src/dsl/workflow-exec-child.test.ts +1 -0
- package/src/dsl/workflow.test.ts +1 -0
- package/src/index.ts +3 -1
- package/src/utils/renditions.ts +124 -0
- package/lib/cjs/activities/generateImageRendition.js +0 -167
- package/lib/cjs/activities/generateImageRendition.js.map +0 -1
- package/lib/esm/activities/generateImageRendition.js +0 -161
- package/lib/esm/activities/generateImageRendition.js.map +0 -1
- package/lib/types/activities/generateImageRendition.d.ts.map +0 -1
- package/src/activities/generateImageRendition.ts +0 -202
@@ -1,172 +1,213 @@
|
|
1
1
|
import { log } from "@temporalio/activity";
|
2
2
|
import {
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
ContentObjectTypeItem,
|
4
|
+
CreateContentObjectTypePayload,
|
5
|
+
DSLActivityExecutionPayload,
|
6
|
+
DSLActivitySpec,
|
7
|
+
ImageRenditionFormat,
|
7
8
|
} from "@vertesia/common";
|
8
|
-
import {
|
9
|
+
import {
|
10
|
+
ActivityContext,
|
11
|
+
setupActivity,
|
12
|
+
} from "../dsl/setup/ActivityContext.js";
|
9
13
|
import { TruncateSpec, truncByMaxTokens } from "../utils/tokens.js";
|
10
|
-
import {
|
14
|
+
import {
|
15
|
+
InteractionExecutionParams,
|
16
|
+
executeInteractionFromActivity,
|
17
|
+
} from "./executeInteraction.js";
|
11
18
|
|
12
19
|
const INT_SELECT_DOCUMENT_TYPE = "sys:SelectDocumentType";
|
13
20
|
const INT_GENERATE_METADATA_MODEL = "sys:GenerateMetadataModel";
|
14
21
|
|
15
|
-
export interface GenerateOrAssignContentTypeParams
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
22
|
+
export interface GenerateOrAssignContentTypeParams
|
23
|
+
extends InteractionExecutionParams {
|
24
|
+
typesHint?: string[];
|
25
|
+
/**
|
26
|
+
* truncate the input doc text to the specified max_tokens
|
27
|
+
*/
|
28
|
+
truncate?: TruncateSpec;
|
29
|
+
|
30
|
+
/**
|
31
|
+
* The name of the interaction to execute
|
32
|
+
* @default SelectDocumentType
|
33
|
+
*/
|
34
|
+
interactionNames?: {
|
35
|
+
selectDocumentType?: string;
|
36
|
+
generateMetadataModel?: string;
|
37
|
+
};
|
30
38
|
}
|
31
39
|
|
32
|
-
export interface GenerateOrAssignContentType
|
33
|
-
|
40
|
+
export interface GenerateOrAssignContentType
|
41
|
+
extends DSLActivitySpec<GenerateOrAssignContentTypeParams> {
|
42
|
+
name: "generateOrAssignContentType";
|
34
43
|
}
|
35
44
|
|
36
45
|
export async function generateOrAssignContentType(
|
37
|
-
|
46
|
+
payload: DSLActivityExecutionPayload<GenerateOrAssignContentTypeParams>,
|
38
47
|
) {
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
const interactionName = params.interactionNames?.selectDocumentType ?? INT_SELECT_DOCUMENT_TYPE;
|
43
|
-
|
44
|
-
log.info("SelectDocumentType for object: " + objectId, { payload });
|
48
|
+
const context =
|
49
|
+
await setupActivity<GenerateOrAssignContentTypeParams>(payload);
|
50
|
+
const { params, client, objectId } = context;
|
45
51
|
|
46
|
-
|
52
|
+
const interactionName =
|
53
|
+
params.interactionNames?.selectDocumentType ?? INT_SELECT_DOCUMENT_TYPE;
|
47
54
|
|
48
|
-
|
49
|
-
//User initiated Content Type change via the Composable UI,
|
50
|
-
//sets object.type to null when they let Composable choose for them.
|
51
|
-
//sets object.type to chosen type (thus non-null) when user picks a type.
|
52
|
-
if (object.type) {
|
53
|
-
log.warn(`Object ${objectId} has already a type. Skipping type creation.`);
|
54
|
-
return { status: "skipped", message: "Object already has a type: " + object.type.name };
|
55
|
-
}
|
55
|
+
log.info("SelectDocumentType for object: " + objectId, { payload });
|
56
56
|
|
57
|
-
|
58
|
-
!object ||
|
59
|
-
(!object.text &&
|
60
|
-
!object.content?.type?.startsWith("image/") &&
|
61
|
-
!object.content?.type?.startsWith("application/pdf"))
|
62
|
-
) {
|
63
|
-
log.info(`Object ${objectId} not found or text is empty and not an image`, { object });
|
64
|
-
return { status: "failed", error: "no-text" };
|
65
|
-
}
|
57
|
+
const object = await client.objects.retrieve(objectId, "+text");
|
66
58
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
if (object.content?.type?.includes("pdf") && object.text?.length && object.text?.length < 100) {
|
77
|
-
return "store:" + objectId;
|
78
|
-
}
|
79
|
-
if (!object.content?.type?.startsWith("image/")) {
|
80
|
-
return undefined;
|
81
|
-
}
|
82
|
-
const res = await client.objects.getRendition(objectId, {
|
83
|
-
max_hw: 1024,
|
84
|
-
format: "image/png",
|
85
|
-
generate_if_missing: true,
|
86
|
-
});
|
87
|
-
if (!res.rendition && res.status === "generating") {
|
88
|
-
//throw to try again
|
89
|
-
throw new Error(`Rendition for object ${objectId} is in progress`);
|
90
|
-
} else if (res.rendition) {
|
91
|
-
return "store:" + objectId;
|
92
|
-
}
|
59
|
+
//Expects object.type to be null on first ingestion of content
|
60
|
+
//User initiated Content Type change via the Composable UI,
|
61
|
+
//sets object.type to null when they let Composable choose for them.
|
62
|
+
//sets object.type to chosen type (thus non-null) when user picks a type.
|
63
|
+
if (object.type) {
|
64
|
+
log.warn(`Object ${objectId} has already a type. Skipping type creation.`);
|
65
|
+
return {
|
66
|
+
status: "skipped",
|
67
|
+
message: "Object already has a type: " + object.type.name,
|
93
68
|
};
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
content,
|
105
|
-
image: fileRef,
|
69
|
+
}
|
70
|
+
|
71
|
+
if (
|
72
|
+
!object ||
|
73
|
+
(!object.text &&
|
74
|
+
!object.content?.type?.startsWith("image/") &&
|
75
|
+
!object.content?.type?.startsWith("application/pdf"))
|
76
|
+
) {
|
77
|
+
log.info(`Object ${objectId} not found or text is empty and not an image`, {
|
78
|
+
object,
|
106
79
|
});
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
80
|
+
return { status: "failed", error: "no-text" };
|
81
|
+
}
|
82
|
+
|
83
|
+
const types = await client.types.list(undefined, {
|
84
|
+
schema: true,
|
85
|
+
});
|
86
|
+
|
87
|
+
//make a list of all existing types, and add hints if any
|
88
|
+
const existing_types = types.filter(
|
89
|
+
(t) => !["DocumentPart", "Rendition"].includes(t.name),
|
90
|
+
);
|
91
|
+
const content = object.text
|
92
|
+
? truncByMaxTokens(object.text, params.truncate || 30000)
|
93
|
+
: undefined;
|
94
|
+
|
95
|
+
const getImage = async () => {
|
96
|
+
if (
|
97
|
+
object.content?.type?.includes("pdf") &&
|
98
|
+
object.text?.length &&
|
99
|
+
object.text?.length < 100
|
100
|
+
) {
|
101
|
+
return "store:" + objectId;
|
119
102
|
}
|
120
|
-
|
121
|
-
|
122
|
-
log.error("Type not found: ", res.result);
|
123
|
-
throw new Error("Type not found: " + res.result.document_type);
|
103
|
+
if (!object.content?.type?.startsWith("image/")) {
|
104
|
+
return undefined;
|
124
105
|
}
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
106
|
+
const res = await client.objects.getRendition(objectId, {
|
107
|
+
max_hw: 1024,
|
108
|
+
format: ImageRenditionFormat.jpeg,
|
109
|
+
generate_if_missing: true,
|
129
110
|
});
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
}
|
111
|
+
if (!res.renditions?.length && res.status === "generating") {
|
112
|
+
//throw to try again
|
113
|
+
throw new Error(`Rendition for object ${objectId} is in progress`);
|
114
|
+
} else if (res.renditions) {
|
115
|
+
return "store:" + objectId;
|
116
|
+
}
|
117
|
+
};
|
118
|
+
|
119
|
+
const fileRef = await getImage();
|
120
|
+
|
121
|
+
log.info(
|
122
|
+
"Execute SelectDocumentType interaction on content with \nexisting types - passing full types: " +
|
123
|
+
existing_types.filter((t) => !t.tags?.includes("system")),
|
124
|
+
);
|
125
|
+
|
126
|
+
const res = await executeInteractionFromActivity(
|
127
|
+
client,
|
128
|
+
interactionName,
|
129
|
+
params,
|
130
|
+
{
|
131
|
+
existing_types,
|
132
|
+
content,
|
133
|
+
image: fileRef,
|
134
|
+
},
|
135
|
+
);
|
136
|
+
|
137
|
+
log.info("Selected Content Type Result: " + JSON.stringify(res.result));
|
138
|
+
|
139
|
+
//if type is not identified or not present in the database, generate a new type
|
140
|
+
let selectedType: { id: string; name: string } | undefined = undefined;
|
141
|
+
|
142
|
+
selectedType = types.find((t) => t.name === res.result.document_type);
|
143
|
+
|
144
|
+
if (!selectedType) {
|
145
|
+
log.warn("Document type not identified: starting type generation");
|
146
|
+
const newType = await generateNewType(
|
147
|
+
context,
|
148
|
+
existing_types,
|
149
|
+
content,
|
150
|
+
fileRef,
|
151
|
+
);
|
152
|
+
selectedType = { id: newType.id, name: newType.name };
|
153
|
+
}
|
154
|
+
|
155
|
+
if (!selectedType) {
|
156
|
+
log.error("Type not found: ", res.result);
|
157
|
+
throw new Error("Type not found: " + res.result.document_type);
|
158
|
+
}
|
159
|
+
|
160
|
+
//update object with selected type
|
161
|
+
await client.objects.update(objectId, {
|
162
|
+
type: selectedType.id,
|
163
|
+
});
|
164
|
+
|
165
|
+
return {
|
166
|
+
id: selectedType.id,
|
167
|
+
name: selectedType.name,
|
168
|
+
isNew: !types.find((t) => t.name === selectedType.name),
|
169
|
+
};
|
136
170
|
}
|
137
171
|
|
138
172
|
async function generateNewType(
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
173
|
+
context: ActivityContext<GenerateOrAssignContentTypeParams>,
|
174
|
+
existing_types: ContentObjectTypeItem[],
|
175
|
+
content?: string,
|
176
|
+
fileRef?: string,
|
143
177
|
) {
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
178
|
+
const { client, params } = context;
|
179
|
+
|
180
|
+
const project = await context.fetchProject();
|
181
|
+
const interactionName =
|
182
|
+
params.interactionNames?.generateMetadataModel ??
|
183
|
+
INT_GENERATE_METADATA_MODEL;
|
184
|
+
|
185
|
+
const genTypeRes = await executeInteractionFromActivity(
|
186
|
+
client,
|
187
|
+
interactionName,
|
188
|
+
params,
|
189
|
+
{
|
190
|
+
existing_types,
|
191
|
+
content: content,
|
192
|
+
human_context: project?.configuration?.human_context ?? undefined,
|
193
|
+
image: fileRef ? fileRef : undefined,
|
194
|
+
},
|
195
|
+
);
|
196
|
+
|
197
|
+
if (!genTypeRes.result.document_type) {
|
198
|
+
log.error("No name generated for type", genTypeRes);
|
199
|
+
throw new Error("No name generated for type");
|
200
|
+
}
|
201
|
+
|
202
|
+
log.info("Generated schema for type", genTypeRes.result.metadata_schema);
|
203
|
+
const typeData: CreateContentObjectTypePayload = {
|
204
|
+
name: genTypeRes.result.document_type,
|
205
|
+
object_schema: genTypeRes.result.metadata_schema,
|
206
|
+
is_chunkable: genTypeRes.result.is_chunkable,
|
207
|
+
table_layout: genTypeRes.result.table_layout,
|
208
|
+
};
|
209
|
+
|
210
|
+
const type = await client.types.create(typeData);
|
211
|
+
|
212
|
+
return type;
|
172
213
|
}
|
@@ -8,7 +8,8 @@ export { executeInteraction } from "./executeInteraction.js";
|
|
8
8
|
export { extractDocumentText } from "./extractDocumentText.js";
|
9
9
|
export { generateDocumentProperties } from "./generateDocumentProperties.js";
|
10
10
|
export { generateEmbeddings } from "./generateEmbeddings.js";
|
11
|
-
export { generateImageRendition } from "./generateImageRendition.js";
|
11
|
+
export { generateImageRendition } from "./renditions/generateImageRendition.js";
|
12
|
+
export { generateVideoRendition } from "./renditions/generateVideoRendition.js";
|
12
13
|
export { generateOrAssignContentType } from "./generateOrAssignContentType.js";
|
13
14
|
export { getObjectFromStore } from "./getObjectFromStore.js";
|
14
15
|
export { handleDslError } from "./handleError.js";
|
@@ -0,0 +1,99 @@
|
|
1
|
+
import { log } from "@temporalio/activity";
|
2
|
+
import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
|
3
|
+
import { setupActivity } from "../../dsl/setup/ActivityContext.js";
|
4
|
+
import { NoDocumentFound, WorkflowParamNotFound } from "../../errors.js";
|
5
|
+
import { saveBlobToTempFile } from "../../utils/blobs.js";
|
6
|
+
import {
|
7
|
+
ImageRenditionParams,
|
8
|
+
uploadRenditionPages,
|
9
|
+
} from "../../utils/renditions.js";
|
10
|
+
|
11
|
+
interface GenerateImageRenditionParams extends ImageRenditionParams {}
|
12
|
+
|
13
|
+
export interface GenerateImageRendition
|
14
|
+
extends DSLActivitySpec<GenerateImageRenditionParams> {
|
15
|
+
name: "generateImageRendition";
|
16
|
+
}
|
17
|
+
|
18
|
+
export async function generateImageRendition(
|
19
|
+
payload: DSLActivityExecutionPayload<GenerateImageRenditionParams>,
|
20
|
+
) {
|
21
|
+
const {
|
22
|
+
client,
|
23
|
+
objectId,
|
24
|
+
params: originParams,
|
25
|
+
} = await setupActivity<GenerateImageRenditionParams>(payload);
|
26
|
+
|
27
|
+
// Fix: Use maxHeightWidth if max_hw is not provided
|
28
|
+
const params = {
|
29
|
+
...originParams,
|
30
|
+
max_hw: originParams.max_hw || (originParams as any).maxHeightWidth || 1024, // Default to 1024 if both are missing
|
31
|
+
format: originParams.format || (originParams as any).format_output || "png", // Default to png if format is missing
|
32
|
+
};
|
33
|
+
|
34
|
+
log.info(`Generating image rendition for ${objectId}`, {
|
35
|
+
originParams,
|
36
|
+
params,
|
37
|
+
});
|
38
|
+
|
39
|
+
const inputObject = await client.objects.retrieve(objectId).catch((err) => {
|
40
|
+
log.error(`Failed to retrieve document ${objectId}`, { err });
|
41
|
+
if (err.message.includes("not found")) {
|
42
|
+
throw new NoDocumentFound(`Document ${objectId} not found`, [objectId]);
|
43
|
+
}
|
44
|
+
throw err;
|
45
|
+
});
|
46
|
+
|
47
|
+
if (!params.format) {
|
48
|
+
log.error(`Format not found`);
|
49
|
+
throw new WorkflowParamNotFound(`format`);
|
50
|
+
}
|
51
|
+
|
52
|
+
if (!inputObject.content?.source) {
|
53
|
+
log.error(`Document ${objectId} has no source`);
|
54
|
+
throw new NoDocumentFound(`Document ${objectId} has no source`, [objectId]);
|
55
|
+
}
|
56
|
+
|
57
|
+
if (
|
58
|
+
!inputObject.content.type ||
|
59
|
+
!inputObject.content.type?.startsWith("image/")
|
60
|
+
) {
|
61
|
+
log.error(
|
62
|
+
`Document ${objectId} is not an image or a video: ${inputObject.content.type}`,
|
63
|
+
);
|
64
|
+
throw new NoDocumentFound(
|
65
|
+
`Document ${objectId} is not an image or a video: ${inputObject.content.type}`,
|
66
|
+
[objectId],
|
67
|
+
);
|
68
|
+
}
|
69
|
+
|
70
|
+
//array of rendition files to upload
|
71
|
+
let renditionPages: string[] = [];
|
72
|
+
|
73
|
+
const imageFile = await saveBlobToTempFile(
|
74
|
+
client,
|
75
|
+
inputObject.content.source,
|
76
|
+
);
|
77
|
+
log.info(`Image ${objectId} copied to ${imageFile}`);
|
78
|
+
renditionPages.push(imageFile);
|
79
|
+
|
80
|
+
const uploaded = await uploadRenditionPages(
|
81
|
+
client,
|
82
|
+
objectId,
|
83
|
+
[imageFile],
|
84
|
+
params,
|
85
|
+
);
|
86
|
+
|
87
|
+
if (!uploaded || !uploaded.length || !uploaded[0]) {
|
88
|
+
log.error(`Failed to upload rendition for ${objectId}`, { uploaded });
|
89
|
+
throw new Error(
|
90
|
+
`Failed to upload rendition for ${objectId} - upload object is empty`,
|
91
|
+
);
|
92
|
+
}
|
93
|
+
|
94
|
+
return {
|
95
|
+
uploads: uploaded.map((u) => u),
|
96
|
+
format: params.format,
|
97
|
+
status: "success",
|
98
|
+
};
|
99
|
+
}
|