@vertesia/workflow 0.56.0 → 0.58.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/lib/cjs/activities/generateEmbeddings.js +158 -61
  2. package/lib/cjs/activities/generateEmbeddings.js.map +1 -1
  3. package/lib/cjs/activities/generateOrAssignContentType.js +19 -8
  4. package/lib/cjs/activities/generateOrAssignContentType.js.map +1 -1
  5. package/lib/cjs/activities/index-dsl.js +4 -2
  6. package/lib/cjs/activities/index-dsl.js.map +1 -1
  7. package/lib/cjs/activities/renditions/generateImageRendition.js +57 -0
  8. package/lib/cjs/activities/renditions/generateImageRendition.js.map +1 -0
  9. package/lib/cjs/activities/renditions/generateVideoRendition.js +196 -0
  10. package/lib/cjs/activities/renditions/generateVideoRendition.js.map +1 -0
  11. package/lib/cjs/dsl/dsl-workflow.js +8 -0
  12. package/lib/cjs/dsl/dsl-workflow.js.map +1 -1
  13. package/lib/cjs/index.js +3 -1
  14. package/lib/cjs/index.js.map +1 -1
  15. package/lib/cjs/utils/renditions.js +88 -0
  16. package/lib/cjs/utils/renditions.js.map +1 -0
  17. package/lib/esm/activities/generateEmbeddings.js +160 -63
  18. package/lib/esm/activities/generateEmbeddings.js.map +1 -1
  19. package/lib/esm/activities/generateOrAssignContentType.js +21 -10
  20. package/lib/esm/activities/generateOrAssignContentType.js.map +1 -1
  21. package/lib/esm/activities/index-dsl.js +2 -1
  22. package/lib/esm/activities/index-dsl.js.map +1 -1
  23. package/lib/esm/activities/renditions/generateImageRendition.js +54 -0
  24. package/lib/esm/activities/renditions/generateImageRendition.js.map +1 -0
  25. package/lib/esm/activities/renditions/generateVideoRendition.js +190 -0
  26. package/lib/esm/activities/renditions/generateVideoRendition.js.map +1 -0
  27. package/lib/esm/dsl/dsl-workflow.js +8 -0
  28. package/lib/esm/dsl/dsl-workflow.js.map +1 -1
  29. package/lib/esm/index.js +3 -1
  30. package/lib/esm/index.js.map +1 -1
  31. package/lib/esm/utils/renditions.js +80 -0
  32. package/lib/esm/utils/renditions.js.map +1 -0
  33. package/lib/types/activities/generateEmbeddings.d.ts +1 -1
  34. package/lib/types/activities/generateEmbeddings.d.ts.map +1 -1
  35. package/lib/types/activities/generateOrAssignContentType.d.ts.map +1 -1
  36. package/lib/types/activities/index-dsl.d.ts +2 -1
  37. package/lib/types/activities/index-dsl.d.ts.map +1 -1
  38. package/lib/types/activities/{generateImageRendition.d.ts → renditions/generateImageRendition.d.ts} +4 -5
  39. package/lib/types/activities/renditions/generateImageRendition.d.ts.map +1 -0
  40. package/lib/types/activities/renditions/generateVideoRendition.d.ts +15 -0
  41. package/lib/types/activities/renditions/generateVideoRendition.d.ts.map +1 -0
  42. package/lib/types/dsl/dsl-workflow.d.ts.map +1 -1
  43. package/lib/types/index.d.ts +3 -1
  44. package/lib/types/index.d.ts.map +1 -1
  45. package/lib/types/utils/renditions.d.ts +23 -0
  46. package/lib/types/utils/renditions.d.ts.map +1 -0
  47. package/lib/workflows-bundle.js +99 -34
  48. package/package.json +3 -3
  49. package/src/activities/generateEmbeddings.ts +440 -296
  50. package/src/activities/generateOrAssignContentType.ts +185 -144
  51. package/src/activities/index-dsl.ts +2 -1
  52. package/src/activities/renditions/generateImageRendition.ts +99 -0
  53. package/src/activities/renditions/generateVideoRendition.ts +288 -0
  54. package/src/dsl/dsl-workflow.ts +8 -0
  55. package/src/dsl/workflow-exec-child.test.ts +1 -0
  56. package/src/dsl/workflow.test.ts +1 -0
  57. package/src/index.ts +3 -1
  58. package/src/utils/renditions.ts +124 -0
  59. package/lib/cjs/activities/generateImageRendition.js +0 -167
  60. package/lib/cjs/activities/generateImageRendition.js.map +0 -1
  61. package/lib/esm/activities/generateImageRendition.js +0 -161
  62. package/lib/esm/activities/generateImageRendition.js.map +0 -1
  63. package/lib/types/activities/generateImageRendition.d.ts.map +0 -1
  64. package/src/activities/generateImageRendition.ts +0 -202
@@ -1,357 +1,501 @@
1
1
  import { EmbeddingsResult } from "@llumiverse/common";
2
2
  import { log } from "@temporalio/activity";
3
3
  import { VertesiaClient } from "@vertesia/client";
4
- import { ContentObject, DSLActivityExecutionPayload, DSLActivitySpec, ProjectConfigurationEmbeddings, SupportedEmbeddingTypes } from "@vertesia/common";
4
+ import {
5
+ ContentObject,
6
+ DSLActivityExecutionPayload,
7
+ DSLActivitySpec,
8
+ ImageRenditionFormat,
9
+ ProjectConfigurationEmbeddings,
10
+ SupportedEmbeddingTypes,
11
+ } from "@vertesia/common";
5
12
  import { setupActivity } from "../dsl/setup/ActivityContext.js";
6
- import { NoDocumentFound } from '../errors.js';
13
+ import { NoDocumentFound } from "../errors.js";
7
14
  import { fetchBlobAsBase64, md5 } from "../utils/blobs.js";
8
15
  import { DocPart, getContentParts } from "../utils/chunks.js";
9
16
  import { countTokens } from "../utils/tokens.js";
10
17
 
11
-
12
18
  export interface GenerateEmbeddingsParams {
13
-
14
- /**
15
- * The model to use for embedding generation
16
- * If not set, the default model for the project will be used
17
- */
18
- model?: string;
19
-
20
- /**
21
- * The environment to use for embedding generation
22
- * If not set, the default environment for the project will be used
23
- */
24
- environment?: string;
25
-
26
- /**
27
- * If true, force embedding generation even if the document already has embeddings
28
- */
29
- force?: boolean;
30
-
31
- /**
32
- * The embedding type to generate
33
- */
34
- type: SupportedEmbeddingTypes;
35
-
36
- /**
37
- * The DocParts to use for long documents
38
- */
39
- parts?: DocPart[];
19
+ /**
20
+ * The model to use for embedding generation
21
+ * If not set, the default model for the project will be used
22
+ */
23
+ model?: string;
24
+
25
+ /**
26
+ * The environment to use for embedding generation
27
+ * If not set, the default environment for the project will be used
28
+ */
29
+ environment?: string;
30
+
31
+ /**
32
+ * If true, force embedding generation even if the document already has embeddings
33
+ */
34
+ force?: boolean;
35
+
36
+ /**
37
+ * The embedding type to generate
38
+ */
39
+ type: SupportedEmbeddingTypes;
40
+
41
+ /**
42
+ * The DocParts to use for long documents
43
+ */
44
+ parts?: DocPart[];
40
45
  }
41
46
 
42
- export interface GenerateEmbeddings extends DSLActivitySpec<GenerateEmbeddingsParams> {
43
- name: 'generateEmbeddings';
47
+ export interface GenerateEmbeddings
48
+ extends DSLActivitySpec<GenerateEmbeddingsParams> {
49
+ name: "generateEmbeddings";
44
50
  }
45
51
 
46
- export async function generateEmbeddings(payload: DSLActivityExecutionPayload<GenerateEmbeddingsParams>) {
47
- const { params, client, objectId, fetchProject } = await setupActivity<GenerateEmbeddingsParams>(payload);
48
- const { force, type } = params;
49
-
50
- const projectData = await fetchProject();
51
- const config = projectData?.configuration.embeddings[type];
52
- if (!projectData) {
53
- throw new NoDocumentFound('Project not found', [payload.project_id]);
54
- }
55
- if (!config) {
56
- throw new NoDocumentFound('Embeddings configuration not found', [objectId])
57
- }
58
-
59
- if (!projectData) {
60
- throw new NoDocumentFound('Project not found', [payload.project_id]);
61
- }
62
-
63
- if (!projectData?.configuration.embeddings[type]?.enabled) {
64
- log.info(`Embeddings generation disabled for type ${type} on project: ${projectData.name} (${projectData.namespace})`, { config });
65
- return { id: objectId, status: "skipped", message: `Embeddings generation disabled for type ${type}` }
66
- }
67
-
68
- log.info(`${type} embedding generation starting for object ${objectId}`, { force, config });
69
-
70
- if (!config.environment) {
71
- throw new Error('No environment found in project configuration. Set environment in project configuration to generate embeddings.');
72
- }
73
-
74
- const document = await client.objects.retrieve(objectId, "+text +parts +embeddings +tokens +properties");
75
-
76
- if (!document) {
77
- throw new NoDocumentFound('Document not found', [objectId]);
78
- }
79
-
80
- if (!document.content) {
81
- throw new NoDocumentFound('Document content not found', [objectId]);
82
- }
83
-
84
- let res;
85
-
86
- switch (type) {
87
- case SupportedEmbeddingTypes.text:
88
- res = await generateTextEmbeddings({
89
- client,
90
- config,
91
- document,
92
- type
93
- })
94
- break;
95
- case SupportedEmbeddingTypes.properties:
96
- res = await generateTextEmbeddings({
97
- client,
98
- config,
99
- document,
100
- type,
101
- });
102
- break;
103
- case SupportedEmbeddingTypes.image:
104
- res = await generateImageEmbeddings({
105
- client,
106
- config,
107
- document,
108
- type
109
- });
110
- break;
111
- default:
112
- res = { id: objectId, status: "failed", message: `unsupported embedding type: ${type}` }
113
- }
114
-
115
- return res;
116
-
52
+ export async function generateEmbeddings(
53
+ payload: DSLActivityExecutionPayload<GenerateEmbeddingsParams>,
54
+ ) {
55
+ const { params, client, objectId, fetchProject } =
56
+ await setupActivity<GenerateEmbeddingsParams>(payload);
57
+ const { force, type } = params;
58
+
59
+ const projectData = await fetchProject();
60
+ const config = projectData?.configuration.embeddings[type];
61
+ if (!projectData) {
62
+ throw new NoDocumentFound("Project not found", [payload.project_id]);
63
+ }
64
+ if (!config) {
65
+ throw new NoDocumentFound("Embeddings configuration not found", [objectId]);
66
+ }
67
+
68
+ if (!projectData) {
69
+ throw new NoDocumentFound("Project not found", [payload.project_id]);
70
+ }
71
+
72
+ if (!projectData?.configuration.embeddings[type]?.enabled) {
73
+ log.info(
74
+ `Embeddings generation disabled for type ${type} on project: ${projectData.name} (${projectData.namespace})`,
75
+ { config },
76
+ );
77
+ return {
78
+ id: objectId,
79
+ status: "skipped",
80
+ message: `Embeddings generation disabled for type ${type}`,
81
+ };
82
+ }
83
+
84
+ log.info(`${type} embedding generation starting for object ${objectId}`, {
85
+ force,
86
+ config,
87
+ });
88
+
89
+ if (!config.environment) {
90
+ throw new Error(
91
+ "No environment found in project configuration. Set environment in project configuration to generate embeddings.",
92
+ );
93
+ }
94
+
95
+ const document = await client.objects.retrieve(
96
+ objectId,
97
+ "+text +parts +embeddings +tokens +properties",
98
+ );
99
+
100
+ if (!document) {
101
+ throw new NoDocumentFound("Document not found", [objectId]);
102
+ }
103
+
104
+ if (!document.content) {
105
+ throw new NoDocumentFound("Document content not found", [objectId]);
106
+ }
107
+
108
+ let res;
109
+
110
+ switch (type) {
111
+ case SupportedEmbeddingTypes.text:
112
+ res = await generateTextEmbeddings({
113
+ client,
114
+ config,
115
+ document,
116
+ type,
117
+ });
118
+ break;
119
+ case SupportedEmbeddingTypes.properties:
120
+ res = await generateTextEmbeddings({
121
+ client,
122
+ config,
123
+ document,
124
+ type,
125
+ });
126
+ break;
127
+ case SupportedEmbeddingTypes.image:
128
+ res = await generateImageEmbeddings({
129
+ client,
130
+ config,
131
+ document,
132
+ type,
133
+ });
134
+ break;
135
+ default:
136
+ res = {
137
+ id: objectId,
138
+ status: "failed",
139
+ message: `unsupported embedding type: ${type}`,
140
+ };
141
+ }
142
+
143
+ return res;
117
144
  }
118
145
 
119
-
120
146
  interface ExecuteGenerateEmbeddingsParams {
121
- document: ContentObject;
122
- client: VertesiaClient;
123
- type: SupportedEmbeddingTypes;
124
- config: ProjectConfigurationEmbeddings;
125
- property?: string;
126
- force?: boolean;
147
+ document: ContentObject;
148
+ client: VertesiaClient;
149
+ type: SupportedEmbeddingTypes;
150
+ config: ProjectConfigurationEmbeddings;
151
+ property?: string;
152
+ force?: boolean;
127
153
  }
128
154
 
129
- async function generateTextEmbeddings({ document, client, type, config }: ExecuteGenerateEmbeddingsParams, parts?: DocPart[],) {
130
- // if (!force && document.embeddings[type]?.etag === (document.text_etag ?? md5(document.text))) {
131
- // return { id: objectId, status: "skipped", message: "embeddings already generated" }
132
- // }
133
-
134
- if (!document) {
135
- return { status: "error", message: "document is null or undefined" }
136
- }
137
-
138
- if (type !== SupportedEmbeddingTypes.text && type !== SupportedEmbeddingTypes.properties) {
139
- return { id: document.id, status: "failed", message: `unsupported embedding type: ${type}` }
155
+ async function generateTextEmbeddings(
156
+ { document, client, type, config }: ExecuteGenerateEmbeddingsParams,
157
+ parts?: DocPart[],
158
+ ) {
159
+ // if (!force && document.embeddings[type]?.etag === (document.text_etag ?? md5(document.text))) {
160
+ // return { id: objectId, status: "skipped", message: "embeddings already generated" }
161
+ // }
162
+
163
+ if (!document) {
164
+ return { status: "error", message: "document is null or undefined" };
165
+ }
166
+
167
+ if (
168
+ type !== SupportedEmbeddingTypes.text &&
169
+ type !== SupportedEmbeddingTypes.properties
170
+ ) {
171
+ return {
172
+ id: document.id,
173
+ status: "failed",
174
+ message: `unsupported embedding type: ${type}`,
175
+ };
176
+ }
177
+
178
+ if (type === SupportedEmbeddingTypes.text && !document.text) {
179
+ return { id: document.id, status: "failed", message: "no text found" };
180
+ }
181
+ if (type === SupportedEmbeddingTypes.properties && !document?.properties) {
182
+ return {
183
+ id: document.id,
184
+ status: "failed",
185
+ message: "no properties found",
186
+ };
187
+ }
188
+
189
+ const { environment, model } = config;
190
+
191
+ const partDefinitions = parts ?? [];
192
+
193
+ // Count tokens if not already done
194
+ if (!document.tokens?.count && type === SupportedEmbeddingTypes.text) {
195
+ log.debug("Updating token count for document: " + document.id);
196
+ const tokensData = countTokens(document.text!);
197
+ await client.objects.update(document.id, {
198
+ tokens: {
199
+ ...tokensData,
200
+ etag: document.text_etag ?? md5(document.text!),
201
+ },
202
+ });
203
+ document.tokens = {
204
+ ...tokensData,
205
+ etag: document.text_etag ?? md5(document.text!),
206
+ };
207
+ }
208
+
209
+ const maxTokens = config.max_tokens ?? 8000;
210
+
211
+ //generate embeddings for the main doc if document isn't too large
212
+ //if too large, we'll just generate embeddings for the parts
213
+ //then we can generate embeddings for the main document by averaging the tensors
214
+ log.info(`Generating ${type} embeddings for document ${document.id}`);
215
+ if (
216
+ type === SupportedEmbeddingTypes.text &&
217
+ document.tokens?.count &&
218
+ document.tokens?.count > maxTokens
219
+ ) {
220
+ log.info("Document too large, generating embeddings for parts");
221
+
222
+ if (!document.text) {
223
+ return { id: document.id, status: "failed", message: "no text found" };
140
224
  }
141
225
 
142
- if (type === SupportedEmbeddingTypes.text && !document.text) {
143
- return { id: document.id, status: "failed", message: "no text found" }
226
+ if (!partDefinitions || partDefinitions.length === 0) {
227
+ log.info("No parts found for document, skipping embeddings generation");
228
+ return { id: document.id, status: "failed", message: "no parts found" };
144
229
  }
145
- if (type === SupportedEmbeddingTypes.properties && !document?.properties) {
146
- return { id: document.id, status: "failed", message: "no properties found" }
147
- }
148
-
149
- const { environment, model } = config;
150
230
 
151
- const partDefinitions = parts ?? [];
231
+ log.info("Generating embeddings for parts", {
232
+ parts: partDefinitions,
233
+ max_tokens: maxTokens,
234
+ });
235
+ const docParts = getContentParts(document.text, partDefinitions);
152
236
 
153
- // Count tokens if not already done
154
- if (!document.tokens?.count && type === SupportedEmbeddingTypes.text) {
155
- log.debug('Updating token count for document: ' + document.id);
156
- const tokensData = countTokens(document.text!);
157
- await client.objects.update(document.id, {
158
- tokens: {
159
- ...tokensData,
160
- etag: document.text_etag ?? md5(document.text!)
161
- }
237
+ log.info(`Retrieved ${docParts.length} parts`);
238
+ const start = new Date().getTime();
239
+ const generatePartEmbeddings = async (partContent: string, i: number) => {
240
+ const localStart = new Date().getTime();
241
+ try {
242
+ log.info(`Generating embeddings for part ${i}`, {
243
+ text_len: partContent.length,
162
244
  });
163
- document.tokens = {
164
- ...tokensData,
165
- etag: document.text_etag ?? md5(document.text!)
166
- };
167
- }
168
-
169
- const maxTokens = config.max_tokens ?? 8000;
170
-
171
- //generate embeddings for the main doc if document isn't too large
172
- //if too large, we'll just generate embeddings for the parts
173
- //then we can generate embeddings for the main document by averaging the tensors
174
- log.info(`Generating ${type} embeddings for document ${document.id}`);
175
- if (type === SupportedEmbeddingTypes.text && document.tokens?.count && document.tokens?.count > maxTokens) {
176
- log.info('Document too large, generating embeddings for parts');
177
-
178
-
179
- if (!document.text) {
180
- return { id: document.id, status: "failed", message: "no text found" }
181
- }
182
-
183
- if (!partDefinitions || partDefinitions.length === 0) {
184
- log.info('No parts found for document, skipping embeddings generation');
185
- return { id: document.id, status: "failed", message: "no parts found" }
245
+ if (!partContent) {
246
+ return {
247
+ id: i,
248
+ number: i,
249
+ result: null,
250
+ status: "skipped",
251
+ message: "no text found",
252
+ };
186
253
  }
187
254
 
255
+ const e = await generateEmbeddingsFromStudio(
256
+ partContent,
257
+ environment,
258
+ client,
259
+ model,
260
+ ).catch((e) => {
261
+ log.error("Error generating embeddings for part " + i, {
262
+ text_length: partContent.length,
263
+ error: e,
264
+ });
265
+ return null;
266
+ });
188
267
 
189
- log.info('Generating embeddings for parts', { parts: partDefinitions, max_tokens: maxTokens });
190
- const docParts = getContentParts(document.text, partDefinitions);
191
-
192
-
193
- log.info(`Retrieved ${docParts.length} parts`)
194
- const start = new Date().getTime();
195
- const generatePartEmbeddings = async (partContent: string, i: number) => {
196
- const localStart = new Date().getTime();
197
- try {
198
- log.info(`Generating embeddings for part ${i}`, { text_len: partContent.length })
199
- if (!partContent) {
200
- return { id: i, number: i, result: null, status: "skipped", message: "no text found" }
201
- }
202
-
203
- const e = await generateEmbeddingsFromStudio(partContent, environment, client, model).catch(e => {
204
- log.error('Error generating embeddings for part ' + i, { text_length: partContent.length, error: e });
205
- return null;
206
- });
207
-
208
- if (!e || !e.values) {
209
- return { id: i, number: i, result: null, message: "no embeddings generated" }
210
- }
211
-
212
- if (e.values.length === 0) {
213
- return { id: i, number: i, result: null, message: "no embeddings generated" }
214
- }
215
- log.info(`Generated embeddings for part ${i}`, { len: e.values.length, duration: new Date().getTime() - localStart });
216
-
217
- return { number: i, result: e }
218
- } catch (err: any) {
219
- log.info(`Error generating ${type} embeddings for part ${i} of ${document.id}`, { error: err });
220
- return { number: i, result: null, message: "error generating embeddings", error: err.message }
221
- }
268
+ if (!e || !e.values) {
269
+ return {
270
+ id: i,
271
+ number: i,
272
+ result: null,
273
+ message: "no embeddings generated",
274
+ };
222
275
  }
223
276
 
224
- const partEmbeddings = await Promise.all(docParts.map((part, i) => generatePartEmbeddings(part, i)));
225
- const validPartEmbeddings = partEmbeddings.filter(e => e.result !== null).map(e => e.result);
226
- const averagedEmbedding = computeAttentionEmbedding(validPartEmbeddings.map(e => e.values));
227
- log.info(`Averaged embeddings for document ${document.id} in ${(new Date().getTime() - start) / 1000} seconds`, { len: averagedEmbedding.length, count: validPartEmbeddings.length, max_tokens: maxTokens });
228
- await client.objects.setEmbedding(document.id, type,
229
- {
230
- values: averagedEmbedding,
231
- model: validPartEmbeddings[0].model,
232
- etag: document.text_etag
233
- }
234
- );
235
- log.info(`Object ${document.id} embedding set`, { type, len: averagedEmbedding.length });
236
-
237
- } else {
238
- log.info(`Generating ${type} embeddings for document`);
239
-
240
- const res = await generateEmbeddingsFromStudio(JSON.stringify(document[type]), environment, client);
241
- if (!res || !res.values) {
242
- return { id: document.id, status: "failed", message: "no embeddings generated" }
277
+ if (e.values.length === 0) {
278
+ return {
279
+ id: i,
280
+ number: i,
281
+ result: null,
282
+ message: "no embeddings generated",
283
+ };
243
284
  }
285
+ log.info(`Generated embeddings for part ${i}`, {
286
+ len: e.values.length,
287
+ duration: new Date().getTime() - localStart,
288
+ });
244
289
 
245
- log.info(`${type} embeddings generated for document ${document.id}`, { len: res.values.length });
246
- await client.objects.setEmbedding(document.id, type,
247
- {
248
- values: res.values,
249
- model: res.model,
250
- etag: document.text_etag
251
- }
290
+ return { number: i, result: e };
291
+ } catch (err: any) {
292
+ log.info(
293
+ `Error generating ${type} embeddings for part ${i} of ${document.id}`,
294
+ { error: err },
252
295
  );
296
+ return {
297
+ number: i,
298
+ result: null,
299
+ message: "error generating embeddings",
300
+ error: err.message,
301
+ };
302
+ }
303
+ };
253
304
 
254
- return { id: document.id, type, status: "completed", len: res.values.length }
255
-
256
- }
257
-
258
- }
259
-
260
- async function generateImageEmbeddings({ document, client, type, config }: ExecuteGenerateEmbeddingsParams) {
261
-
262
- log.info('Generating image embeddings for document ' + document.id, { content: document.content });
263
- if (!document.content?.type?.startsWith('image/') && !document.content?.type?.includes('pdf')) {
264
- return { id: document.id, type, status: "failed", message: "content is not an image" }
265
- }
266
- const { environment, model } = config
267
-
268
- const resRnd = await client.store.objects.getRendition(document.id, {
269
- format: "image/png",
270
- max_hw: 1024,
271
- generate_if_missing: true
305
+ const partEmbeddings = await Promise.all(
306
+ docParts.map((part, i) => generatePartEmbeddings(part, i)),
307
+ );
308
+ const validPartEmbeddings = partEmbeddings
309
+ .filter((e) => e.result !== null)
310
+ .map((e) => e.result);
311
+ const averagedEmbedding = computeAttentionEmbedding(
312
+ validPartEmbeddings.map((e) => e.values),
313
+ );
314
+ log.info(
315
+ `Averaged embeddings for document ${document.id} in ${(new Date().getTime() - start) / 1000} seconds`,
316
+ {
317
+ len: averagedEmbedding.length,
318
+ count: validPartEmbeddings.length,
319
+ max_tokens: maxTokens,
320
+ },
321
+ );
322
+ await client.objects.setEmbedding(document.id, type, {
323
+ values: averagedEmbedding,
324
+ model: validPartEmbeddings[0].model,
325
+ etag: document.text_etag,
272
326
  });
273
-
274
- if (resRnd.status === 'generating') {
275
- throw new Error("Rendition is generating, will retry later")
276
- } else if (resRnd.status === "failed" || !resRnd.rendition) {
277
- throw new NoDocumentFound("Rendition retrieval failed", [document.id])
278
- }
279
-
280
- if (!resRnd.rendition.content.source) {
281
- throw new NoDocumentFound("No source found in rendition", [document.id])
282
- }
283
-
284
- const image = await fetchBlobAsBase64(client, resRnd.rendition.content.source);
285
-
286
- const res = await client.environments.embeddings(environment, {
287
- image,
288
- model
289
- }).then(res => res).catch(e => {
290
- log.error('Error generating embeddings for image', { error: e })
291
- throw e;
327
+ log.info(`Object ${document.id} embedding set`, {
328
+ type,
329
+ len: averagedEmbedding.length,
292
330
  });
331
+ } else {
332
+ log.info(`Generating ${type} embeddings for document`);
293
333
 
334
+ const res = await generateEmbeddingsFromStudio(
335
+ JSON.stringify(document[type]),
336
+ environment,
337
+ client,
338
+ );
294
339
  if (!res || !res.values) {
295
- return { id: document.id, status: "failed", message: "no embeddings generated" }
340
+ return {
341
+ id: document.id,
342
+ status: "failed",
343
+ message: "no embeddings generated",
344
+ };
296
345
  }
297
346
 
298
- await client.objects.setEmbedding(document.id, SupportedEmbeddingTypes.image,
299
- {
300
- values: res.values,
301
- model: res.model,
302
- etag: document.text_etag
303
- }
304
- );
305
-
306
- return { id: document.id, type, status: "completed", len: res.values.length }
347
+ log.info(`${type} embeddings generated for document ${document.id}`, {
348
+ len: res.values.length,
349
+ });
350
+ await client.objects.setEmbedding(document.id, type, {
351
+ values: res.values,
352
+ model: res.model,
353
+ etag: document.text_etag,
354
+ });
307
355
 
356
+ return {
357
+ id: document.id,
358
+ type,
359
+ status: "completed",
360
+ len: res.values.length,
361
+ };
362
+ }
308
363
  }
309
364
 
310
- async function generateEmbeddingsFromStudio(text: string, env: string, client: VertesiaClient, model?: string): Promise<EmbeddingsResult> {
365
+ async function generateImageEmbeddings({
366
+ document,
367
+ client,
368
+ type,
369
+ config,
370
+ }: ExecuteGenerateEmbeddingsParams) {
371
+ log.info("Generating image embeddings for document " + document.id, {
372
+ content: document.content,
373
+ });
374
+ if (
375
+ !document.content?.type?.startsWith("image/") &&
376
+ !document.content?.type?.includes("pdf")
377
+ ) {
378
+ return {
379
+ id: document.id,
380
+ type,
381
+ status: "failed",
382
+ message: "content is not an image",
383
+ };
384
+ }
385
+ const { environment, model } = config;
386
+
387
+ const resRnd = await client.store.objects.getRendition(document.id, {
388
+ format: ImageRenditionFormat.jpeg,
389
+ max_hw: 1024,
390
+ generate_if_missing: true,
391
+ sign_url: false,
392
+ });
393
+
394
+ if (resRnd.status === "generating") {
395
+ throw new Error("Rendition is generating, will retry later");
396
+ } else if (
397
+ resRnd.status === "failed" ||
398
+ !resRnd.renditions ||
399
+ !resRnd.renditions.length
400
+ ) {
401
+ throw new NoDocumentFound("Rendition retrieval failed", [document.id]);
402
+ }
403
+
404
+ const renditions = resRnd.renditions;
405
+ if (!renditions?.length) {
406
+ throw new NoDocumentFound("No source found in rendition", [document.id]);
407
+ }
408
+
409
+ const rendition = renditions[0];
410
+ const image = await fetchBlobAsBase64(client, rendition);
411
+
412
+ const res = await client.environments
413
+ .embeddings(environment, {
414
+ image,
415
+ model,
416
+ })
417
+ .then((res) => res)
418
+ .catch((e) => {
419
+ log.error("Error generating embeddings for image", { error: e });
420
+ throw e;
421
+ });
311
422
 
312
- log.info(`Generating embeddings for text of ${text.length} chars with environment ${env}`);
423
+ if (!res || !res.values) {
424
+ return {
425
+ id: document.id,
426
+ status: "failed",
427
+ message: "no embeddings generated",
428
+ };
429
+ }
430
+
431
+ await client.objects.setEmbedding(
432
+ document.id,
433
+ SupportedEmbeddingTypes.image,
434
+ {
435
+ values: res.values,
436
+ model: res.model,
437
+ etag: document.text_etag,
438
+ },
439
+ );
440
+
441
+ return { id: document.id, type, status: "completed", len: res.values.length };
442
+ }
313
443
 
314
- return client.environments.embeddings(env, {
315
- text,
316
- model
317
- }).then(res => res).catch(e => {
318
- log.error('Error generating embeddings for text', { error: e })
319
- throw e;
444
+ async function generateEmbeddingsFromStudio(
445
+ text: string,
446
+ env: string,
447
+ client: VertesiaClient,
448
+ model?: string,
449
+ ): Promise<EmbeddingsResult> {
450
+ log.info(
451
+ `Generating embeddings for text of ${text.length} chars with environment ${env}`,
452
+ );
453
+
454
+ return client.environments
455
+ .embeddings(env, {
456
+ text,
457
+ model,
458
+ })
459
+ .then((res) => res)
460
+ .catch((e) => {
461
+ log.error("Error generating embeddings for text", { error: e });
462
+ throw e;
320
463
  });
321
-
322
464
  }
323
465
 
324
466
  //Simplified attention mechanism
325
467
  // This is a naive implementation and should be replaced with a more sophisticated
326
468
  // using tensorflow in a specific package
327
469
  function computeAttentionEmbedding(chunkEmbeddings: number[][]): number[] {
328
- if (chunkEmbeddings.length === 0) return [];
470
+ if (chunkEmbeddings.length === 0) return [];
329
471
 
330
- const start = new Date().getTime();
472
+ const start = new Date().getTime();
331
473
 
332
- // Generate random attention weights
333
- const attentionWeights = chunkEmbeddings.map(() => Math.random());
474
+ // Generate random attention weights
475
+ const attentionWeights = chunkEmbeddings.map(() => Math.random());
334
476
 
335
- // Apply softmax to get attention scores
336
- const expWeights = attentionWeights.map(w => Math.exp(w));
337
- const sumExpWeights = expWeights.reduce((sum, val) => sum + val, 0);
338
- const attentionScores = expWeights.map(w => w / sumExpWeights);
477
+ // Apply softmax to get attention scores
478
+ const expWeights = attentionWeights.map((w) => Math.exp(w));
479
+ const sumExpWeights = expWeights.reduce((sum, val) => sum + val, 0);
480
+ const attentionScores = expWeights.map((w) => w / sumExpWeights);
339
481
 
340
- // Get embedding dimension
341
- const embeddingDim = chunkEmbeddings[0].length;
482
+ // Get embedding dimension
483
+ const embeddingDim = chunkEmbeddings[0].length;
342
484
 
343
- // Initialize document embedding
344
- const documentEmbedding = new Array(embeddingDim).fill(0);
485
+ // Initialize document embedding
486
+ const documentEmbedding = new Array(embeddingDim).fill(0);
345
487
 
346
- // Weighted sum of embeddings
347
- for (let i = 0; i < chunkEmbeddings.length; i++) {
348
- for (let j = 0; j < embeddingDim; j++) {
349
- documentEmbedding[j] += chunkEmbeddings[i][j] * attentionScores[i];
350
- }
488
+ // Weighted sum of embeddings
489
+ for (let i = 0; i < chunkEmbeddings.length; i++) {
490
+ for (let j = 0; j < embeddingDim; j++) {
491
+ documentEmbedding[j] += chunkEmbeddings[i][j] * attentionScores[i];
351
492
  }
493
+ }
352
494
 
353
- const duration = new Date().getTime() - start;
354
- console.log(`Computed document embedding in ${duration}ms for ${chunkEmbeddings.length} chunks`);
495
+ const duration = new Date().getTime() - start;
496
+ console.log(
497
+ `Computed document embedding in ${duration}ms for ${chunkEmbeddings.length} chunks`,
498
+ );
355
499
 
356
- return documentEmbedding;
500
+ return documentEmbedding;
357
501
  }