@vertesia/workflow 0.80.0 → 0.81.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/activities/generateEmbeddings.js +14 -144
- package/lib/cjs/activities/generateEmbeddings.js.map +1 -1
- package/lib/cjs/activities/index-dsl.js +11 -9
- package/lib/cjs/activities/index-dsl.js.map +1 -1
- package/lib/cjs/activities/media/saveGladiaTranscription.js +81 -0
- package/lib/cjs/activities/media/saveGladiaTranscription.js.map +1 -0
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js +7 -9
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js.map +1 -1
- package/lib/esm/activities/generateEmbeddings.js +15 -145
- package/lib/esm/activities/generateEmbeddings.js.map +1 -1
- package/lib/esm/activities/index-dsl.js +5 -4
- package/lib/esm/activities/index-dsl.js.map +1 -1
- package/lib/esm/activities/media/saveGladiaTranscription.js +78 -0
- package/lib/esm/activities/media/saveGladiaTranscription.js.map +1 -0
- package/lib/esm/activities/media/transcribeMediaWithGladia.js +7 -9
- package/lib/esm/activities/media/transcribeMediaWithGladia.js.map +1 -1
- package/lib/types/activities/generateEmbeddings.d.ts +1 -1
- package/lib/types/activities/generateEmbeddings.d.ts.map +1 -1
- package/lib/types/activities/index-dsl.d.ts +6 -4
- package/lib/types/activities/index-dsl.d.ts.map +1 -1
- package/lib/types/activities/media/saveGladiaTranscription.d.ts +14 -0
- package/lib/types/activities/media/saveGladiaTranscription.d.ts.map +1 -0
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts +5 -0
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts.map +1 -1
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +1 -1
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +1 -1
- package/lib/workflows-bundle.js +98 -40
- package/package.json +6 -6
- package/src/activities/generateEmbeddings.ts +18 -185
- package/src/activities/index-dsl.ts +7 -4
- package/src/activities/media/saveGladiaTranscription.ts +128 -0
- package/src/activities/media/transcribeMediaWithGladia.ts +13 -10
|
@@ -6,7 +6,6 @@ const common_1 = require("@vertesia/common");
|
|
|
6
6
|
const ActivityContext_js_1 = require("../dsl/setup/ActivityContext.js");
|
|
7
7
|
const errors_js_1 = require("../errors.js");
|
|
8
8
|
const blobs_js_1 = require("../utils/blobs.js");
|
|
9
|
-
const chunks_js_1 = require("../utils/chunks.js");
|
|
10
9
|
const tokens_js_1 = require("../utils/tokens.js");
|
|
11
10
|
async function generateEmbeddings(payload) {
|
|
12
11
|
const { params, client, objectId, fetchProject } = await (0, ActivityContext_js_1.setupActivity)(payload);
|
|
@@ -81,7 +80,7 @@ async function generateEmbeddings(payload) {
|
|
|
81
80
|
}
|
|
82
81
|
return res;
|
|
83
82
|
}
|
|
84
|
-
async function generateTextEmbeddings({ document, client, type, config }
|
|
83
|
+
async function generateTextEmbeddings({ document, client, type, config }) {
|
|
85
84
|
if (!document) {
|
|
86
85
|
return { status: "error", message: "document is null or undefined" };
|
|
87
86
|
}
|
|
@@ -103,127 +102,25 @@ async function generateTextEmbeddings({ document, client, type, config }, parts)
|
|
|
103
102
|
message: "no properties found",
|
|
104
103
|
};
|
|
105
104
|
}
|
|
106
|
-
const { environment
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
if (
|
|
110
|
-
|
|
111
|
-
const tokensData = (0, tokens_js_1.countTokens)(document.text);
|
|
112
|
-
await client.objects.update(document.id, {
|
|
113
|
-
tokens: {
|
|
114
|
-
...tokensData,
|
|
115
|
-
etag: document.text_etag ?? (0, blobs_js_1.md5)(document.text),
|
|
116
|
-
},
|
|
117
|
-
});
|
|
118
|
-
document.tokens = {
|
|
119
|
-
...tokensData,
|
|
120
|
-
etag: document.text_etag ?? (0, blobs_js_1.md5)(document.text),
|
|
121
|
-
};
|
|
105
|
+
const { environment } = config;
|
|
106
|
+
// Count tokens if needed, do not rely on existing token count
|
|
107
|
+
let tokenCount = undefined;
|
|
108
|
+
if (type === common_1.SupportedEmbeddingTypes.text && document.text) {
|
|
109
|
+
tokenCount = (0, tokens_js_1.countTokens)(document.text).count;
|
|
122
110
|
}
|
|
123
111
|
const maxTokens = config.max_tokens ?? 8000;
|
|
124
112
|
//generate embeddings for the main doc if document isn't too large
|
|
125
|
-
//if too large, we'll just generate embeddings for the parts
|
|
126
|
-
//then we can generate embeddings for the main document by averaging the tensors
|
|
127
113
|
activity_1.log.info(`Generating ${type} embeddings for document ${document.id}`);
|
|
128
114
|
if (type === common_1.SupportedEmbeddingTypes.text &&
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
};
|
|
138
|
-
}
|
|
139
|
-
if (!partDefinitions || partDefinitions.length === 0) {
|
|
140
|
-
activity_1.log.info("No parts found for document, skipping embeddings generation");
|
|
141
|
-
return {
|
|
142
|
-
id: document.id,
|
|
143
|
-
status: "failed",
|
|
144
|
-
message: "no parts found",
|
|
145
|
-
};
|
|
146
|
-
}
|
|
147
|
-
activity_1.log.info("Generating embeddings for parts", {
|
|
148
|
-
parts: partDefinitions,
|
|
149
|
-
max_tokens: maxTokens,
|
|
150
|
-
});
|
|
151
|
-
const docParts = (0, chunks_js_1.getContentParts)(document.text, partDefinitions);
|
|
152
|
-
activity_1.log.info(`Retrieved ${docParts.length} parts`);
|
|
153
|
-
const start = new Date().getTime();
|
|
154
|
-
const generatePartEmbeddings = async (partContent, i) => {
|
|
155
|
-
const localStart = new Date().getTime();
|
|
156
|
-
try {
|
|
157
|
-
activity_1.log.info(`Generating embeddings for part ${i}`, {
|
|
158
|
-
text_len: partContent.length,
|
|
159
|
-
});
|
|
160
|
-
if (!partContent) {
|
|
161
|
-
return {
|
|
162
|
-
id: i,
|
|
163
|
-
number: i,
|
|
164
|
-
result: null,
|
|
165
|
-
status: "skipped",
|
|
166
|
-
message: "no text found",
|
|
167
|
-
};
|
|
168
|
-
}
|
|
169
|
-
const e = await generateEmbeddingsFromStudio(partContent, environment, client, model).catch((e) => {
|
|
170
|
-
activity_1.log.error("Error generating embeddings for part " + i, {
|
|
171
|
-
text_length: partContent.length,
|
|
172
|
-
error: e,
|
|
173
|
-
});
|
|
174
|
-
return null;
|
|
175
|
-
});
|
|
176
|
-
if (!e || !e.values) {
|
|
177
|
-
return {
|
|
178
|
-
id: i,
|
|
179
|
-
number: i,
|
|
180
|
-
result: null,
|
|
181
|
-
message: "no embeddings generated",
|
|
182
|
-
};
|
|
183
|
-
}
|
|
184
|
-
if (e.values.length === 0) {
|
|
185
|
-
return {
|
|
186
|
-
id: i,
|
|
187
|
-
number: i,
|
|
188
|
-
result: null,
|
|
189
|
-
message: "no embeddings generated",
|
|
190
|
-
};
|
|
191
|
-
}
|
|
192
|
-
activity_1.log.info(`Generated embeddings for part ${i}`, {
|
|
193
|
-
len: e.values.length,
|
|
194
|
-
duration: new Date().getTime() - localStart,
|
|
195
|
-
});
|
|
196
|
-
return { number: i, result: e };
|
|
197
|
-
}
|
|
198
|
-
catch (err) {
|
|
199
|
-
activity_1.log.info(`Error generating ${type} embeddings for part ${i} of ${document.id}`, { error: err });
|
|
200
|
-
return {
|
|
201
|
-
number: i,
|
|
202
|
-
result: null,
|
|
203
|
-
message: "error generating embeddings",
|
|
204
|
-
error: err.message,
|
|
205
|
-
};
|
|
206
|
-
}
|
|
115
|
+
tokenCount !== undefined &&
|
|
116
|
+
tokenCount > maxTokens) {
|
|
117
|
+
//TODO: Review strategy for large documents
|
|
118
|
+
activity_1.log.warn(`Document too large for ${type} embeddings generation, skipping (${tokenCount} tokens)`);
|
|
119
|
+
return {
|
|
120
|
+
id: document.id,
|
|
121
|
+
status: "skipped",
|
|
122
|
+
message: `${type} embeddings generation, skipped for large document (${tokenCount} tokens)`,
|
|
207
123
|
};
|
|
208
|
-
const partEmbeddings = await Promise.all(docParts.map((part, i) => generatePartEmbeddings(part, i)));
|
|
209
|
-
const validPartEmbeddings = partEmbeddings
|
|
210
|
-
.filter((e) => e.result !== null)
|
|
211
|
-
.map((e) => e.result);
|
|
212
|
-
const averagedEmbedding = computeAttentionEmbedding(validPartEmbeddings.map((e) => e.values));
|
|
213
|
-
activity_1.log.info(`Averaged embeddings for document ${document.id} in ${(new Date().getTime() - start) / 1000} seconds`, {
|
|
214
|
-
len: averagedEmbedding.length,
|
|
215
|
-
count: validPartEmbeddings.length,
|
|
216
|
-
max_tokens: maxTokens,
|
|
217
|
-
});
|
|
218
|
-
await client.objects.setEmbedding(document.id, type, {
|
|
219
|
-
values: averagedEmbedding,
|
|
220
|
-
model: validPartEmbeddings[0].model,
|
|
221
|
-
etag: document.text_etag,
|
|
222
|
-
});
|
|
223
|
-
activity_1.log.info(`Object ${document.id} embedding set`, {
|
|
224
|
-
type,
|
|
225
|
-
len: averagedEmbedding.length,
|
|
226
|
-
});
|
|
227
124
|
}
|
|
228
125
|
else {
|
|
229
126
|
activity_1.log.info(`Generating ${type} embeddings for document`);
|
|
@@ -328,31 +225,4 @@ async function generateEmbeddingsFromStudio(text, env, client, model) {
|
|
|
328
225
|
throw e;
|
|
329
226
|
});
|
|
330
227
|
}
|
|
331
|
-
//Simplified attention mechanism
|
|
332
|
-
// This is a naive implementation and should be replaced with a more sophisticated
|
|
333
|
-
// using tensorflow in a specific package
|
|
334
|
-
function computeAttentionEmbedding(chunkEmbeddings) {
|
|
335
|
-
if (chunkEmbeddings.length === 0)
|
|
336
|
-
return [];
|
|
337
|
-
const start = new Date().getTime();
|
|
338
|
-
// Generate random attention weights
|
|
339
|
-
const attentionWeights = chunkEmbeddings.map(() => Math.random());
|
|
340
|
-
// Apply softmax to get attention scores
|
|
341
|
-
const expWeights = attentionWeights.map((w) => Math.exp(w));
|
|
342
|
-
const sumExpWeights = expWeights.reduce((sum, val) => sum + val, 0);
|
|
343
|
-
const attentionScores = expWeights.map((w) => w / sumExpWeights);
|
|
344
|
-
// Get embedding dimension
|
|
345
|
-
const embeddingDim = chunkEmbeddings[0].length;
|
|
346
|
-
// Initialize document embedding
|
|
347
|
-
const documentEmbedding = new Array(embeddingDim).fill(0);
|
|
348
|
-
// Weighted sum of embeddings
|
|
349
|
-
for (let i = 0; i < chunkEmbeddings.length; i++) {
|
|
350
|
-
for (let j = 0; j < embeddingDim; j++) {
|
|
351
|
-
documentEmbedding[j] += chunkEmbeddings[i][j] * attentionScores[i];
|
|
352
|
-
}
|
|
353
|
-
}
|
|
354
|
-
const duration = new Date().getTime() - start;
|
|
355
|
-
console.log(`Computed document embedding in ${duration}ms for ${chunkEmbeddings.length} chunks`);
|
|
356
|
-
return documentEmbedding;
|
|
357
|
-
}
|
|
358
228
|
//# sourceMappingURL=generateEmbeddings.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generateEmbeddings.js","sourceRoot":"","sources":["../../../src/activities/generateEmbeddings.ts"],"names":[],"mappings":";;AAmDA,gDA8FC;AAhJD,mDAA2C;AAE3C,6CAO0B;AAC1B,wEAAgE;AAChE,4CAAqD;AACrD,gDAA2D;AAC3D,kDAA8D;AAC9D,kDAAiD;AAoC1C,KAAK,UAAU,kBAAkB,CACpC,OAA8D;IAE9D,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,GAC5C,MAAM,IAAA,kCAAa,EAA2B,OAAO,CAAC,CAAC;IAC3D,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,MAAM,CAAC;IAE/B,MAAM,WAAW,GAAG,MAAM,YAAY,EAAE,CAAC;IACzC,MAAM,MAAM,GAAG,WAAW,EAAE,aAAa,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;IAC3D,IAAI,CAAC,WAAW,EAAE,CAAC;QACf,MAAM,IAAI,iCAAqB,CAAC,mBAAmB,EAAE,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;IAC/E,CAAC;IACD,IAAI,CAAC,MAAM,EAAE,CAAC;QACV,MAAM,IAAI,iCAAqB,CAAC,oCAAoC,EAAE;YAClE,QAAQ;SACX,CAAC,CAAC;IACP,CAAC;IAED,IAAI,CAAC,WAAW,EAAE,CAAC;QACf,MAAM,IAAI,iCAAqB,CAAC,mBAAmB,EAAE,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;IAC/E,CAAC;IAED,IAAI,CAAC,WAAW,EAAE,aAAa,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,CAAC;QACxD,cAAG,CAAC,IAAI,CACJ,2CAA2C,IAAI,gBAAgB,WAAW,CAAC,IAAI,KAAK,WAAW,CAAC,SAAS,GAAG,EAC5G,EAAE,MAAM,EAAE,CACb,CAAC;QACF,OAAO;YACH,EAAE,EAAE,QAAQ;YACZ,MAAM,EAAE,SAAS;YACjB,OAAO,EAAE,2CAA2C,IAAI,EAAE;SAC7D,CAAC;IACN,CAAC;IAED,cAAG,CAAC,IAAI,CAAC,GAAG,IAAI,6CAA6C,QAAQ,EAAE,EAAE;QACrE,KAAK;QACL,MAAM;KACT,CAAC,CAAC;IAEH,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CACX,iHAAiH,CACpH,CAAC;IACN,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,QAAQ,CAC1C,QAAQ,EACR,8CAA8C,CACjD,CAAC;IAEF,IAAI,CAAC,QAAQ,EAAE,CAAC;QACZ,MAAM,IAAI,iCAAqB,CAAC,oBAAoB,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC;IACtE,CAAC;IAED,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,IAAI,iCAAqB,CAAC,4BAA4B,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC9E,CAAC;IAED,IAAI,GAAG,CAAC;IAER,QAAQ,IAAI,EAAE,CAAC;QACX,KAAK,gCAAuB,CAAC,IAAI;YAC7B,GAAG,GAAG,MAAM,sBAAsB,CAAC;gBAC/B,MAAM;gBACN,MAAM;gBACN,QAAQ;gBACR,IAAI;aACP,CAAC,CAAC;YACH,MAAM;QACV,KAAK,gCAAuB,CAAC,UAAU;YACnC,GAAG,GAAG,MAAM,sBAAsB,CAAC;gBAC/B,MAAM;gBACN,MAAM;gBACN,QAAQ;gBACR,IAAI;aACP,CAAC,CAAC;YACH,MAAM;QACV,KAAK,gCAAuB,CAAC,KAAK;YAC9B,GAAG,GAAG,MAAM,uBAAuB,CAAC;gBAChC,MAAM;gBACN,MAAM;gBACN,QAAQ;gBACR,IAAI;aACP,CAAC,CAAC;YACH,MAAM;QACV;YACI,GAAG,GAAG;gBACF,EAAE,EAAE,QAAQ;gBACZ,MAAM,EAAE,QAAQ;gBAChB,OAAO,EAAE,+BAA+B,IAAI,EAAE;aACjD,CAAC;IACV,CAAC;IAED,OAAO,GAAG,CAAC;AACf,CAAC;AAWD,KAAK,UAAU,sBAAsB,CACjC,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAmC,EACnE,KAAiB;IAGjB,IAAI,CAAC,QAAQ,EAAE,CAAC;QACZ,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,+BAA+B,EAAE,CAAC;IACzE,CAAC;IAED,IACI,IAAI,KAAK,gCAAuB,CAAC,IAAI;QACrC,IAAI,KAAK,gCAAuB,CAAC,UAAU,EAC7C,CAAC;QACC,OAAO;YACH,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,MAAM,EAAE,QAAQ;YAChB,OAAO,EAAE,+BAA+B,IAAI,EAAE;SACjD,CAAC;IACN,CAAC;IAED,IAAI,IAAI,KAAK,gCAAuB,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC1D,OAAO,EAAE,EAAE,EAAE,QAAQ,CAAC,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3E,CAAC;IACD,IAAI,IAAI,KAAK,gCAAuB,CAAC,UAAU,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,CAAC;QACvE,OAAO;YACH,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,MAAM,EAAE,QAAQ;YAChB,OAAO,EAAE,qBAAqB;SACjC,CAAC;IACN,CAAC;IAED,MAAM,EAAE,WAAW,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC;IAEtC,MAAM,eAAe,GAAG,KAAK,IAAI,EAAE,CAAC;IAEpC,mCAAmC;IACnC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,KAAK,IAAI,IAAI,KAAK,gCAAuB,CAAC,IAAI,EAAE,CAAC;QACnE,cAAG,CAAC,KAAK,CAAC,qCAAqC,GAAG,QAAQ,CAAC,EAAE,CAAC,CAAC;QAC/D,MAAM,UAAU,GAAG,IAAA,uBAAW,EAAC,QAAQ,CAAC,IAAK,CAAC,CAAC;QAC/C,MAAM,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,EAAE;YACrC,MAAM,EAAE;gBACJ,GAAG,UAAU;gBACb,IAAI,EAAE,QAAQ,CAAC,SAAS,IAAI,IAAA,cAAG,EAAC,QAAQ,CAAC,IAAK,CAAC;aAClD;SACJ,CAAC,CAAC;QACH,QAAQ,CAAC,MAAM,GAAG;YACd,GAAG,UAAU;YACb,IAAI,EAAE,QAAQ,CAAC,SAAS,IAAI,IAAA,cAAG,EAAC,QAAQ,CAAC,IAAK,CAAC;SAClD,CAAC;IACN,CAAC;IAED,MAAM,SAAS,GAAG,MAAM,CAAC,UAAU,IAAI,IAAI,CAAC;IAE5C,kEAAkE;IAClE,4DAA4D;IAC5D,gFAAgF;IAChF,cAAG,CAAC,IAAI,CAAC,cAAc,IAAI,4BAA4B,QAAQ,CAAC,EAAE,EAAE,CAAC,CAAC;IACtE,IACI,IAAI,KAAK,gCAAuB,CAAC,IAAI;QACrC,QAAQ,CAAC,MAAM,EAAE,KAAK;QACtB,QAAQ,CAAC,MAAM,EAAE,KAAK,GAAG,SAAS,EACpC,CAAC;QACC,cAAG,CAAC,IAAI,CAAC,qDAAqD,CAAC,CAAC;QAEhE,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACjB,OAAO;gBACH,EAAE,EAAE,QAAQ,CAAC,EAAE;gBACf,MAAM,EAAE,QAAQ;gBAChB,OAAO,EAAE,eAAe;aAC3B,CAAC;QACN,CAAC;QAED,IAAI,CAAC,eAAe,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACnD,cAAG,CAAC,IAAI,CACJ,6DAA6D,CAChE,CAAC;YACF,OAAO;gBACH,EAAE,EAAE,QAAQ,CAAC,EAAE;gBACf,MAAM,EAAE,QAAQ;gBAChB,OAAO,EAAE,gBAAgB;aAC5B,CAAC;QACN,CAAC;QAED,cAAG,CAAC,IAAI,CAAC,iCAAiC,EAAE;YACxC,KAAK,EAAE,eAAe;YACtB,UAAU,EAAE,SAAS;SACxB,CAAC,CAAC;QACH,MAAM,QAAQ,GAAG,IAAA,2BAAe,EAAC,QAAQ,CAAC,IAAI,EAAE,eAAe,CAAC,CAAC;QAEjE,cAAG,CAAC,IAAI,CAAC,aAAa,QAAQ,CAAC,MAAM,QAAQ,CAAC,CAAC;QAC/C,MAAM,KAAK,GAAG,IAAI,IAAI,EAAE,CAAC,OAAO,EAAE,CAAC;QACnC,MAAM,sBAAsB,GAAG,KAAK,EAChC,WAAmB,EACnB,CAAS,EACX,EAAE;YACA,MAAM,UAAU,GAAG,IAAI,IAAI,EAAE,CAAC,OAAO,EAAE,CAAC;YACxC,IAAI,CAAC;gBACD,cAAG,CAAC,IAAI,CAAC,kCAAkC,CAAC,EAAE,EAAE;oBAC5C,QAAQ,EAAE,WAAW,CAAC,MAAM;iBAC/B,CAAC,CAAC;gBACH,IAAI,CAAC,WAAW,EAAE,CAAC;oBACf,OAAO;wBACH,EAAE,EAAE,CAAC;wBACL,MAAM,EAAE,CAAC;wBACT,MAAM,EAAE,IAAI;wBACZ,MAAM,EAAE,SAAS;wBACjB,OAAO,EAAE,eAAe;qBAC3B,CAAC;gBACN,CAAC;gBAED,MAAM,CAAC,GAAG,MAAM,4BAA4B,CACxC,WAAW,EACX,WAAW,EACX,MAAM,EACN,KAAK,CACR,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;oBACV,cAAG,CAAC,KAAK,CAAC,uCAAuC,GAAG,CAAC,EAAE;wBACnD,WAAW,EAAE,WAAW,CAAC,MAAM;wBAC/B,KAAK,EAAE,CAAC;qBACX,CAAC,CAAC;oBACH,OAAO,IAAI,CAAC;gBAChB,CAAC,CAAC,CAAC;gBAEH,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;oBAClB,OAAO;wBACH,EAAE,EAAE,CAAC;wBACL,MAAM,EAAE,CAAC;wBACT,MAAM,EAAE,IAAI;wBACZ,OAAO,EAAE,yBAAyB;qBACrC,CAAC;gBACN,CAAC;gBAED,IAAI,CAAC,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBACxB,OAAO;wBACH,EAAE,EAAE,CAAC;wBACL,MAAM,EAAE,CAAC;wBACT,MAAM,EAAE,IAAI;wBACZ,OAAO,EAAE,yBAAyB;qBACrC,CAAC;gBACN,CAAC;gBACD,cAAG,CAAC,IAAI,CAAC,iCAAiC,CAAC,EAAE,EAAE;oBAC3C,GAAG,EAAE,CAAC,CAAC,MAAM,CAAC,MAAM;oBACpB,QAAQ,EAAE,IAAI,IAAI,EAAE,CAAC,OAAO,EAAE,GAAG,UAAU;iBAC9C,CAAC,CAAC;gBAEH,OAAO,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC;YACpC,CAAC;YAAC,OAAO,GAAQ,EAAE,CAAC;gBAChB,cAAG,CAAC,IAAI,CACJ,oBAAoB,IAAI,wBAAwB,CAAC,OAAO,QAAQ,CAAC,EAAE,EAAE,EACrE,EAAE,KAAK,EAAE,GAAG,EAAE,CACjB,CAAC;gBACF,OAAO;oBACH,MAAM,EAAE,CAAC;oBACT,MAAM,EAAE,IAAI;oBACZ,OAAO,EAAE,6BAA6B;oBACtC,KAAK,EAAE,GAAG,CAAC,OAAO;iBACrB,CAAC;YACN,CAAC;QACL,CAAC,CAAC;QAEF,MAAM,cAAc,GAAG,MAAM,OAAO,CAAC,GAAG,CACpC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,sBAAsB,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAC7D,CAAC;QACF,MAAM,mBAAmB,GAAG,cAAc;aACrC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,IAAI,CAAC;aAChC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAC1B,MAAM,iBAAiB,GAAG,yBAAyB,CAC/C,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAC3C,CAAC;QACF,cAAG,CAAC,IAAI,CACJ,oCAAoC,QAAQ,CAAC,EAAE,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC,OAAO,EAAE,GAAG,KAAK,CAAC,GAAG,IAAI,UAAU,EACrG;YACI,GAAG,EAAE,iBAAiB,CAAC,MAAM;YAC7B,KAAK,EAAE,mBAAmB,CAAC,MAAM;YACjC,UAAU,EAAE,SAAS;SACxB,CACJ,CAAC;QACF,MAAM,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,QAAQ,CAAC,EAAE,EAAE,IAAI,EAAE;YACjD,MAAM,EAAE,iBAAiB;YACzB,KAAK,EAAE,mBAAmB,CAAC,CAAC,CAAC,CAAC,KAAK;YACnC,IAAI,EAAE,QAAQ,CAAC,SAAS;SAC3B,CAAC,CAAC;QACH,cAAG,CAAC,IAAI,CAAC,UAAU,QAAQ,CAAC,EAAE,gBAAgB,EAAE;YAC5C,IAAI;YACJ,GAAG,EAAE,iBAAiB,CAAC,MAAM;SAChC,CAAC,CAAC;IACP,CAAC;SAAM,CAAC;QACJ,cAAG,CAAC,IAAI,CAAC,cAAc,IAAI,0BAA0B,CAAC,CAAC;QAEvD,MAAM,GAAG,GAAG,MAAM,4BAA4B,CAC1C,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,EAC9B,WAAW,EACX,MAAM,CACT,CAAC;QACF,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC;YACtB,OAAO;gBACH,EAAE,EAAE,QAAQ,CAAC,EAAE;gBACf,MAAM,EAAE,QAAQ;gBAChB,OAAO,EAAE,yBAAyB;aACrC,CAAC;QACN,CAAC;QAED,cAAG,CAAC,IAAI,CAAC,GAAG,IAAI,sCAAsC,QAAQ,CAAC,EAAE,EAAE,EAAE;YACjE,GAAG,EAAE,GAAG,CAAC,MAAM,CAAC,MAAM;SACzB,CAAC,CAAC;QACH,MAAM,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,QAAQ,CAAC,EAAE,EAAE,IAAI,EAAE;YACjD,MAAM,EAAE,GAAG,CAAC,MAAM;YAClB,KAAK,EAAE,GAAG,CAAC,KAAK;YAChB,IAAI,EAAE,QAAQ,CAAC,SAAS;SAC3B,CAAC,CAAC;QAEH,OAAO;YACH,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,IAAI;YACJ,MAAM,EAAE,WAAW;YACnB,GAAG,EAAE,GAAG,CAAC,MAAM,CAAC,MAAM;SACzB,CAAC;IACN,CAAC;AACL,CAAC;AAED,KAAK,UAAU,uBAAuB,CAAC,EACnC,QAAQ,EACR,MAAM,EACN,IAAI,EACJ,MAAM,GACwB;IAC9B,cAAG,CAAC,IAAI,CAAC,2CAA2C,GAAG,QAAQ,CAAC,EAAE,EAAE;QAChE,OAAO,EAAE,QAAQ,CAAC,OAAO;KAC5B,CAAC,CAAC;IACH,IACI,CAAC,QAAQ,CAAC,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,QAAQ,CAAC;QAC7C,CAAC,QAAQ,CAAC,OAAO,EAAE,IAAI,EAAE,QAAQ,CAAC,KAAK,CAAC,EAC1C,CAAC;QACC,OAAO;YACH,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,IAAI;YACJ,MAAM,EAAE,QAAQ;YAChB,OAAO,EAAE,yBAAyB;SACrC,CAAC;IACN,CAAC;IACD,MAAM,EAAE,WAAW,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC;IAEtC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,YAAY,CAAC,QAAQ,CAAC,EAAE,EAAE;QAChE,MAAM,EAAE,6BAAoB,CAAC,IAAI;QACjC,mBAAmB,EAAE,IAAI;QACzB,QAAQ,EAAE,IAAI;KACjB,CAAC,CAAC;IAEH,IAAI,MAAM,CAAC,MAAM,KAAK,YAAY,EAAE,CAAC;QACjC,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;IACjE,CAAC;SAAM,IACH,MAAM,CAAC,MAAM,KAAK,QAAQ;QAC1B,CAAC,MAAM,CAAC,UAAU;QAClB,CAAC,MAAM,CAAC,UAAU,CAAC,MAAM,EAC3B,CAAC;QACC,MAAM,IAAI,iCAAqB,CAAC,4BAA4B,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC;IACjF,CAAC;IAED,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;IACrC,IAAI,CAAC,UAAU,EAAE,MAAM,EAAE,CAAC;QACtB,MAAM,IAAI,iCAAqB,CAAC,8BAA8B,EAAE;YAC5D,QAAQ,CAAC,EAAE;SACd,CAAC,CAAC;IACP,CAAC;IAED,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IAChC,MAAM,KAAK,GAAG,MAAM,IAAA,4BAAiB,EAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IAEzD,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,YAAY;SAChC,UAAU,CAAC,WAAW,EAAE;QACrB,KAAK;QACL,KAAK;KACR,CAAC;SACD,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC;SAClB,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;QACT,cAAG,CAAC,KAAK,CAAC,uCAAuC,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QACjE,MAAM,CAAC,CAAC;IACZ,CAAC,CAAC,CAAC;IAEP,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC;QACtB,OAAO;YACH,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,MAAM,EAAE,QAAQ;YAChB,OAAO,EAAE,yBAAyB;SACrC,CAAC;IACN,CAAC;IAED,MAAM,MAAM,CAAC,OAAO,CAAC,YAAY,CAC7B,QAAQ,CAAC,EAAE,EACX,gCAAuB,CAAC,KAAK,EAC7B;QACI,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,KAAK,EAAE,GAAG,CAAC,KAAK;QAChB,IAAI,EAAE,QAAQ,CAAC,SAAS;KAC3B,CACJ,CAAC;IAEF,OAAO;QACH,EAAE,EAAE,QAAQ,CAAC,EAAE;QACf,IAAI;QACJ,MAAM,EAAE,WAAW;QACnB,GAAG,EAAE,GAAG,CAAC,MAAM,CAAC,MAAM;KACzB,CAAC;AACN,CAAC;AAED,KAAK,UAAU,4BAA4B,CACvC,IAAY,EACZ,GAAW,EACX,MAAsB,EACtB,KAAc;IAEd,cAAG,CAAC,IAAI,CACJ,qCAAqC,IAAI,CAAC,MAAM,2BAA2B,GAAG,EAAE,CACnF,CAAC;IAEF,OAAO,MAAM,CAAC,YAAY;SACrB,UAAU,CAAC,GAAG,EAAE;QACb,IAAI;QACJ,KAAK;KACR,CAAC;SACD,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC;SAClB,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;QACT,cAAG,CAAC,KAAK,CAAC,sCAAsC,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QAChE,MAAM,CAAC,CAAC;IACZ,CAAC,CAAC,CAAC;AACX,CAAC;AAED,gCAAgC;AAChC,kFAAkF;AAClF,yCAAyC;AACzC,SAAS,yBAAyB,CAAC,eAA2B;IAC1D,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE5C,MAAM,KAAK,GAAG,IAAI,IAAI,EAAE,CAAC,OAAO,EAAE,CAAC;IAEnC,oCAAoC;IACpC,MAAM,gBAAgB,GAAG,eAAe,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;IAElE,wCAAwC;IACxC,MAAM,UAAU,GAAG,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5D,MAAM,aAAa,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC;IACpE,MAAM,eAAe,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,aAAa,CAAC,CAAC;IAEjE,0BAA0B;IAC1B,MAAM,YAAY,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAE/C,gCAAgC;IAChC,MAAM,iBAAiB,GAAG,IAAI,KAAK,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAE1D,6BAA6B;IAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,eAAe,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;YACpC,iBAAiB,CAAC,CAAC,CAAC,IAAI,eAAe,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC;QACvE,CAAC;IACL,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,IAAI,EAAE,CAAC,OAAO,EAAE,GAAG,KAAK,CAAC;IAC9C,OAAO,CAAC,GAAG,CACP,kCAAkC,QAAQ,UAAU,eAAe,CAAC,MAAM,SAAS,CACtF,CAAC;IAEF,OAAO,iBAAiB,CAAC;AAC7B,CAAC"}
|
|
1
|
+
{"version":3,"file":"generateEmbeddings.js","sourceRoot":"","sources":["../../../src/activities/generateEmbeddings.ts"],"names":[],"mappings":";;AAmDA,gDA8FC;AAhJD,mDAA2C;AAE3C,6CAO0B;AAC1B,wEAAgE;AAChE,4CAAqD;AACrD,gDAAsD;AAEtD,kDAAiD;AAoC1C,KAAK,UAAU,kBAAkB,CACpC,OAA8D;IAE9D,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,GAC5C,MAAM,IAAA,kCAAa,EAA2B,OAAO,CAAC,CAAC;IAC3D,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,MAAM,CAAC;IAE/B,MAAM,WAAW,GAAG,MAAM,YAAY,EAAE,CAAC;IACzC,MAAM,MAAM,GAAG,WAAW,EAAE,aAAa,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;IAC3D,IAAI,CAAC,WAAW,EAAE,CAAC;QACf,MAAM,IAAI,iCAAqB,CAAC,mBAAmB,EAAE,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;IAC/E,CAAC;IACD,IAAI,CAAC,MAAM,EAAE,CAAC;QACV,MAAM,IAAI,iCAAqB,CAAC,oCAAoC,EAAE;YAClE,QAAQ;SACX,CAAC,CAAC;IACP,CAAC;IAED,IAAI,CAAC,WAAW,EAAE,CAAC;QACf,MAAM,IAAI,iCAAqB,CAAC,mBAAmB,EAAE,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;IAC/E,CAAC;IAED,IAAI,CAAC,WAAW,EAAE,aAAa,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,CAAC;QACxD,cAAG,CAAC,IAAI,CACJ,2CAA2C,IAAI,gBAAgB,WAAW,CAAC,IAAI,KAAK,WAAW,CAAC,SAAS,GAAG,EAC5G,EAAE,MAAM,EAAE,CACb,CAAC;QACF,OAAO;YACH,EAAE,EAAE,QAAQ;YACZ,MAAM,EAAE,SAAS;YACjB,OAAO,EAAE,2CAA2C,IAAI,EAAE;SAC7D,CAAC;IACN,CAAC;IAED,cAAG,CAAC,IAAI,CAAC,GAAG,IAAI,6CAA6C,QAAQ,EAAE,EAAE;QACrE,KAAK;QACL,MAAM;KACT,CAAC,CAAC;IAEH,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CACX,iHAAiH,CACpH,CAAC;IACN,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,QAAQ,CAC1C,QAAQ,EACR,8CAA8C,CACjD,CAAC;IAEF,IAAI,CAAC,QAAQ,EAAE,CAAC;QACZ,MAAM,IAAI,iCAAqB,CAAC,oBAAoB,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC;IACtE,CAAC;IAED,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,IAAI,iCAAqB,CAAC,4BAA4B,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC9E,CAAC;IAED,IAAI,GAAG,CAAC;IAER,QAAQ,IAAI,EAAE,CAAC;QACX,KAAK,gCAAuB,CAAC,IAAI;YAC7B,GAAG,GAAG,MAAM,sBAAsB,CAAC;gBAC/B,MAAM;gBACN,MAAM;gBACN,QAAQ;gBACR,IAAI;aACP,CAAC,CAAC;YACH,MAAM;QACV,KAAK,gCAAuB,CAAC,UAAU;YACnC,GAAG,GAAG,MAAM,sBAAsB,CAAC;gBAC/B,MAAM;gBACN,MAAM;gBACN,QAAQ;gBACR,IAAI;aACP,CAAC,CAAC;YACH,MAAM;QACV,KAAK,gCAAuB,CAAC,KAAK;YAC9B,GAAG,GAAG,MAAM,uBAAuB,CAAC;gBAChC,MAAM;gBACN,MAAM;gBACN,QAAQ;gBACR,IAAI;aACP,CAAC,CAAC;YACH,MAAM;QACV;YACI,GAAG,GAAG;gBACF,EAAE,EAAE,QAAQ;gBACZ,MAAM,EAAE,QAAQ;gBAChB,OAAO,EAAE,+BAA+B,IAAI,EAAE;aACjD,CAAC;IACV,CAAC;IAED,OAAO,GAAG,CAAC;AACf,CAAC;AAWD,KAAK,UAAU,sBAAsB,CACjC,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAmC;IAGnE,IAAI,CAAC,QAAQ,EAAE,CAAC;QACZ,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,+BAA+B,EAAE,CAAC;IACzE,CAAC;IAED,IACI,IAAI,KAAK,gCAAuB,CAAC,IAAI;QACrC,IAAI,KAAK,gCAAuB,CAAC,UAAU,EAC7C,CAAC;QACC,OAAO;YACH,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,MAAM,EAAE,QAAQ;YAChB,OAAO,EAAE,+BAA+B,IAAI,EAAE;SACjD,CAAC;IACN,CAAC;IAED,IAAI,IAAI,KAAK,gCAAuB,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC1D,OAAO,EAAE,EAAE,EAAE,QAAQ,CAAC,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3E,CAAC;IACD,IAAI,IAAI,KAAK,gCAAuB,CAAC,UAAU,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,CAAC;QACvE,OAAO;YACH,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,MAAM,EAAE,QAAQ;YAChB,OAAO,EAAE,qBAAqB;SACjC,CAAC;IACN,CAAC;IAED,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,CAAC;IAE/B,8DAA8D;IAC9D,IAAI,UAAU,GAAwB,SAAS,CAAC;IAChD,IAAI,IAAI,KAAK,gCAAuB,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC;QACzD,UAAU,GAAG,IAAA,uBAAW,EAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC;IAClD,CAAC;IAED,MAAM,SAAS,GAAG,MAAM,CAAC,UAAU,IAAI,IAAI,CAAC;IAE5C,kEAAkE;IAClE,cAAG,CAAC,IAAI,CAAC,cAAc,IAAI,4BAA4B,QAAQ,CAAC,EAAE,EAAE,CAAC,CAAC;IACtE,IACI,IAAI,KAAK,gCAAuB,CAAC,IAAI;QACrC,UAAU,KAAK,SAAS;QACxB,UAAU,GAAG,SAAS,EACxB,CAAC;QACC,2CAA2C;QAC3C,cAAG,CAAC,IAAI,CACJ,0BAA0B,IAAI,qCAAqC,UAAU,UAAU,CAC1F,CAAC;QACF,OAAO;YACH,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,MAAM,EAAE,SAAS;YACjB,OAAO,EAAE,GAAG,IAAI,uDAAuD,UAAU,UAAU;SAC9F,CAAA;IACL,CAAC;SAAM,CAAC;QACJ,cAAG,CAAC,IAAI,CAAC,cAAc,IAAI,0BAA0B,CAAC,CAAC;QAEvD,MAAM,GAAG,GAAG,MAAM,4BAA4B,CAC1C,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,EAC9B,WAAW,EACX,MAAM,CACT,CAAC;QACF,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC;YACtB,OAAO;gBACH,EAAE,EAAE,QAAQ,CAAC,EAAE;gBACf,MAAM,EAAE,QAAQ;gBAChB,OAAO,EAAE,yBAAyB;aACrC,CAAC;QACN,CAAC;QAED,cAAG,CAAC,IAAI,CAAC,GAAG,IAAI,sCAAsC,QAAQ,CAAC,EAAE,EAAE,EAAE;YACjE,GAAG,EAAE,GAAG,CAAC,MAAM,CAAC,MAAM;SACzB,CAAC,CAAC;QACH,MAAM,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,QAAQ,CAAC,EAAE,EAAE,IAAI,EAAE;YACjD,MAAM,EAAE,GAAG,CAAC,MAAM;YAClB,KAAK,EAAE,GAAG,CAAC,KAAK;YAChB,IAAI,EAAE,QAAQ,CAAC,SAAS;SAC3B,CAAC,CAAC;QAEH,OAAO;YACH,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,IAAI;YACJ,MAAM,EAAE,WAAW;YACnB,GAAG,EAAE,GAAG,CAAC,MAAM,CAAC,MAAM;SACzB,CAAC;IACN,CAAC;AACL,CAAC;AAED,KAAK,UAAU,uBAAuB,CAAC,EACnC,QAAQ,EACR,MAAM,EACN,IAAI,EACJ,MAAM,GACwB;IAC9B,cAAG,CAAC,IAAI,CAAC,2CAA2C,GAAG,QAAQ,CAAC,EAAE,EAAE;QAChE,OAAO,EAAE,QAAQ,CAAC,OAAO;KAC5B,CAAC,CAAC;IACH,IACI,CAAC,QAAQ,CAAC,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,QAAQ,CAAC;QAC7C,CAAC,QAAQ,CAAC,OAAO,EAAE,IAAI,EAAE,QAAQ,CAAC,KAAK,CAAC,EAC1C,CAAC;QACC,OAAO;YACH,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,IAAI;YACJ,MAAM,EAAE,QAAQ;YAChB,OAAO,EAAE,yBAAyB;SACrC,CAAC;IACN,CAAC;IACD,MAAM,EAAE,WAAW,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC;IAEtC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,YAAY,CAAC,QAAQ,CAAC,EAAE,EAAE;QAChE,MAAM,EAAE,6BAAoB,CAAC,IAAI;QACjC,mBAAmB,EAAE,IAAI;QACzB,QAAQ,EAAE,IAAI;KACjB,CAAC,CAAC;IAEH,IAAI,MAAM,CAAC,MAAM,KAAK,YAAY,EAAE,CAAC;QACjC,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;IACjE,CAAC;SAAM,IACH,MAAM,CAAC,MAAM,KAAK,QAAQ;QAC1B,CAAC,MAAM,CAAC,UAAU;QAClB,CAAC,MAAM,CAAC,UAAU,CAAC,MAAM,EAC3B,CAAC;QACC,MAAM,IAAI,iCAAqB,CAAC,4BAA4B,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC;IACjF,CAAC;IAED,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;IACrC,IAAI,CAAC,UAAU,EAAE,MAAM,EAAE,CAAC;QACtB,MAAM,IAAI,iCAAqB,CAAC,8BAA8B,EAAE;YAC5D,QAAQ,CAAC,EAAE;SACd,CAAC,CAAC;IACP,CAAC;IAED,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IAChC,MAAM,KAAK,GAAG,MAAM,IAAA,4BAAiB,EAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IAEzD,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,YAAY;SAChC,UAAU,CAAC,WAAW,EAAE;QACrB,KAAK;QACL,KAAK;KACR,CAAC;SACD,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC;SAClB,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;QACT,cAAG,CAAC,KAAK,CAAC,uCAAuC,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QACjE,MAAM,CAAC,CAAC;IACZ,CAAC,CAAC,CAAC;IAEP,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC;QACtB,OAAO;YACH,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,MAAM,EAAE,QAAQ;YAChB,OAAO,EAAE,yBAAyB;SACrC,CAAC;IACN,CAAC;IAED,MAAM,MAAM,CAAC,OAAO,CAAC,YAAY,CAC7B,QAAQ,CAAC,EAAE,EACX,gCAAuB,CAAC,KAAK,EAC7B;QACI,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,KAAK,EAAE,GAAG,CAAC,KAAK;QAChB,IAAI,EAAE,QAAQ,CAAC,SAAS;KAC3B,CACJ,CAAC;IAEF,OAAO;QACH,EAAE,EAAE,QAAQ,CAAC,EAAE;QACf,IAAI;QACJ,MAAM,EAAE,WAAW;QACnB,GAAG,EAAE,GAAG,CAAC,MAAM,CAAC,MAAM;KACzB,CAAC;AACN,CAAC;AAED,KAAK,UAAU,4BAA4B,CACvC,IAAY,EACZ,GAAW,EACX,MAAsB,EACtB,KAAc;IAEd,cAAG,CAAC,IAAI,CACJ,qCAAqC,IAAI,CAAC,MAAM,2BAA2B,GAAG,EAAE,CACnF,CAAC;IAEF,OAAO,MAAM,CAAC,YAAY;SACrB,UAAU,CAAC,GAAG,EAAE;QACb,IAAI;QACJ,KAAK;KACR,CAAC;SACD,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC;SAClB,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;QACT,cAAG,CAAC,KAAK,CAAC,sCAAsC,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QAChE,MAAM,CAAC,CAAC;IACZ,CAAC,CAAC,CAAC;AACX,CAAC"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.
|
|
3
|
+
exports.setDocumentStatus = exports.generateVideoRendition = exports.generateImageRendition = exports.checkRateLimit = exports.notifyWebhook = exports.transcribeMedia = exports.saveGladiaTranscription = exports.convertPdfToStructuredText = exports.prepareVideo = exports.handleDslError = exports.getObjectFromStore = exports.generateOrAssignContentType = exports.generateEmbeddings = exports.generateDocumentProperties = exports.extractDocumentText = exports.executeInteraction = exports.createPdfDocumentFromSource = exports.chunkDocument = exports.updateDocumentFromInteractionRun = exports.createOrUpdateDocumentFromInteractionRun = exports.createDocumentTypeFromInteractionRun = void 0;
|
|
4
4
|
// Export here DSL activities
|
|
5
5
|
var createDocumentTypeFromInteractionRun_js_1 = require("./advanced/createDocumentTypeFromInteractionRun.js");
|
|
6
6
|
Object.defineProperty(exports, "createDocumentTypeFromInteractionRun", { enumerable: true, get: function () { return createDocumentTypeFromInteractionRun_js_1.createDocumentTypeFromInteractionRun; } });
|
|
@@ -20,26 +20,28 @@ var generateDocumentProperties_js_1 = require("./generateDocumentProperties.js")
|
|
|
20
20
|
Object.defineProperty(exports, "generateDocumentProperties", { enumerable: true, get: function () { return generateDocumentProperties_js_1.generateDocumentProperties; } });
|
|
21
21
|
var generateEmbeddings_js_1 = require("./generateEmbeddings.js");
|
|
22
22
|
Object.defineProperty(exports, "generateEmbeddings", { enumerable: true, get: function () { return generateEmbeddings_js_1.generateEmbeddings; } });
|
|
23
|
-
var generateImageRendition_js_1 = require("./renditions/generateImageRendition.js");
|
|
24
|
-
Object.defineProperty(exports, "generateImageRendition", { enumerable: true, get: function () { return generateImageRendition_js_1.generateImageRendition; } });
|
|
25
|
-
var generateVideoRendition_js_1 = require("./renditions/generateVideoRendition.js");
|
|
26
|
-
Object.defineProperty(exports, "generateVideoRendition", { enumerable: true, get: function () { return generateVideoRendition_js_1.generateVideoRendition; } });
|
|
27
23
|
var generateOrAssignContentType_js_1 = require("./generateOrAssignContentType.js");
|
|
28
24
|
Object.defineProperty(exports, "generateOrAssignContentType", { enumerable: true, get: function () { return generateOrAssignContentType_js_1.generateOrAssignContentType; } });
|
|
29
25
|
var getObjectFromStore_js_1 = require("./getObjectFromStore.js");
|
|
30
26
|
Object.defineProperty(exports, "getObjectFromStore", { enumerable: true, get: function () { return getObjectFromStore_js_1.getObjectFromStore; } });
|
|
31
27
|
var handleError_js_1 = require("./handleError.js");
|
|
32
28
|
Object.defineProperty(exports, "handleDslError", { enumerable: true, get: function () { return handleError_js_1.handleDslError; } });
|
|
29
|
+
var prepareVideo_js_1 = require("./media/prepareVideo.js");
|
|
30
|
+
Object.defineProperty(exports, "prepareVideo", { enumerable: true, get: function () { return prepareVideo_js_1.prepareVideo; } });
|
|
33
31
|
var processPdfWithTextract_js_1 = require("./media/processPdfWithTextract.js");
|
|
34
32
|
Object.defineProperty(exports, "convertPdfToStructuredText", { enumerable: true, get: function () { return processPdfWithTextract_js_1.convertPdfToStructuredText; } });
|
|
33
|
+
var saveGladiaTranscription_js_1 = require("./media/saveGladiaTranscription.js");
|
|
34
|
+
Object.defineProperty(exports, "saveGladiaTranscription", { enumerable: true, get: function () { return saveGladiaTranscription_js_1.saveGladiaTranscription; } });
|
|
35
35
|
var transcribeMediaWithGladia_js_1 = require("./media/transcribeMediaWithGladia.js");
|
|
36
36
|
Object.defineProperty(exports, "transcribeMedia", { enumerable: true, get: function () { return transcribeMediaWithGladia_js_1.transcribeMedia; } });
|
|
37
|
-
var prepareVideo_js_1 = require("./media/prepareVideo.js");
|
|
38
|
-
Object.defineProperty(exports, "prepareVideo", { enumerable: true, get: function () { return prepareVideo_js_1.prepareVideo; } });
|
|
39
37
|
var notifyWebhook_js_1 = require("./notifyWebhook.js");
|
|
40
38
|
Object.defineProperty(exports, "notifyWebhook", { enumerable: true, get: function () { return notifyWebhook_js_1.notifyWebhook; } });
|
|
41
|
-
var setDocumentStatus_js_1 = require("./setDocumentStatus.js");
|
|
42
|
-
Object.defineProperty(exports, "setDocumentStatus", { enumerable: true, get: function () { return setDocumentStatus_js_1.setDocumentStatus; } });
|
|
43
39
|
var rateLimiter_js_1 = require("./rateLimiter.js");
|
|
44
40
|
Object.defineProperty(exports, "checkRateLimit", { enumerable: true, get: function () { return rateLimiter_js_1.checkRateLimit; } });
|
|
41
|
+
var generateImageRendition_js_1 = require("./renditions/generateImageRendition.js");
|
|
42
|
+
Object.defineProperty(exports, "generateImageRendition", { enumerable: true, get: function () { return generateImageRendition_js_1.generateImageRendition; } });
|
|
43
|
+
var generateVideoRendition_js_1 = require("./renditions/generateVideoRendition.js");
|
|
44
|
+
Object.defineProperty(exports, "generateVideoRendition", { enumerable: true, get: function () { return generateVideoRendition_js_1.generateVideoRendition; } });
|
|
45
|
+
var setDocumentStatus_js_1 = require("./setDocumentStatus.js");
|
|
46
|
+
Object.defineProperty(exports, "setDocumentStatus", { enumerable: true, get: function () { return setDocumentStatus_js_1.setDocumentStatus; } });
|
|
45
47
|
//# sourceMappingURL=index-dsl.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index-dsl.js","sourceRoot":"","sources":["../../../src/activities/index-dsl.ts"],"names":[],"mappings":";;;AAAA,6BAA6B;AAC7B,8GAA0G;AAAjG,+JAAA,oCAAoC,OAAA;AAC7C,sHAAkH;AAAzG,uKAAA,wCAAwC,OAAA;AACjD,sGAAkG;AAAzF,uJAAA,gCAAgC,OAAA;AACzC,uDAAmD;AAA1C,iHAAA,aAAa,OAAA;AACtB,2EAA2E;AAAlE,yIAAA,2BAA2B,OAAA;AACpC,iEAA6D;AAApD,2HAAA,kBAAkB,OAAA;AAC3B,mEAA+D;AAAtD,6HAAA,mBAAmB,OAAA;AAC5B,iFAA6E;AAApE,2IAAA,0BAA0B,OAAA;AACnC,iEAA6D;AAApD,2HAAA,kBAAkB,OAAA;AAC3B,
|
|
1
|
+
{"version":3,"file":"index-dsl.js","sourceRoot":"","sources":["../../../src/activities/index-dsl.ts"],"names":[],"mappings":";;;AAAA,6BAA6B;AAC7B,8GAA0G;AAAjG,+JAAA,oCAAoC,OAAA;AAC7C,sHAAkH;AAAzG,uKAAA,wCAAwC,OAAA;AACjD,sGAAkG;AAAzF,uJAAA,gCAAgC,OAAA;AACzC,uDAAmD;AAA1C,iHAAA,aAAa,OAAA;AACtB,2EAA2E;AAAlE,yIAAA,2BAA2B,OAAA;AACpC,iEAA6D;AAApD,2HAAA,kBAAkB,OAAA;AAC3B,mEAA+D;AAAtD,6HAAA,mBAAmB,OAAA;AAC5B,iFAA6E;AAApE,2IAAA,0BAA0B,OAAA;AACnC,iEAA6D;AAApD,2HAAA,kBAAkB,OAAA;AAC3B,mFAA+E;AAAtE,6IAAA,2BAA2B,OAAA;AACpC,iEAA6D;AAApD,2HAAA,kBAAkB,OAAA;AAC3B,mDAAkD;AAAzC,gHAAA,cAAc,OAAA;AACvB,2DAAuD;AAA9C,+GAAA,YAAY,OAAA;AACrB,+EAA+E;AAAtE,uIAAA,0BAA0B,OAAA;AACnC,iFAA6E;AAApE,qIAAA,uBAAuB,OAAA;AAChC,qFAAuE;AAA9D,+HAAA,eAAe,OAAA;AAExB,uDAAmD;AAA1C,iHAAA,aAAa,OAAA;AACtB,mDAAkD;AAAzC,gHAAA,cAAc,OAAA;AACvB,oFAAgF;AAAvE,mIAAA,sBAAsB,OAAA;AAC/B,oFAAgF;AAAvE,mIAAA,sBAAsB,OAAA;AAC/B,+DAA2D;AAAlD,yHAAA,iBAAiB,OAAA"}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.saveGladiaTranscription = saveGladiaTranscription;
|
|
4
|
+
const activity_1 = require("@temporalio/activity");
|
|
5
|
+
const api_fetch_client_1 = require("@vertesia/api-fetch-client");
|
|
6
|
+
const common_1 = require("@vertesia/common");
|
|
7
|
+
const ActivityContext_js_1 = require("../../dsl/setup/ActivityContext.js");
|
|
8
|
+
const result_types_js_1 = require("../../result-types.js");
|
|
9
|
+
const GLADIA_URL = "https://api.gladia.io/v2";
|
|
10
|
+
/**
|
|
11
|
+
* Fetches transcription results from Gladia and saves them to the content object.
|
|
12
|
+
* This activity is called after transcribeMedia completes via webhook callback.
|
|
13
|
+
*/
|
|
14
|
+
async function saveGladiaTranscription(payload) {
|
|
15
|
+
const { params, client, objectId } = await (0, ActivityContext_js_1.setupActivity)(payload);
|
|
16
|
+
const gladiaConfig = await client.projects.integrations.retrieve(payload.project_id, common_1.SupportedIntegrations.gladia);
|
|
17
|
+
if (!gladiaConfig || !gladiaConfig.enabled) {
|
|
18
|
+
return {
|
|
19
|
+
hasText: false,
|
|
20
|
+
objectId,
|
|
21
|
+
status: result_types_js_1.TextExtractionStatus.error,
|
|
22
|
+
error: "Gladia integration not enabled",
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
const gladiaClient = new api_fetch_client_1.FetchClient(gladiaConfig.url ?? GLADIA_URL);
|
|
26
|
+
gladiaClient.withHeaders({ "x-gladia-key": gladiaConfig.api_key });
|
|
27
|
+
activity_1.log.info(`Fetching transcription result from Gladia`, { objectId, transcriptionId: params.gladiaTranscriptionId });
|
|
28
|
+
const transcriptionResult = await gladiaClient.get(`/transcription/${params.gladiaTranscriptionId}`);
|
|
29
|
+
if (transcriptionResult.status === 'error') {
|
|
30
|
+
activity_1.log.error(`Gladia transcription failed`, { objectId, error: transcriptionResult });
|
|
31
|
+
return {
|
|
32
|
+
hasText: false,
|
|
33
|
+
objectId,
|
|
34
|
+
status: result_types_js_1.TextExtractionStatus.error,
|
|
35
|
+
error: "Gladia transcription failed",
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
if (transcriptionResult.status !== 'done') {
|
|
39
|
+
activity_1.log.warn(`Gladia transcription not ready`, { objectId, status: transcriptionResult.status });
|
|
40
|
+
return {
|
|
41
|
+
hasText: false,
|
|
42
|
+
objectId,
|
|
43
|
+
status: result_types_js_1.TextExtractionStatus.error,
|
|
44
|
+
error: `Gladia transcription not ready: ${transcriptionResult.status}`,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
const object = await client.objects.retrieve(objectId, "+text");
|
|
48
|
+
const segments = processUtterances(transcriptionResult.result.transcription.utterances);
|
|
49
|
+
const fullText = transcriptionResult.result.transcription.full_transcript;
|
|
50
|
+
await client.objects.update(objectId, {
|
|
51
|
+
text: fullText,
|
|
52
|
+
text_etag: object.content?.etag,
|
|
53
|
+
transcript: {
|
|
54
|
+
segments,
|
|
55
|
+
etag: object.content?.etag
|
|
56
|
+
},
|
|
57
|
+
metadata: {
|
|
58
|
+
...object.metadata,
|
|
59
|
+
duration: transcriptionResult.result.metadata.audio_duration,
|
|
60
|
+
languages: transcriptionResult.result.transcription.languages
|
|
61
|
+
}
|
|
62
|
+
});
|
|
63
|
+
activity_1.log.info(`Saved transcription for object`, { objectId, textLength: fullText?.length, segmentCount: segments.length });
|
|
64
|
+
return {
|
|
65
|
+
hasText: (fullText?.length ?? 0) > 0,
|
|
66
|
+
objectId,
|
|
67
|
+
status: result_types_js_1.TextExtractionStatus.success,
|
|
68
|
+
message: `Transcription saved with ${segments.length} segments`
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
function processUtterances(utterances) {
|
|
72
|
+
return utterances.map(u => ({
|
|
73
|
+
start: u.start,
|
|
74
|
+
end: u.end,
|
|
75
|
+
text: u.text,
|
|
76
|
+
speaker: u.speaker,
|
|
77
|
+
confidence: u.confidence,
|
|
78
|
+
language: u.language
|
|
79
|
+
}));
|
|
80
|
+
}
|
|
81
|
+
//# sourceMappingURL=saveGladiaTranscription.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"saveGladiaTranscription.js","sourceRoot":"","sources":["../../../../src/activities/media/saveGladiaTranscription.ts"],"names":[],"mappings":";;AAoBA,0DAmEC;AAvFD,mDAA2C;AAC3C,iEAAyD;AACzD,6CAA6K;AAC7K,2EAAmE;AACnE,2DAAmF;AAUnF,MAAM,UAAU,GAAG,0BAA0B,CAAC;AAE9C;;;GAGG;AACI,KAAK,UAAU,uBAAuB,CAAC,OAAmE;IAC7G,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,IAAA,kCAAa,EAAgC,OAAO,CAAC,CAAC;IAEjG,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,YAAY,CAAC,QAAQ,CAAC,OAAO,CAAC,UAAU,EAAE,8BAAqB,CAAC,MAAM,CAAoC,CAAC;IACtJ,IAAI,CAAC,YAAY,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,CAAC;QACzC,OAAO;YACH,OAAO,EAAE,KAAK;YACd,QAAQ;YACR,MAAM,EAAE,sCAAoB,CAAC,KAAK;YAClC,KAAK,EAAE,gCAAgC;SAC1C,CAAC;IACN,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,8BAAW,CAAC,YAAY,CAAC,GAAG,IAAI,UAAU,CAAC,CAAC;IACrE,YAAY,CAAC,WAAW,CAAC,EAAE,cAAc,EAAE,YAAY,CAAC,OAAO,EAAE,CAAC,CAAC;IAEnE,cAAG,CAAC,IAAI,CAAC,2CAA2C,EAAE,EAAE,QAAQ,EAAE,eAAe,EAAE,MAAM,CAAC,qBAAqB,EAAE,CAAC,CAAC;IAEnH,MAAM,mBAAmB,GAAG,MAAM,YAAY,CAAC,GAAG,CAAC,kBAAkB,MAAM,CAAC,qBAAqB,EAAE,CAA8B,CAAC;IAElI,IAAI,mBAAmB,CAAC,MAAM,KAAK,OAAO,EAAE,CAAC;QACzC,cAAG,CAAC,KAAK,CAAC,6BAA6B,EAAE,EAAE,QAAQ,EAAE,KAAK,EAAE,mBAAmB,EAAE,CAAC,CAAC;QACnF,OAAO;YACH,OAAO,EAAE,KAAK;YACd,QAAQ;YACR,MAAM,EAAE,sCAAoB,CAAC,KAAK;YAClC,KAAK,EAAE,6BAA6B;SACvC,CAAC;IACN,CAAC;IAED,IAAI,mBAAmB,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;QACxC,cAAG,CAAC,IAAI,CAAC,gCAAgC,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,mBAAmB,CAAC,MAAM,EAAE,CAAC,CAAC;QAC7F,OAAO;YACH,OAAO,EAAE,KAAK;YACd,QAAQ;YACR,MAAM,EAAE,sCAAoB,CAAC,KAAK;YAClC,KAAK,EAAE,mCAAmC,mBAAmB,CAAC,MAAM,EAAE;SACzE,CAAC;IACN,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAEhE,MAAM,QAAQ,GAAG,iBAAiB,CAAC,mBAAmB,CAAC,MAAM,CAAC,aAAa,CAAC,UAAU,CAAC,CAAC;IACxF,MAAM,QAAQ,GAAG,mBAAmB,CAAC,MAAM,CAAC,aAAa,CAAC,eAAe,CAAC;IAE1E,MAAM,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE;QAClC,IAAI,EAAE,QAAQ;QACd,SAAS,EAAE,MAAM,CAAC,OAAO,EAAE,IAAI;QAC/B,UAAU,EAAE;YACR,QAAQ;YACR,IAAI,EAAE,MAAM,CAAC,OAAO,EAAE,IAAI;SAC7B;QACD,QAAQ,EAAE;YACN,GAAG,MAAM,CAAC,QAAQ;YAClB,QAAQ,EAAE,mBAAmB,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc;YAC5D,SAAS,EAAE,mBAAmB,CAAC,MAAM,CAAC,aAAa,CAAC,SAAS;SAC/B;KACrC,CAAC,CAAC;IAEH,cAAG,CAAC,IAAI,CAAC,gCAAgC,EAAE,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,EAAE,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;IAEtH,OAAO;QACH,OAAO,EAAE,CAAC,QAAQ,EAAE,MAAM,IAAI,CAAC,CAAC,GAAG,CAAC;QACpC,QAAQ;QACR,MAAM,EAAE,sCAAoB,CAAC,OAAO;QACpC,OAAO,EAAE,4BAA4B,QAAQ,CAAC,MAAM,WAAW;KAClE,CAAC;AACN,CAAC;AAED,SAAS,iBAAiB,CAAC,UAA6B;IACpD,OAAO,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACxB,KAAK,EAAE,CAAC,CAAC,KAAK;QACd,GAAG,EAAE,CAAC,CAAC,GAAG;QACV,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,UAAU,EAAE,CAAC,CAAC,UAAU;QACxB,QAAQ,EAAE,CAAC,CAAC,QAAQ;KACvB,CAAC,CAAC,CAAC;AACR,CAAC"}
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.transcribeMedia = transcribeMedia;
|
|
4
|
-
const common_1 = require("@vertesia/common");
|
|
5
4
|
const activity_1 = require("@temporalio/activity");
|
|
6
5
|
const api_fetch_client_1 = require("@vertesia/api-fetch-client");
|
|
6
|
+
const common_1 = require("@vertesia/common");
|
|
7
7
|
const ActivityContext_js_1 = require("../../dsl/setup/ActivityContext.js");
|
|
8
8
|
const errors_js_1 = require("../../errors.js");
|
|
9
9
|
const index_js_1 = require("../../index.js");
|
|
@@ -45,7 +45,7 @@ async function transcribeMedia(payload) {
|
|
|
45
45
|
}
|
|
46
46
|
activity_1.log.info(`Using media URL for transcription`, { objectId, mediaUrl: mediaSource });
|
|
47
47
|
const taskToken = Buffer.from((0, activity_1.activityInfo)().taskToken).toString('base64url');
|
|
48
|
-
const callbackUrl = generateCallbackUrlForGladia(client.store.baseUrl,
|
|
48
|
+
const callbackUrl = generateCallbackUrlForGladia(client.store.baseUrl, taskToken, objectId);
|
|
49
49
|
activity_1.log.info(`Transcribing media ${mediaUrl} with Gladia`, { objectId, callbackUrl });
|
|
50
50
|
try {
|
|
51
51
|
const res = await gladiaClient.post("/transcription", {
|
|
@@ -61,7 +61,6 @@ async function transcribeMedia(payload) {
|
|
|
61
61
|
}
|
|
62
62
|
});
|
|
63
63
|
activity_1.log.info(`Transcription request sent to Gladia`, { objectId, res });
|
|
64
|
-
throw new activity_1.CompleteAsyncError();
|
|
65
64
|
}
|
|
66
65
|
catch (error) {
|
|
67
66
|
if (error instanceof api_fetch_client_1.RequestError && error.status === 422) {
|
|
@@ -72,13 +71,12 @@ async function transcribeMedia(payload) {
|
|
|
72
71
|
error: `Gladia transcription error: ${error.message}`,
|
|
73
72
|
};
|
|
74
73
|
}
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
throw error;
|
|
78
|
-
}
|
|
74
|
+
activity_1.log.error(`Error sending transcription request to Gladia for object ${objectId}`, { error });
|
|
75
|
+
throw error;
|
|
79
76
|
}
|
|
77
|
+
throw new activity_1.CompleteAsyncError();
|
|
80
78
|
}
|
|
81
|
-
function generateCallbackUrlForGladia(baseUrl,
|
|
82
|
-
return `${baseUrl}/
|
|
79
|
+
function generateCallbackUrlForGladia(baseUrl, taskToken, objectId) {
|
|
80
|
+
return `${baseUrl}/webhooks/gladia/${objectId}?task_token=${taskToken}`;
|
|
83
81
|
}
|
|
84
82
|
//# sourceMappingURL=transcribeMediaWithGladia.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"transcribeMediaWithGladia.js","sourceRoot":"","sources":["../../../../src/activities/media/transcribeMediaWithGladia.ts"],"names":[],"mappings":";;
|
|
1
|
+
{"version":3,"file":"transcribeMediaWithGladia.js","sourceRoot":"","sources":["../../../../src/activities/media/transcribeMediaWithGladia.ts"],"names":[],"mappings":";;AA4BA,0CA+EC;AA3GD,mDAA6E;AAC7E,iEAAuE;AACvE,6CAAgL;AAChL,2EAAmE;AACnE,+CAAwD;AACxD,6CAA4E;AAqB5E,MAAM,UAAU,GAAG,0BAA0B,CAAC;AAEvC,KAAK,UAAU,eAAe,CAAC,OAA2D;IAE7F,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,IAAA,kCAAa,EAAwB,OAAO,CAAC,CAAC;IAEzF,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,YAAY,CAAC,QAAQ,CAAC,OAAO,CAAC,UAAU,EAAE,8BAAqB,CAAC,MAAM,CAAoC,CAAC;IACtJ,IAAI,CAAC,YAAY,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,CAAC;QACzC,OAAO;YACH,OAAO,EAAE,KAAK;YACd,QAAQ;YACR,MAAM,EAAE,+BAAoB,CAAC,KAAK;YAClC,KAAK,EAAE,gCAAgC;SAC1C,CAAA;IACL,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAChE,MAAM,YAAY,GAAG,IAAI,8BAAW,CAAC,YAAY,CAAC,GAAG,IAAI,UAAU,CAAC,CAAC;IACrE,YAAY,CAAC,WAAW,CAAC,EAAE,cAAc,EAAE,YAAY,CAAC,OAAO,EAAE,CAAC,CAAC;IAEnE,IAAI,MAAM,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;QAC/B,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,+BAAoB,CAAC,OAAO,EAAE,OAAO,EAAE,4CAA4C,EAAE,CAAA;IACnI,CAAC;IAED,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,EAAE,CAAC;QAC1B,MAAM,IAAI,iCAAqB,CAAC,8BAA8B,QAAQ,EAAE,CAAC,CAAC;IAC9E,CAAC;IAED,qEAAqE;IACrE,IAAI,WAAW,GAAW,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC;IAChD,IAAI,MAAM,CAAC,QAAQ,EAAE,IAAI,KAAK,sBAAa,CAAC,KAAK,EAAE,CAAC;QAChD,MAAM,aAAa,GAAG,MAAM,CAAC,QAAyB,CAAC;QACvD,MAAM,cAAc,GAAG,aAAa,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,6BAAoB,CAAC,CAAC;QAC5F,IAAI,cAAc,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;YAClC,WAAW,GAAG,cAAc,CAAC,OAAO,CAAC,MAAM,CAAC;YAC5C,cAAG,CAAC,IAAI,CAAC,0CAA0C,QAAQ,EAAE,EAAE,EAAE,WAAW,EAAE,CAAC,CAAC;QACpF,CAAC;IACL,CAAC;IAED,wCAAwC;IACxC,MAAM,EAAE,GAAG,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC;IAEzE,IAAI,CAAC,QAAQ,EAAE,CAAC;QACZ,MAAM,IAAI,iCAAqB,CAAC,gCAAgC,WAAW,EAAE,CAAC,CAAC;IACnF,CAAC;IAED,cAAG,CAAC,IAAI,CAAC,mCAAmC,EAAE,EAAE,QAAQ,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC,CAAC;IAEnF,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,IAAA,uBAAY,GAAE,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IAC9E,MAAM,WAAW,GAAG,4BAA4B,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;IAE5F,cAAG,CAAC,IAAI,CAAC,sBAAsB,QAAQ,cAAc,EAAE,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC,CAAC;IAElF,IAAI,CAAC;QACD,MAAM,GAAG,GAAG,MAAM,YAAY,CAAC,IAAI,CAAC,gBAAgB,EAAE;YAClD,OAAO,EAAE;gBACL,SAAS,EAAE,QAAQ;gBACnB,YAAY,EAAE,WAAW;gBACzB,oBAAoB,EAAE,IAAI;gBAC1B,qBAAqB,EAAE,IAAI;gBAC3B,SAAS,EAAE,IAAI;gBACf,gBAAgB,EAAE;oBACd,OAAO,EAAE,CAAC,KAAK,CAAC;iBACnB;aACJ;SACJ,CAAoC,CAAC;QACtC,cAAG,CAAC,IAAI,CAAC,sCAAsC,EAAE,EAAE,QAAQ,EAAE,GAAG,EAAE,CAAC,CAAC;IACxE,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QAClB,IAAI,KAAK,YAAY,+BAAY,IAAI,KAAK,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;YACxD,OAAO;gBACH,OAAO,EAAE,KAAK;gBACd,QAAQ;gBACR,MAAM,EAAE,+BAAoB,CAAC,KAAK;gBAClC,KAAK,EAAE,+BAA+B,KAAK,CAAC,OAAO,EAAE;aACxD,CAAA;QACL,CAAC;QACD,cAAG,CAAC,KAAK,CAAC,4DAA4D,QAAQ,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;QAC7F,MAAM,KAAK,CAAC;IAChB,CAAC;IAED,MAAM,IAAI,6BAAkB,EAAE,CAAC;AACnC,CAAC;AAED,SAAS,4BAA4B,CAAC,OAAe,EAAE,SAAiB,EAAE,QAAgB;IACtF,OAAO,GAAG,OAAO,oBAAoB,QAAQ,eAAe,SAAS,EAAE,CAAC;AAC5E,CAAC"}
|
|
@@ -2,8 +2,7 @@ import { log } from "@temporalio/activity";
|
|
|
2
2
|
import { ImageRenditionFormat, SupportedEmbeddingTypes, } from "@vertesia/common";
|
|
3
3
|
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
|
4
4
|
import { DocumentNotFoundError } from "../errors.js";
|
|
5
|
-
import { fetchBlobAsBase64
|
|
6
|
-
import { getContentParts } from "../utils/chunks.js";
|
|
5
|
+
import { fetchBlobAsBase64 } from "../utils/blobs.js";
|
|
7
6
|
import { countTokens } from "../utils/tokens.js";
|
|
8
7
|
export async function generateEmbeddings(payload) {
|
|
9
8
|
const { params, client, objectId, fetchProject } = await setupActivity(payload);
|
|
@@ -78,7 +77,7 @@ export async function generateEmbeddings(payload) {
|
|
|
78
77
|
}
|
|
79
78
|
return res;
|
|
80
79
|
}
|
|
81
|
-
async function generateTextEmbeddings({ document, client, type, config }
|
|
80
|
+
async function generateTextEmbeddings({ document, client, type, config }) {
|
|
82
81
|
if (!document) {
|
|
83
82
|
return { status: "error", message: "document is null or undefined" };
|
|
84
83
|
}
|
|
@@ -100,127 +99,25 @@ async function generateTextEmbeddings({ document, client, type, config }, parts)
|
|
|
100
99
|
message: "no properties found",
|
|
101
100
|
};
|
|
102
101
|
}
|
|
103
|
-
const { environment
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
if (
|
|
107
|
-
|
|
108
|
-
const tokensData = countTokens(document.text);
|
|
109
|
-
await client.objects.update(document.id, {
|
|
110
|
-
tokens: {
|
|
111
|
-
...tokensData,
|
|
112
|
-
etag: document.text_etag ?? md5(document.text),
|
|
113
|
-
},
|
|
114
|
-
});
|
|
115
|
-
document.tokens = {
|
|
116
|
-
...tokensData,
|
|
117
|
-
etag: document.text_etag ?? md5(document.text),
|
|
118
|
-
};
|
|
102
|
+
const { environment } = config;
|
|
103
|
+
// Count tokens if needed, do not rely on existing token count
|
|
104
|
+
let tokenCount = undefined;
|
|
105
|
+
if (type === SupportedEmbeddingTypes.text && document.text) {
|
|
106
|
+
tokenCount = countTokens(document.text).count;
|
|
119
107
|
}
|
|
120
108
|
const maxTokens = config.max_tokens ?? 8000;
|
|
121
109
|
//generate embeddings for the main doc if document isn't too large
|
|
122
|
-
//if too large, we'll just generate embeddings for the parts
|
|
123
|
-
//then we can generate embeddings for the main document by averaging the tensors
|
|
124
110
|
log.info(`Generating ${type} embeddings for document ${document.id}`);
|
|
125
111
|
if (type === SupportedEmbeddingTypes.text &&
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
};
|
|
135
|
-
}
|
|
136
|
-
if (!partDefinitions || partDefinitions.length === 0) {
|
|
137
|
-
log.info("No parts found for document, skipping embeddings generation");
|
|
138
|
-
return {
|
|
139
|
-
id: document.id,
|
|
140
|
-
status: "failed",
|
|
141
|
-
message: "no parts found",
|
|
142
|
-
};
|
|
143
|
-
}
|
|
144
|
-
log.info("Generating embeddings for parts", {
|
|
145
|
-
parts: partDefinitions,
|
|
146
|
-
max_tokens: maxTokens,
|
|
147
|
-
});
|
|
148
|
-
const docParts = getContentParts(document.text, partDefinitions);
|
|
149
|
-
log.info(`Retrieved ${docParts.length} parts`);
|
|
150
|
-
const start = new Date().getTime();
|
|
151
|
-
const generatePartEmbeddings = async (partContent, i) => {
|
|
152
|
-
const localStart = new Date().getTime();
|
|
153
|
-
try {
|
|
154
|
-
log.info(`Generating embeddings for part ${i}`, {
|
|
155
|
-
text_len: partContent.length,
|
|
156
|
-
});
|
|
157
|
-
if (!partContent) {
|
|
158
|
-
return {
|
|
159
|
-
id: i,
|
|
160
|
-
number: i,
|
|
161
|
-
result: null,
|
|
162
|
-
status: "skipped",
|
|
163
|
-
message: "no text found",
|
|
164
|
-
};
|
|
165
|
-
}
|
|
166
|
-
const e = await generateEmbeddingsFromStudio(partContent, environment, client, model).catch((e) => {
|
|
167
|
-
log.error("Error generating embeddings for part " + i, {
|
|
168
|
-
text_length: partContent.length,
|
|
169
|
-
error: e,
|
|
170
|
-
});
|
|
171
|
-
return null;
|
|
172
|
-
});
|
|
173
|
-
if (!e || !e.values) {
|
|
174
|
-
return {
|
|
175
|
-
id: i,
|
|
176
|
-
number: i,
|
|
177
|
-
result: null,
|
|
178
|
-
message: "no embeddings generated",
|
|
179
|
-
};
|
|
180
|
-
}
|
|
181
|
-
if (e.values.length === 0) {
|
|
182
|
-
return {
|
|
183
|
-
id: i,
|
|
184
|
-
number: i,
|
|
185
|
-
result: null,
|
|
186
|
-
message: "no embeddings generated",
|
|
187
|
-
};
|
|
188
|
-
}
|
|
189
|
-
log.info(`Generated embeddings for part ${i}`, {
|
|
190
|
-
len: e.values.length,
|
|
191
|
-
duration: new Date().getTime() - localStart,
|
|
192
|
-
});
|
|
193
|
-
return { number: i, result: e };
|
|
194
|
-
}
|
|
195
|
-
catch (err) {
|
|
196
|
-
log.info(`Error generating ${type} embeddings for part ${i} of ${document.id}`, { error: err });
|
|
197
|
-
return {
|
|
198
|
-
number: i,
|
|
199
|
-
result: null,
|
|
200
|
-
message: "error generating embeddings",
|
|
201
|
-
error: err.message,
|
|
202
|
-
};
|
|
203
|
-
}
|
|
112
|
+
tokenCount !== undefined &&
|
|
113
|
+
tokenCount > maxTokens) {
|
|
114
|
+
//TODO: Review strategy for large documents
|
|
115
|
+
log.warn(`Document too large for ${type} embeddings generation, skipping (${tokenCount} tokens)`);
|
|
116
|
+
return {
|
|
117
|
+
id: document.id,
|
|
118
|
+
status: "skipped",
|
|
119
|
+
message: `${type} embeddings generation, skipped for large document (${tokenCount} tokens)`,
|
|
204
120
|
};
|
|
205
|
-
const partEmbeddings = await Promise.all(docParts.map((part, i) => generatePartEmbeddings(part, i)));
|
|
206
|
-
const validPartEmbeddings = partEmbeddings
|
|
207
|
-
.filter((e) => e.result !== null)
|
|
208
|
-
.map((e) => e.result);
|
|
209
|
-
const averagedEmbedding = computeAttentionEmbedding(validPartEmbeddings.map((e) => e.values));
|
|
210
|
-
log.info(`Averaged embeddings for document ${document.id} in ${(new Date().getTime() - start) / 1000} seconds`, {
|
|
211
|
-
len: averagedEmbedding.length,
|
|
212
|
-
count: validPartEmbeddings.length,
|
|
213
|
-
max_tokens: maxTokens,
|
|
214
|
-
});
|
|
215
|
-
await client.objects.setEmbedding(document.id, type, {
|
|
216
|
-
values: averagedEmbedding,
|
|
217
|
-
model: validPartEmbeddings[0].model,
|
|
218
|
-
etag: document.text_etag,
|
|
219
|
-
});
|
|
220
|
-
log.info(`Object ${document.id} embedding set`, {
|
|
221
|
-
type,
|
|
222
|
-
len: averagedEmbedding.length,
|
|
223
|
-
});
|
|
224
121
|
}
|
|
225
122
|
else {
|
|
226
123
|
log.info(`Generating ${type} embeddings for document`);
|
|
@@ -325,31 +222,4 @@ async function generateEmbeddingsFromStudio(text, env, client, model) {
|
|
|
325
222
|
throw e;
|
|
326
223
|
});
|
|
327
224
|
}
|
|
328
|
-
//Simplified attention mechanism
|
|
329
|
-
// This is a naive implementation and should be replaced with a more sophisticated
|
|
330
|
-
// using tensorflow in a specific package
|
|
331
|
-
function computeAttentionEmbedding(chunkEmbeddings) {
|
|
332
|
-
if (chunkEmbeddings.length === 0)
|
|
333
|
-
return [];
|
|
334
|
-
const start = new Date().getTime();
|
|
335
|
-
// Generate random attention weights
|
|
336
|
-
const attentionWeights = chunkEmbeddings.map(() => Math.random());
|
|
337
|
-
// Apply softmax to get attention scores
|
|
338
|
-
const expWeights = attentionWeights.map((w) => Math.exp(w));
|
|
339
|
-
const sumExpWeights = expWeights.reduce((sum, val) => sum + val, 0);
|
|
340
|
-
const attentionScores = expWeights.map((w) => w / sumExpWeights);
|
|
341
|
-
// Get embedding dimension
|
|
342
|
-
const embeddingDim = chunkEmbeddings[0].length;
|
|
343
|
-
// Initialize document embedding
|
|
344
|
-
const documentEmbedding = new Array(embeddingDim).fill(0);
|
|
345
|
-
// Weighted sum of embeddings
|
|
346
|
-
for (let i = 0; i < chunkEmbeddings.length; i++) {
|
|
347
|
-
for (let j = 0; j < embeddingDim; j++) {
|
|
348
|
-
documentEmbedding[j] += chunkEmbeddings[i][j] * attentionScores[i];
|
|
349
|
-
}
|
|
350
|
-
}
|
|
351
|
-
const duration = new Date().getTime() - start;
|
|
352
|
-
console.log(`Computed document embedding in ${duration}ms for ${chunkEmbeddings.length} chunks`);
|
|
353
|
-
return documentEmbedding;
|
|
354
|
-
}
|
|
355
225
|
//# sourceMappingURL=generateEmbeddings.js.map
|