@vertesia/workflow 0.80.0-dev.20251121 → 0.81.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +55 -14
  2. package/lib/cjs/activities/executeInteraction.js +22 -18
  3. package/lib/cjs/activities/executeInteraction.js.map +1 -1
  4. package/lib/cjs/activities/index-dsl.js +11 -9
  5. package/lib/cjs/activities/index-dsl.js.map +1 -1
  6. package/lib/cjs/activities/media/saveGladiaTranscription.js +81 -0
  7. package/lib/cjs/activities/media/saveGladiaTranscription.js.map +1 -0
  8. package/lib/cjs/activities/media/transcribeMediaWithGladia.js +7 -9
  9. package/lib/cjs/activities/media/transcribeMediaWithGladia.js.map +1 -1
  10. package/lib/cjs/dsl/setup/ActivityContext.js +0 -1
  11. package/lib/cjs/dsl/setup/ActivityContext.js.map +1 -1
  12. package/lib/cjs/utils/storage.js +0 -1
  13. package/lib/cjs/utils/storage.js.map +1 -1
  14. package/lib/esm/activities/executeInteraction.js +20 -16
  15. package/lib/esm/activities/executeInteraction.js.map +1 -1
  16. package/lib/esm/activities/index-dsl.js +5 -4
  17. package/lib/esm/activities/index-dsl.js.map +1 -1
  18. package/lib/esm/activities/media/saveGladiaTranscription.js +78 -0
  19. package/lib/esm/activities/media/saveGladiaTranscription.js.map +1 -0
  20. package/lib/esm/activities/media/transcribeMediaWithGladia.js +7 -9
  21. package/lib/esm/activities/media/transcribeMediaWithGladia.js.map +1 -1
  22. package/lib/esm/dsl/setup/ActivityContext.js +0 -1
  23. package/lib/esm/dsl/setup/ActivityContext.js.map +1 -1
  24. package/lib/esm/utils/storage.js +0 -1
  25. package/lib/esm/utils/storage.js.map +1 -1
  26. package/lib/types/activities/executeInteraction.d.ts.map +1 -1
  27. package/lib/types/activities/index-dsl.d.ts +6 -4
  28. package/lib/types/activities/index-dsl.d.ts.map +1 -1
  29. package/lib/types/activities/media/saveGladiaTranscription.d.ts +14 -0
  30. package/lib/types/activities/media/saveGladiaTranscription.d.ts.map +1 -0
  31. package/lib/types/activities/media/transcribeMediaWithGladia.d.ts +5 -0
  32. package/lib/types/activities/media/transcribeMediaWithGladia.d.ts.map +1 -1
  33. package/lib/types/dsl/setup/ActivityContext.d.ts.map +1 -1
  34. package/lib/types/utils/storage.d.ts.map +1 -1
  35. package/lib/workflows-bundle.js +12366 -12271
  36. package/package.json +16 -6
  37. package/src/activities/executeInteraction.ts +29 -24
  38. package/src/activities/index-dsl.ts +7 -4
  39. package/src/activities/media/saveGladiaTranscription.ts +128 -0
  40. package/src/activities/media/transcribeMediaWithGladia.ts +13 -10
  41. package/src/dsl/setup/ActivityContext.ts +0 -2
  42. package/src/utils/storage.ts +0 -1
  43. package/lib/tsconfig.tsbuildinfo +0 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vertesia/workflow",
3
- "version": "0.80.0-dev.20251121",
3
+ "version": "0.81.0",
4
4
  "type": "module",
5
5
  "description": "Vertesia workflow DSL",
6
6
  "main": "./lib/esm/index.js",
@@ -44,11 +44,11 @@
44
44
  "tmp": "^0.2.4",
45
45
  "tmp-promise": "^3.0.3",
46
46
  "yaml": "^2.6.0",
47
- "@vertesia/api-fetch-client": "0.80.0-dev.20251121",
48
- "@vertesia/memory": "0.80.0-dev.20251121",
49
- "@vertesia/common": "0.80.0-dev.20251121",
50
- "@vertesia/client": "0.80.0-dev.20251121",
51
- "@llumiverse/common": "0.23.0-dev.20251121"
47
+ "@llumiverse/common": "0.24.0",
48
+ "@vertesia/common": "0.81.0",
49
+ "@vertesia/client": "0.81.0",
50
+ "@vertesia/api-fetch-client": "0.81.0",
51
+ "@vertesia/memory": "0.81.0"
52
52
  },
53
53
  "ts_dual_module": {
54
54
  "outDir": "lib",
@@ -105,6 +105,16 @@
105
105
  "url": "https://github.com/vertesia/composableai.git",
106
106
  "directory": "packages/workflow"
107
107
  },
108
+ "keywords": [
109
+ "vertesia",
110
+ "workflow",
111
+ "dsl",
112
+ "temporalio",
113
+ "llm",
114
+ "ai",
115
+ "agents",
116
+ "typescript"
117
+ ],
108
118
  "types": "./lib/types/index.d.ts",
109
119
  "typesVersions": {
110
120
  "*": {
@@ -1,4 +1,4 @@
1
- import { CompletionResult, Modalities, ModelOptions } from "@llumiverse/common";
1
+ import { CompletionResult, ModelOptions } from "@llumiverse/common";
2
2
  import { activityInfo, log } from "@temporalio/activity";
3
3
  import { VertesiaClient } from "@vertesia/client";
4
4
  import { NodeStreamSource } from "@vertesia/client/node";
@@ -159,32 +159,37 @@ export async function executeInteraction(payload: DSLActivityExecutionPayload<Ex
159
159
  let completionResult: CompletionResult[] = res.result;
160
160
 
161
161
  // Handle image uploads if the result contains base64 images
162
- if (res.output_modality === Modalities.image) {
163
- const images = res.result.images();
162
+ const imageResults = completionResult.filter(r => r.type === "image");
163
+ if (imageResults.length > 0) {
164
164
  const uploadedImages = await Promise.all(
165
- images.map((image: string, index: number) => {
166
- // Extract base64 data and create buffer
167
- const base64Data = image.replace(/^data:image\/[a-z]+;base64,/, "");
168
- const buffer = Buffer.from(base64Data, 'base64');
169
-
170
- // Generate filename
171
- const { runId } = activityInfo().workflowExecution;
172
- const { activityId } = activityInfo();
173
- const filename = `generated-image-${runId}-${activityId}-${index}.png`;
174
-
175
- // Create a readable stream from the buffer
176
- const stream = Readable.from(buffer);
177
-
178
- const source = new NodeStreamSource(
179
- stream,
180
- filename,
181
- "image/png",
182
- );
183
-
184
- return client.files.uploadFile(source);
165
+ completionResult.map(async (item, index) => {
166
+ if (item.type === "image") {
167
+ const image = item.value;
168
+ // Extract base64 data and create buffer
169
+ const base64Data = image.replace(/^data:image\/[a-z]+;base64,/, "");
170
+ const buffer = Buffer.from(base64Data, 'base64');
171
+
172
+ // Generate filename
173
+ const { runId } = activityInfo().workflowExecution;
174
+ const { activityId } = activityInfo();
175
+ const filename = `generated-image-${runId}-${activityId}-${index}.png`;
176
+
177
+ // Create a readable stream from the buffer
178
+ const stream = Readable.from(buffer);
179
+
180
+ const source = new NodeStreamSource(
181
+ stream,
182
+ filename,
183
+ "image/png",
184
+ );
185
+
186
+ const file = await client.files.uploadFile(source);
187
+ return { type: "image", value: file } as CompletionResult;
188
+ }
189
+ return item;
185
190
  })
186
191
  );
187
- completionResult = uploadedImages.map(file => ({ type: "image", value: file }));
192
+ completionResult = uploadedImages;
188
193
  }
189
194
 
190
195
  return projectResult(payload, params, res, {
@@ -8,14 +8,17 @@ export { executeInteraction } from "./executeInteraction.js";
8
8
  export { extractDocumentText } from "./extractDocumentText.js";
9
9
  export { generateDocumentProperties } from "./generateDocumentProperties.js";
10
10
  export { generateEmbeddings } from "./generateEmbeddings.js";
11
- export { generateImageRendition } from "./renditions/generateImageRendition.js";
12
- export { generateVideoRendition } from "./renditions/generateVideoRendition.js";
13
11
  export { generateOrAssignContentType } from "./generateOrAssignContentType.js";
14
12
  export { getObjectFromStore } from "./getObjectFromStore.js";
15
13
  export { handleDslError } from "./handleError.js";
14
+ export { prepareVideo } from "./media/prepareVideo.js";
16
15
  export { convertPdfToStructuredText } from "./media/processPdfWithTextract.js";
16
+ export { saveGladiaTranscription } from "./media/saveGladiaTranscription.js";
17
17
  export { transcribeMedia } from "./media/transcribeMediaWithGladia.js";
18
- export { prepareVideo } from "./media/prepareVideo.js";
18
+ export type { TranscriptMediaResult } from "./media/transcribeMediaWithGladia.js";
19
19
  export { notifyWebhook } from "./notifyWebhook.js";
20
- export { setDocumentStatus } from "./setDocumentStatus.js";
21
20
  export { checkRateLimit } from "./rateLimiter.js";
21
+ export { generateImageRendition } from "./renditions/generateImageRendition.js";
22
+ export { generateVideoRendition } from "./renditions/generateVideoRendition.js";
23
+ export { setDocumentStatus } from "./setDocumentStatus.js";
24
+
@@ -0,0 +1,128 @@
1
+ import { log } from "@temporalio/activity";
2
+ import { FetchClient } from "@vertesia/api-fetch-client";
3
+ import { AudioMetadata, DSLActivityExecutionPayload, DSLActivitySpec, GladiaConfiguration, SupportedIntegrations, TranscriptSegment, VideoMetadata } from "@vertesia/common";
4
+ import { setupActivity } from "../../dsl/setup/ActivityContext.js";
5
+ import { TextExtractionResult, TextExtractionStatus } from "../../result-types.js";
6
+
7
+ export interface SaveGladiaTranscriptionParams {
8
+ gladiaTranscriptionId: string;
9
+ }
10
+
11
+ export interface SaveGladiaTranscription extends DSLActivitySpec<SaveGladiaTranscriptionParams> {
12
+ name: 'SaveGladiaTranscription';
13
+ }
14
+
15
+ const GLADIA_URL = "https://api.gladia.io/v2";
16
+
17
+ /**
18
+ * Fetches transcription results from Gladia and saves them to the content object.
19
+ * This activity is called after transcribeMedia completes via webhook callback.
20
+ */
21
+ export async function saveGladiaTranscription(payload: DSLActivityExecutionPayload<SaveGladiaTranscriptionParams>): Promise<TextExtractionResult> {
22
+ const { params, client, objectId } = await setupActivity<SaveGladiaTranscriptionParams>(payload);
23
+
24
+ const gladiaConfig = await client.projects.integrations.retrieve(payload.project_id, SupportedIntegrations.gladia) as GladiaConfiguration | undefined;
25
+ if (!gladiaConfig || !gladiaConfig.enabled) {
26
+ return {
27
+ hasText: false,
28
+ objectId,
29
+ status: TextExtractionStatus.error,
30
+ error: "Gladia integration not enabled",
31
+ };
32
+ }
33
+
34
+ const gladiaClient = new FetchClient(gladiaConfig.url ?? GLADIA_URL);
35
+ gladiaClient.withHeaders({ "x-gladia-key": gladiaConfig.api_key });
36
+
37
+ log.info(`Fetching transcription result from Gladia`, { objectId, transcriptionId: params.gladiaTranscriptionId });
38
+
39
+ const transcriptionResult = await gladiaClient.get(`/transcription/${params.gladiaTranscriptionId}`) as GladiaTranscriptionResult;
40
+
41
+ if (transcriptionResult.status === 'error') {
42
+ log.error(`Gladia transcription failed`, { objectId, error: transcriptionResult });
43
+ return {
44
+ hasText: false,
45
+ objectId,
46
+ status: TextExtractionStatus.error,
47
+ error: "Gladia transcription failed",
48
+ };
49
+ }
50
+
51
+ if (transcriptionResult.status !== 'done') {
52
+ log.warn(`Gladia transcription not ready`, { objectId, status: transcriptionResult.status });
53
+ return {
54
+ hasText: false,
55
+ objectId,
56
+ status: TextExtractionStatus.error,
57
+ error: `Gladia transcription not ready: ${transcriptionResult.status}`,
58
+ };
59
+ }
60
+
61
+ const object = await client.objects.retrieve(objectId, "+text");
62
+
63
+ const segments = processUtterances(transcriptionResult.result.transcription.utterances);
64
+ const fullText = transcriptionResult.result.transcription.full_transcript;
65
+
66
+ await client.objects.update(objectId, {
67
+ text: fullText,
68
+ text_etag: object.content?.etag,
69
+ transcript: {
70
+ segments,
71
+ etag: object.content?.etag
72
+ },
73
+ metadata: {
74
+ ...object.metadata,
75
+ duration: transcriptionResult.result.metadata.audio_duration,
76
+ languages: transcriptionResult.result.transcription.languages
77
+ } as AudioMetadata | VideoMetadata
78
+ });
79
+
80
+ log.info(`Saved transcription for object`, { objectId, textLength: fullText?.length, segmentCount: segments.length });
81
+
82
+ return {
83
+ hasText: (fullText?.length ?? 0) > 0,
84
+ objectId,
85
+ status: TextExtractionStatus.success,
86
+ message: `Transcription saved with ${segments.length} segments`
87
+ };
88
+ }
89
+
90
+ function processUtterances(utterances: GladiaUtterance[]): TranscriptSegment[] {
91
+ return utterances.map(u => ({
92
+ start: u.start,
93
+ end: u.end,
94
+ text: u.text,
95
+ speaker: u.speaker,
96
+ confidence: u.confidence,
97
+ language: u.language
98
+ }));
99
+ }
100
+
101
+ // Gladia API response types
102
+ interface GladiaTranscriptionResult {
103
+ id: string;
104
+ status: 'queued' | 'processing' | 'done' | 'error';
105
+ result: {
106
+ metadata: {
107
+ audio_duration: number;
108
+ number_of_distinct_channels: number;
109
+ billing_time: number;
110
+ transcription_time: number;
111
+ };
112
+ transcription: {
113
+ full_transcript: string;
114
+ languages: string[];
115
+ utterances: GladiaUtterance[];
116
+ };
117
+ };
118
+ }
119
+
120
+ interface GladiaUtterance {
121
+ language: string;
122
+ start: number;
123
+ end: number;
124
+ confidence: number;
125
+ channel: number;
126
+ speaker: number;
127
+ text: string;
128
+ }
@@ -1,6 +1,6 @@
1
- import { DSLActivityExecutionPayload, DSLActivitySpec, GladiaConfiguration, SupportedIntegrations, AUDIO_RENDITION_NAME, VideoMetadata, ContentNature } from "@vertesia/common";
2
1
  import { activityInfo, CompleteAsyncError, log } from "@temporalio/activity";
3
2
  import { FetchClient, RequestError } from "@vertesia/api-fetch-client";
3
+ import { AUDIO_RENDITION_NAME, ContentNature, DSLActivityExecutionPayload, DSLActivitySpec, GladiaConfiguration, SupportedIntegrations, VideoMetadata } from "@vertesia/common";
4
4
  import { setupActivity } from "../../dsl/setup/ActivityContext.js";
5
5
  import { DocumentNotFoundError } from "../../errors.js";
6
6
  import { TextExtractionResult, TextExtractionStatus } from "../../index.js";
@@ -17,6 +17,11 @@ export interface TranscriptMedia extends DSLActivitySpec<TranscriptMediaParams>
17
17
 
18
18
  export interface TranscriptMediaResult extends TextExtractionResult {
19
19
  message?: string;
20
+ /**
21
+ * Gladia transcription ID for fetching results in a follow-up activity.
22
+ * Present when async media transcription completes successfully.
23
+ */
24
+ gladiaTranscriptionId?: string;
20
25
  }
21
26
 
22
27
  const GLADIA_URL = "https://api.gladia.io/v2";
@@ -68,7 +73,7 @@ export async function transcribeMedia(payload: DSLActivityExecutionPayload<Trans
68
73
  log.info(`Using media URL for transcription`, { objectId, mediaUrl: mediaSource });
69
74
 
70
75
  const taskToken = Buffer.from(activityInfo().taskToken).toString('base64url');
71
- const callbackUrl = generateCallbackUrlForGladia(client.store.baseUrl, payload.auth_token, taskToken, objectId);
76
+ const callbackUrl = generateCallbackUrlForGladia(client.store.baseUrl, taskToken, objectId);
72
77
 
73
78
  log.info(`Transcribing media ${mediaUrl} with Gladia`, { objectId, callbackUrl });
74
79
 
@@ -85,10 +90,7 @@ export async function transcribeMedia(payload: DSLActivityExecutionPayload<Trans
85
90
  }
86
91
  }
87
92
  }) as GladiaTranscriptRequestResponse;
88
-
89
93
  log.info(`Transcription request sent to Gladia`, { objectId, res });
90
- throw new CompleteAsyncError();
91
-
92
94
  } catch (error: any) {
93
95
  if (error instanceof RequestError && error.status === 422) {
94
96
  return {
@@ -97,15 +99,16 @@ export async function transcribeMedia(payload: DSLActivityExecutionPayload<Trans
97
99
  status: TextExtractionStatus.error,
98
100
  error: `Gladia transcription error: ${error.message}`,
99
101
  }
100
- } else {
101
- log.error(`Error sending transcription request to Gladia for object ${objectId}`, { error });
102
- throw error;
103
102
  }
103
+ log.error(`Error sending transcription request to Gladia for object ${objectId}`, { error });
104
+ throw error;
104
105
  }
106
+
107
+ throw new CompleteAsyncError();
105
108
  }
106
109
 
107
- function generateCallbackUrlForGladia(baseUrl: string, authToken: string, taskToken: string, objectId: string) {
108
- return `${baseUrl}/api/v1/webhooks/gladia/${objectId}?access_token=${authToken}&task_token=${taskToken}`;
110
+ function generateCallbackUrlForGladia(baseUrl: string, taskToken: string, objectId: string) {
111
+ return `${baseUrl}/webhooks/gladia/${objectId}?task_token=${taskToken}`;
109
112
  }
110
113
 
111
114
  interface GladiaTranscriptRequestResponse {
@@ -166,8 +166,6 @@ export async function setupActivity<ParamsT extends Record<string, any>>(
166
166
  }
167
167
 
168
168
  const params = vars.resolve() as ParamsT;
169
- log.info(`Activity ${payload.activity.name} setup complete`);
170
-
171
169
  return new ActivityContext<ParamsT>(payload, client, params);
172
170
  }
173
171
 
@@ -34,7 +34,6 @@ export async function saveAgentArtifact(
34
34
 
35
35
  //create the file path and append extension if needed
36
36
  const filePath = agentStoragePath(runId) + "/" + name + (ext && !name.endsWith(ext) ? "." + ext : "");
37
- log.info(`Storing agent artifact ${filePath} for run ${runId}`);
38
37
 
39
38
  try {
40
39
  const source = new NodeStreamSource(fileContent, `${runId}-${basename(filePath)}`, mimeType, filePath);