@vertesia/workflow 1.0.0 → 1.1.0-dev.20260427.060440Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/lib/cjs/activities/executeRemoteActivity.js +21 -2
  2. package/lib/cjs/activities/executeRemoteActivity.js.map +1 -1
  3. package/lib/cjs/activities/generateEmbeddings.js +5 -3
  4. package/lib/cjs/activities/generateEmbeddings.js.map +1 -1
  5. package/lib/cjs/activities/handleError.js +8 -1
  6. package/lib/cjs/activities/handleError.js.map +1 -1
  7. package/lib/cjs/activities/index-dsl.js +5 -1
  8. package/lib/cjs/activities/index-dsl.js.map +1 -1
  9. package/lib/cjs/activities/loadChildWorkflowSpec.js +15 -0
  10. package/lib/cjs/activities/loadChildWorkflowSpec.js.map +1 -0
  11. package/lib/cjs/activities/media/prepareAudio.js +3 -2
  12. package/lib/cjs/activities/media/prepareAudio.js.map +1 -1
  13. package/lib/cjs/activities/media/prepareVideo.js +4 -1
  14. package/lib/cjs/activities/media/prepareVideo.js.map +1 -1
  15. package/lib/cjs/activities/media/probeMediaStreams.js +49 -0
  16. package/lib/cjs/activities/media/probeMediaStreams.js.map +1 -0
  17. package/lib/cjs/activities/resolveRemoteActivities.js +29 -16
  18. package/lib/cjs/activities/resolveRemoteActivities.js.map +1 -1
  19. package/lib/cjs/dsl/dsl-workflow.js +22 -4
  20. package/lib/cjs/dsl/dsl-workflow.js.map +1 -1
  21. package/lib/cjs/dsl/setup/ActivityContext.js +12 -2
  22. package/lib/cjs/dsl/setup/ActivityContext.js.map +1 -1
  23. package/lib/cjs/security/ssrf.js +34 -0
  24. package/lib/cjs/security/ssrf.js.map +1 -0
  25. package/lib/esm/activities/executeRemoteActivity.js +21 -2
  26. package/lib/esm/activities/executeRemoteActivity.js.map +1 -1
  27. package/lib/esm/activities/generateEmbeddings.js +5 -3
  28. package/lib/esm/activities/generateEmbeddings.js.map +1 -1
  29. package/lib/esm/activities/handleError.js +8 -1
  30. package/lib/esm/activities/handleError.js.map +1 -1
  31. package/lib/esm/activities/index-dsl.js +2 -0
  32. package/lib/esm/activities/index-dsl.js.map +1 -1
  33. package/lib/esm/activities/loadChildWorkflowSpec.js +12 -0
  34. package/lib/esm/activities/loadChildWorkflowSpec.js.map +1 -0
  35. package/lib/esm/activities/media/prepareAudio.js +3 -2
  36. package/lib/esm/activities/media/prepareAudio.js.map +1 -1
  37. package/lib/esm/activities/media/prepareVideo.js +5 -2
  38. package/lib/esm/activities/media/prepareVideo.js.map +1 -1
  39. package/lib/esm/activities/media/probeMediaStreams.js +46 -0
  40. package/lib/esm/activities/media/probeMediaStreams.js.map +1 -0
  41. package/lib/esm/activities/resolveRemoteActivities.js +29 -16
  42. package/lib/esm/activities/resolveRemoteActivities.js.map +1 -1
  43. package/lib/esm/dsl/dsl-workflow.js +22 -4
  44. package/lib/esm/dsl/dsl-workflow.js.map +1 -1
  45. package/lib/esm/dsl/setup/ActivityContext.js +12 -2
  46. package/lib/esm/dsl/setup/ActivityContext.js.map +1 -1
  47. package/lib/esm/security/ssrf.js +29 -0
  48. package/lib/esm/security/ssrf.js.map +1 -0
  49. package/lib/tsconfig.tsbuildinfo +1 -1
  50. package/lib/types/activities/executeRemoteActivity.d.ts.map +1 -1
  51. package/lib/types/activities/generateEmbeddings.d.ts +6 -4
  52. package/lib/types/activities/generateEmbeddings.d.ts.map +1 -1
  53. package/lib/types/activities/handleError.d.ts.map +1 -1
  54. package/lib/types/activities/index-dsl.d.ts +3 -0
  55. package/lib/types/activities/index-dsl.d.ts.map +1 -1
  56. package/lib/types/activities/loadChildWorkflowSpec.d.ts +6 -0
  57. package/lib/types/activities/loadChildWorkflowSpec.d.ts.map +1 -0
  58. package/lib/types/activities/media/prepareAudio.d.ts.map +1 -1
  59. package/lib/types/activities/media/prepareVideo.d.ts.map +1 -1
  60. package/lib/types/activities/media/probeMediaStreams.d.ts +12 -0
  61. package/lib/types/activities/media/probeMediaStreams.d.ts.map +1 -0
  62. package/lib/types/activities/resolveRemoteActivities.d.ts.map +1 -1
  63. package/lib/types/dsl/dsl-workflow.d.ts.map +1 -1
  64. package/lib/types/dsl/setup/ActivityContext.d.ts.map +1 -1
  65. package/lib/types/security/ssrf.d.ts +18 -0
  66. package/lib/types/security/ssrf.d.ts.map +1 -0
  67. package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +6 -4
  68. package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +1 -1
  69. package/lib/workflows-bundle.js +352 -158
  70. package/package.json +7 -7
  71. package/src/activities/executeRemoteActivity.test.ts +8 -0
  72. package/src/activities/executeRemoteActivity.ts +21 -2
  73. package/src/activities/generateEmbeddings.ts +6 -3
  74. package/src/activities/handleError.ts +9 -1
  75. package/src/activities/index-dsl.ts +3 -0
  76. package/src/activities/loadChildWorkflowSpec.ts +21 -0
  77. package/src/activities/media/prepareAudio.ts +3 -2
  78. package/src/activities/media/prepareVideo.ts +5 -2
  79. package/src/activities/media/probeMediaStreams.test.ts +126 -0
  80. package/src/activities/media/probeMediaStreams.ts +81 -0
  81. package/src/activities/resolveRemoteActivities.test.ts +11 -10
  82. package/src/activities/resolveRemoteActivities.ts +31 -16
  83. package/src/dsl/dsl-workflow.ts +22 -4
  84. package/src/dsl/setup/ActivityContext.test.ts +57 -0
  85. package/src/dsl/setup/ActivityContext.ts +16 -2
  86. package/src/security/ssrf.ts +32 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vertesia/workflow",
3
- "version": "1.0.0",
3
+ "version": "1.1.0-dev.20260427.060440Z",
4
4
  "type": "module",
5
5
  "description": "Vertesia workflow DSL",
6
6
  "main": "./lib/esm/index.js",
@@ -18,7 +18,7 @@
18
18
  "@temporalio/testing": "^1.11.5",
19
19
  "@temporalio/worker": "^1.11.5",
20
20
  "@types/jsonwebtoken": "^9.0.10",
21
- "@types/node": "^22.13.5",
21
+ "@types/node": "^25.6.0",
22
22
  "@types/papaparse": "^5.5.1",
23
23
  "@types/tmp": "^0.2.6",
24
24
  "ts-dual-module": "^0.6.3",
@@ -43,11 +43,11 @@
43
43
  "tmp": "^0.2.4",
44
44
  "tmp-promise": "^3.0.3",
45
45
  "yaml": "^2.6.0",
46
- "@llumiverse/common": "1.0.0",
47
- "@vertesia/client": "1.0.0",
48
- "@vertesia/api-fetch-client": "1.0.0",
49
- "@vertesia/common": "1.0.0",
50
- "@vertesia/memory": "1.0.0"
46
+ "@llumiverse/common": "1.1.0-dev.20260427.054520Z",
47
+ "@vertesia/client": "1.1.0-dev.20260427.060440Z",
48
+ "@vertesia/common": "1.1.0-dev.20260427.060440Z",
49
+ "@vertesia/memory": "1.1.0-dev.20260427.060440Z",
50
+ "@vertesia/api-fetch-client": "1.1.0-dev.20260427.060440Z"
51
51
  },
52
52
  "ts_dual_module": {
53
53
  "outDir": "lib",
@@ -5,6 +5,14 @@ import { executeRemoteActivity, ExecuteRemoteActivityParams } from "./executeRem
5
5
 
6
6
  vi.stubGlobal("fetch", vi.fn());
7
7
 
8
+ vi.mock("../utils/client.js", () => ({
9
+ getVertesiaClient: vi.fn().mockReturnValue({
10
+ apps: {
11
+ validateUrl: vi.fn().mockResolvedValue({ valid: true }),
12
+ },
13
+ }),
14
+ }));
15
+
8
16
  let testEnv: MockActivityEnvironment;
9
17
  const mockFetch = vi.mocked(fetch);
10
18
 
@@ -5,6 +5,7 @@ import {
5
5
  RemoteActivityExecutionResponse,
6
6
  } from "@vertesia/common";
7
7
  import { setupActivity } from "../dsl/setup/ActivityContext.js";
8
+ import { URLValidationError, safeFetch } from "../security/ssrf.js";
8
9
 
9
10
  /**
10
11
  * Parameters for the executeRemoteActivity bridge activity.
@@ -39,9 +40,20 @@ export async function executeRemoteActivity(
39
40
  payload: DSLActivityExecutionPayload<ExecuteRemoteActivityParams>,
40
41
  ): Promise<any> {
41
42
  const ctx = await setupActivity<ExecuteRemoteActivityParams>(payload);
42
- const { params, runId } = ctx;
43
+ const { params, runId, client } = ctx;
43
44
  const { url, activity_name, params: activityParams, app_install_id, app_settings } = params;
44
45
 
46
+ // Validate the URL via Studio before forwarding the auth token
47
+ try {
48
+ await client.apps.validateUrl(url);
49
+ } catch (e) {
50
+ log.warn("URL validation blocked remote activity endpoint", { activity: activity_name, url, error: (e as Error).message });
51
+ throw ApplicationFailure.create({
52
+ message: `Remote activity ${activity_name} blocked: ${(e as Error).message}`,
53
+ nonRetryable: true,
54
+ });
55
+ }
56
+
45
57
  const executionPayload: RemoteActivityExecutionPayload = {
46
58
  activity_name,
47
59
  params: activityParams,
@@ -58,7 +70,7 @@ export async function executeRemoteActivity(
58
70
 
59
71
  let response: Response;
60
72
  try {
61
- response = await fetch(url, {
73
+ response = await safeFetch(url, {
62
74
  method: "POST",
63
75
  headers: {
64
76
  "Content-Type": "application/json",
@@ -68,6 +80,13 @@ export async function executeRemoteActivity(
68
80
  body: JSON.stringify(executionPayload),
69
81
  });
70
82
  } catch (err: unknown) {
83
+ if (err instanceof URLValidationError) {
84
+ log.warn("Redirect blocked on remote activity endpoint", { activity: activity_name, url, error: err.message });
85
+ throw ApplicationFailure.create({
86
+ message: `Remote activity ${activity_name} blocked: ${err.message}`,
87
+ nonRetryable: true,
88
+ });
89
+ }
71
90
  const message = err instanceof Error ? err.message : String(err);
72
91
  log.warn("Failed to reach remote activity endpoint", {
73
92
  error: message, activity: activity_name, endpoint: url, runId, app_install_id,
@@ -218,14 +218,17 @@ async function generateTextEmbeddings(
218
218
  tokenCount = countTokens(document.text).count;
219
219
  }
220
220
 
221
+ if (type === SupportedEmbeddingTypes.properties && document.properties) {
222
+ const propertiesText = JSON.stringify(document.properties);
223
+ tokenCount = countTokens(propertiesText).count;
224
+ }
225
+
221
226
  const maxTokens = config.max_tokens ?? 8000;
222
227
 
223
228
  //generate embeddings for the main doc if document isn't too large
224
229
  log.debug(`Generating ${type} embeddings for document ${document.id}`);
225
230
  if (
226
- type === SupportedEmbeddingTypes.text &&
227
- tokenCount !== undefined &&
228
- tokenCount > maxTokens
231
+ tokenCount !== undefined && tokenCount > maxTokens
229
232
  ) {
230
233
  //TODO: Review strategy for large documents
231
234
  log.warn(
@@ -8,7 +8,15 @@ export interface HandleDslErrorParams {
8
8
 
9
9
  export async function handleDslError(payload: DSLActivityExecutionPayload<HandleDslErrorParams>): Promise<void> {
10
10
  const { client, params, objectId } = await setupActivity<HandleDslErrorParams>(payload);
11
- const isIntake = payload.workflow_name === "StandardDocumentIntake" || payload.workflow_name === "StandardImageIntake";
11
+
12
+ const isIntake = [
13
+ "StandardDocumentIntake",
14
+ "StandardImageIntake",
15
+ "StandardMediaContainerIntake",
16
+ "StandardVideoIntake",
17
+ "StandardAudioIntake",
18
+ "StandardDocPartIntake",
19
+ ].includes(payload.workflow_name);
12
20
  if (!isIntake) {
13
21
  log.warn(`Workflow execution failed, but no error handler registered for this workflow: ${payload.workflow_name}`,
14
22
  { error: params.errorMessage },
@@ -16,6 +16,8 @@ export { getObjectFromStore } from "./getObjectFromStore.js";
16
16
  export { handleDslError } from "./handleError.js";
17
17
  export { prepareAudio } from "./media/prepareAudio.js";
18
18
  export { prepareVideo } from "./media/prepareVideo.js";
19
+ export { probeMediaStreams } from "./media/probeMediaStreams.js";
20
+ export type { ProbeMediaStreamsResult } from "./media/probeMediaStreams.js";
19
21
  export { convertPdfToStructuredText } from "./media/processPdfWithTextract.js";
20
22
  export { saveGladiaTranscription } from "./media/saveGladiaTranscription.js";
21
23
  export { transcribeMedia } from "./media/transcribeMediaWithGladia.js";
@@ -26,4 +28,5 @@ export { checkRateLimit } from "./rateLimiter.js";
26
28
  export { generateImageRendition } from "./renditions/generateImageRendition.js";
27
29
  export { generateVideoRendition } from "./renditions/generateVideoRendition.js";
28
30
  export { setDocumentStatus } from "./setDocumentStatus.js";
31
+ export { loadChildWorkflowSpec } from "./loadChildWorkflowSpec.js";
29
32
 
@@ -0,0 +1,21 @@
1
+ import { DSLActivityExecutionPayload, DSLWorkflowSpec } from '@vertesia/common';
2
+ import { setupActivity } from '../dsl/setup/ActivityContext.js';
3
+
4
+ export interface LoadChildWorkflowSpecParams {
5
+ workflowName: string;
6
+ }
7
+
8
+ export async function loadChildWorkflowSpec(
9
+ payload: DSLActivityExecutionPayload<LoadChildWorkflowSpecParams>,
10
+ ): Promise<DSLWorkflowSpec> {
11
+ const { client, params } = await setupActivity<LoadChildWorkflowSpecParams>(payload);
12
+ const { workflowName } = params;
13
+
14
+ const refs = await client.store.workflows.definitions.list();
15
+ const ref = refs.find(r => r.name === workflowName);
16
+ if (!ref) {
17
+ throw new Error(`Workflow definition not found: ${workflowName}`);
18
+ }
19
+
20
+ return client.store.workflows.definitions.retrieve(ref.id) as Promise<DSLWorkflowSpec>;
21
+ }
@@ -221,11 +221,12 @@ export async function prepareAudio(
221
221
  throw new DocumentNotFoundError(`Document ${objectId} has no source`, [objectId]);
222
222
  }
223
223
 
224
- if (!inputObject.content.type || !inputObject.content.type.startsWith('audio/')) {
224
+ if (!inputObject.content.type ||
225
+ (!inputObject.content.type.startsWith('audio/') && !inputObject.content.type.startsWith('video/'))) {
225
226
  log.error(`Document ${objectId} is not an audio file: ${inputObject.content.type}`);
226
227
  throw new InvalidContentTypeError(
227
228
  objectId,
228
- 'audio/*',
229
+ 'audio/* or video/*',
229
230
  inputObject.content.type || 'unknown',
230
231
  );
231
232
  }
@@ -1,4 +1,4 @@
1
- import { log } from '@temporalio/activity';
1
+ import { ApplicationFailure, log } from '@temporalio/activity';
2
2
  import { DSLActivityExecutionPayload, DSLActivitySpec, VideoMetadata, VideoRendition, POSTER_RENDITION_NAME, AUDIO_RENDITION_NAME, WEB_VIDEO_RENDITION_NAME, ContentNature } from '@vertesia/common';
3
3
  import { exec } from 'child_process';
4
4
  import fs from 'fs';
@@ -84,7 +84,7 @@ async function getVideoMetadata(videoPath: string): Promise<VideoMetadataExtende
84
84
  );
85
85
 
86
86
  if (!videoStream) {
87
- throw new Error('No video stream found in file');
87
+ throw ApplicationFailure.nonRetryable('No video stream found in file');
88
88
  }
89
89
 
90
90
  const duration = parseFloat(metadata.format.duration ?? '0') || 0;
@@ -111,6 +111,9 @@ async function getVideoMetadata(videoPath: string): Promise<VideoMetadataExtende
111
111
  log.error(
112
112
  `Failed to get video metadata: ${error instanceof Error ? error.message : 'Unknown error'}`,
113
113
  );
114
+ if (error instanceof ApplicationFailure) {
115
+ throw error;
116
+ }
114
117
  throw new Error(
115
118
  `Failed to probe video metadata: ${error instanceof Error ? error.message : 'Unknown error'}`,
116
119
  );
@@ -0,0 +1,126 @@
1
+ import { MockActivityEnvironment } from '@temporalio/testing';
2
+ import type { VertesiaClient } from '@vertesia/client';
3
+ import { ContentEventName, DSLActivityExecutionPayload } from '@vertesia/common';
4
+ import type { ActivityContext } from '@vertesia/workflow';
5
+ import { beforeAll, beforeEach, describe, expect, it, vi } from 'vitest';
6
+ import { probeMediaStreams, ProbeMediaStreamsParams, ProbeMediaStreamsResult } from './probeMediaStreams.js';
7
+
8
+ vi.mock('../../dsl/setup/ActivityContext.js', async (importOriginal) => {
9
+ const actual = await importOriginal<typeof import('../../dsl/setup/ActivityContext.js')>();
10
+ return { ...actual, setupActivity: vi.fn() };
11
+ });
12
+
13
+ // child_process.exec uses util.promisify.custom to return { stdout, stderr }.
14
+ // vi.hoisted ensures these are defined before the vi.mock factory runs.
15
+ const { execMock, execCustom } = vi.hoisted(() => {
16
+ const custom = vi.fn();
17
+ const mock = Object.assign(vi.fn(), { [Symbol.for('nodejs.util.promisify.custom')]: custom });
18
+ return { execMock: mock, execCustom: custom };
19
+ });
20
+ vi.mock('child_process', () => ({ exec: execMock }));
21
+
22
+ let testEnv: MockActivityEnvironment;
23
+
24
+ beforeAll(async () => {
25
+ testEnv = new MockActivityEnvironment();
26
+ });
27
+
28
+ beforeEach(() => {
29
+ vi.clearAllMocks();
30
+ });
31
+
32
+ const createPayload = (objectId = 'test-object-id'): DSLActivityExecutionPayload<ProbeMediaStreamsParams> => ({
33
+ auth_token: 'mock-token',
34
+ account_id: 'test-account',
35
+ project_id: 'test-project',
36
+ params: {},
37
+ config: { studio_url: 'http://mock-studio', store_url: 'http://mock-store' },
38
+ workflow_name: 'test-workflow',
39
+ event: ContentEventName.create,
40
+ objectIds: [objectId],
41
+ input: { inputType: 'objectIds', objectIds: [objectId] },
42
+ vars: {},
43
+ activity: { name: 'probeMediaStreams', params: {} },
44
+ });
45
+
46
+ function mockExec(stdout: string) {
47
+ execCustom.mockResolvedValue({ stdout, stderr: '' });
48
+ }
49
+
50
+ async function setupMockContext(objectId: string, signedUrl: string): Promise<void> {
51
+ const { setupActivity } = await import('../../dsl/setup/ActivityContext.js');
52
+ const mockClient = {
53
+ objects: {
54
+ retrieve: vi.fn().mockResolvedValue({
55
+ content: { source: 'gs://bucket/file.mp4', type: 'video/mp4' },
56
+ }),
57
+ },
58
+ files: {
59
+ getDownloadUrl: vi.fn().mockResolvedValue({ url: signedUrl }),
60
+ },
61
+ } as unknown as VertesiaClient;
62
+ vi.mocked(setupActivity).mockResolvedValue({
63
+ client: mockClient,
64
+ objectId,
65
+ inputType: 'objectIds',
66
+ params: {} satisfies ProbeMediaStreamsParams,
67
+ } as unknown as ActivityContext<ProbeMediaStreamsParams>);
68
+ }
69
+
70
+ describe('probeMediaStreams', () => {
71
+ it('returns hasVideo=true and hasAudio=true for a video+audio container', async () => {
72
+ await setupMockContext('test-object-id', 'https://storage.example.com/file.mp4?token=abc');
73
+ mockExec(JSON.stringify({ streams: [{ codec_type: 'video' }, { codec_type: 'audio' }] }));
74
+
75
+ const result: ProbeMediaStreamsResult = await testEnv.run(probeMediaStreams, createPayload());
76
+
77
+ expect(result).toEqual({ hasVideo: true, hasAudio: true });
78
+ });
79
+
80
+ it('returns hasVideo=true and hasAudio=false for a video-only container', async () => {
81
+ await setupMockContext('test-object-id', 'https://storage.example.com/file.mp4');
82
+ mockExec(JSON.stringify({ streams: [{ codec_type: 'video' }] }));
83
+
84
+ const result: ProbeMediaStreamsResult = await testEnv.run(probeMediaStreams, createPayload());
85
+
86
+ expect(result).toEqual({ hasVideo: true, hasAudio: false });
87
+ });
88
+
89
+ it('returns hasVideo=false and hasAudio=true for an audio-only container (the bug case)', async () => {
90
+ await setupMockContext('test-object-id', 'https://storage.example.com/audio-only.mp4');
91
+ mockExec(JSON.stringify({ streams: [{ codec_type: 'audio' }] }));
92
+
93
+ const result: ProbeMediaStreamsResult = await testEnv.run(probeMediaStreams, createPayload());
94
+
95
+ expect(result).toEqual({ hasVideo: false, hasAudio: true });
96
+ });
97
+
98
+ it('throws nonRetryable ApplicationFailure when no usable streams are found', async () => {
99
+ await setupMockContext('test-object-id', 'https://storage.example.com/bad.mp4');
100
+ mockExec(JSON.stringify({ streams: [] }));
101
+
102
+ await expect(testEnv.run(probeMediaStreams, createPayload())).rejects.toThrow(
103
+ 'No audio or video streams found in container',
104
+ );
105
+ });
106
+
107
+ it('throws DocumentNotFoundError when the object has no source', async () => {
108
+ const { setupActivity } = await import('../../dsl/setup/ActivityContext.js');
109
+ const mockClient = {
110
+ objects: {
111
+ retrieve: vi.fn().mockResolvedValue({ content: {} }),
112
+ },
113
+ files: { getDownloadUrl: vi.fn() },
114
+ } as unknown as VertesiaClient;
115
+ vi.mocked(setupActivity).mockResolvedValue({
116
+ client: mockClient,
117
+ objectId: 'test-object-id',
118
+ inputType: 'objectIds',
119
+ params: {},
120
+ } as unknown as ActivityContext<ProbeMediaStreamsParams>);
121
+
122
+ await expect(testEnv.run(probeMediaStreams, createPayload())).rejects.toThrow(
123
+ 'has no source',
124
+ );
125
+ });
126
+ });
@@ -0,0 +1,81 @@
1
+ import { ApplicationFailure, log } from '@temporalio/activity';
2
+ import { DSLActivityExecutionPayload, DSLActivitySpec } from '@vertesia/common';
3
+ import { RequestError } from '@vertesia/api-fetch-client';
4
+ import { exec } from 'child_process';
5
+ import { promisify } from 'util';
6
+ import { setupActivity } from '../../dsl/setup/ActivityContext.js';
7
+ import { DocumentNotFoundError } from '../../errors.js';
8
+
9
+ const execAsync = promisify(exec);
10
+
11
+ const FFPROBE_MAX_BUFFER = 1024 * 1024; // 1MB is more than enough for stream metadata JSON
12
+
13
+ export interface ProbeMediaStreamsResult {
14
+ hasVideo: boolean;
15
+ hasAudio: boolean;
16
+ }
17
+
18
+ // eslint-disable-next-line @typescript-eslint/no-empty-object-type
19
+ export interface ProbeMediaStreamsParams {}
20
+
21
+ export interface ProbeMediaStreams extends DSLActivitySpec<ProbeMediaStreamsParams> {
22
+ name: 'probeMediaStreams';
23
+ }
24
+
25
+ interface FFProbeStream {
26
+ codec_type: string;
27
+ }
28
+
29
+ interface FFProbeOutput {
30
+ streams: FFProbeStream[];
31
+ }
32
+
33
+ export async function probeMediaStreams(payload: DSLActivityExecutionPayload<ProbeMediaStreamsParams>): Promise<ProbeMediaStreamsResult> {
34
+ const { client, objectId } = await setupActivity<ProbeMediaStreamsParams>(payload);
35
+
36
+ const inputObject = await client.objects.retrieve(objectId).catch((err: unknown) => {
37
+ log.error(`Failed to retrieve object ${objectId}`, { err });
38
+ if (err instanceof RequestError && err.status === 404) {
39
+ throw new DocumentNotFoundError(`Object ${objectId} not found`, [objectId]);
40
+ }
41
+ throw err;
42
+ });
43
+
44
+ const source = inputObject.content?.source;
45
+ if (!source) {
46
+ throw new DocumentNotFoundError(`Object ${objectId} has no source`, [objectId]);
47
+ }
48
+
49
+ const { url } = await client.files.getDownloadUrl(source);
50
+ if (!url) {
51
+ throw new DocumentNotFoundError(`Failed to get download URL for object ${objectId}`);
52
+ }
53
+
54
+ // ffprobe reads only the container headers via HTTP range requests.
55
+ // -probesize 32k caps the amount read from the network to ~32 KB.
56
+ let stdout: string;
57
+ try {
58
+ ({ stdout } = await execAsync(
59
+ `ffprobe -v quiet -probesize 32k -print_format json -show_streams "${url}"`,
60
+ { maxBuffer: FFPROBE_MAX_BUFFER },
61
+ ));
62
+ } catch (err: unknown) {
63
+ const message = err instanceof Error ? err.message : String(err);
64
+ log.error(`ffprobe failed for object ${objectId}: ${message}`);
65
+ throw new Error(`Failed to probe media streams for object ${objectId}: ${message}`);
66
+ }
67
+
68
+ const { streams } = JSON.parse(stdout) as FFProbeOutput;
69
+ const hasVideo = streams.some(s => s.codec_type === 'video');
70
+ const hasAudio = streams.some(s => s.codec_type === 'audio');
71
+
72
+ log.info(`Media probe result for object ${objectId}`, { hasVideo, hasAudio });
73
+
74
+ if (!hasVideo && !hasAudio) {
75
+ throw ApplicationFailure.nonRetryable(
76
+ `No audio or video streams found in container for object ${objectId}`,
77
+ );
78
+ }
79
+
80
+ return { hasVideo, hasAudio };
81
+ }
@@ -1,7 +1,7 @@
1
1
  import { MockActivityEnvironment } from "@temporalio/testing";
2
2
  import { ContentEventName, DSLActivityExecutionPayload } from "@vertesia/common";
3
3
  import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
4
- import { resolveRemoteActivities, ResolveRemoteActivitiesParams } from "./resolveRemoteActivities.js";
4
+ import { resolveRemoteActivities, type RemoteActivityMap, ResolveRemoteActivitiesParams } from "./resolveRemoteActivities.js";
5
5
 
6
6
  vi.stubGlobal("fetch", vi.fn());
7
7
 
@@ -11,6 +11,7 @@ vi.mock("../utils/client.js", () => ({
11
11
  getVertesiaClient: vi.fn().mockReturnValue({
12
12
  apps: {
13
13
  getInstalledApps: (...args: any[]) => mockGetInstalledApps(...args),
14
+ validateUrl: vi.fn().mockResolvedValue({ valid: true }),
14
15
  },
15
16
  }),
16
17
  }));
@@ -46,7 +47,7 @@ describe("resolveRemoteActivities", () => {
46
47
  it("returns empty map when no apps installed", async () => {
47
48
  mockGetInstalledApps.mockResolvedValueOnce([]);
48
49
 
49
- const result = await testEnv.run(resolveRemoteActivities, createPayload());
50
+ const result: RemoteActivityMap = await testEnv.run(resolveRemoteActivities, createPayload());
50
51
  expect(result).toEqual({});
51
52
  expect(mockGetInstalledApps).toHaveBeenCalledWith("tools");
52
53
  });
@@ -73,7 +74,7 @@ describe("resolveRemoteActivities", () => {
73
74
  ),
74
75
  );
75
76
 
76
- const result = await testEnv.run(resolveRemoteActivities, createPayload());
77
+ const result: RemoteActivityMap = await testEnv.run(resolveRemoteActivities, createPayload());
77
78
 
78
79
  expect(Object.keys(result)).toHaveLength(2);
79
80
  expect(result["app:my-nlp-app:nlp:analyze_sentiment"]).toBeDefined();
@@ -108,7 +109,7 @@ describe("resolveRemoteActivities", () => {
108
109
  new Response(JSON.stringify({ activities: [{ name: "task_b", collection: "main" }] }), { status: 200 }),
109
110
  );
110
111
 
111
- const result = await testEnv.run(resolveRemoteActivities, createPayload());
112
+ const result: RemoteActivityMap = await testEnv.run(resolveRemoteActivities, createPayload());
112
113
 
113
114
  expect(Object.keys(result)).toHaveLength(2);
114
115
  expect(result["app:app-one:main:task_a"]).toBeDefined();
@@ -125,7 +126,7 @@ describe("resolveRemoteActivities", () => {
125
126
  new Response(JSON.stringify({ activities: [] }), { status: 200 }),
126
127
  );
127
128
 
128
- const result = await testEnv.run(resolveRemoteActivities, createPayload());
129
+ const result: RemoteActivityMap = await testEnv.run(resolveRemoteActivities, createPayload());
129
130
  expect(result).toEqual({});
130
131
  });
131
132
 
@@ -135,7 +136,7 @@ describe("resolveRemoteActivities", () => {
135
136
  manifest: { name: "no-endpoint" },
136
137
  }]);
137
138
 
138
- const result = await testEnv.run(resolveRemoteActivities, createPayload());
139
+ const result: RemoteActivityMap = await testEnv.run(resolveRemoteActivities, createPayload());
139
140
  expect(result).toEqual({});
140
141
  expect(mockFetch).not.toHaveBeenCalled();
141
142
  });
@@ -160,7 +161,7 @@ describe("resolveRemoteActivities", () => {
160
161
  new Response(JSON.stringify({ activities: [{ name: "task", collection: "main" }] }), { status: 200 }),
161
162
  );
162
163
 
163
- const result = await testEnv.run(resolveRemoteActivities, createPayload());
164
+ const result: RemoteActivityMap = await testEnv.run(resolveRemoteActivities, createPayload());
164
165
  expect(Object.keys(result)).toHaveLength(1);
165
166
  expect(result["app:same-app:main:task"].app_install_id).toBe("install-1");
166
167
  });
@@ -183,7 +184,7 @@ describe("resolveRemoteActivities", () => {
183
184
  new Response(JSON.stringify({ activities: [{ name: "task", collection: "main" }] }), { status: 200 }),
184
185
  );
185
186
 
186
- const result = await testEnv.run(resolveRemoteActivities, createPayload());
187
+ const result: RemoteActivityMap = await testEnv.run(resolveRemoteActivities, createPayload());
187
188
  expect(Object.keys(result)).toHaveLength(1);
188
189
  expect(result["app:working-app:main:task"]).toBeDefined();
189
190
  });
@@ -191,7 +192,7 @@ describe("resolveRemoteActivities", () => {
191
192
  it("returns empty map when getInstalledApps fails", async () => {
192
193
  mockGetInstalledApps.mockRejectedValueOnce(new Error("API error"));
193
194
 
194
- const result = await testEnv.run(resolveRemoteActivities, createPayload());
195
+ const result: RemoteActivityMap = await testEnv.run(resolveRemoteActivities, createPayload());
195
196
  expect(result).toEqual({});
196
197
  });
197
198
 
@@ -213,7 +214,7 @@ describe("resolveRemoteActivities", () => {
213
214
  ),
214
215
  );
215
216
 
216
- const result = await testEnv.run(resolveRemoteActivities, createPayload()) as Record<string, unknown>;
217
+ const result: RemoteActivityMap = await testEnv.run(resolveRemoteActivities, createPayload());
217
218
  expect(Object.keys(result)).toHaveLength(1);
218
219
  expect(result["app:bad-app:main:has_collection"]).toBeDefined();
219
220
  });
@@ -5,7 +5,9 @@ import {
5
5
  DSLActivityExecutionPayload,
6
6
  RemoteActivityDefinition,
7
7
  } from "@vertesia/common";
8
+ import { VertesiaClient } from "@vertesia/client";
8
9
  import { setupActivity } from "../dsl/setup/ActivityContext.js";
10
+ import { URLValidationError, safeFetch } from "../security/ssrf.js";
9
11
 
10
12
  /** Prefix identifying a remote activity name in DSL workflow steps */
11
13
  const REMOTE_ACTIVITY_PREFIX = "app:";
@@ -66,7 +68,7 @@ export async function resolveRemoteActivities(
66
68
  }
67
69
 
68
70
  try {
69
- const pkg = await fetchActivitiesPackage(manifest.endpoint, payload.auth_token);
71
+ const pkg = await fetchActivitiesPackage(manifest.endpoint, payload.auth_token, client);
70
72
  if (!pkg.activities || pkg.activities.length === 0) {
71
73
  continue;
72
74
  }
@@ -94,7 +96,7 @@ export async function resolveRemoteActivities(
94
96
  }
95
97
 
96
98
  // Resolve the activity execution URL (collection-specific endpoint)
97
- const activityUrl = resolveActivityUrl(manifest.endpoint, activity, collection);
99
+ const activityUrl = await resolveActivityUrl(manifest.endpoint, activity, collection, client);
98
100
 
99
101
  map[qualifiedName] = {
100
102
  url: activityUrl,
@@ -126,11 +128,13 @@ export async function resolveRemoteActivities(
126
128
  /**
127
129
  * Fetches the activities scope from a tool server package endpoint.
128
130
  */
129
- async function fetchActivitiesPackage(endpoint: string, authToken: string): Promise<AppPackage> {
131
+ async function fetchActivitiesPackage(endpoint: string, authToken: string, client: VertesiaClient): Promise<AppPackage> {
130
132
  const url = new URL(endpoint);
131
133
  url.searchParams.set('scope', 'activities');
132
134
 
133
- const response = await fetch(url.toString(), {
135
+ await client.apps.validateUrl(url.toString());
136
+
137
+ const response = await safeFetch(url.toString(), {
134
138
  method: 'GET',
135
139
  headers: {
136
140
  'Accept': 'application/json',
@@ -147,21 +151,32 @@ async function fetchActivitiesPackage(endpoint: string, authToken: string): Prom
147
151
  }
148
152
 
149
153
  /**
150
- * Resolves the execution URL for a remote activity.
154
+ * Resolves and validates the execution URL for a remote activity.
151
155
  * If the activity has a `url` field, resolve it relative to the endpoint base.
152
156
  * Otherwise, use the collection-specific activities endpoint: `/api/activities/{collection}`.
157
+ * Validates the resolved URL to prevent second-hop SSRF from tool server responses.
153
158
  */
154
- function resolveActivityUrl(endpoint: string, activity: RemoteActivityDefinition, collection: string): string {
159
+ async function resolveActivityUrl(endpoint: string, activity: RemoteActivityDefinition, collection: string, client: VertesiaClient): Promise<string> {
160
+ let resolved: string;
155
161
  if (activity.url) {
156
- // Absolute URLs are used as-is
157
- if (activity.url.startsWith('http://') || activity.url.startsWith('https://')) {
158
- return activity.url;
159
- }
160
- // Resolve relative URLs against the endpoint's base path (not just origin)
161
- return new URL(activity.url, endpoint).toString();
162
+ // Absolute URLs are used as-is; relative URLs are resolved against the endpoint base
163
+ resolved = (activity.url.startsWith('http://') || activity.url.startsWith('https://'))
164
+ ? activity.url
165
+ : new URL(activity.url, endpoint).toString();
166
+ } else {
167
+ // Default: POST to the collection-specific activities endpoint
168
+ const base = new URL(endpoint);
169
+ const activitiesPath = base.pathname.replace(/\/package\/?$/, `/activities/${collection}`);
170
+ resolved = new URL(activitiesPath, base.origin).toString();
171
+ }
172
+
173
+ // Validate the resolved URL via Studio — safeFetch on the discovery request does NOT protect this
174
+ // second-hop URL which comes from the tool server response body.
175
+ try {
176
+ await client.apps.validateUrl(resolved);
177
+ } catch (e) {
178
+ throw new URLValidationError(`Blocked activity URL from app response: ${(e as Error).message}`);
162
179
  }
163
- // Default: POST to the collection-specific activities endpoint
164
- const base = new URL(endpoint);
165
- const activitiesPath = base.pathname.replace(/\/package\/?$/, `/activities/${collection}`);
166
- return new URL(activitiesPath, base.origin).toString();
180
+
181
+ return resolved;
167
182
  }