@vertesia/workflow 1.1.0-dev.20260327.125707Z → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/activities/executeRemoteActivity.js +21 -2
- package/lib/cjs/activities/executeRemoteActivity.js.map +1 -1
- package/lib/cjs/activities/generateEmbeddings.js +5 -3
- package/lib/cjs/activities/generateEmbeddings.js.map +1 -1
- package/lib/cjs/activities/handleError.js +8 -1
- package/lib/cjs/activities/handleError.js.map +1 -1
- package/lib/cjs/activities/index-dsl.js +5 -1
- package/lib/cjs/activities/index-dsl.js.map +1 -1
- package/lib/cjs/activities/loadChildWorkflowSpec.js +15 -0
- package/lib/cjs/activities/loadChildWorkflowSpec.js.map +1 -0
- package/lib/cjs/activities/media/prepareAudio.js +3 -2
- package/lib/cjs/activities/media/prepareAudio.js.map +1 -1
- package/lib/cjs/activities/media/prepareVideo.js +4 -1
- package/lib/cjs/activities/media/prepareVideo.js.map +1 -1
- package/lib/cjs/activities/media/probeMediaStreams.js +49 -0
- package/lib/cjs/activities/media/probeMediaStreams.js.map +1 -0
- package/lib/cjs/activities/media/saveGladiaTranscription.js +8 -0
- package/lib/cjs/activities/media/saveGladiaTranscription.js.map +1 -1
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js +8 -0
- package/lib/cjs/activities/media/transcribeMediaWithGladia.js.map +1 -1
- package/lib/cjs/activities/resolveRemoteActivities.js +29 -16
- package/lib/cjs/activities/resolveRemoteActivities.js.map +1 -1
- package/lib/cjs/dsl/dsl-workflow.js +22 -4
- package/lib/cjs/dsl/dsl-workflow.js.map +1 -1
- package/lib/cjs/dsl/setup/ActivityContext.js +12 -2
- package/lib/cjs/dsl/setup/ActivityContext.js.map +1 -1
- package/lib/cjs/security/ssrf.js +34 -0
- package/lib/cjs/security/ssrf.js.map +1 -0
- package/lib/esm/activities/executeRemoteActivity.js +21 -2
- package/lib/esm/activities/executeRemoteActivity.js.map +1 -1
- package/lib/esm/activities/generateEmbeddings.js +5 -3
- package/lib/esm/activities/generateEmbeddings.js.map +1 -1
- package/lib/esm/activities/handleError.js +8 -1
- package/lib/esm/activities/handleError.js.map +1 -1
- package/lib/esm/activities/index-dsl.js +2 -0
- package/lib/esm/activities/index-dsl.js.map +1 -1
- package/lib/esm/activities/loadChildWorkflowSpec.js +12 -0
- package/lib/esm/activities/loadChildWorkflowSpec.js.map +1 -0
- package/lib/esm/activities/media/prepareAudio.js +3 -2
- package/lib/esm/activities/media/prepareAudio.js.map +1 -1
- package/lib/esm/activities/media/prepareVideo.js +5 -2
- package/lib/esm/activities/media/prepareVideo.js.map +1 -1
- package/lib/esm/activities/media/probeMediaStreams.js +46 -0
- package/lib/esm/activities/media/probeMediaStreams.js.map +1 -0
- package/lib/esm/activities/media/saveGladiaTranscription.js +8 -0
- package/lib/esm/activities/media/saveGladiaTranscription.js.map +1 -1
- package/lib/esm/activities/media/transcribeMediaWithGladia.js +8 -0
- package/lib/esm/activities/media/transcribeMediaWithGladia.js.map +1 -1
- package/lib/esm/activities/resolveRemoteActivities.js +29 -16
- package/lib/esm/activities/resolveRemoteActivities.js.map +1 -1
- package/lib/esm/dsl/dsl-workflow.js +22 -4
- package/lib/esm/dsl/dsl-workflow.js.map +1 -1
- package/lib/esm/dsl/setup/ActivityContext.js +12 -2
- package/lib/esm/dsl/setup/ActivityContext.js.map +1 -1
- package/lib/esm/security/ssrf.js +29 -0
- package/lib/esm/security/ssrf.js.map +1 -0
- package/lib/tsconfig.tsbuildinfo +1 -1
- package/lib/types/activities/executeRemoteActivity.d.ts.map +1 -1
- package/lib/types/activities/generateEmbeddings.d.ts +6 -4
- package/lib/types/activities/generateEmbeddings.d.ts.map +1 -1
- package/lib/types/activities/handleError.d.ts.map +1 -1
- package/lib/types/activities/index-dsl.d.ts +3 -0
- package/lib/types/activities/index-dsl.d.ts.map +1 -1
- package/lib/types/activities/loadChildWorkflowSpec.d.ts +6 -0
- package/lib/types/activities/loadChildWorkflowSpec.d.ts.map +1 -0
- package/lib/types/activities/media/prepareAudio.d.ts.map +1 -1
- package/lib/types/activities/media/prepareVideo.d.ts.map +1 -1
- package/lib/types/activities/media/probeMediaStreams.d.ts +12 -0
- package/lib/types/activities/media/probeMediaStreams.d.ts.map +1 -0
- package/lib/types/activities/media/saveGladiaTranscription.d.ts.map +1 -1
- package/lib/types/activities/media/transcribeMediaWithGladia.d.ts.map +1 -1
- package/lib/types/activities/resolveRemoteActivities.d.ts.map +1 -1
- package/lib/types/dsl/dsl-workflow.d.ts.map +1 -1
- package/lib/types/dsl/setup/ActivityContext.d.ts.map +1 -1
- package/lib/types/security/ssrf.d.ts +18 -0
- package/lib/types/security/ssrf.d.ts.map +1 -0
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +6 -4
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +1 -1
- package/lib/workflows-bundle.js +352 -158
- package/package.json +7 -7
- package/src/activities/executeRemoteActivity.test.ts +8 -0
- package/src/activities/executeRemoteActivity.ts +21 -2
- package/src/activities/generateEmbeddings.ts +6 -3
- package/src/activities/handleError.ts +9 -1
- package/src/activities/index-dsl.ts +3 -0
- package/src/activities/loadChildWorkflowSpec.ts +21 -0
- package/src/activities/media/prepareAudio.ts +3 -2
- package/src/activities/media/prepareVideo.ts +5 -2
- package/src/activities/media/probeMediaStreams.test.ts +126 -0
- package/src/activities/media/probeMediaStreams.ts +81 -0
- package/src/activities/media/saveGladiaTranscription.ts +8 -0
- package/src/activities/media/transcribeMediaWithGladia.ts +8 -0
- package/src/activities/resolveRemoteActivities.test.ts +11 -10
- package/src/activities/resolveRemoteActivities.ts +31 -16
- package/src/dsl/dsl-workflow.ts +22 -4
- package/src/dsl/setup/ActivityContext.test.ts +57 -0
- package/src/dsl/setup/ActivityContext.ts +16 -2
- package/src/security/ssrf.ts +32 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vertesia/workflow",
|
|
3
|
-
"version": "1.1.0
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Vertesia workflow DSL",
|
|
6
6
|
"main": "./lib/esm/index.js",
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
"@temporalio/testing": "^1.11.5",
|
|
19
19
|
"@temporalio/worker": "^1.11.5",
|
|
20
20
|
"@types/jsonwebtoken": "^9.0.10",
|
|
21
|
-
"@types/node": "^
|
|
21
|
+
"@types/node": "^25.6.0",
|
|
22
22
|
"@types/papaparse": "^5.5.1",
|
|
23
23
|
"@types/tmp": "^0.2.6",
|
|
24
24
|
"ts-dual-module": "^0.6.3",
|
|
@@ -43,11 +43,11 @@
|
|
|
43
43
|
"tmp": "^0.2.4",
|
|
44
44
|
"tmp-promise": "^3.0.3",
|
|
45
45
|
"yaml": "^2.6.0",
|
|
46
|
-
"@llumiverse/common": "1.
|
|
47
|
-
"@vertesia/
|
|
48
|
-
"@vertesia/
|
|
49
|
-
"@vertesia/
|
|
50
|
-
"@vertesia/
|
|
46
|
+
"@llumiverse/common": "1.1.0",
|
|
47
|
+
"@vertesia/client": "1.1.0",
|
|
48
|
+
"@vertesia/common": "1.1.0",
|
|
49
|
+
"@vertesia/memory": "1.1.0",
|
|
50
|
+
"@vertesia/api-fetch-client": "1.1.0"
|
|
51
51
|
},
|
|
52
52
|
"ts_dual_module": {
|
|
53
53
|
"outDir": "lib",
|
|
@@ -5,6 +5,14 @@ import { executeRemoteActivity, ExecuteRemoteActivityParams } from "./executeRem
|
|
|
5
5
|
|
|
6
6
|
vi.stubGlobal("fetch", vi.fn());
|
|
7
7
|
|
|
8
|
+
vi.mock("../utils/client.js", () => ({
|
|
9
|
+
getVertesiaClient: vi.fn().mockReturnValue({
|
|
10
|
+
apps: {
|
|
11
|
+
validateUrl: vi.fn().mockResolvedValue({ valid: true }),
|
|
12
|
+
},
|
|
13
|
+
}),
|
|
14
|
+
}));
|
|
15
|
+
|
|
8
16
|
let testEnv: MockActivityEnvironment;
|
|
9
17
|
const mockFetch = vi.mocked(fetch);
|
|
10
18
|
|
|
@@ -5,6 +5,7 @@ import {
|
|
|
5
5
|
RemoteActivityExecutionResponse,
|
|
6
6
|
} from "@vertesia/common";
|
|
7
7
|
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
|
8
|
+
import { URLValidationError, safeFetch } from "../security/ssrf.js";
|
|
8
9
|
|
|
9
10
|
/**
|
|
10
11
|
* Parameters for the executeRemoteActivity bridge activity.
|
|
@@ -39,9 +40,20 @@ export async function executeRemoteActivity(
|
|
|
39
40
|
payload: DSLActivityExecutionPayload<ExecuteRemoteActivityParams>,
|
|
40
41
|
): Promise<any> {
|
|
41
42
|
const ctx = await setupActivity<ExecuteRemoteActivityParams>(payload);
|
|
42
|
-
const { params, runId } = ctx;
|
|
43
|
+
const { params, runId, client } = ctx;
|
|
43
44
|
const { url, activity_name, params: activityParams, app_install_id, app_settings } = params;
|
|
44
45
|
|
|
46
|
+
// Validate the URL via Studio before forwarding the auth token
|
|
47
|
+
try {
|
|
48
|
+
await client.apps.validateUrl(url);
|
|
49
|
+
} catch (e) {
|
|
50
|
+
log.warn("URL validation blocked remote activity endpoint", { activity: activity_name, url, error: (e as Error).message });
|
|
51
|
+
throw ApplicationFailure.create({
|
|
52
|
+
message: `Remote activity ${activity_name} blocked: ${(e as Error).message}`,
|
|
53
|
+
nonRetryable: true,
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
|
|
45
57
|
const executionPayload: RemoteActivityExecutionPayload = {
|
|
46
58
|
activity_name,
|
|
47
59
|
params: activityParams,
|
|
@@ -58,7 +70,7 @@ export async function executeRemoteActivity(
|
|
|
58
70
|
|
|
59
71
|
let response: Response;
|
|
60
72
|
try {
|
|
61
|
-
response = await
|
|
73
|
+
response = await safeFetch(url, {
|
|
62
74
|
method: "POST",
|
|
63
75
|
headers: {
|
|
64
76
|
"Content-Type": "application/json",
|
|
@@ -68,6 +80,13 @@ export async function executeRemoteActivity(
|
|
|
68
80
|
body: JSON.stringify(executionPayload),
|
|
69
81
|
});
|
|
70
82
|
} catch (err: unknown) {
|
|
83
|
+
if (err instanceof URLValidationError) {
|
|
84
|
+
log.warn("Redirect blocked on remote activity endpoint", { activity: activity_name, url, error: err.message });
|
|
85
|
+
throw ApplicationFailure.create({
|
|
86
|
+
message: `Remote activity ${activity_name} blocked: ${err.message}`,
|
|
87
|
+
nonRetryable: true,
|
|
88
|
+
});
|
|
89
|
+
}
|
|
71
90
|
const message = err instanceof Error ? err.message : String(err);
|
|
72
91
|
log.warn("Failed to reach remote activity endpoint", {
|
|
73
92
|
error: message, activity: activity_name, endpoint: url, runId, app_install_id,
|
|
@@ -218,14 +218,17 @@ async function generateTextEmbeddings(
|
|
|
218
218
|
tokenCount = countTokens(document.text).count;
|
|
219
219
|
}
|
|
220
220
|
|
|
221
|
+
if (type === SupportedEmbeddingTypes.properties && document.properties) {
|
|
222
|
+
const propertiesText = JSON.stringify(document.properties);
|
|
223
|
+
tokenCount = countTokens(propertiesText).count;
|
|
224
|
+
}
|
|
225
|
+
|
|
221
226
|
const maxTokens = config.max_tokens ?? 8000;
|
|
222
227
|
|
|
223
228
|
//generate embeddings for the main doc if document isn't too large
|
|
224
229
|
log.debug(`Generating ${type} embeddings for document ${document.id}`);
|
|
225
230
|
if (
|
|
226
|
-
|
|
227
|
-
tokenCount !== undefined &&
|
|
228
|
-
tokenCount > maxTokens
|
|
231
|
+
tokenCount !== undefined && tokenCount > maxTokens
|
|
229
232
|
) {
|
|
230
233
|
//TODO: Review strategy for large documents
|
|
231
234
|
log.warn(
|
|
@@ -8,7 +8,15 @@ export interface HandleDslErrorParams {
|
|
|
8
8
|
|
|
9
9
|
export async function handleDslError(payload: DSLActivityExecutionPayload<HandleDslErrorParams>): Promise<void> {
|
|
10
10
|
const { client, params, objectId } = await setupActivity<HandleDslErrorParams>(payload);
|
|
11
|
-
|
|
11
|
+
|
|
12
|
+
const isIntake = [
|
|
13
|
+
"StandardDocumentIntake",
|
|
14
|
+
"StandardImageIntake",
|
|
15
|
+
"StandardMediaContainerIntake",
|
|
16
|
+
"StandardVideoIntake",
|
|
17
|
+
"StandardAudioIntake",
|
|
18
|
+
"StandardDocPartIntake",
|
|
19
|
+
].includes(payload.workflow_name);
|
|
12
20
|
if (!isIntake) {
|
|
13
21
|
log.warn(`Workflow execution failed, but no error handler registered for this workflow: ${payload.workflow_name}`,
|
|
14
22
|
{ error: params.errorMessage },
|
|
@@ -16,6 +16,8 @@ export { getObjectFromStore } from "./getObjectFromStore.js";
|
|
|
16
16
|
export { handleDslError } from "./handleError.js";
|
|
17
17
|
export { prepareAudio } from "./media/prepareAudio.js";
|
|
18
18
|
export { prepareVideo } from "./media/prepareVideo.js";
|
|
19
|
+
export { probeMediaStreams } from "./media/probeMediaStreams.js";
|
|
20
|
+
export type { ProbeMediaStreamsResult } from "./media/probeMediaStreams.js";
|
|
19
21
|
export { convertPdfToStructuredText } from "./media/processPdfWithTextract.js";
|
|
20
22
|
export { saveGladiaTranscription } from "./media/saveGladiaTranscription.js";
|
|
21
23
|
export { transcribeMedia } from "./media/transcribeMediaWithGladia.js";
|
|
@@ -26,4 +28,5 @@ export { checkRateLimit } from "./rateLimiter.js";
|
|
|
26
28
|
export { generateImageRendition } from "./renditions/generateImageRendition.js";
|
|
27
29
|
export { generateVideoRendition } from "./renditions/generateVideoRendition.js";
|
|
28
30
|
export { setDocumentStatus } from "./setDocumentStatus.js";
|
|
31
|
+
export { loadChildWorkflowSpec } from "./loadChildWorkflowSpec.js";
|
|
29
32
|
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { DSLActivityExecutionPayload, DSLWorkflowSpec } from '@vertesia/common';
|
|
2
|
+
import { setupActivity } from '../dsl/setup/ActivityContext.js';
|
|
3
|
+
|
|
4
|
+
export interface LoadChildWorkflowSpecParams {
|
|
5
|
+
workflowName: string;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export async function loadChildWorkflowSpec(
|
|
9
|
+
payload: DSLActivityExecutionPayload<LoadChildWorkflowSpecParams>,
|
|
10
|
+
): Promise<DSLWorkflowSpec> {
|
|
11
|
+
const { client, params } = await setupActivity<LoadChildWorkflowSpecParams>(payload);
|
|
12
|
+
const { workflowName } = params;
|
|
13
|
+
|
|
14
|
+
const refs = await client.store.workflows.definitions.list();
|
|
15
|
+
const ref = refs.find(r => r.name === workflowName);
|
|
16
|
+
if (!ref) {
|
|
17
|
+
throw new Error(`Workflow definition not found: ${workflowName}`);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
return client.store.workflows.definitions.retrieve(ref.id) as Promise<DSLWorkflowSpec>;
|
|
21
|
+
}
|
|
@@ -221,11 +221,12 @@ export async function prepareAudio(
|
|
|
221
221
|
throw new DocumentNotFoundError(`Document ${objectId} has no source`, [objectId]);
|
|
222
222
|
}
|
|
223
223
|
|
|
224
|
-
if (!inputObject.content.type ||
|
|
224
|
+
if (!inputObject.content.type ||
|
|
225
|
+
(!inputObject.content.type.startsWith('audio/') && !inputObject.content.type.startsWith('video/'))) {
|
|
225
226
|
log.error(`Document ${objectId} is not an audio file: ${inputObject.content.type}`);
|
|
226
227
|
throw new InvalidContentTypeError(
|
|
227
228
|
objectId,
|
|
228
|
-
'audio/*',
|
|
229
|
+
'audio/* or video/*',
|
|
229
230
|
inputObject.content.type || 'unknown',
|
|
230
231
|
);
|
|
231
232
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { log } from '@temporalio/activity';
|
|
1
|
+
import { ApplicationFailure, log } from '@temporalio/activity';
|
|
2
2
|
import { DSLActivityExecutionPayload, DSLActivitySpec, VideoMetadata, VideoRendition, POSTER_RENDITION_NAME, AUDIO_RENDITION_NAME, WEB_VIDEO_RENDITION_NAME, ContentNature } from '@vertesia/common';
|
|
3
3
|
import { exec } from 'child_process';
|
|
4
4
|
import fs from 'fs';
|
|
@@ -84,7 +84,7 @@ async function getVideoMetadata(videoPath: string): Promise<VideoMetadataExtende
|
|
|
84
84
|
);
|
|
85
85
|
|
|
86
86
|
if (!videoStream) {
|
|
87
|
-
throw
|
|
87
|
+
throw ApplicationFailure.nonRetryable('No video stream found in file');
|
|
88
88
|
}
|
|
89
89
|
|
|
90
90
|
const duration = parseFloat(metadata.format.duration ?? '0') || 0;
|
|
@@ -111,6 +111,9 @@ async function getVideoMetadata(videoPath: string): Promise<VideoMetadataExtende
|
|
|
111
111
|
log.error(
|
|
112
112
|
`Failed to get video metadata: ${error instanceof Error ? error.message : 'Unknown error'}`,
|
|
113
113
|
);
|
|
114
|
+
if (error instanceof ApplicationFailure) {
|
|
115
|
+
throw error;
|
|
116
|
+
}
|
|
114
117
|
throw new Error(
|
|
115
118
|
`Failed to probe video metadata: ${error instanceof Error ? error.message : 'Unknown error'}`,
|
|
116
119
|
);
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import { MockActivityEnvironment } from '@temporalio/testing';
|
|
2
|
+
import type { VertesiaClient } from '@vertesia/client';
|
|
3
|
+
import { ContentEventName, DSLActivityExecutionPayload } from '@vertesia/common';
|
|
4
|
+
import type { ActivityContext } from '@vertesia/workflow';
|
|
5
|
+
import { beforeAll, beforeEach, describe, expect, it, vi } from 'vitest';
|
|
6
|
+
import { probeMediaStreams, ProbeMediaStreamsParams, ProbeMediaStreamsResult } from './probeMediaStreams.js';
|
|
7
|
+
|
|
8
|
+
vi.mock('../../dsl/setup/ActivityContext.js', async (importOriginal) => {
|
|
9
|
+
const actual = await importOriginal<typeof import('../../dsl/setup/ActivityContext.js')>();
|
|
10
|
+
return { ...actual, setupActivity: vi.fn() };
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
// child_process.exec uses util.promisify.custom to return { stdout, stderr }.
|
|
14
|
+
// vi.hoisted ensures these are defined before the vi.mock factory runs.
|
|
15
|
+
const { execMock, execCustom } = vi.hoisted(() => {
|
|
16
|
+
const custom = vi.fn();
|
|
17
|
+
const mock = Object.assign(vi.fn(), { [Symbol.for('nodejs.util.promisify.custom')]: custom });
|
|
18
|
+
return { execMock: mock, execCustom: custom };
|
|
19
|
+
});
|
|
20
|
+
vi.mock('child_process', () => ({ exec: execMock }));
|
|
21
|
+
|
|
22
|
+
let testEnv: MockActivityEnvironment;
|
|
23
|
+
|
|
24
|
+
beforeAll(async () => {
|
|
25
|
+
testEnv = new MockActivityEnvironment();
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
beforeEach(() => {
|
|
29
|
+
vi.clearAllMocks();
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
const createPayload = (objectId = 'test-object-id'): DSLActivityExecutionPayload<ProbeMediaStreamsParams> => ({
|
|
33
|
+
auth_token: 'mock-token',
|
|
34
|
+
account_id: 'test-account',
|
|
35
|
+
project_id: 'test-project',
|
|
36
|
+
params: {},
|
|
37
|
+
config: { studio_url: 'http://mock-studio', store_url: 'http://mock-store' },
|
|
38
|
+
workflow_name: 'test-workflow',
|
|
39
|
+
event: ContentEventName.create,
|
|
40
|
+
objectIds: [objectId],
|
|
41
|
+
input: { inputType: 'objectIds', objectIds: [objectId] },
|
|
42
|
+
vars: {},
|
|
43
|
+
activity: { name: 'probeMediaStreams', params: {} },
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
function mockExec(stdout: string) {
|
|
47
|
+
execCustom.mockResolvedValue({ stdout, stderr: '' });
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async function setupMockContext(objectId: string, signedUrl: string): Promise<void> {
|
|
51
|
+
const { setupActivity } = await import('../../dsl/setup/ActivityContext.js');
|
|
52
|
+
const mockClient = {
|
|
53
|
+
objects: {
|
|
54
|
+
retrieve: vi.fn().mockResolvedValue({
|
|
55
|
+
content: { source: 'gs://bucket/file.mp4', type: 'video/mp4' },
|
|
56
|
+
}),
|
|
57
|
+
},
|
|
58
|
+
files: {
|
|
59
|
+
getDownloadUrl: vi.fn().mockResolvedValue({ url: signedUrl }),
|
|
60
|
+
},
|
|
61
|
+
} as unknown as VertesiaClient;
|
|
62
|
+
vi.mocked(setupActivity).mockResolvedValue({
|
|
63
|
+
client: mockClient,
|
|
64
|
+
objectId,
|
|
65
|
+
inputType: 'objectIds',
|
|
66
|
+
params: {} satisfies ProbeMediaStreamsParams,
|
|
67
|
+
} as unknown as ActivityContext<ProbeMediaStreamsParams>);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
describe('probeMediaStreams', () => {
|
|
71
|
+
it('returns hasVideo=true and hasAudio=true for a video+audio container', async () => {
|
|
72
|
+
await setupMockContext('test-object-id', 'https://storage.example.com/file.mp4?token=abc');
|
|
73
|
+
mockExec(JSON.stringify({ streams: [{ codec_type: 'video' }, { codec_type: 'audio' }] }));
|
|
74
|
+
|
|
75
|
+
const result: ProbeMediaStreamsResult = await testEnv.run(probeMediaStreams, createPayload());
|
|
76
|
+
|
|
77
|
+
expect(result).toEqual({ hasVideo: true, hasAudio: true });
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
it('returns hasVideo=true and hasAudio=false for a video-only container', async () => {
|
|
81
|
+
await setupMockContext('test-object-id', 'https://storage.example.com/file.mp4');
|
|
82
|
+
mockExec(JSON.stringify({ streams: [{ codec_type: 'video' }] }));
|
|
83
|
+
|
|
84
|
+
const result: ProbeMediaStreamsResult = await testEnv.run(probeMediaStreams, createPayload());
|
|
85
|
+
|
|
86
|
+
expect(result).toEqual({ hasVideo: true, hasAudio: false });
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
it('returns hasVideo=false and hasAudio=true for an audio-only container (the bug case)', async () => {
|
|
90
|
+
await setupMockContext('test-object-id', 'https://storage.example.com/audio-only.mp4');
|
|
91
|
+
mockExec(JSON.stringify({ streams: [{ codec_type: 'audio' }] }));
|
|
92
|
+
|
|
93
|
+
const result: ProbeMediaStreamsResult = await testEnv.run(probeMediaStreams, createPayload());
|
|
94
|
+
|
|
95
|
+
expect(result).toEqual({ hasVideo: false, hasAudio: true });
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it('throws nonRetryable ApplicationFailure when no usable streams are found', async () => {
|
|
99
|
+
await setupMockContext('test-object-id', 'https://storage.example.com/bad.mp4');
|
|
100
|
+
mockExec(JSON.stringify({ streams: [] }));
|
|
101
|
+
|
|
102
|
+
await expect(testEnv.run(probeMediaStreams, createPayload())).rejects.toThrow(
|
|
103
|
+
'No audio or video streams found in container',
|
|
104
|
+
);
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
it('throws DocumentNotFoundError when the object has no source', async () => {
|
|
108
|
+
const { setupActivity } = await import('../../dsl/setup/ActivityContext.js');
|
|
109
|
+
const mockClient = {
|
|
110
|
+
objects: {
|
|
111
|
+
retrieve: vi.fn().mockResolvedValue({ content: {} }),
|
|
112
|
+
},
|
|
113
|
+
files: { getDownloadUrl: vi.fn() },
|
|
114
|
+
} as unknown as VertesiaClient;
|
|
115
|
+
vi.mocked(setupActivity).mockResolvedValue({
|
|
116
|
+
client: mockClient,
|
|
117
|
+
objectId: 'test-object-id',
|
|
118
|
+
inputType: 'objectIds',
|
|
119
|
+
params: {},
|
|
120
|
+
} as unknown as ActivityContext<ProbeMediaStreamsParams>);
|
|
121
|
+
|
|
122
|
+
await expect(testEnv.run(probeMediaStreams, createPayload())).rejects.toThrow(
|
|
123
|
+
'has no source',
|
|
124
|
+
);
|
|
125
|
+
});
|
|
126
|
+
});
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import { ApplicationFailure, log } from '@temporalio/activity';
|
|
2
|
+
import { DSLActivityExecutionPayload, DSLActivitySpec } from '@vertesia/common';
|
|
3
|
+
import { RequestError } from '@vertesia/api-fetch-client';
|
|
4
|
+
import { exec } from 'child_process';
|
|
5
|
+
import { promisify } from 'util';
|
|
6
|
+
import { setupActivity } from '../../dsl/setup/ActivityContext.js';
|
|
7
|
+
import { DocumentNotFoundError } from '../../errors.js';
|
|
8
|
+
|
|
9
|
+
const execAsync = promisify(exec);
|
|
10
|
+
|
|
11
|
+
const FFPROBE_MAX_BUFFER = 1024 * 1024; // 1MB is more than enough for stream metadata JSON
|
|
12
|
+
|
|
13
|
+
export interface ProbeMediaStreamsResult {
|
|
14
|
+
hasVideo: boolean;
|
|
15
|
+
hasAudio: boolean;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// eslint-disable-next-line @typescript-eslint/no-empty-object-type
|
|
19
|
+
export interface ProbeMediaStreamsParams {}
|
|
20
|
+
|
|
21
|
+
export interface ProbeMediaStreams extends DSLActivitySpec<ProbeMediaStreamsParams> {
|
|
22
|
+
name: 'probeMediaStreams';
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
interface FFProbeStream {
|
|
26
|
+
codec_type: string;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
interface FFProbeOutput {
|
|
30
|
+
streams: FFProbeStream[];
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export async function probeMediaStreams(payload: DSLActivityExecutionPayload<ProbeMediaStreamsParams>): Promise<ProbeMediaStreamsResult> {
|
|
34
|
+
const { client, objectId } = await setupActivity<ProbeMediaStreamsParams>(payload);
|
|
35
|
+
|
|
36
|
+
const inputObject = await client.objects.retrieve(objectId).catch((err: unknown) => {
|
|
37
|
+
log.error(`Failed to retrieve object ${objectId}`, { err });
|
|
38
|
+
if (err instanceof RequestError && err.status === 404) {
|
|
39
|
+
throw new DocumentNotFoundError(`Object ${objectId} not found`, [objectId]);
|
|
40
|
+
}
|
|
41
|
+
throw err;
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
const source = inputObject.content?.source;
|
|
45
|
+
if (!source) {
|
|
46
|
+
throw new DocumentNotFoundError(`Object ${objectId} has no source`, [objectId]);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const { url } = await client.files.getDownloadUrl(source);
|
|
50
|
+
if (!url) {
|
|
51
|
+
throw new DocumentNotFoundError(`Failed to get download URL for object ${objectId}`);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// ffprobe reads only the container headers via HTTP range requests.
|
|
55
|
+
// -probesize 32k caps the amount read from the network to ~32 KB.
|
|
56
|
+
let stdout: string;
|
|
57
|
+
try {
|
|
58
|
+
({ stdout } = await execAsync(
|
|
59
|
+
`ffprobe -v quiet -probesize 32k -print_format json -show_streams "${url}"`,
|
|
60
|
+
{ maxBuffer: FFPROBE_MAX_BUFFER },
|
|
61
|
+
));
|
|
62
|
+
} catch (err: unknown) {
|
|
63
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
64
|
+
log.error(`ffprobe failed for object ${objectId}: ${message}`);
|
|
65
|
+
throw new Error(`Failed to probe media streams for object ${objectId}: ${message}`);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const { streams } = JSON.parse(stdout) as FFProbeOutput;
|
|
69
|
+
const hasVideo = streams.some(s => s.codec_type === 'video');
|
|
70
|
+
const hasAudio = streams.some(s => s.codec_type === 'audio');
|
|
71
|
+
|
|
72
|
+
log.info(`Media probe result for object ${objectId}`, { hasVideo, hasAudio });
|
|
73
|
+
|
|
74
|
+
if (!hasVideo && !hasAudio) {
|
|
75
|
+
throw ApplicationFailure.nonRetryable(
|
|
76
|
+
`No audio or video streams found in container for object ${objectId}`,
|
|
77
|
+
);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return { hasVideo, hasAudio };
|
|
81
|
+
}
|
|
@@ -32,6 +32,14 @@ export async function saveGladiaTranscription(payload: DSLActivityExecutionPaylo
|
|
|
32
32
|
error: "Gladia integration not enabled",
|
|
33
33
|
};
|
|
34
34
|
}
|
|
35
|
+
if (!gladiaConfig.api_key) {
|
|
36
|
+
return {
|
|
37
|
+
hasText: false,
|
|
38
|
+
objectId: inputType === 'objectIds' ? context.objectId : undefined,
|
|
39
|
+
status: TextExtractionStatus.error,
|
|
40
|
+
error: "Gladia API key not configured",
|
|
41
|
+
};
|
|
42
|
+
}
|
|
35
43
|
|
|
36
44
|
const gladiaClient = new FetchClient(gladiaConfig.url ?? GLADIA_URL);
|
|
37
45
|
gladiaClient.withHeaders({ "x-gladia-key": gladiaConfig.api_key });
|
|
@@ -41,6 +41,14 @@ export async function transcribeMedia(payload: DSLActivityExecutionPayload<Trans
|
|
|
41
41
|
error: "Gladia integration not enabled",
|
|
42
42
|
}
|
|
43
43
|
}
|
|
44
|
+
if (!gladiaConfig.api_key) {
|
|
45
|
+
return {
|
|
46
|
+
hasText: false,
|
|
47
|
+
objectId: inputType === 'objectIds' ? context.objectId : undefined,
|
|
48
|
+
status: TextExtractionStatus.error,
|
|
49
|
+
error: "Gladia API key not configured",
|
|
50
|
+
}
|
|
51
|
+
}
|
|
44
52
|
|
|
45
53
|
const gladiaClient = new FetchClient(gladiaConfig.url ?? GLADIA_URL);
|
|
46
54
|
gladiaClient.withHeaders({ "x-gladia-key": gladiaConfig.api_key });
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { MockActivityEnvironment } from "@temporalio/testing";
|
|
2
2
|
import { ContentEventName, DSLActivityExecutionPayload } from "@vertesia/common";
|
|
3
3
|
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
|
4
|
-
import { resolveRemoteActivities, ResolveRemoteActivitiesParams } from "./resolveRemoteActivities.js";
|
|
4
|
+
import { resolveRemoteActivities, type RemoteActivityMap, ResolveRemoteActivitiesParams } from "./resolveRemoteActivities.js";
|
|
5
5
|
|
|
6
6
|
vi.stubGlobal("fetch", vi.fn());
|
|
7
7
|
|
|
@@ -11,6 +11,7 @@ vi.mock("../utils/client.js", () => ({
|
|
|
11
11
|
getVertesiaClient: vi.fn().mockReturnValue({
|
|
12
12
|
apps: {
|
|
13
13
|
getInstalledApps: (...args: any[]) => mockGetInstalledApps(...args),
|
|
14
|
+
validateUrl: vi.fn().mockResolvedValue({ valid: true }),
|
|
14
15
|
},
|
|
15
16
|
}),
|
|
16
17
|
}));
|
|
@@ -46,7 +47,7 @@ describe("resolveRemoteActivities", () => {
|
|
|
46
47
|
it("returns empty map when no apps installed", async () => {
|
|
47
48
|
mockGetInstalledApps.mockResolvedValueOnce([]);
|
|
48
49
|
|
|
49
|
-
const result = await testEnv.run(resolveRemoteActivities, createPayload());
|
|
50
|
+
const result: RemoteActivityMap = await testEnv.run(resolveRemoteActivities, createPayload());
|
|
50
51
|
expect(result).toEqual({});
|
|
51
52
|
expect(mockGetInstalledApps).toHaveBeenCalledWith("tools");
|
|
52
53
|
});
|
|
@@ -73,7 +74,7 @@ describe("resolveRemoteActivities", () => {
|
|
|
73
74
|
),
|
|
74
75
|
);
|
|
75
76
|
|
|
76
|
-
const result = await testEnv.run(resolveRemoteActivities, createPayload());
|
|
77
|
+
const result: RemoteActivityMap = await testEnv.run(resolveRemoteActivities, createPayload());
|
|
77
78
|
|
|
78
79
|
expect(Object.keys(result)).toHaveLength(2);
|
|
79
80
|
expect(result["app:my-nlp-app:nlp:analyze_sentiment"]).toBeDefined();
|
|
@@ -108,7 +109,7 @@ describe("resolveRemoteActivities", () => {
|
|
|
108
109
|
new Response(JSON.stringify({ activities: [{ name: "task_b", collection: "main" }] }), { status: 200 }),
|
|
109
110
|
);
|
|
110
111
|
|
|
111
|
-
const result = await testEnv.run(resolveRemoteActivities, createPayload());
|
|
112
|
+
const result: RemoteActivityMap = await testEnv.run(resolveRemoteActivities, createPayload());
|
|
112
113
|
|
|
113
114
|
expect(Object.keys(result)).toHaveLength(2);
|
|
114
115
|
expect(result["app:app-one:main:task_a"]).toBeDefined();
|
|
@@ -125,7 +126,7 @@ describe("resolveRemoteActivities", () => {
|
|
|
125
126
|
new Response(JSON.stringify({ activities: [] }), { status: 200 }),
|
|
126
127
|
);
|
|
127
128
|
|
|
128
|
-
const result = await testEnv.run(resolveRemoteActivities, createPayload());
|
|
129
|
+
const result: RemoteActivityMap = await testEnv.run(resolveRemoteActivities, createPayload());
|
|
129
130
|
expect(result).toEqual({});
|
|
130
131
|
});
|
|
131
132
|
|
|
@@ -135,7 +136,7 @@ describe("resolveRemoteActivities", () => {
|
|
|
135
136
|
manifest: { name: "no-endpoint" },
|
|
136
137
|
}]);
|
|
137
138
|
|
|
138
|
-
const result = await testEnv.run(resolveRemoteActivities, createPayload());
|
|
139
|
+
const result: RemoteActivityMap = await testEnv.run(resolveRemoteActivities, createPayload());
|
|
139
140
|
expect(result).toEqual({});
|
|
140
141
|
expect(mockFetch).not.toHaveBeenCalled();
|
|
141
142
|
});
|
|
@@ -160,7 +161,7 @@ describe("resolveRemoteActivities", () => {
|
|
|
160
161
|
new Response(JSON.stringify({ activities: [{ name: "task", collection: "main" }] }), { status: 200 }),
|
|
161
162
|
);
|
|
162
163
|
|
|
163
|
-
const result = await testEnv.run(resolveRemoteActivities, createPayload());
|
|
164
|
+
const result: RemoteActivityMap = await testEnv.run(resolveRemoteActivities, createPayload());
|
|
164
165
|
expect(Object.keys(result)).toHaveLength(1);
|
|
165
166
|
expect(result["app:same-app:main:task"].app_install_id).toBe("install-1");
|
|
166
167
|
});
|
|
@@ -183,7 +184,7 @@ describe("resolveRemoteActivities", () => {
|
|
|
183
184
|
new Response(JSON.stringify({ activities: [{ name: "task", collection: "main" }] }), { status: 200 }),
|
|
184
185
|
);
|
|
185
186
|
|
|
186
|
-
const result = await testEnv.run(resolveRemoteActivities, createPayload());
|
|
187
|
+
const result: RemoteActivityMap = await testEnv.run(resolveRemoteActivities, createPayload());
|
|
187
188
|
expect(Object.keys(result)).toHaveLength(1);
|
|
188
189
|
expect(result["app:working-app:main:task"]).toBeDefined();
|
|
189
190
|
});
|
|
@@ -191,7 +192,7 @@ describe("resolveRemoteActivities", () => {
|
|
|
191
192
|
it("returns empty map when getInstalledApps fails", async () => {
|
|
192
193
|
mockGetInstalledApps.mockRejectedValueOnce(new Error("API error"));
|
|
193
194
|
|
|
194
|
-
const result = await testEnv.run(resolveRemoteActivities, createPayload());
|
|
195
|
+
const result: RemoteActivityMap = await testEnv.run(resolveRemoteActivities, createPayload());
|
|
195
196
|
expect(result).toEqual({});
|
|
196
197
|
});
|
|
197
198
|
|
|
@@ -213,7 +214,7 @@ describe("resolveRemoteActivities", () => {
|
|
|
213
214
|
),
|
|
214
215
|
);
|
|
215
216
|
|
|
216
|
-
const result = await testEnv.run(resolveRemoteActivities, createPayload())
|
|
217
|
+
const result: RemoteActivityMap = await testEnv.run(resolveRemoteActivities, createPayload());
|
|
217
218
|
expect(Object.keys(result)).toHaveLength(1);
|
|
218
219
|
expect(result["app:bad-app:main:has_collection"]).toBeDefined();
|
|
219
220
|
});
|
|
@@ -5,7 +5,9 @@ import {
|
|
|
5
5
|
DSLActivityExecutionPayload,
|
|
6
6
|
RemoteActivityDefinition,
|
|
7
7
|
} from "@vertesia/common";
|
|
8
|
+
import { VertesiaClient } from "@vertesia/client";
|
|
8
9
|
import { setupActivity } from "../dsl/setup/ActivityContext.js";
|
|
10
|
+
import { URLValidationError, safeFetch } from "../security/ssrf.js";
|
|
9
11
|
|
|
10
12
|
/** Prefix identifying a remote activity name in DSL workflow steps */
|
|
11
13
|
const REMOTE_ACTIVITY_PREFIX = "app:";
|
|
@@ -66,7 +68,7 @@ export async function resolveRemoteActivities(
|
|
|
66
68
|
}
|
|
67
69
|
|
|
68
70
|
try {
|
|
69
|
-
const pkg = await fetchActivitiesPackage(manifest.endpoint, payload.auth_token);
|
|
71
|
+
const pkg = await fetchActivitiesPackage(manifest.endpoint, payload.auth_token, client);
|
|
70
72
|
if (!pkg.activities || pkg.activities.length === 0) {
|
|
71
73
|
continue;
|
|
72
74
|
}
|
|
@@ -94,7 +96,7 @@ export async function resolveRemoteActivities(
|
|
|
94
96
|
}
|
|
95
97
|
|
|
96
98
|
// Resolve the activity execution URL (collection-specific endpoint)
|
|
97
|
-
const activityUrl = resolveActivityUrl(manifest.endpoint, activity, collection);
|
|
99
|
+
const activityUrl = await resolveActivityUrl(manifest.endpoint, activity, collection, client);
|
|
98
100
|
|
|
99
101
|
map[qualifiedName] = {
|
|
100
102
|
url: activityUrl,
|
|
@@ -126,11 +128,13 @@ export async function resolveRemoteActivities(
|
|
|
126
128
|
/**
|
|
127
129
|
* Fetches the activities scope from a tool server package endpoint.
|
|
128
130
|
*/
|
|
129
|
-
async function fetchActivitiesPackage(endpoint: string, authToken: string): Promise<AppPackage> {
|
|
131
|
+
async function fetchActivitiesPackage(endpoint: string, authToken: string, client: VertesiaClient): Promise<AppPackage> {
|
|
130
132
|
const url = new URL(endpoint);
|
|
131
133
|
url.searchParams.set('scope', 'activities');
|
|
132
134
|
|
|
133
|
-
|
|
135
|
+
await client.apps.validateUrl(url.toString());
|
|
136
|
+
|
|
137
|
+
const response = await safeFetch(url.toString(), {
|
|
134
138
|
method: 'GET',
|
|
135
139
|
headers: {
|
|
136
140
|
'Accept': 'application/json',
|
|
@@ -147,21 +151,32 @@ async function fetchActivitiesPackage(endpoint: string, authToken: string): Prom
|
|
|
147
151
|
}
|
|
148
152
|
|
|
149
153
|
/**
|
|
150
|
-
* Resolves the execution URL for a remote activity.
|
|
154
|
+
* Resolves and validates the execution URL for a remote activity.
|
|
151
155
|
* If the activity has a `url` field, resolve it relative to the endpoint base.
|
|
152
156
|
* Otherwise, use the collection-specific activities endpoint: `/api/activities/{collection}`.
|
|
157
|
+
* Validates the resolved URL to prevent second-hop SSRF from tool server responses.
|
|
153
158
|
*/
|
|
154
|
-
function resolveActivityUrl(endpoint: string, activity: RemoteActivityDefinition, collection: string): string {
|
|
159
|
+
async function resolveActivityUrl(endpoint: string, activity: RemoteActivityDefinition, collection: string, client: VertesiaClient): Promise<string> {
|
|
160
|
+
let resolved: string;
|
|
155
161
|
if (activity.url) {
|
|
156
|
-
// Absolute URLs are used as-is
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
+
// Absolute URLs are used as-is; relative URLs are resolved against the endpoint base
|
|
163
|
+
resolved = (activity.url.startsWith('http://') || activity.url.startsWith('https://'))
|
|
164
|
+
? activity.url
|
|
165
|
+
: new URL(activity.url, endpoint).toString();
|
|
166
|
+
} else {
|
|
167
|
+
// Default: POST to the collection-specific activities endpoint
|
|
168
|
+
const base = new URL(endpoint);
|
|
169
|
+
const activitiesPath = base.pathname.replace(/\/package\/?$/, `/activities/${collection}`);
|
|
170
|
+
resolved = new URL(activitiesPath, base.origin).toString();
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Validate the resolved URL via Studio — safeFetch on the discovery request does NOT protect this
|
|
174
|
+
// second-hop URL which comes from the tool server response body.
|
|
175
|
+
try {
|
|
176
|
+
await client.apps.validateUrl(resolved);
|
|
177
|
+
} catch (e) {
|
|
178
|
+
throw new URLValidationError(`Blocked activity URL from app response: ${(e as Error).message}`);
|
|
162
179
|
}
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
const activitiesPath = base.pathname.replace(/\/package\/?$/, `/activities/${collection}`);
|
|
166
|
-
return new URL(activitiesPath, base.origin).toString();
|
|
180
|
+
|
|
181
|
+
return resolved;
|
|
167
182
|
}
|