@vellumai/assistant 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +2 -0
- package/README.md +45 -18
- package/package.json +1 -1
- package/scripts/ipc/generate-swift.ts +13 -0
- package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +100 -0
- package/src/__tests__/approval-hardcoded-copy-guard.test.ts +41 -0
- package/src/__tests__/approval-message-composer.test.ts +253 -0
- package/src/__tests__/call-domain.test.ts +12 -2
- package/src/__tests__/call-orchestrator.test.ts +391 -1
- package/src/__tests__/call-routes-http.test.ts +27 -2
- package/src/__tests__/channel-approval-routes.test.ts +397 -135
- package/src/__tests__/channel-approvals.test.ts +99 -3
- package/src/__tests__/channel-delivery-store.test.ts +30 -4
- package/src/__tests__/channel-guardian.test.ts +261 -22
- package/src/__tests__/channel-readiness-service.test.ts +257 -0
- package/src/__tests__/config-schema.test.ts +2 -1
- package/src/__tests__/credential-security-invariants.test.ts +1 -0
- package/src/__tests__/daemon-lifecycle.test.ts +636 -0
- package/src/__tests__/dictation-mode-detection.test.ts +63 -0
- package/src/__tests__/entity-search.test.ts +615 -0
- package/src/__tests__/gateway-only-enforcement.test.ts +19 -13
- package/src/__tests__/handlers-twilio-config.test.ts +480 -0
- package/src/__tests__/ipc-snapshot.test.ts +63 -0
- package/src/__tests__/messaging-send-tool.test.ts +65 -0
- package/src/__tests__/run-orchestrator-assistant-events.test.ts +4 -0
- package/src/__tests__/run-orchestrator.test.ts +22 -0
- package/src/__tests__/secret-scanner.test.ts +223 -0
- package/src/__tests__/session-runtime-assembly.test.ts +85 -1
- package/src/__tests__/shell-parser-property.test.ts +357 -2
- package/src/__tests__/sms-messaging-provider.test.ts +125 -0
- package/src/__tests__/system-prompt.test.ts +25 -1
- package/src/__tests__/tool-executor-lifecycle-events.test.ts +34 -1
- package/src/__tests__/twilio-routes.test.ts +39 -3
- package/src/__tests__/twitter-cli-error-shaping.test.ts +2 -2
- package/src/__tests__/user-reference.test.ts +68 -0
- package/src/__tests__/web-search.test.ts +1 -1
- package/src/__tests__/work-item-output.test.ts +110 -0
- package/src/calls/call-domain.ts +8 -5
- package/src/calls/call-orchestrator.ts +85 -22
- package/src/calls/twilio-config.ts +17 -11
- package/src/calls/twilio-rest.ts +276 -0
- package/src/calls/twilio-routes.ts +39 -1
- package/src/cli/map.ts +6 -0
- package/src/commands/__tests__/cc-command-registry.test.ts +67 -0
- package/src/commands/cc-command-registry.ts +14 -1
- package/src/config/bundled-skills/claude-code/TOOLS.json +10 -3
- package/src/config/bundled-skills/knowledge-graph/SKILL.md +15 -0
- package/src/config/bundled-skills/knowledge-graph/TOOLS.json +56 -0
- package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +185 -0
- package/src/config/bundled-skills/media-processing/SKILL.md +199 -0
- package/src/config/bundled-skills/media-processing/TOOLS.json +320 -0
- package/src/config/bundled-skills/media-processing/services/capability-registry.ts +137 -0
- package/src/config/bundled-skills/media-processing/services/event-detection-service.ts +280 -0
- package/src/config/bundled-skills/media-processing/services/feedback-aggregation.ts +144 -0
- package/src/config/bundled-skills/media-processing/services/feedback-store.ts +136 -0
- package/src/config/bundled-skills/media-processing/services/processing-pipeline.ts +261 -0
- package/src/config/bundled-skills/media-processing/services/retrieval-service.ts +95 -0
- package/src/config/bundled-skills/media-processing/services/timeline-service.ts +267 -0
- package/src/config/bundled-skills/media-processing/tools/analyze-keyframes.ts +301 -0
- package/src/config/bundled-skills/media-processing/tools/detect-events.ts +110 -0
- package/src/config/bundled-skills/media-processing/tools/extract-keyframes.ts +190 -0
- package/src/config/bundled-skills/media-processing/tools/generate-clip.ts +195 -0
- package/src/config/bundled-skills/media-processing/tools/ingest-media.ts +197 -0
- package/src/config/bundled-skills/media-processing/tools/media-diagnostics.ts +166 -0
- package/src/config/bundled-skills/media-processing/tools/media-status.ts +75 -0
- package/src/config/bundled-skills/media-processing/tools/query-media-events.ts +300 -0
- package/src/config/bundled-skills/media-processing/tools/recalibrate.ts +235 -0
- package/src/config/bundled-skills/media-processing/tools/select-tracking-profile.ts +142 -0
- package/src/config/bundled-skills/media-processing/tools/submit-feedback.ts +150 -0
- package/src/config/bundled-skills/messaging/SKILL.md +24 -5
- package/src/config/bundled-skills/messaging/tools/messaging-send.ts +5 -1
- package/src/config/bundled-skills/phone-calls/SKILL.md +2 -2
- package/src/config/bundled-skills/twitter/SKILL.md +19 -3
- package/src/config/defaults.ts +2 -1
- package/src/config/schema.ts +9 -3
- package/src/config/skills.ts +5 -32
- package/src/config/system-prompt.ts +40 -0
- package/src/config/templates/IDENTITY.md +2 -2
- package/src/config/user-reference.ts +29 -0
- package/src/config/vellum-skills/catalog.json +58 -0
- package/src/config/vellum-skills/google-oauth-setup/SKILL.md +3 -3
- package/src/config/vellum-skills/slack-oauth-setup/SKILL.md +3 -3
- package/src/config/vellum-skills/sms-setup/SKILL.md +118 -0
- package/src/config/vellum-skills/telegram-setup/SKILL.md +6 -1
- package/src/config/vellum-skills/twilio-setup/SKILL.md +76 -6
- package/src/daemon/auth-manager.ts +103 -0
- package/src/daemon/computer-use-session.ts +8 -1
- package/src/daemon/config-watcher.ts +253 -0
- package/src/daemon/handlers/config.ts +819 -22
- package/src/daemon/handlers/dictation.ts +182 -0
- package/src/daemon/handlers/identity.ts +14 -23
- package/src/daemon/handlers/index.ts +2 -0
- package/src/daemon/handlers/sessions.ts +2 -0
- package/src/daemon/handlers/shared.ts +3 -0
- package/src/daemon/handlers/skills.ts +6 -7
- package/src/daemon/handlers/work-items.ts +15 -7
- package/src/daemon/ipc-contract-inventory.json +10 -0
- package/src/daemon/ipc-contract.ts +114 -4
- package/src/daemon/ipc-handler.ts +87 -0
- package/src/daemon/lifecycle.ts +18 -4
- package/src/daemon/ride-shotgun-handler.ts +11 -1
- package/src/daemon/server.ts +111 -504
- package/src/daemon/session-agent-loop.ts +10 -15
- package/src/daemon/session-runtime-assembly.ts +115 -44
- package/src/daemon/session-tool-setup.ts +2 -0
- package/src/daemon/session.ts +19 -2
- package/src/inbound/public-ingress-urls.ts +3 -3
- package/src/memory/channel-guardian-store.ts +2 -1
- package/src/memory/db-connection.ts +28 -0
- package/src/memory/db-init.ts +1163 -0
- package/src/memory/db.ts +2 -2007
- package/src/memory/embedding-backend.ts +79 -11
- package/src/memory/indexer.ts +2 -0
- package/src/memory/job-handlers/media-processing.ts +100 -0
- package/src/memory/job-utils.ts +64 -4
- package/src/memory/jobs-store.ts +2 -1
- package/src/memory/jobs-worker.ts +11 -1
- package/src/memory/media-store.ts +759 -0
- package/src/memory/recall-cache.ts +107 -0
- package/src/memory/retriever.ts +36 -2
- package/src/memory/schema-migration.ts +984 -0
- package/src/memory/schema.ts +99 -0
- package/src/memory/search/entity.ts +208 -25
- package/src/memory/search/ranking.ts +6 -1
- package/src/memory/search/types.ts +26 -0
- package/src/messaging/provider-types.ts +2 -0
- package/src/messaging/providers/sms/adapter.ts +204 -0
- package/src/messaging/providers/sms/client.ts +93 -0
- package/src/messaging/providers/sms/types.ts +7 -0
- package/src/permissions/checker.ts +16 -2
- package/src/permissions/prompter.ts +14 -3
- package/src/permissions/trust-store.ts +7 -0
- package/src/runtime/approval-message-composer.ts +143 -0
- package/src/runtime/channel-approvals.ts +29 -7
- package/src/runtime/channel-guardian-service.ts +44 -18
- package/src/runtime/channel-readiness-service.ts +292 -0
- package/src/runtime/channel-readiness-types.ts +29 -0
- package/src/runtime/gateway-client.ts +2 -1
- package/src/runtime/http-server.ts +65 -28
- package/src/runtime/http-types.ts +3 -0
- package/src/runtime/routes/call-routes.ts +2 -1
- package/src/runtime/routes/channel-routes.ts +237 -103
- package/src/runtime/routes/run-routes.ts +7 -1
- package/src/runtime/run-orchestrator.ts +43 -3
- package/src/security/secret-scanner.ts +218 -0
- package/src/skills/frontmatter.ts +63 -0
- package/src/skills/slash-commands.ts +23 -0
- package/src/skills/vellum-catalog-remote.ts +107 -0
- package/src/tools/assets/materialize.ts +2 -2
- package/src/tools/browser/auto-navigate.ts +132 -24
- package/src/tools/browser/browser-manager.ts +67 -61
- package/src/tools/calls/call-start.ts +1 -0
- package/src/tools/claude-code/claude-code.ts +55 -3
- package/src/tools/credentials/vault.ts +1 -1
- package/src/tools/execution-target.ts +11 -1
- package/src/tools/executor.ts +10 -2
- package/src/tools/network/web-search.ts +1 -1
- package/src/tools/skills/vellum-catalog.ts +61 -156
- package/src/tools/terminal/parser.ts +21 -5
- package/src/tools/types.ts +2 -0
- package/src/twitter/router.ts +1 -1
- package/src/util/platform.ts +43 -1
- package/src/util/retry.ts +4 -4
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
import { readFile } from 'node:fs/promises';
|
|
2
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
3
|
+
import { getConfig } from '../../../../config/loader.js';
|
|
4
|
+
import type { ToolContext, ToolExecutionResult } from '../../../../tools/types.js';
|
|
5
|
+
import {
|
|
6
|
+
getMediaAssetById,
|
|
7
|
+
getKeyframesForAsset,
|
|
8
|
+
getVisionOutputsForAsset,
|
|
9
|
+
insertVisionOutputsBatch,
|
|
10
|
+
createProcessingStage,
|
|
11
|
+
updateProcessingStage,
|
|
12
|
+
getProcessingStagesForAsset,
|
|
13
|
+
type MediaKeyframe,
|
|
14
|
+
type ProcessingStage,
|
|
15
|
+
} from '../../../../memory/media-store.js';
|
|
16
|
+
|
|
17
|
+
const VLM_PROMPT = `Analyze this image frame extracted from a video. Return a JSON object with the following fields:
|
|
18
|
+
|
|
19
|
+
{
|
|
20
|
+
"sceneDescription": "A concise description of the overall scene",
|
|
21
|
+
"subjects": ["List of identifiable subjects/objects/people in the frame"],
|
|
22
|
+
"actions": ["List of actions or activities occurring"],
|
|
23
|
+
"context": "Environmental or situational context (setting, conditions, etc.)"
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
Return ONLY the JSON object, no additional text.`;
|
|
27
|
+
|
|
28
|
+
export async function analyzeKeyframesForAsset(
|
|
29
|
+
assetId: string,
|
|
30
|
+
analysisType?: string,
|
|
31
|
+
batchSize?: number,
|
|
32
|
+
onProgress?: (msg: string) => void,
|
|
33
|
+
signal?: AbortSignal,
|
|
34
|
+
): Promise<void> {
|
|
35
|
+
const type = analysisType ?? 'scene_description';
|
|
36
|
+
const batch = batchSize ?? 10;
|
|
37
|
+
|
|
38
|
+
if (batch <= 0) {
|
|
39
|
+
throw new Error('batch_size must be greater than 0.');
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const asset = getMediaAssetById(assetId);
|
|
43
|
+
if (!asset) {
|
|
44
|
+
throw new Error(`Media asset not found: ${assetId}`);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Get all keyframes for this asset
|
|
48
|
+
const keyframes = getKeyframesForAsset(assetId);
|
|
49
|
+
if (keyframes.length === 0) {
|
|
50
|
+
throw new Error('No keyframes found for this asset. Run extract_keyframes first.');
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Resumability: find already-analyzed keyframe IDs for this analysis type
|
|
54
|
+
const existingOutputs = getVisionOutputsForAsset(assetId, type);
|
|
55
|
+
const analyzedKeyframeIds = new Set(existingOutputs.map((o) => o.keyframeId));
|
|
56
|
+
const pendingKeyframes = keyframes.filter((kf) => !analyzedKeyframeIds.has(kf.id));
|
|
57
|
+
|
|
58
|
+
if (pendingKeyframes.length === 0) {
|
|
59
|
+
// Nothing to do — all keyframes already analyzed
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Find or create the vision_analysis processing stage
|
|
64
|
+
let stage: ProcessingStage | undefined;
|
|
65
|
+
const existingStages = getProcessingStagesForAsset(assetId);
|
|
66
|
+
stage = existingStages.find((s) => s.stage === 'vision_analysis');
|
|
67
|
+
if (!stage) {
|
|
68
|
+
stage = createProcessingStage({ assetId, stage: 'vision_analysis' });
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
updateProcessingStage(stage.id, { status: 'running', startedAt: Date.now() });
|
|
72
|
+
|
|
73
|
+
const config = getConfig();
|
|
74
|
+
const apiKey = config.apiKeys.anthropic ?? process.env.ANTHROPIC_API_KEY;
|
|
75
|
+
if (!apiKey) {
|
|
76
|
+
updateProcessingStage(stage.id, {
|
|
77
|
+
status: 'failed',
|
|
78
|
+
lastError: 'Anthropic API key not configured',
|
|
79
|
+
});
|
|
80
|
+
throw new Error('No Anthropic API key available. Configure it in settings or set ANTHROPIC_API_KEY.');
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const client = new Anthropic({ apiKey });
|
|
84
|
+
let analyzedCount = analyzedKeyframeIds.size;
|
|
85
|
+
const totalKeyframes = keyframes.length;
|
|
86
|
+
|
|
87
|
+
onProgress?.(`Analyzing ${pendingKeyframes.length} keyframes (${analyzedKeyframeIds.size} already done)...\n`);
|
|
88
|
+
|
|
89
|
+
let aborted = false;
|
|
90
|
+
|
|
91
|
+
try {
|
|
92
|
+
// Process in batches
|
|
93
|
+
for (let i = 0; i < pendingKeyframes.length; i += batch) {
|
|
94
|
+
if (signal?.aborted) {
|
|
95
|
+
onProgress?.('Aborted.\n');
|
|
96
|
+
aborted = true;
|
|
97
|
+
break;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const currentBatch = pendingKeyframes.slice(i, i + batch);
|
|
101
|
+
const batchResults: Array<{
|
|
102
|
+
assetId: string;
|
|
103
|
+
keyframeId: string;
|
|
104
|
+
analysisType: string;
|
|
105
|
+
output: Record<string, unknown>;
|
|
106
|
+
confidence?: number;
|
|
107
|
+
}> = [];
|
|
108
|
+
|
|
109
|
+
for (const keyframe of currentBatch) {
|
|
110
|
+
if (signal?.aborted) {
|
|
111
|
+
onProgress?.('Aborted.\n');
|
|
112
|
+
aborted = true;
|
|
113
|
+
break;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
try {
|
|
117
|
+
const result = await analyzeKeyframe(client, keyframe);
|
|
118
|
+
batchResults.push({
|
|
119
|
+
assetId,
|
|
120
|
+
keyframeId: keyframe.id,
|
|
121
|
+
analysisType: type,
|
|
122
|
+
output: result.output,
|
|
123
|
+
confidence: result.confidence,
|
|
124
|
+
});
|
|
125
|
+
analyzedCount++;
|
|
126
|
+
} catch (err) {
|
|
127
|
+
onProgress?.(` Warning: failed to analyze frame at ${keyframe.timestamp}s: ${(err as Error).message}\n`);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Batch insert results
|
|
132
|
+
if (batchResults.length > 0) {
|
|
133
|
+
insertVisionOutputsBatch(batchResults);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Update progress
|
|
137
|
+
const progress = Math.round((analyzedCount / totalKeyframes) * 100);
|
|
138
|
+
updateProcessingStage(stage.id, { progress });
|
|
139
|
+
|
|
140
|
+
onProgress?.(` Batch ${Math.floor(i / batch) + 1}: analyzed ${batchResults.length}/${currentBatch.length} frames (${progress}% total)\n`);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
if (aborted) {
|
|
144
|
+
throw new Error('Analysis aborted');
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const finalProgress = Math.round((analyzedCount / totalKeyframes) * 100);
|
|
148
|
+
const isComplete = analyzedCount >= totalKeyframes;
|
|
149
|
+
|
|
150
|
+
updateProcessingStage(stage.id, {
|
|
151
|
+
status: isComplete ? 'completed' : 'running',
|
|
152
|
+
progress: finalProgress,
|
|
153
|
+
...(isComplete ? { completedAt: Date.now() } : {}),
|
|
154
|
+
});
|
|
155
|
+
} catch (err) {
|
|
156
|
+
updateProcessingStage(stage.id, {
|
|
157
|
+
status: 'failed',
|
|
158
|
+
lastError: (err as Error).message.slice(0, 500),
|
|
159
|
+
});
|
|
160
|
+
throw err;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export async function run(
|
|
165
|
+
input: Record<string, unknown>,
|
|
166
|
+
context: ToolContext,
|
|
167
|
+
): Promise<ToolExecutionResult> {
|
|
168
|
+
const assetId = input.asset_id as string | undefined;
|
|
169
|
+
if (!assetId) {
|
|
170
|
+
return { content: 'asset_id is required.', isError: true };
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
const analysisType = (input.analysis_type as string) || 'scene_description';
|
|
174
|
+
const batchSize = (input.batch_size as number) || 10;
|
|
175
|
+
|
|
176
|
+
try {
|
|
177
|
+
// Check if all keyframes are already analyzed before calling the core function
|
|
178
|
+
const keyframes = getKeyframesForAsset(assetId);
|
|
179
|
+
const existingOutputs = getVisionOutputsForAsset(assetId, analysisType);
|
|
180
|
+
const analyzedKeyframeIds = new Set(existingOutputs.map((o) => o.keyframeId));
|
|
181
|
+
const pendingKeyframes = keyframes.filter((kf) => !analyzedKeyframeIds.has(kf.id));
|
|
182
|
+
|
|
183
|
+
if (keyframes.length > 0 && pendingKeyframes.length === 0) {
|
|
184
|
+
return {
|
|
185
|
+
content: JSON.stringify({
|
|
186
|
+
message: 'All keyframes already analyzed',
|
|
187
|
+
assetId,
|
|
188
|
+
analysisType,
|
|
189
|
+
totalKeyframes: keyframes.length,
|
|
190
|
+
alreadyAnalyzed: existingOutputs.length,
|
|
191
|
+
}, null, 2),
|
|
192
|
+
isError: false,
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
await analyzeKeyframesForAsset(assetId, analysisType, batchSize, context.onOutput, context.signal);
|
|
197
|
+
|
|
198
|
+
// Gather final stats
|
|
199
|
+
const allKeyframes = getKeyframesForAsset(assetId);
|
|
200
|
+
const allOutputs = getVisionOutputsForAsset(assetId, analysisType);
|
|
201
|
+
const totalKeyframes = allKeyframes.length;
|
|
202
|
+
const analyzedCount = allOutputs.length;
|
|
203
|
+
const finalProgress = Math.round((analyzedCount / totalKeyframes) * 100);
|
|
204
|
+
const isComplete = analyzedCount >= totalKeyframes;
|
|
205
|
+
|
|
206
|
+
return {
|
|
207
|
+
content: JSON.stringify({
|
|
208
|
+
message: `Vision analysis ${isComplete ? 'completed' : 'in progress'}`,
|
|
209
|
+
assetId,
|
|
210
|
+
analysisType,
|
|
211
|
+
totalKeyframes,
|
|
212
|
+
analyzedCount,
|
|
213
|
+
newlyAnalyzed: analyzedCount - analyzedKeyframeIds.size,
|
|
214
|
+
errorCount: pendingKeyframes.length - (analyzedCount - analyzedKeyframeIds.size),
|
|
215
|
+
progress: finalProgress,
|
|
216
|
+
}, null, 2),
|
|
217
|
+
isError: false,
|
|
218
|
+
};
|
|
219
|
+
} catch (err) {
|
|
220
|
+
const msg = (err as Error).message;
|
|
221
|
+
// Preserve original error message format
|
|
222
|
+
if (
|
|
223
|
+
msg === 'batch_size must be greater than 0.' ||
|
|
224
|
+
msg.startsWith('Media asset not found:') ||
|
|
225
|
+
msg === 'No keyframes found for this asset. Run extract_keyframes first.' ||
|
|
226
|
+
msg === 'No Anthropic API key available. Configure it in settings or set ANTHROPIC_API_KEY.'
|
|
227
|
+
) {
|
|
228
|
+
return { content: msg, isError: true };
|
|
229
|
+
}
|
|
230
|
+
return { content: `Vision analysis failed: ${(err as Error).message}`, isError: true };
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
async function analyzeKeyframe(
|
|
235
|
+
client: Anthropic,
|
|
236
|
+
keyframe: MediaKeyframe,
|
|
237
|
+
): Promise<{ output: Record<string, unknown>; confidence: number }> {
|
|
238
|
+
// Read the image file and encode as base64
|
|
239
|
+
const imageData = await readFile(keyframe.filePath);
|
|
240
|
+
const base64 = imageData.toString('base64');
|
|
241
|
+
|
|
242
|
+
// Determine media type from file extension
|
|
243
|
+
const ext = keyframe.filePath.split('.').pop()?.toLowerCase() ?? 'jpg';
|
|
244
|
+
const mediaTypeMap: Record<string, string> = {
|
|
245
|
+
jpg: 'image/jpeg',
|
|
246
|
+
jpeg: 'image/jpeg',
|
|
247
|
+
png: 'image/png',
|
|
248
|
+
gif: 'image/gif',
|
|
249
|
+
webp: 'image/webp',
|
|
250
|
+
};
|
|
251
|
+
const mediaType = mediaTypeMap[ext] ?? 'image/jpeg';
|
|
252
|
+
|
|
253
|
+
const response = await client.messages.create({
|
|
254
|
+
model: 'claude-sonnet-4-6-20250514',
|
|
255
|
+
max_tokens: 1024,
|
|
256
|
+
messages: [
|
|
257
|
+
{
|
|
258
|
+
role: 'user',
|
|
259
|
+
content: [
|
|
260
|
+
{
|
|
261
|
+
type: 'image',
|
|
262
|
+
source: {
|
|
263
|
+
type: 'base64',
|
|
264
|
+
media_type: mediaType as 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp',
|
|
265
|
+
data: base64,
|
|
266
|
+
},
|
|
267
|
+
},
|
|
268
|
+
{
|
|
269
|
+
type: 'text',
|
|
270
|
+
text: VLM_PROMPT,
|
|
271
|
+
},
|
|
272
|
+
],
|
|
273
|
+
},
|
|
274
|
+
],
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
// Extract text from response
|
|
278
|
+
const textBlock = response.content.find((block) => block.type === 'text');
|
|
279
|
+
const responseText = textBlock && 'text' in textBlock ? textBlock.text : '';
|
|
280
|
+
|
|
281
|
+
// Parse JSON from response
|
|
282
|
+
let output: Record<string, unknown>;
|
|
283
|
+
try {
|
|
284
|
+
// Try to extract JSON from the response (handle markdown code fences)
|
|
285
|
+
const jsonMatch = responseText.match(/```(?:json)?\s*([\s\S]*?)```/) ?? [null, responseText];
|
|
286
|
+
output = JSON.parse(jsonMatch[1]!.trim()) as Record<string, unknown>;
|
|
287
|
+
} catch {
|
|
288
|
+
// If JSON parsing fails, wrap raw text as output
|
|
289
|
+
output = {
|
|
290
|
+
sceneDescription: responseText,
|
|
291
|
+
subjects: [],
|
|
292
|
+
actions: [],
|
|
293
|
+
context: '',
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// Add timestamp context to the output
|
|
298
|
+
output.timestamp = keyframe.timestamp;
|
|
299
|
+
|
|
300
|
+
return { output, confidence: 0.8 };
|
|
301
|
+
}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import type { ToolContext, ToolExecutionResult } from '../../../../tools/types.js';
|
|
2
|
+
import { detectEvents, type DetectionConfig, type DetectionRule } from '../services/event-detection-service.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Sensible default detection rules for common event types.
|
|
6
|
+
* These are fallbacks when the caller doesn't provide explicit rules —
|
|
7
|
+
* the system is not limited to these event types.
|
|
8
|
+
*/
|
|
9
|
+
const DEFAULT_RULES_BY_EVENT_TYPE: Record<string, DetectionRule[]> = {
|
|
10
|
+
turnover: [
|
|
11
|
+
{ ruleType: 'segment_transition', params: { field: 'subjects' }, weight: 0.5 },
|
|
12
|
+
{ ruleType: 'short_segment', params: { maxDurationSeconds: 5 }, weight: 0.3 },
|
|
13
|
+
{ ruleType: 'attribute_match', params: { field: 'actions', pattern: 'steal|turnover|loss|intercept' }, weight: 0.2 },
|
|
14
|
+
],
|
|
15
|
+
scene_change: [
|
|
16
|
+
{ ruleType: 'segment_transition', params: { field: 'segmentType' }, weight: 1.0 },
|
|
17
|
+
],
|
|
18
|
+
short_play: [
|
|
19
|
+
{ ruleType: 'short_segment', params: { maxDurationSeconds: 3 }, weight: 0.6 },
|
|
20
|
+
{ ruleType: 'segment_transition', params: { field: 'subjects' }, weight: 0.4 },
|
|
21
|
+
],
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
export async function run(
|
|
25
|
+
input: Record<string, unknown>,
|
|
26
|
+
context: ToolContext,
|
|
27
|
+
): Promise<ToolExecutionResult> {
|
|
28
|
+
const assetId = input.asset_id as string | undefined;
|
|
29
|
+
if (!assetId) {
|
|
30
|
+
return { content: 'asset_id is required.', isError: true };
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const eventType = input.event_type as string | undefined;
|
|
34
|
+
if (!eventType) {
|
|
35
|
+
return { content: 'event_type is required.', isError: true };
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Parse detection rules: use provided rules or fall back to defaults
|
|
39
|
+
let rules: DetectionRule[];
|
|
40
|
+
const rawRules = input.detection_rules;
|
|
41
|
+
|
|
42
|
+
if (rawRules) {
|
|
43
|
+
// Accept rules as a JSON string or as an already-parsed array
|
|
44
|
+
if (typeof rawRules === 'string') {
|
|
45
|
+
try {
|
|
46
|
+
const parsed = JSON.parse(rawRules);
|
|
47
|
+
if (!Array.isArray(parsed)) {
|
|
48
|
+
return { content: 'detection_rules must be a valid JSON array of rule objects.', isError: true };
|
|
49
|
+
}
|
|
50
|
+
rules = parsed as DetectionRule[];
|
|
51
|
+
} catch {
|
|
52
|
+
return { content: 'detection_rules must be a valid JSON array of rule objects.', isError: true };
|
|
53
|
+
}
|
|
54
|
+
} else if (Array.isArray(rawRules)) {
|
|
55
|
+
rules = rawRules as DetectionRule[];
|
|
56
|
+
} else {
|
|
57
|
+
return { content: 'detection_rules must be an array of rule objects.', isError: true };
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Validate each rule has the required shape
|
|
61
|
+
for (const rule of rules) {
|
|
62
|
+
if (!rule.ruleType || typeof rule.ruleType !== 'string') {
|
|
63
|
+
return { content: 'Each detection rule must have a "ruleType" string.', isError: true };
|
|
64
|
+
}
|
|
65
|
+
if (rule.weight === undefined || typeof rule.weight !== 'number') {
|
|
66
|
+
return { content: 'Each detection rule must have a "weight" number.', isError: true };
|
|
67
|
+
}
|
|
68
|
+
if (!rule.params || typeof rule.params !== 'object') {
|
|
69
|
+
return { content: 'Each detection rule must have a "params" object.', isError: true };
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
} else {
|
|
73
|
+
// Use defaults for known event types, or a generic transition-based fallback
|
|
74
|
+
rules = DEFAULT_RULES_BY_EVENT_TYPE[eventType] ?? [
|
|
75
|
+
{ ruleType: 'segment_transition', params: { field: 'segmentType' }, weight: 0.6 },
|
|
76
|
+
{ ruleType: 'short_segment', params: { maxDurationSeconds: 5 }, weight: 0.4 },
|
|
77
|
+
];
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const config: DetectionConfig = { eventType, rules };
|
|
81
|
+
|
|
82
|
+
try {
|
|
83
|
+
const result = detectEvents(assetId, config, {
|
|
84
|
+
onProgress: (msg) => context.onOutput?.(`${msg}\n`),
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
return {
|
|
88
|
+
content: JSON.stringify({
|
|
89
|
+
message: `Detected ${result.candidateCount} ${result.eventType} events`,
|
|
90
|
+
assetId: result.assetId,
|
|
91
|
+
eventType: result.eventType,
|
|
92
|
+
totalEvents: result.candidateCount,
|
|
93
|
+
rulesUsed: rules.map((r) => r.ruleType),
|
|
94
|
+
events: result.events.map((e) => ({
|
|
95
|
+
id: e.id,
|
|
96
|
+
startTime: e.startTime,
|
|
97
|
+
endTime: e.endTime,
|
|
98
|
+
confidence: e.confidence,
|
|
99
|
+
reasons: e.reasons,
|
|
100
|
+
})),
|
|
101
|
+
}, null, 2),
|
|
102
|
+
isError: false,
|
|
103
|
+
};
|
|
104
|
+
} catch (err) {
|
|
105
|
+
return {
|
|
106
|
+
content: `Event detection failed: ${(err as Error).message}`,
|
|
107
|
+
isError: true,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
}
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import { join, dirname } from 'node:path';
|
|
2
|
+
import { mkdir, readdir, rename, rm } from 'node:fs/promises';
|
|
3
|
+
import { randomUUID } from 'node:crypto';
|
|
4
|
+
import type { ToolContext, ToolExecutionResult } from '../../../../tools/types.js';
|
|
5
|
+
import {
|
|
6
|
+
getMediaAssetById,
|
|
7
|
+
getKeyframesForAsset,
|
|
8
|
+
insertKeyframesBatch,
|
|
9
|
+
deleteKeyframesForAsset,
|
|
10
|
+
createProcessingStage,
|
|
11
|
+
updateProcessingStage,
|
|
12
|
+
getProcessingStagesForAsset,
|
|
13
|
+
type ProcessingStage,
|
|
14
|
+
} from '../../../../memory/media-store.js';
|
|
15
|
+
|
|
16
|
+
const FFMPEG_TIMEOUT_MS = 300_000;
|
|
17
|
+
|
|
18
|
+
function spawnWithTimeout(
|
|
19
|
+
cmd: string[],
|
|
20
|
+
timeoutMs: number,
|
|
21
|
+
): Promise<{ exitCode: number; stdout: string; stderr: string }> {
|
|
22
|
+
return new Promise((resolve, reject) => {
|
|
23
|
+
const proc = Bun.spawn(cmd, { stdout: 'pipe', stderr: 'pipe' });
|
|
24
|
+
const timer = setTimeout(() => {
|
|
25
|
+
proc.kill();
|
|
26
|
+
reject(new Error(`Process timed out after ${timeoutMs}ms: ${cmd[0]}`));
|
|
27
|
+
}, timeoutMs);
|
|
28
|
+
proc.exited.then(async (exitCode) => {
|
|
29
|
+
clearTimeout(timer);
|
|
30
|
+
const stdout = await new Response(proc.stdout).text();
|
|
31
|
+
const stderr = await new Response(proc.stderr).text();
|
|
32
|
+
resolve({ exitCode, stdout, stderr });
|
|
33
|
+
});
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export async function extractKeyframesForAsset(
|
|
38
|
+
assetId: string,
|
|
39
|
+
intervalSeconds?: number,
|
|
40
|
+
onProgress?: (msg: string) => void,
|
|
41
|
+
): Promise<void> {
|
|
42
|
+
const interval = intervalSeconds ?? 3;
|
|
43
|
+
|
|
44
|
+
const asset = getMediaAssetById(assetId);
|
|
45
|
+
if (!asset) {
|
|
46
|
+
throw new Error(`Media asset not found: ${assetId}`);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (asset.mediaType !== 'video') {
|
|
50
|
+
throw new Error(`Keyframe extraction requires a video asset. Got: ${asset.mediaType}`);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Find or create the keyframe_extraction processing stage
|
|
54
|
+
let stage: ProcessingStage | undefined;
|
|
55
|
+
const existingStages = getProcessingStagesForAsset(assetId);
|
|
56
|
+
stage = existingStages.find((s) => s.stage === 'keyframe_extraction');
|
|
57
|
+
if (!stage) {
|
|
58
|
+
stage = createProcessingStage({ assetId, stage: 'keyframe_extraction' });
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
updateProcessingStage(stage.id, { status: 'running', startedAt: Date.now() });
|
|
62
|
+
|
|
63
|
+
// Store keyframes in a durable directory alongside the source file.
|
|
64
|
+
// Extract to a temp dir first so that if ffmpeg fails the old frames remain intact.
|
|
65
|
+
const outputDir = join(dirname(asset.filePath), 'keyframes', assetId);
|
|
66
|
+
const tempDir = outputDir + '-tmp-' + randomUUID();
|
|
67
|
+
await mkdir(tempDir, { recursive: true });
|
|
68
|
+
|
|
69
|
+
try {
|
|
70
|
+
onProgress?.(`Extracting keyframes every ${interval}s from ${asset.title}...\n`);
|
|
71
|
+
|
|
72
|
+
// Use ffmpeg to extract frames at the specified interval
|
|
73
|
+
const result = await spawnWithTimeout([
|
|
74
|
+
'ffmpeg', '-y',
|
|
75
|
+
'-i', asset.filePath,
|
|
76
|
+
'-vf', `fps=1/${interval}`,
|
|
77
|
+
'-q:v', '2',
|
|
78
|
+
join(tempDir, 'frame-%06d.jpg'),
|
|
79
|
+
], FFMPEG_TIMEOUT_MS);
|
|
80
|
+
|
|
81
|
+
if (result.exitCode !== 0) {
|
|
82
|
+
await rm(tempDir, { recursive: true, force: true });
|
|
83
|
+
updateProcessingStage(stage.id, {
|
|
84
|
+
status: 'failed',
|
|
85
|
+
lastError: result.stderr.slice(0, 500),
|
|
86
|
+
});
|
|
87
|
+
throw new Error(`ffmpeg failed: ${result.stderr.slice(0, 500)}`);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// List extracted frames
|
|
91
|
+
const files = await readdir(tempDir);
|
|
92
|
+
const frameFiles = files
|
|
93
|
+
.filter((f) => f.startsWith('frame-') && f.endsWith('.jpg'))
|
|
94
|
+
.sort();
|
|
95
|
+
|
|
96
|
+
if (frameFiles.length === 0) {
|
|
97
|
+
await rm(tempDir, { recursive: true, force: true });
|
|
98
|
+
updateProcessingStage(stage.id, {
|
|
99
|
+
status: 'failed',
|
|
100
|
+
lastError: 'No frames extracted',
|
|
101
|
+
});
|
|
102
|
+
throw new Error('No frames were extracted from the video.');
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Extraction succeeded — atomically swap temp dir into the durable path
|
|
106
|
+
await rm(outputDir, { recursive: true, force: true });
|
|
107
|
+
await rename(tempDir, outputDir);
|
|
108
|
+
|
|
109
|
+
onProgress?.(`Extracted ${frameFiles.length} frames. Registering in database...\n`);
|
|
110
|
+
|
|
111
|
+
// Build keyframe rows
|
|
112
|
+
const keyframeRows = frameFiles.map((file, index) => ({
|
|
113
|
+
assetId,
|
|
114
|
+
timestamp: index * interval,
|
|
115
|
+
filePath: join(outputDir, file),
|
|
116
|
+
metadata: { frameIndex: index, intervalSeconds: interval },
|
|
117
|
+
}));
|
|
118
|
+
|
|
119
|
+
// Clear existing keyframes to prevent duplicates on re-extraction
|
|
120
|
+
deleteKeyframesForAsset(assetId);
|
|
121
|
+
|
|
122
|
+
// Batch insert
|
|
123
|
+
const keyframes = insertKeyframesBatch(keyframeRows);
|
|
124
|
+
|
|
125
|
+
// Update progress
|
|
126
|
+
updateProcessingStage(stage.id, {
|
|
127
|
+
status: 'completed',
|
|
128
|
+
progress: 100,
|
|
129
|
+
completedAt: Date.now(),
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
onProgress?.(`Registered ${keyframes.length} keyframes.\n`);
|
|
133
|
+
} catch (err) {
|
|
134
|
+
// Clean up temp dir if it still exists (no-op when already removed above)
|
|
135
|
+
await rm(tempDir, { recursive: true, force: true }).catch(() => {});
|
|
136
|
+
// Update stage for unexpected errors (ffmpeg/no-frames cases already updated above)
|
|
137
|
+
const msg = (err as Error).message;
|
|
138
|
+
if (!msg.startsWith('ffmpeg failed:') && msg !== 'No frames were extracted from the video.') {
|
|
139
|
+
updateProcessingStage(stage.id, {
|
|
140
|
+
status: 'failed',
|
|
141
|
+
lastError: msg.slice(0, 500),
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
throw err;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
export async function run(
|
|
149
|
+
input: Record<string, unknown>,
|
|
150
|
+
context: ToolContext,
|
|
151
|
+
): Promise<ToolExecutionResult> {
|
|
152
|
+
const assetId = input.asset_id as string | undefined;
|
|
153
|
+
if (!assetId) {
|
|
154
|
+
return { content: 'asset_id is required.', isError: true };
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const intervalSeconds = (input.interval_seconds as number) || 3;
|
|
158
|
+
|
|
159
|
+
try {
|
|
160
|
+
await extractKeyframesForAsset(assetId, intervalSeconds, context.onOutput);
|
|
161
|
+
|
|
162
|
+
const asset = getMediaAssetById(assetId);
|
|
163
|
+
const outputDir = join(dirname(asset!.filePath), 'keyframes', assetId);
|
|
164
|
+
const keyframes = getKeyframesForAsset(assetId);
|
|
165
|
+
|
|
166
|
+
return {
|
|
167
|
+
content: JSON.stringify({
|
|
168
|
+
message: `Extracted and registered ${keyframes.length} keyframes`,
|
|
169
|
+
assetId,
|
|
170
|
+
keyframeCount: keyframes.length,
|
|
171
|
+
intervalSeconds,
|
|
172
|
+
outputDir,
|
|
173
|
+
}, null, 2),
|
|
174
|
+
isError: false,
|
|
175
|
+
};
|
|
176
|
+
} catch (err) {
|
|
177
|
+
const msg = (err as Error).message;
|
|
178
|
+
// Preserve original error message format: validation and known failures
|
|
179
|
+
// are returned directly; unexpected errors get the prefix
|
|
180
|
+
if (
|
|
181
|
+
msg.startsWith('Media asset not found:') ||
|
|
182
|
+
msg.startsWith('Keyframe extraction requires a video asset.') ||
|
|
183
|
+
msg.startsWith('ffmpeg failed:') ||
|
|
184
|
+
msg === 'No frames were extracted from the video.'
|
|
185
|
+
) {
|
|
186
|
+
return { content: msg, isError: true };
|
|
187
|
+
}
|
|
188
|
+
return { content: `Keyframe extraction failed: ${msg}`, isError: true };
|
|
189
|
+
}
|
|
190
|
+
}
|