@vellumai/assistant 0.3.3 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/Dockerfile +2 -0
  2. package/README.md +45 -18
  3. package/package.json +1 -1
  4. package/scripts/ipc/generate-swift.ts +13 -0
  5. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +100 -0
  6. package/src/__tests__/approval-hardcoded-copy-guard.test.ts +41 -0
  7. package/src/__tests__/approval-message-composer.test.ts +253 -0
  8. package/src/__tests__/call-domain.test.ts +12 -2
  9. package/src/__tests__/call-orchestrator.test.ts +391 -1
  10. package/src/__tests__/call-routes-http.test.ts +27 -2
  11. package/src/__tests__/channel-approval-routes.test.ts +397 -135
  12. package/src/__tests__/channel-approvals.test.ts +99 -3
  13. package/src/__tests__/channel-delivery-store.test.ts +30 -4
  14. package/src/__tests__/channel-guardian.test.ts +261 -22
  15. package/src/__tests__/channel-readiness-service.test.ts +257 -0
  16. package/src/__tests__/config-schema.test.ts +2 -1
  17. package/src/__tests__/credential-security-invariants.test.ts +1 -0
  18. package/src/__tests__/daemon-lifecycle.test.ts +636 -0
  19. package/src/__tests__/dictation-mode-detection.test.ts +63 -0
  20. package/src/__tests__/entity-search.test.ts +615 -0
  21. package/src/__tests__/gateway-only-enforcement.test.ts +19 -13
  22. package/src/__tests__/handlers-twilio-config.test.ts +480 -0
  23. package/src/__tests__/ipc-snapshot.test.ts +63 -0
  24. package/src/__tests__/messaging-send-tool.test.ts +65 -0
  25. package/src/__tests__/run-orchestrator-assistant-events.test.ts +4 -0
  26. package/src/__tests__/run-orchestrator.test.ts +22 -0
  27. package/src/__tests__/secret-scanner.test.ts +223 -0
  28. package/src/__tests__/session-runtime-assembly.test.ts +85 -1
  29. package/src/__tests__/shell-parser-property.test.ts +357 -2
  30. package/src/__tests__/sms-messaging-provider.test.ts +125 -0
  31. package/src/__tests__/system-prompt.test.ts +25 -1
  32. package/src/__tests__/tool-executor-lifecycle-events.test.ts +34 -1
  33. package/src/__tests__/twilio-routes.test.ts +39 -3
  34. package/src/__tests__/twitter-cli-error-shaping.test.ts +2 -2
  35. package/src/__tests__/user-reference.test.ts +68 -0
  36. package/src/__tests__/web-search.test.ts +1 -1
  37. package/src/__tests__/work-item-output.test.ts +110 -0
  38. package/src/calls/call-domain.ts +8 -5
  39. package/src/calls/call-orchestrator.ts +85 -22
  40. package/src/calls/twilio-config.ts +17 -11
  41. package/src/calls/twilio-rest.ts +276 -0
  42. package/src/calls/twilio-routes.ts +39 -1
  43. package/src/cli/map.ts +6 -0
  44. package/src/commands/__tests__/cc-command-registry.test.ts +67 -0
  45. package/src/commands/cc-command-registry.ts +14 -1
  46. package/src/config/bundled-skills/claude-code/TOOLS.json +10 -3
  47. package/src/config/bundled-skills/knowledge-graph/SKILL.md +15 -0
  48. package/src/config/bundled-skills/knowledge-graph/TOOLS.json +56 -0
  49. package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +185 -0
  50. package/src/config/bundled-skills/media-processing/SKILL.md +199 -0
  51. package/src/config/bundled-skills/media-processing/TOOLS.json +320 -0
  52. package/src/config/bundled-skills/media-processing/services/capability-registry.ts +137 -0
  53. package/src/config/bundled-skills/media-processing/services/event-detection-service.ts +280 -0
  54. package/src/config/bundled-skills/media-processing/services/feedback-aggregation.ts +144 -0
  55. package/src/config/bundled-skills/media-processing/services/feedback-store.ts +136 -0
  56. package/src/config/bundled-skills/media-processing/services/processing-pipeline.ts +261 -0
  57. package/src/config/bundled-skills/media-processing/services/retrieval-service.ts +95 -0
  58. package/src/config/bundled-skills/media-processing/services/timeline-service.ts +267 -0
  59. package/src/config/bundled-skills/media-processing/tools/analyze-keyframes.ts +301 -0
  60. package/src/config/bundled-skills/media-processing/tools/detect-events.ts +110 -0
  61. package/src/config/bundled-skills/media-processing/tools/extract-keyframes.ts +190 -0
  62. package/src/config/bundled-skills/media-processing/tools/generate-clip.ts +195 -0
  63. package/src/config/bundled-skills/media-processing/tools/ingest-media.ts +197 -0
  64. package/src/config/bundled-skills/media-processing/tools/media-diagnostics.ts +166 -0
  65. package/src/config/bundled-skills/media-processing/tools/media-status.ts +75 -0
  66. package/src/config/bundled-skills/media-processing/tools/query-media-events.ts +300 -0
  67. package/src/config/bundled-skills/media-processing/tools/recalibrate.ts +235 -0
  68. package/src/config/bundled-skills/media-processing/tools/select-tracking-profile.ts +142 -0
  69. package/src/config/bundled-skills/media-processing/tools/submit-feedback.ts +150 -0
  70. package/src/config/bundled-skills/messaging/SKILL.md +24 -5
  71. package/src/config/bundled-skills/messaging/tools/messaging-send.ts +5 -1
  72. package/src/config/bundled-skills/phone-calls/SKILL.md +2 -2
  73. package/src/config/bundled-skills/twitter/SKILL.md +19 -3
  74. package/src/config/defaults.ts +2 -1
  75. package/src/config/schema.ts +9 -3
  76. package/src/config/skills.ts +5 -32
  77. package/src/config/system-prompt.ts +40 -0
  78. package/src/config/templates/IDENTITY.md +2 -2
  79. package/src/config/user-reference.ts +29 -0
  80. package/src/config/vellum-skills/catalog.json +58 -0
  81. package/src/config/vellum-skills/google-oauth-setup/SKILL.md +3 -3
  82. package/src/config/vellum-skills/slack-oauth-setup/SKILL.md +3 -3
  83. package/src/config/vellum-skills/sms-setup/SKILL.md +118 -0
  84. package/src/config/vellum-skills/telegram-setup/SKILL.md +6 -1
  85. package/src/config/vellum-skills/twilio-setup/SKILL.md +76 -6
  86. package/src/daemon/auth-manager.ts +103 -0
  87. package/src/daemon/computer-use-session.ts +8 -1
  88. package/src/daemon/config-watcher.ts +253 -0
  89. package/src/daemon/handlers/config.ts +819 -22
  90. package/src/daemon/handlers/dictation.ts +182 -0
  91. package/src/daemon/handlers/identity.ts +14 -23
  92. package/src/daemon/handlers/index.ts +2 -0
  93. package/src/daemon/handlers/sessions.ts +2 -0
  94. package/src/daemon/handlers/shared.ts +3 -0
  95. package/src/daemon/handlers/skills.ts +6 -7
  96. package/src/daemon/handlers/work-items.ts +15 -7
  97. package/src/daemon/ipc-contract-inventory.json +10 -0
  98. package/src/daemon/ipc-contract.ts +114 -4
  99. package/src/daemon/ipc-handler.ts +87 -0
  100. package/src/daemon/lifecycle.ts +18 -4
  101. package/src/daemon/ride-shotgun-handler.ts +11 -1
  102. package/src/daemon/server.ts +111 -504
  103. package/src/daemon/session-agent-loop.ts +10 -15
  104. package/src/daemon/session-runtime-assembly.ts +115 -44
  105. package/src/daemon/session-tool-setup.ts +2 -0
  106. package/src/daemon/session.ts +19 -2
  107. package/src/inbound/public-ingress-urls.ts +3 -3
  108. package/src/memory/channel-guardian-store.ts +2 -1
  109. package/src/memory/db-connection.ts +28 -0
  110. package/src/memory/db-init.ts +1163 -0
  111. package/src/memory/db.ts +2 -2007
  112. package/src/memory/embedding-backend.ts +79 -11
  113. package/src/memory/indexer.ts +2 -0
  114. package/src/memory/job-handlers/media-processing.ts +100 -0
  115. package/src/memory/job-utils.ts +64 -4
  116. package/src/memory/jobs-store.ts +2 -1
  117. package/src/memory/jobs-worker.ts +11 -1
  118. package/src/memory/media-store.ts +759 -0
  119. package/src/memory/recall-cache.ts +107 -0
  120. package/src/memory/retriever.ts +36 -2
  121. package/src/memory/schema-migration.ts +984 -0
  122. package/src/memory/schema.ts +99 -0
  123. package/src/memory/search/entity.ts +208 -25
  124. package/src/memory/search/ranking.ts +6 -1
  125. package/src/memory/search/types.ts +26 -0
  126. package/src/messaging/provider-types.ts +2 -0
  127. package/src/messaging/providers/sms/adapter.ts +204 -0
  128. package/src/messaging/providers/sms/client.ts +93 -0
  129. package/src/messaging/providers/sms/types.ts +7 -0
  130. package/src/permissions/checker.ts +16 -2
  131. package/src/permissions/prompter.ts +14 -3
  132. package/src/permissions/trust-store.ts +7 -0
  133. package/src/runtime/approval-message-composer.ts +143 -0
  134. package/src/runtime/channel-approvals.ts +29 -7
  135. package/src/runtime/channel-guardian-service.ts +44 -18
  136. package/src/runtime/channel-readiness-service.ts +292 -0
  137. package/src/runtime/channel-readiness-types.ts +29 -0
  138. package/src/runtime/gateway-client.ts +2 -1
  139. package/src/runtime/http-server.ts +65 -28
  140. package/src/runtime/http-types.ts +3 -0
  141. package/src/runtime/routes/call-routes.ts +2 -1
  142. package/src/runtime/routes/channel-routes.ts +237 -103
  143. package/src/runtime/routes/run-routes.ts +7 -1
  144. package/src/runtime/run-orchestrator.ts +43 -3
  145. package/src/security/secret-scanner.ts +218 -0
  146. package/src/skills/frontmatter.ts +63 -0
  147. package/src/skills/slash-commands.ts +23 -0
  148. package/src/skills/vellum-catalog-remote.ts +107 -0
  149. package/src/tools/assets/materialize.ts +2 -2
  150. package/src/tools/browser/auto-navigate.ts +132 -24
  151. package/src/tools/browser/browser-manager.ts +67 -61
  152. package/src/tools/calls/call-start.ts +1 -0
  153. package/src/tools/claude-code/claude-code.ts +55 -3
  154. package/src/tools/credentials/vault.ts +1 -1
  155. package/src/tools/execution-target.ts +11 -1
  156. package/src/tools/executor.ts +10 -2
  157. package/src/tools/network/web-search.ts +1 -1
  158. package/src/tools/skills/vellum-catalog.ts +61 -156
  159. package/src/tools/terminal/parser.ts +21 -5
  160. package/src/tools/types.ts +2 -0
  161. package/src/twitter/router.ts +1 -1
  162. package/src/util/platform.ts +43 -1
  163. package/src/util/retry.ts +4 -4
@@ -0,0 +1,185 @@
1
+ import { initializeDb, getDb } from '../../../../memory/db.js';
2
+ import { findMatchedEntities, findNeighborEntities, getEntityLinkedItemCandidates, collectTypedNeighbors } from '../../../../memory/search/entity.js';
3
+ import { memoryEntities } from '../../../../memory/schema.js';
4
+ import { inArray } from 'drizzle-orm';
5
+ import type { TraversalStep } from '../../../../memory/search/types.js';
6
+ import type { EntityRelationType, EntityType } from '../../../../memory/entity-extractor.js';
7
+ import type { ToolContext, ToolExecutionResult } from '../../../../tools/types.js';
8
+
9
+ interface GraphQueryInput {
10
+ query_type: 'neighbors' | 'typed_traversal' | 'intersection';
11
+ seeds: string[];
12
+ steps?: Array<{
13
+ relation_types?: string[];
14
+ entity_types?: string[];
15
+ }>;
16
+ max_results?: number;
17
+ include_items?: boolean;
18
+ }
19
+
20
+ interface EntityResult {
21
+ id: string;
22
+ name: string;
23
+ type: string;
24
+ aliases: string[];
25
+ items?: Array<{ subject: string; statement: string }>;
26
+ }
27
+
28
+ export async function run(
29
+ input: Record<string, unknown>,
30
+ _context: ToolContext,
31
+ ): Promise<ToolExecutionResult> {
32
+ const params = input as unknown as GraphQueryInput;
33
+
34
+ initializeDb();
35
+
36
+ const maxResults = params.max_results ?? 20;
37
+ const includeItems = params.include_items ?? true;
38
+
39
+ // Resolve seed entity names to IDs
40
+ const seedEntityIds: string[] = [];
41
+ const resolvedSeeds: Array<{ name: string; id: string }> = [];
42
+ for (const seedName of params.seeds) {
43
+ const matched = findMatchedEntities(seedName, 5);
44
+ if (matched.length > 0) {
45
+ seedEntityIds.push(matched[0].id);
46
+ resolvedSeeds.push({ name: seedName, id: matched[0].id });
47
+ }
48
+ }
49
+
50
+ if (seedEntityIds.length === 0) {
51
+ return {
52
+ content: JSON.stringify({
53
+ error: 'No matching entities found for the provided seed names',
54
+ seeds: params.seeds,
55
+ }),
56
+ isError: true,
57
+ };
58
+ }
59
+
60
+ // For intersection queries, all seeds must resolve — dropping any seed silently
61
+ // changes semantics from "reachable from ALL seeds" to "reachable from resolved seeds"
62
+ if (params.query_type === 'intersection' && seedEntityIds.length < params.seeds.length) {
63
+ const unresolvedSeeds = params.seeds.filter(
64
+ name => !resolvedSeeds.some(s => s.name === name),
65
+ );
66
+ return {
67
+ content: JSON.stringify({
68
+ error: 'Some seed entities could not be resolved. Intersection requires all seeds to match.',
69
+ unresolved_seeds: unresolvedSeeds,
70
+ resolved_seeds: resolvedSeeds,
71
+ }),
72
+ isError: true,
73
+ };
74
+ }
75
+
76
+ let resultEntityIds: string[];
77
+
78
+ switch (params.query_type) {
79
+ case 'neighbors': {
80
+ const steps = params.steps?.[0];
81
+ const result = findNeighborEntities(seedEntityIds, {
82
+ maxEdges: 40,
83
+ maxNeighborEntities: maxResults,
84
+ maxDepth: 1,
85
+ relationTypes: steps?.relation_types as EntityRelationType[] | undefined,
86
+ entityTypes: steps?.entity_types as EntityType[] | undefined,
87
+ });
88
+ resultEntityIds = result.neighborEntityIds;
89
+ break;
90
+ }
91
+
92
+ case 'typed_traversal': {
93
+ const traversalSteps: TraversalStep[] = (params.steps ?? []).map(s => ({
94
+ relationTypes: s.relation_types as EntityRelationType[] | undefined,
95
+ entityTypes: s.entity_types as EntityType[] | undefined,
96
+ }));
97
+ resultEntityIds = collectTypedNeighbors(seedEntityIds, traversalSteps, {
98
+ maxResultsPerStep: maxResults,
99
+ maxEdgesPerStep: 40,
100
+ });
101
+ break;
102
+ }
103
+
104
+ case 'intersection': {
105
+ // Run typed traversal from each seed independently, then intersect
106
+ const traversalSteps: TraversalStep[] = (params.steps ?? []).map(s => ({
107
+ relationTypes: s.relation_types as EntityRelationType[] | undefined,
108
+ entityTypes: s.entity_types as EntityType[] | undefined,
109
+ }));
110
+
111
+ const resultSets: Set<string>[] = [];
112
+ for (const seedId of seedEntityIds) {
113
+ const result = collectTypedNeighbors([seedId], traversalSteps, {
114
+ maxResultsPerStep: maxResults,
115
+ maxEdgesPerStep: 40,
116
+ });
117
+ resultSets.push(new Set(result));
118
+ }
119
+
120
+ if (resultSets.length === 0) {
121
+ resultEntityIds = [];
122
+ } else {
123
+ // Intersect all sets
124
+ const intersection = [...resultSets[0]].filter(id =>
125
+ resultSets.every(set => set.has(id))
126
+ );
127
+ resultEntityIds = intersection;
128
+ }
129
+ break;
130
+ }
131
+
132
+ default:
133
+ return {
134
+ content: JSON.stringify({ error: `Unknown query_type: ${params.query_type}` }),
135
+ isError: true,
136
+ };
137
+ }
138
+
139
+ // Look up entity details
140
+ const db = getDb();
141
+ const entities: EntityResult[] = [];
142
+
143
+ if (resultEntityIds.length > 0) {
144
+ const entityRows = db
145
+ .select()
146
+ .from(memoryEntities)
147
+ .where(inArray(memoryEntities.id, resultEntityIds.slice(0, maxResults)))
148
+ .all();
149
+
150
+ for (const row of entityRows) {
151
+ const entity: EntityResult = {
152
+ id: row.id,
153
+ name: row.name,
154
+ type: row.type,
155
+ aliases: row.aliases ? JSON.parse(row.aliases) as string[] : [],
156
+ };
157
+
158
+ if (includeItems) {
159
+ const candidates = getEntityLinkedItemCandidates([row.id], {
160
+ source: 'entity_direct',
161
+ scopeIds: _context.memoryScopeId ? [_context.memoryScopeId] : undefined,
162
+ });
163
+ entity.items = candidates.slice(0, 5).map(c => {
164
+ const parts = c.text.split(': ');
165
+ return {
166
+ subject: parts[0] ?? '',
167
+ statement: parts.slice(1).join(': ') || c.text,
168
+ };
169
+ });
170
+ }
171
+
172
+ entities.push(entity);
173
+ }
174
+ }
175
+
176
+ return {
177
+ content: JSON.stringify({
178
+ query_type: params.query_type,
179
+ resolved_seeds: resolvedSeeds,
180
+ result_count: entities.length,
181
+ entities,
182
+ }, null, 2),
183
+ isError: false,
184
+ };
185
+ }
@@ -0,0 +1,199 @@
1
+ ---
2
+ name: "Media Processing"
3
+ description: "Ingest and process media files (video, audio, image) through multi-stage pipelines including keyframe extraction, vision analysis, and timeline generation"
4
+ metadata: {"vellum": {"emoji": "🎬"}}
5
+ ---
6
+
7
+ Ingest and track processing of media files (video, audio, images) through configurable multi-stage pipelines.
8
+
9
+ ## End-to-End Workflow
10
+
11
+ The processing pipeline follows a sequential flow. Each stage depends on the output of the previous one:
12
+
13
+ 1. **Ingest** (`ingest_media`) — Register a media file, detect MIME type, extract duration, deduplicate by content hash.
14
+ 2. **Extract Keyframes** (`extract_keyframes`) — Pull frames from video at regular intervals (default: every 3 seconds) using ffmpeg.
15
+ 3. **Analyze Keyframes** (`analyze_keyframes`) — Send each keyframe to Claude VLM for structured scene analysis (subjects, actions, context).
16
+ 4. **Generate Timeline** — Aggregate vision outputs into coherent timeline segments (called via `services/timeline-service.ts`).
17
+ 5. **Detect Events** (`detect_events`) — Apply configurable detection rules against timeline segments to find events of interest.
18
+ 6. **Query & Clip** — Use `query_media_events` to search events with natural language, and `generate_clip` to extract video clips around specific moments.
19
+
20
+ The processing pipeline service (`services/processing-pipeline.ts`) can orchestrate stages 2-5 automatically with retries, resumability, and cancellation support.
21
+
22
+ ## Tools
23
+
24
+ ### ingest_media
25
+
26
+ Register a media file for processing. Accepts an absolute file path, validates the file exists, detects MIME type, extracts duration (for video/audio via ffprobe), and registers the asset with content-hash deduplication.
27
+
28
+ ### media_status
29
+
30
+ Query the processing status of a media asset. Returns the asset metadata along with per-stage progress details. Use this to monitor pipeline progress.
31
+
32
+ ### extract_keyframes
33
+
34
+ Extract keyframes from a video asset at regular intervals using ffmpeg. Frames are saved as JPEG images and registered in the database for subsequent vision analysis.
35
+
36
+ ### analyze_keyframes
37
+
38
+ Analyze extracted keyframes using Claude VLM (vision language model). Produces structured JSON output with scene descriptions, subjects, actions, and context. Supports resumability by skipping already-analyzed frames.
39
+
40
+ ### detect_events
41
+
42
+ Detect events from timeline segments using configurable detection rules. Built-in rule types:
43
+ - **segment_transition** — Fires when a specified field changes between adjacent segments.
44
+ - **short_segment** — Fires when a segment's duration is below a threshold.
45
+ - **attribute_match** — Fires when segment attribute values match a regex pattern.
46
+
47
+ If no rules are provided, sensible defaults are applied based on the event type.
48
+
49
+ ### query_media_events
50
+
51
+ Query detected events using natural language. Parses the query into structured filters (event type, count, confidence threshold, time range) and returns matching events ranked by confidence.
52
+
53
+ ### generate_clip
54
+
55
+ Extract a video clip from a media asset using ffmpeg. Applies configurable pre/post-roll padding (clamped to file boundaries), outputs the clip as a temporary file.
56
+
57
+ ### select_tracking_profile
58
+
59
+ Configure which event capabilities are enabled for a media asset. Capabilities are organized into tiers:
60
+ - **Ready**: Production-quality detection, included by default.
61
+ - **Beta**: Functional but may have accuracy gaps. Results include a confidence disclaimer.
62
+ - **Experimental**: Early-stage detection, expect noise. Results include a confidence disclaimer.
63
+
64
+ Call without capabilities to see available options; call with a capabilities array to set the profile.
65
+
66
+ ### submit_feedback
67
+
68
+ Submit feedback on a detected event. Supports four types:
69
+ - **correct** — Confirms the event is accurate.
70
+ - **incorrect** — Marks a false positive.
71
+ - **boundary_edit** — Adjusts start/end times.
72
+ - **missed** — Reports an event the system failed to detect.
73
+
74
+ ### recalibrate
75
+
76
+ Re-rank existing events based on accumulated feedback. Adjusts confidence scores using correction patterns (false positive rates, missed events, boundary adjustments).
77
+
78
+ ### media_diagnostics
79
+
80
+ Get a diagnostic report for a media asset. Returns:
81
+ - **Processing stats**: total keyframes, vision outputs, timeline segments, events detected.
82
+ - **Per-stage status and timing**: which stages have run, how long each took, current progress.
83
+ - **Failure reasons**: last error from any failed stage.
84
+ - **Cost estimation**: based on keyframe count and estimated API cost per frame.
85
+ - **Feedback summary**: precision/recall estimates per event type.
86
+
87
+ ## Services
88
+
89
+ ### Processing Pipeline (services/processing-pipeline.ts)
90
+
91
+ Orchestrates the full processing pipeline with reliability features:
92
+ - **Sequential execution**: keyframe_extraction, vision_analysis, timeline_generation, event_detection.
93
+ - **Retries**: Each stage is retried with exponential backoff and jitter (configurable max retries and base delay).
94
+ - **Resumability**: Checks processing_stages to find the last completed stage and resumes from there. Safe to restart after crashes.
95
+ - **Cancellation**: Cooperative cancellation via asset status. Set asset status to `cancelled` and the pipeline stops between stages.
96
+ - **Idempotency**: Re-ingesting the same file hash is a no-op. Re-running a fully completed pipeline is also a no-op.
97
+ - **Graceful degradation**: If a stage fails mid-batch (e.g., vision API errors), partial results are saved. The stage is marked as failed with the error details, and the pipeline stops without losing work.
98
+
99
+ ### Timeline Generation (services/timeline-service.ts)
100
+
101
+ Aggregates vision analysis outputs into coherent timeline segments. Groups adjacent keyframes that share similar scene characteristics into time ranges with merged attributes.
102
+
103
+ ### Event Detection (services/event-detection-service.ts)
104
+
105
+ Evaluates configurable detection rules against timeline segments. Produces scored event candidates with weighted confidence.
106
+
107
+ ### Feedback Aggregation (services/feedback-aggregation.ts)
108
+
109
+ Computes precision/recall estimates per event type from user feedback. Provides structured JSON export for offline analysis.
110
+
111
+ ### Capability Registry (services/capability-registry.ts)
112
+
113
+ Maintains an extensible, domain-agnostic catalog of available tracking capabilities with tier classification. Other domains can register their own capabilities by calling `registerCapability()`.
114
+
115
+ ## Operator Runbook
116
+
117
+ ### Monitoring Progress
118
+
119
+ Use `media_status` to check the current state of any asset:
120
+ - **registered** — Ingested but not yet processed.
121
+ - **processing** — Pipeline is running.
122
+ - **indexed** — All stages completed successfully.
123
+ - **failed** — A stage failed. Check stage details for the error.
124
+
125
+ The response includes per-stage progress (0-100%) so you can see exactly where processing stands.
126
+
127
+ ### Diagnosing Failures
128
+
129
+ Use `media_diagnostics` to get a full diagnostic report:
130
+ 1. Check the `stages` array for any stage with `status: "failed"`.
131
+ 2. Read the `lastError` field for that stage to understand what went wrong.
132
+ 3. Check `durationMs` to see if a stage timed out or ran unusually long.
133
+ 4. Common failure causes:
134
+ - **keyframe_extraction**: ffmpeg not installed, corrupt video file, disk full.
135
+ - **vision_analysis**: ANTHROPIC_API_KEY not set, API rate limits, network errors.
136
+ - **timeline_generation**: No keyframes or vision outputs exist (earlier stage skipped or failed).
137
+ - **event_detection**: No timeline segments exist.
138
+
139
+ After fixing the root cause, re-run the failed stage. The pipeline is resumable — it picks up from where it left off.
140
+
141
+ ### Configuring Tracking Profiles
142
+
143
+ 1. Call `select_tracking_profile` with just the `asset_id` to see available capabilities and their tiers.
144
+ 2. Call again with a `capabilities` array to enable the desired event types.
145
+ 3. Only enabled capabilities are returned by `query_media_events`.
146
+ 4. The capability registry is extensible — new domains can register capabilities via `registerCapability()` in `services/capability-registry.ts`.
147
+
148
+ ### Feedback and Recalibration
149
+
150
+ 1. Review detected events using `query_media_events`.
151
+ 2. For each event, submit feedback via `submit_feedback`:
152
+ - Mark correct detections as `correct` to build precision data.
153
+ - Mark false positives as `incorrect`.
154
+ - Adjust boundaries with `boundary_edit`.
155
+ - Report missed events with `missed` (creates a new event record).
156
+ 3. Run `recalibrate` to re-rank events based on accumulated feedback.
157
+ 4. Use `media_diagnostics` to check precision/recall estimates after feedback.
158
+
159
+ ### Cost Expectations
160
+
161
+ Vision analysis is the primary cost driver. Cost scales linearly with video duration and keyframe interval:
162
+
163
+ | Video Duration | Interval | Keyframes | Estimated Cost |
164
+ |----------------|----------|-----------|----------------|
165
+ | 30 min | 3s | ~600 | ~$1.80 |
166
+ | 60 min | 3s | ~1,200 | ~$3.60 |
167
+ | 90 min | 3s | ~1,800 | ~$5.40 |
168
+ | 90 min | 5s | ~1,080 | ~$3.24 |
169
+
170
+ Increasing the keyframe interval reduces cost proportionally but may miss short-duration events. The `media_diagnostics` tool provides per-asset cost estimates.
171
+
172
+ ### Known Limitations
173
+
174
+ - **ffmpeg required**: Keyframe extraction and clip generation require ffmpeg to be installed on the host.
175
+ - **Single-file ingestion**: Each `ingest_media` call processes one file. Batch ingestion is not yet supported.
176
+ - **Vision model latency**: Analyzing keyframes is the slowest stage. A 90-minute video at 3-second intervals requires ~1,800 API calls.
177
+ - **Scene similarity heuristic**: Timeline segmentation uses Jaccard similarity on subjects — it works well for distinct scenes but may over-merge visually similar but semantically different moments.
178
+ - **Detection rules are heuristic**: Event detection uses rule-based scoring, not ML. Accuracy depends on how well the rules match the target event patterns. Use feedback and recalibration to improve over time.
179
+ - **No real-time processing**: The pipeline processes pre-recorded media files. Live/streaming video is not supported.
180
+
181
+ ### Troubleshooting
182
+
183
+ | Symptom | Likely Cause | Fix |
184
+ |---------|-------------|-----|
185
+ | "No keyframes found" | extract_keyframes not run or failed | Check keyframe_extraction stage status; re-run if needed |
186
+ | "ANTHROPIC_API_KEY not set" | Missing env var | Set ANTHROPIC_API_KEY in the environment |
187
+ | Vision analysis very slow | Large video, small interval | Increase interval_seconds or use smaller batch_size |
188
+ | Low event confidence | Detection rules too broad | Tune rules: increase weights on high-signal rules, use tighter regex patterns |
189
+ | Many false positives | Rules overfitting on noise | Submit `incorrect` feedback, then run `recalibrate` |
190
+ | Pipeline stuck at "processing" | Stage crashed without updating status | Use `media_diagnostics` to find the stuck stage; re-run manually |
191
+
192
+ ## Usage Notes
193
+
194
+ - The `ingest_media` tool requires an absolute path to a local file.
195
+ - Supported media types: video (mp4, mov, avi, mkv, webm, etc.), audio (mp3, wav, m4a, etc.), and images (png, jpg, gif, webp, etc.).
196
+ - For video and audio files, duration is automatically extracted via ffprobe (requires ffmpeg to be installed).
197
+ - Duplicate files are detected by content hash and return the existing asset record.
198
+ - The `analyze_keyframes` tool is marked as medium risk because it makes external API calls to Claude VLM, which incur costs.
199
+ - All schema tables, services, and tool interfaces are media-generic. Domain-specific interpretation belongs in VLM prompt templates.