@vellumai/assistant 0.3.4 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/Dockerfile +2 -0
  2. package/README.md +37 -2
  3. package/package.json +1 -1
  4. package/scripts/ipc/generate-swift.ts +13 -0
  5. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +100 -0
  6. package/src/__tests__/approval-hardcoded-copy-guard.test.ts +41 -0
  7. package/src/__tests__/approval-message-composer.test.ts +253 -0
  8. package/src/__tests__/call-domain.test.ts +12 -2
  9. package/src/__tests__/call-orchestrator.test.ts +70 -1
  10. package/src/__tests__/call-routes-http.test.ts +27 -2
  11. package/src/__tests__/channel-approval-routes.test.ts +21 -17
  12. package/src/__tests__/channel-approvals.test.ts +48 -1
  13. package/src/__tests__/channel-guardian.test.ts +74 -22
  14. package/src/__tests__/channel-readiness-service.test.ts +257 -0
  15. package/src/__tests__/config-schema.test.ts +2 -1
  16. package/src/__tests__/credential-security-invariants.test.ts +1 -0
  17. package/src/__tests__/daemon-lifecycle.test.ts +13 -12
  18. package/src/__tests__/dictation-mode-detection.test.ts +63 -0
  19. package/src/__tests__/entity-search.test.ts +615 -0
  20. package/src/__tests__/handlers-twilio-config.test.ts +407 -0
  21. package/src/__tests__/ipc-snapshot.test.ts +63 -0
  22. package/src/__tests__/messaging-send-tool.test.ts +65 -0
  23. package/src/__tests__/run-orchestrator-assistant-events.test.ts +4 -0
  24. package/src/__tests__/run-orchestrator.test.ts +22 -0
  25. package/src/__tests__/session-runtime-assembly.test.ts +85 -1
  26. package/src/__tests__/sms-messaging-provider.test.ts +125 -0
  27. package/src/__tests__/twilio-routes.test.ts +39 -3
  28. package/src/__tests__/twitter-cli-error-shaping.test.ts +2 -2
  29. package/src/__tests__/web-search.test.ts +1 -1
  30. package/src/__tests__/work-item-output.test.ts +110 -0
  31. package/src/calls/call-domain.ts +8 -5
  32. package/src/calls/call-orchestrator.ts +22 -11
  33. package/src/calls/twilio-config.ts +17 -11
  34. package/src/calls/twilio-rest.ts +276 -0
  35. package/src/calls/twilio-routes.ts +39 -1
  36. package/src/config/bundled-skills/knowledge-graph/SKILL.md +15 -0
  37. package/src/config/bundled-skills/knowledge-graph/TOOLS.json +56 -0
  38. package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +185 -0
  39. package/src/config/bundled-skills/media-processing/SKILL.md +199 -0
  40. package/src/config/bundled-skills/media-processing/TOOLS.json +320 -0
  41. package/src/config/bundled-skills/media-processing/services/capability-registry.ts +137 -0
  42. package/src/config/bundled-skills/media-processing/services/event-detection-service.ts +280 -0
  43. package/src/config/bundled-skills/media-processing/services/feedback-aggregation.ts +144 -0
  44. package/src/config/bundled-skills/media-processing/services/feedback-store.ts +136 -0
  45. package/src/config/bundled-skills/media-processing/services/processing-pipeline.ts +261 -0
  46. package/src/config/bundled-skills/media-processing/services/retrieval-service.ts +95 -0
  47. package/src/config/bundled-skills/media-processing/services/timeline-service.ts +267 -0
  48. package/src/config/bundled-skills/media-processing/tools/analyze-keyframes.ts +301 -0
  49. package/src/config/bundled-skills/media-processing/tools/detect-events.ts +110 -0
  50. package/src/config/bundled-skills/media-processing/tools/extract-keyframes.ts +190 -0
  51. package/src/config/bundled-skills/media-processing/tools/generate-clip.ts +195 -0
  52. package/src/config/bundled-skills/media-processing/tools/ingest-media.ts +197 -0
  53. package/src/config/bundled-skills/media-processing/tools/media-diagnostics.ts +166 -0
  54. package/src/config/bundled-skills/media-processing/tools/media-status.ts +75 -0
  55. package/src/config/bundled-skills/media-processing/tools/query-media-events.ts +300 -0
  56. package/src/config/bundled-skills/media-processing/tools/recalibrate.ts +235 -0
  57. package/src/config/bundled-skills/media-processing/tools/select-tracking-profile.ts +142 -0
  58. package/src/config/bundled-skills/media-processing/tools/submit-feedback.ts +150 -0
  59. package/src/config/bundled-skills/messaging/SKILL.md +21 -6
  60. package/src/config/bundled-skills/messaging/tools/messaging-send.ts +5 -1
  61. package/src/config/bundled-skills/phone-calls/SKILL.md +2 -2
  62. package/src/config/bundled-skills/twitter/SKILL.md +19 -3
  63. package/src/config/defaults.ts +2 -1
  64. package/src/config/schema.ts +9 -3
  65. package/src/config/system-prompt.ts +24 -0
  66. package/src/config/templates/IDENTITY.md +2 -2
  67. package/src/config/vellum-skills/catalog.json +6 -0
  68. package/src/config/vellum-skills/google-oauth-setup/SKILL.md +3 -3
  69. package/src/config/vellum-skills/slack-oauth-setup/SKILL.md +3 -3
  70. package/src/config/vellum-skills/sms-setup/SKILL.md +118 -0
  71. package/src/config/vellum-skills/twilio-setup/SKILL.md +40 -8
  72. package/src/daemon/handlers/config.ts +783 -9
  73. package/src/daemon/handlers/dictation.ts +182 -0
  74. package/src/daemon/handlers/identity.ts +14 -23
  75. package/src/daemon/handlers/index.ts +2 -0
  76. package/src/daemon/handlers/sessions.ts +2 -0
  77. package/src/daemon/handlers/shared.ts +3 -0
  78. package/src/daemon/handlers/work-items.ts +15 -7
  79. package/src/daemon/ipc-contract-inventory.json +10 -0
  80. package/src/daemon/ipc-contract.ts +108 -4
  81. package/src/daemon/lifecycle.ts +2 -0
  82. package/src/daemon/ride-shotgun-handler.ts +1 -1
  83. package/src/daemon/server.ts +6 -2
  84. package/src/daemon/session-agent-loop.ts +5 -1
  85. package/src/daemon/session-runtime-assembly.ts +55 -0
  86. package/src/daemon/session-tool-setup.ts +2 -0
  87. package/src/daemon/session.ts +11 -1
  88. package/src/inbound/public-ingress-urls.ts +3 -3
  89. package/src/memory/channel-guardian-store.ts +2 -1
  90. package/src/memory/db-init.ts +144 -0
  91. package/src/memory/job-handlers/media-processing.ts +100 -0
  92. package/src/memory/jobs-store.ts +2 -1
  93. package/src/memory/jobs-worker.ts +4 -0
  94. package/src/memory/media-store.ts +759 -0
  95. package/src/memory/retriever.ts +6 -1
  96. package/src/memory/schema.ts +98 -0
  97. package/src/memory/search/entity.ts +208 -25
  98. package/src/memory/search/ranking.ts +6 -1
  99. package/src/memory/search/types.ts +24 -0
  100. package/src/messaging/provider-types.ts +2 -0
  101. package/src/messaging/providers/sms/adapter.ts +204 -0
  102. package/src/messaging/providers/sms/client.ts +93 -0
  103. package/src/messaging/providers/sms/types.ts +7 -0
  104. package/src/permissions/checker.ts +16 -2
  105. package/src/runtime/approval-message-composer.ts +143 -0
  106. package/src/runtime/channel-approvals.ts +12 -4
  107. package/src/runtime/channel-guardian-service.ts +44 -18
  108. package/src/runtime/channel-readiness-service.ts +292 -0
  109. package/src/runtime/channel-readiness-types.ts +29 -0
  110. package/src/runtime/http-server.ts +53 -27
  111. package/src/runtime/http-types.ts +3 -0
  112. package/src/runtime/routes/call-routes.ts +2 -1
  113. package/src/runtime/routes/channel-routes.ts +67 -21
  114. package/src/runtime/run-orchestrator.ts +35 -2
  115. package/src/tools/assets/materialize.ts +2 -2
  116. package/src/tools/calls/call-start.ts +1 -0
  117. package/src/tools/credentials/vault.ts +1 -1
  118. package/src/tools/execution-target.ts +11 -1
  119. package/src/tools/network/web-search.ts +1 -1
  120. package/src/tools/types.ts +2 -0
  121. package/src/twitter/router.ts +1 -1
  122. package/src/util/platform.ts +35 -0
@@ -0,0 +1,199 @@
1
+ ---
2
+ name: "Media Processing"
3
+ description: "Ingest and process media files (video, audio, image) through multi-stage pipelines including keyframe extraction, vision analysis, and timeline generation"
4
+ metadata: {"vellum": {"emoji": "🎬"}}
5
+ ---
6
+
7
+ Ingest and track processing of media files (video, audio, images) through configurable multi-stage pipelines.
8
+
9
+ ## End-to-End Workflow
10
+
11
+ The processing pipeline follows a sequential flow. Each stage depends on the output of the previous one:
12
+
13
+ 1. **Ingest** (`ingest_media`) — Register a media file, detect MIME type, extract duration, deduplicate by content hash.
14
+ 2. **Extract Keyframes** (`extract_keyframes`) — Pull frames from video at regular intervals (default: every 3 seconds) using ffmpeg.
15
+ 3. **Analyze Keyframes** (`analyze_keyframes`) — Send each keyframe to Claude VLM for structured scene analysis (subjects, actions, context).
16
+ 4. **Generate Timeline** — Aggregate vision outputs into coherent timeline segments (called via `services/timeline-service.ts`).
17
+ 5. **Detect Events** (`detect_events`) — Apply configurable detection rules against timeline segments to find events of interest.
18
+ 6. **Query & Clip** — Use `query_media_events` to search events with natural language, and `generate_clip` to extract video clips around specific moments.
19
+
20
+ The processing pipeline service (`services/processing-pipeline.ts`) can orchestrate stages 2-5 automatically with retries, resumability, and cancellation support.
21
+
22
+ ## Tools
23
+
24
+ ### ingest_media
25
+
26
+ Register a media file for processing. Accepts an absolute file path, validates the file exists, detects MIME type, extracts duration (for video/audio via ffprobe), and registers the asset with content-hash deduplication.
27
+
28
+ ### media_status
29
+
30
+ Query the processing status of a media asset. Returns the asset metadata along with per-stage progress details. Use this to monitor pipeline progress.
31
+
32
+ ### extract_keyframes
33
+
34
+ Extract keyframes from a video asset at regular intervals using ffmpeg. Frames are saved as JPEG images and registered in the database for subsequent vision analysis.
35
+
36
+ ### analyze_keyframes
37
+
38
+ Analyze extracted keyframes using Claude VLM (vision language model). Produces structured JSON output with scene descriptions, subjects, actions, and context. Supports resumability by skipping already-analyzed frames.
39
+
40
+ ### detect_events
41
+
42
+ Detect events from timeline segments using configurable detection rules. Built-in rule types:
43
+ - **segment_transition** — Fires when a specified field changes between adjacent segments.
44
+ - **short_segment** — Fires when a segment's duration is below a threshold.
45
+ - **attribute_match** — Fires when segment attribute values match a regex pattern.
46
+
47
+ If no rules are provided, sensible defaults are applied based on the event type.
48
+
49
+ ### query_media_events
50
+
51
+ Query detected events using natural language. Parses the query into structured filters (event type, count, confidence threshold, time range) and returns matching events ranked by confidence.
52
+
53
+ ### generate_clip
54
+
55
+ Extract a video clip from a media asset using ffmpeg. Applies configurable pre/post-roll padding (clamped to file boundaries), outputs the clip as a temporary file.
56
+
57
+ ### select_tracking_profile
58
+
59
+ Configure which event capabilities are enabled for a media asset. Capabilities are organized into tiers:
60
+ - **Ready**: Production-quality detection, included by default.
61
+ - **Beta**: Functional but may have accuracy gaps. Results include a confidence disclaimer.
62
+ - **Experimental**: Early-stage detection, expect noise. Results include a confidence disclaimer.
63
+
64
+ Call without capabilities to see available options; call with a capabilities array to set the profile.
65
+
66
+ ### submit_feedback
67
+
68
+ Submit feedback on a detected event. Supports four types:
69
+ - **correct** — Confirms the event is accurate.
70
+ - **incorrect** — Marks a false positive.
71
+ - **boundary_edit** — Adjusts start/end times.
72
+ - **missed** — Reports an event the system failed to detect.
73
+
74
+ ### recalibrate
75
+
76
+ Re-rank existing events based on accumulated feedback. Adjusts confidence scores using correction patterns (false positive rates, missed events, boundary adjustments).
77
+
78
+ ### media_diagnostics
79
+
80
+ Get a diagnostic report for a media asset. Returns:
81
+ - **Processing stats**: total keyframes, vision outputs, timeline segments, events detected.
82
+ - **Per-stage status and timing**: which stages have run, how long each took, current progress.
83
+ - **Failure reasons**: last error from any failed stage.
84
+ - **Cost estimation**: based on keyframe count and estimated API cost per frame.
85
+ - **Feedback summary**: precision/recall estimates per event type.
86
+
87
+ ## Services
88
+
89
+ ### Processing Pipeline (services/processing-pipeline.ts)
90
+
91
+ Orchestrates the full processing pipeline with reliability features:
92
+ - **Sequential execution**: keyframe_extraction, vision_analysis, timeline_generation, event_detection.
93
+ - **Retries**: Each stage is retried with exponential backoff and jitter (configurable max retries and base delay).
94
+ - **Resumability**: Checks processing_stages to find the last completed stage and resumes from there. Safe to restart after crashes.
95
+ - **Cancellation**: Cooperative cancellation via asset status. Set asset status to `cancelled` and the pipeline stops between stages.
96
+ - **Idempotency**: Re-ingesting the same file hash is a no-op. Re-running a fully completed pipeline is also a no-op.
97
+ - **Graceful degradation**: If a stage fails mid-batch (e.g., vision API errors), partial results are saved. The stage is marked as failed with the error details, and the pipeline stops without losing work.
98
+
99
+ ### Timeline Generation (services/timeline-service.ts)
100
+
101
+ Aggregates vision analysis outputs into coherent timeline segments. Groups adjacent keyframes that share similar scene characteristics into time ranges with merged attributes.
102
+
103
+ ### Event Detection (services/event-detection-service.ts)
104
+
105
+ Evaluates configurable detection rules against timeline segments. Produces scored event candidates with weighted confidence.
106
+
107
+ ### Feedback Aggregation (services/feedback-aggregation.ts)
108
+
109
+ Computes precision/recall estimates per event type from user feedback. Provides structured JSON export for offline analysis.
110
+
111
+ ### Capability Registry (services/capability-registry.ts)
112
+
113
+ Maintains an extensible, domain-agnostic catalog of available tracking capabilities with tier classification. Other domains can register their own capabilities by calling `registerCapability()`.
114
+
115
+ ## Operator Runbook
116
+
117
+ ### Monitoring Progress
118
+
119
+ Use `media_status` to check the current state of any asset:
120
+ - **registered** — Ingested but not yet processed.
121
+ - **processing** — Pipeline is running.
122
+ - **indexed** — All stages completed successfully.
123
+ - **failed** — A stage failed. Check stage details for the error.
124
+
125
+ The response includes per-stage progress (0-100%) so you can see exactly where processing stands.
126
+
127
+ ### Diagnosing Failures
128
+
129
+ Use `media_diagnostics` to get a full diagnostic report:
130
+ 1. Check the `stages` array for any stage with `status: "failed"`.
131
+ 2. Read the `lastError` field for that stage to understand what went wrong.
132
+ 3. Check `durationMs` to see if a stage timed out or ran unusually long.
133
+ 4. Common failure causes:
134
+ - **keyframe_extraction**: ffmpeg not installed, corrupt video file, disk full.
135
+ - **vision_analysis**: ANTHROPIC_API_KEY not set, API rate limits, network errors.
136
+ - **timeline_generation**: No keyframes or vision outputs exist (earlier stage skipped or failed).
137
+ - **event_detection**: No timeline segments exist.
138
+
139
+ After fixing the root cause, re-run the failed stage. The pipeline is resumable — it picks up from where it left off.
140
+
141
+ ### Configuring Tracking Profiles
142
+
143
+ 1. Call `select_tracking_profile` with just the `asset_id` to see available capabilities and their tiers.
144
+ 2. Call again with a `capabilities` array to enable the desired event types.
145
+ 3. Only enabled capabilities are returned by `query_media_events`.
146
+ 4. The capability registry is extensible — new domains can register capabilities via `registerCapability()` in `services/capability-registry.ts`.
147
+
148
+ ### Feedback and Recalibration
149
+
150
+ 1. Review detected events using `query_media_events`.
151
+ 2. For each event, submit feedback via `submit_feedback`:
152
+ - Mark correct detections as `correct` to build precision data.
153
+ - Mark false positives as `incorrect`.
154
+ - Adjust boundaries with `boundary_edit`.
155
+ - Report missed events with `missed` (creates a new event record).
156
+ 3. Run `recalibrate` to re-rank events based on accumulated feedback.
157
+ 4. Use `media_diagnostics` to check precision/recall estimates after feedback.
158
+
159
+ ### Cost Expectations
160
+
161
+ Vision analysis is the primary cost driver. Cost scales linearly with video duration and keyframe interval:
162
+
163
+ | Video Duration | Interval | Keyframes | Estimated Cost |
164
+ |----------------|----------|-----------|----------------|
165
+ | 30 min | 3s | ~600 | ~$1.80 |
166
+ | 60 min | 3s | ~1,200 | ~$3.60 |
167
+ | 90 min | 3s | ~1,800 | ~$5.40 |
168
+ | 90 min | 5s | ~1,080 | ~$3.24 |
169
+
170
+ Increasing the keyframe interval reduces cost proportionally but may miss short-duration events. The `media_diagnostics` tool provides per-asset cost estimates.
171
+
172
+ ### Known Limitations
173
+
174
+ - **ffmpeg required**: Keyframe extraction and clip generation require ffmpeg to be installed on the host.
175
+ - **Single-file ingestion**: Each `ingest_media` call processes one file. Batch ingestion is not yet supported.
176
+ - **Vision model latency**: Analyzing keyframes is the slowest stage. A 90-minute video at 3-second intervals requires ~1,800 API calls.
177
+ - **Scene similarity heuristic**: Timeline segmentation uses Jaccard similarity on subjects — it works well for distinct scenes but may over-merge visually similar but semantically different moments.
178
+ - **Detection rules are heuristic**: Event detection uses rule-based scoring, not ML. Accuracy depends on how well the rules match the target event patterns. Use feedback and recalibration to improve over time.
179
+ - **No real-time processing**: The pipeline processes pre-recorded media files. Live/streaming video is not supported.
180
+
181
+ ### Troubleshooting
182
+
183
+ | Symptom | Likely Cause | Fix |
184
+ |---------|-------------|-----|
185
+ | "No keyframes found" | extract_keyframes not run or failed | Check keyframe_extraction stage status; re-run if needed |
186
+ | "ANTHROPIC_API_KEY not set" | Missing env var | Set ANTHROPIC_API_KEY in the environment |
187
+ | Vision analysis very slow | Large video, small interval | Increase interval_seconds or use smaller batch_size |
188
+ | Low event confidence | Detection rules too broad | Tune rules: increase weights on high-signal rules, use tighter regex patterns |
189
+ | Many false positives | Rules overfitting on noise | Submit `incorrect` feedback, then run `recalibrate` |
190
+ | Pipeline stuck at "processing" | Stage crashed without updating status | Use `media_diagnostics` to find the stuck stage; re-run manually |
191
+
192
+ ## Usage Notes
193
+
194
+ - The `ingest_media` tool requires an absolute path to a local file.
195
+ - Supported media types: video (mp4, mov, avi, mkv, webm, etc.), audio (mp3, wav, m4a, etc.), and images (png, jpg, gif, webp, etc.).
196
+ - For video and audio files, duration is automatically extracted via ffprobe (requires ffmpeg to be installed).
197
+ - Duplicate files are detected by content hash and return the existing asset record.
198
+ - The `analyze_keyframes` tool is marked as medium risk because it makes external API calls to Claude VLM, which incur costs.
199
+ - All schema tables, services, and tool interfaces are media-generic. Domain-specific interpretation belongs in VLM prompt templates.
@@ -0,0 +1,320 @@
1
+ {
2
+ "version": 1,
3
+ "tools": [
4
+ {
5
+ "name": "ingest_media",
6
+ "description": "Ingest a media file (video, audio, or image) for processing. Validates the file, detects MIME type, extracts duration for video/audio, registers the asset with content-hash dedup, and enqueues an initial processing job.",
7
+ "category": "media",
8
+ "risk": "low",
9
+ "input_schema": {
10
+ "type": "object",
11
+ "properties": {
12
+ "file_path": {
13
+ "type": "string",
14
+ "description": "Absolute path to a local media file (video, audio, or image)"
15
+ },
16
+ "title": {
17
+ "type": "string",
18
+ "description": "Optional human-readable title for the media asset. Defaults to the filename."
19
+ },
20
+ "metadata": {
21
+ "type": "object",
22
+ "description": "Optional JSON metadata to attach to the asset (e.g., pipeline config, source info)"
23
+ }
24
+ },
25
+ "required": ["file_path"]
26
+ },
27
+ "executor": "tools/ingest-media.ts",
28
+ "execution_target": "host"
29
+ },
30
+ {
31
+ "name": "media_status",
32
+ "description": "Query the processing status of one or more media assets, including per-stage progress details.",
33
+ "category": "media",
34
+ "risk": "low",
35
+ "input_schema": {
36
+ "type": "object",
37
+ "properties": {
38
+ "asset_id": {
39
+ "type": "string",
40
+ "description": "ID of a specific media asset to query"
41
+ },
42
+ "file_path": {
43
+ "type": "string",
44
+ "description": "File path to look up a media asset by its original path"
45
+ },
46
+ "status_filter": {
47
+ "type": "string",
48
+ "enum": ["registered", "processing", "indexed", "failed"],
49
+ "description": "Filter assets by processing status"
50
+ }
51
+ }
52
+ },
53
+ "executor": "tools/media-status.ts",
54
+ "execution_target": "host"
55
+ },
56
+ {
57
+ "name": "extract_keyframes",
58
+ "description": "Extract keyframes from a video asset at regular intervals using ffmpeg. Stores frame images and registers each in the database for subsequent vision analysis.",
59
+ "category": "media",
60
+ "risk": "low",
61
+ "input_schema": {
62
+ "type": "object",
63
+ "properties": {
64
+ "asset_id": {
65
+ "type": "string",
66
+ "description": "ID of the media asset (must be a video)"
67
+ },
68
+ "interval_seconds": {
69
+ "type": "number",
70
+ "description": "Interval between keyframes in seconds. Default: 3"
71
+ }
72
+ },
73
+ "required": ["asset_id"]
74
+ },
75
+ "executor": "tools/extract-keyframes.ts",
76
+ "execution_target": "host"
77
+ },
78
+ {
79
+ "name": "analyze_keyframes",
80
+ "description": "Analyze extracted keyframes using Claude VLM (vision language model). Produces structured scene descriptions with subjects, actions, and context for each frame. Supports resumability — skips already-analyzed frames.",
81
+ "category": "media",
82
+ "risk": "medium",
83
+ "input_schema": {
84
+ "type": "object",
85
+ "properties": {
86
+ "asset_id": {
87
+ "type": "string",
88
+ "description": "ID of the media asset whose keyframes to analyze"
89
+ },
90
+ "analysis_type": {
91
+ "type": "string",
92
+ "description": "Type of analysis to perform. Default: 'scene_description'"
93
+ },
94
+ "batch_size": {
95
+ "type": "number",
96
+ "description": "Number of keyframes to process per batch. Default: 10"
97
+ }
98
+ },
99
+ "required": ["asset_id"]
100
+ },
101
+ "executor": "tools/analyze-keyframes.ts",
102
+ "execution_target": "host"
103
+ },
104
+ {
105
+ "name": "detect_events",
106
+ "description": "Detect events from a media asset's timeline segments using configurable detection rules with weighted confidence ranking. Supports built-in rule types: segment_transition (field changes between adjacent segments), short_segment (segments below a duration threshold), and attribute_match (regex matching on segment attributes). If no rules are provided, sensible defaults are used based on the event type.",
107
+ "category": "media",
108
+ "risk": "low",
109
+ "input_schema": {
110
+ "type": "object",
111
+ "properties": {
112
+ "asset_id": {
113
+ "type": "string",
114
+ "description": "ID of the media asset to detect events in"
115
+ },
116
+ "event_type": {
117
+ "type": "string",
118
+ "description": "Type label for detected events (e.g., 'turnover', 'scene_change', 'highlight')"
119
+ },
120
+ "detection_rules": {
121
+ "type": "array",
122
+ "description": "Optional array of detection rule objects. Each rule has: ruleType (string: 'segment_transition', 'short_segment', or 'attribute_match'), params (object with rule-specific parameters), and weight (number: contribution to confidence score). If omitted, defaults are used based on event_type.",
123
+ "items": {
124
+ "type": "object",
125
+ "properties": {
126
+ "ruleType": {
127
+ "type": "string",
128
+ "description": "Rule type: 'segment_transition', 'short_segment', or 'attribute_match'"
129
+ },
130
+ "params": {
131
+ "type": "object",
132
+ "description": "Rule-specific parameters (e.g., { field: 'subjects' }, { maxDurationSeconds: 5 }, { field: 'actions', pattern: 'steal|turnover' })"
133
+ },
134
+ "weight": {
135
+ "type": "number",
136
+ "description": "Weight for this rule's contribution to the confidence score"
137
+ }
138
+ },
139
+ "required": ["ruleType", "params", "weight"]
140
+ }
141
+ }
142
+ },
143
+ "required": ["asset_id", "event_type"]
144
+ },
145
+ "executor": "tools/detect-events.ts",
146
+ "execution_target": "host"
147
+ },
148
+ {
149
+ "name": "query_media_events",
150
+ "description": "Query media events using natural language. Parses the query into structured filters (event type, count, confidence threshold, time range) and retrieves matching events ranked by confidence. Supports domain-specific keyword mapping (e.g., 'turnovers' → eventType='turnover').",
151
+ "category": "media",
152
+ "risk": "low",
153
+ "input_schema": {
154
+ "type": "object",
155
+ "properties": {
156
+ "query": {
157
+ "type": "string",
158
+ "description": "Natural language query describing the events to find (e.g., 'top 5 turnovers', 'high confidence goals in the first half')"
159
+ },
160
+ "asset_id": {
161
+ "type": "string",
162
+ "description": "ID of the media asset to search within"
163
+ }
164
+ },
165
+ "required": ["query", "asset_id"]
166
+ },
167
+ "executor": "tools/query-media-events.ts",
168
+ "execution_target": "host"
169
+ },
170
+ {
171
+ "name": "select_tracking_profile",
172
+ "description": "Select and persist a tracking profile for a media asset. When called without capabilities, returns available capabilities organized by tier (ready, beta, experimental). When called with capabilities, validates them against the registry and stores the profile. The capability tier system is generic and extensible across domains.",
173
+ "category": "media",
174
+ "risk": "low",
175
+ "input_schema": {
176
+ "type": "object",
177
+ "properties": {
178
+ "asset_id": {
179
+ "type": "string",
180
+ "description": "ID of the media asset to configure tracking for"
181
+ },
182
+ "capabilities": {
183
+ "type": "array",
184
+ "items": { "type": "string" },
185
+ "description": "Optional array of capability names to enable (e.g., ['turnover', 'field_goal']). If omitted, returns the available capabilities for selection."
186
+ }
187
+ },
188
+ "required": ["asset_id"]
189
+ },
190
+ "executor": "tools/select-tracking-profile.ts",
191
+ "execution_target": "host"
192
+ },
193
+ {
194
+ "name": "generate_clip",
195
+ "description": "Extract a video clip from a media asset using ffmpeg. Applies configurable pre/post-roll padding (clamped to file boundaries), outputs the clip as a temporary file, and registers it as an attachment for in-chat delivery.",
196
+ "category": "media",
197
+ "risk": "low",
198
+ "input_schema": {
199
+ "type": "object",
200
+ "properties": {
201
+ "asset_id": {
202
+ "type": "string",
203
+ "description": "ID of the media asset (must be a video)"
204
+ },
205
+ "start_time": {
206
+ "type": "number",
207
+ "description": "Start time of the clip in seconds"
208
+ },
209
+ "end_time": {
210
+ "type": "number",
211
+ "description": "End time of the clip in seconds"
212
+ },
213
+ "pre_roll": {
214
+ "type": "number",
215
+ "description": "Seconds of padding before start_time. Default: 3"
216
+ },
217
+ "post_roll": {
218
+ "type": "number",
219
+ "description": "Seconds of padding after end_time. Default: 2"
220
+ },
221
+ "output_format": {
222
+ "type": "string",
223
+ "enum": ["mp4", "webm", "mov"],
224
+ "description": "Output video format. Default: 'mp4'"
225
+ }
226
+ },
227
+ "required": ["asset_id", "start_time", "end_time"]
228
+ },
229
+ "executor": "tools/generate-clip.ts",
230
+ "execution_target": "host"
231
+ },
232
+ {
233
+ "name": "submit_feedback",
234
+ "description": "Submit feedback on a detected media event. Supports four feedback types: 'correct' (confirms accuracy), 'incorrect' (marks false positive), 'boundary_edit' (adjusts start/end times), and 'missed' (reports an event the system failed to detect, creating a new event). Works for any event type.",
235
+ "category": "media",
236
+ "risk": "low",
237
+ "input_schema": {
238
+ "type": "object",
239
+ "properties": {
240
+ "event_id": {
241
+ "type": "string",
242
+ "description": "ID of the event to provide feedback on. Required for all types except 'missed'."
243
+ },
244
+ "feedback_type": {
245
+ "type": "string",
246
+ "enum": ["correct", "incorrect", "boundary_edit", "missed"],
247
+ "description": "Type of feedback: correct, incorrect, boundary_edit, or missed"
248
+ },
249
+ "corrected_start_time": {
250
+ "type": "number",
251
+ "description": "Corrected start time in seconds (for boundary_edit feedback)"
252
+ },
253
+ "corrected_end_time": {
254
+ "type": "number",
255
+ "description": "Corrected end time in seconds (for boundary_edit feedback)"
256
+ },
257
+ "notes": {
258
+ "type": "string",
259
+ "description": "Optional free-text notes about the feedback"
260
+ },
261
+ "asset_id": {
262
+ "type": "string",
263
+ "description": "ID of the media asset (required for 'missed' type)"
264
+ },
265
+ "event_type": {
266
+ "type": "string",
267
+ "description": "Event type label for the missed event (required for 'missed' type)"
268
+ },
269
+ "start_time": {
270
+ "type": "number",
271
+ "description": "Start time in seconds for the missed event (required for 'missed' type)"
272
+ },
273
+ "end_time": {
274
+ "type": "number",
275
+ "description": "End time in seconds for the missed event (required for 'missed' type)"
276
+ }
277
+ },
278
+ "required": ["feedback_type"]
279
+ },
280
+ "executor": "tools/submit-feedback.ts",
281
+ "execution_target": "host"
282
+ },
283
+ {
284
+ "name": "media_diagnostics",
285
+ "description": "Get a diagnostic report for a media asset including processing stats, per-stage timing, failure reasons, cost estimation, and feedback summary.",
286
+ "category": "media",
287
+ "risk": "low",
288
+ "input_schema": {
289
+ "type": "object",
290
+ "properties": {
291
+ "asset_id": {
292
+ "type": "string",
293
+ "description": "ID of the media asset to diagnose"
294
+ }
295
+ },
296
+ "required": ["asset_id"]
297
+ },
298
+ "executor": "tools/media-diagnostics.ts",
299
+ "execution_target": "host"
300
+ },
301
+ {
302
+ "name": "recalibrate",
303
+ "description": "Recalibrate event detection for a media asset based on accumulated feedback. Analyzes correction patterns (false positive rates, missed events, boundary adjustments), re-ranks existing events by adjusting confidence scores, and returns a summary of adjustments made. Works for any event type.",
304
+ "category": "media",
305
+ "risk": "low",
306
+ "input_schema": {
307
+ "type": "object",
308
+ "properties": {
309
+ "asset_id": {
310
+ "type": "string",
311
+ "description": "ID of the media asset to recalibrate"
312
+ }
313
+ },
314
+ "required": ["asset_id"]
315
+ },
316
+ "executor": "tools/recalibrate.ts",
317
+ "execution_target": "host"
318
+ }
319
+ ]
320
+ }
@@ -0,0 +1,137 @@
1
+ /**
2
+ * Generic capability registry with tier-based classification.
3
+ *
4
+ * The registry is domain-agnostic: any domain (sports, surveillance, lecture
5
+ * recording, etc.) can register its own capabilities. Basketball-specific
6
+ * capabilities are registered as one example via `registerDefaults()`.
7
+ */
8
+
9
+ import type { CapabilityTier } from '../../../../memory/media-store.js';
10
+
11
+ // ---------------------------------------------------------------------------
12
+ // Types
13
+ // ---------------------------------------------------------------------------
14
+
15
+ export interface Capability {
16
+ /** Unique name used as the key in tracking profiles (e.g. 'turnover'). */
17
+ name: string;
18
+ /** Human-readable description of what this capability detects/tracks. */
19
+ description: string;
20
+ /** Maturity tier governing confidence disclaimers and default inclusion. */
21
+ tier: CapabilityTier;
22
+ /** Domain this capability belongs to (e.g. 'basketball', 'surveillance'). */
23
+ domain: string;
24
+ /** Granularity level (e.g. 'team', 'per-player'). */
25
+ granularity?: string;
26
+ }
27
+
28
+ // ---------------------------------------------------------------------------
29
+ // Registry (singleton in-memory Map)
30
+ // ---------------------------------------------------------------------------
31
+
32
+ const registry = new Map<string, Capability>();
33
+
34
+ /**
35
+ * Register a capability. Overwrites any existing capability with the same name.
36
+ */
37
+ export function registerCapability(cap: Capability): void {
38
+ registry.set(cap.name, cap);
39
+ }
40
+
41
+ /**
42
+ * Get all registered capabilities, optionally filtered by domain.
43
+ */
44
+ export function getCapabilities(domain?: string): Capability[] {
45
+ const all = Array.from(registry.values());
46
+ if (!domain) return all;
47
+ return all.filter((c) => c.domain === domain);
48
+ }
49
+
50
+ /**
51
+ * Get capabilities filtered by tier.
52
+ */
53
+ export function getCapabilitiesByTier(tier: CapabilityTier): Capability[] {
54
+ return Array.from(registry.values()).filter((c) => c.tier === tier);
55
+ }
56
+
57
+ /**
58
+ * Look up a single capability by name.
59
+ */
60
+ export function getCapabilityByName(name: string): Capability | undefined {
61
+ return registry.get(name);
62
+ }
63
+
64
+ /**
65
+ * Get all unique domain names in the registry.
66
+ */
67
+ export function getRegisteredDomains(): string[] {
68
+ const domains = new Set<string>();
69
+ for (const cap of registry.values()) {
70
+ domains.add(cap.domain);
71
+ }
72
+ return Array.from(domains);
73
+ }
74
+
75
+ // ---------------------------------------------------------------------------
76
+ // Default registrations — basketball as one example domain
77
+ // ---------------------------------------------------------------------------
78
+
79
+ /**
80
+ * Register the default basketball capabilities as an example domain.
81
+ * Other domains should call `registerCapability()` with their own entries.
82
+ */
83
+ export function registerDefaults(): void {
84
+ // Ready tier: production-quality detection
85
+ registerCapability({
86
+ name: 'turnover',
87
+ description: 'Team-level turnover detection',
88
+ tier: 'ready',
89
+ domain: 'basketball',
90
+ granularity: 'team',
91
+ });
92
+
93
+ // Beta tier: functional but may have accuracy gaps
94
+ registerCapability({
95
+ name: 'field_goal',
96
+ description: 'Team-level field goal detection',
97
+ tier: 'beta',
98
+ domain: 'basketball',
99
+ granularity: 'team',
100
+ });
101
+
102
+ registerCapability({
103
+ name: 'rebound',
104
+ description: 'Team-level rebound detection',
105
+ tier: 'beta',
106
+ domain: 'basketball',
107
+ granularity: 'team',
108
+ });
109
+
110
+ registerCapability({
111
+ name: 'turnover_per_player',
112
+ description: 'Per-player turnover attribution',
113
+ tier: 'beta',
114
+ domain: 'basketball',
115
+ granularity: 'per-player',
116
+ });
117
+
118
+ // Experimental tier: early-stage, expect noise
119
+ registerCapability({
120
+ name: 'field_goal_per_player',
121
+ description: 'Per-player field goal attribution',
122
+ tier: 'experimental',
123
+ domain: 'basketball',
124
+ granularity: 'per-player',
125
+ });
126
+
127
+ registerCapability({
128
+ name: 'rebound_per_player',
129
+ description: 'Per-player rebound attribution',
130
+ tier: 'experimental',
131
+ domain: 'basketball',
132
+ granularity: 'per-player',
133
+ });
134
+ }
135
+
136
+ // Auto-register defaults on first import
137
+ registerDefaults();