@vellumai/assistant 0.3.3 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/Dockerfile +2 -0
  2. package/README.md +45 -18
  3. package/package.json +1 -1
  4. package/scripts/ipc/generate-swift.ts +13 -0
  5. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +100 -0
  6. package/src/__tests__/approval-hardcoded-copy-guard.test.ts +41 -0
  7. package/src/__tests__/approval-message-composer.test.ts +253 -0
  8. package/src/__tests__/call-domain.test.ts +12 -2
  9. package/src/__tests__/call-orchestrator.test.ts +391 -1
  10. package/src/__tests__/call-routes-http.test.ts +27 -2
  11. package/src/__tests__/channel-approval-routes.test.ts +397 -135
  12. package/src/__tests__/channel-approvals.test.ts +99 -3
  13. package/src/__tests__/channel-delivery-store.test.ts +30 -4
  14. package/src/__tests__/channel-guardian.test.ts +261 -22
  15. package/src/__tests__/channel-readiness-service.test.ts +257 -0
  16. package/src/__tests__/config-schema.test.ts +2 -1
  17. package/src/__tests__/credential-security-invariants.test.ts +1 -0
  18. package/src/__tests__/daemon-lifecycle.test.ts +636 -0
  19. package/src/__tests__/dictation-mode-detection.test.ts +63 -0
  20. package/src/__tests__/entity-search.test.ts +615 -0
  21. package/src/__tests__/gateway-only-enforcement.test.ts +19 -13
  22. package/src/__tests__/handlers-twilio-config.test.ts +480 -0
  23. package/src/__tests__/ipc-snapshot.test.ts +63 -0
  24. package/src/__tests__/messaging-send-tool.test.ts +65 -0
  25. package/src/__tests__/run-orchestrator-assistant-events.test.ts +4 -0
  26. package/src/__tests__/run-orchestrator.test.ts +22 -0
  27. package/src/__tests__/secret-scanner.test.ts +223 -0
  28. package/src/__tests__/session-runtime-assembly.test.ts +85 -1
  29. package/src/__tests__/shell-parser-property.test.ts +357 -2
  30. package/src/__tests__/sms-messaging-provider.test.ts +125 -0
  31. package/src/__tests__/system-prompt.test.ts +25 -1
  32. package/src/__tests__/tool-executor-lifecycle-events.test.ts +34 -1
  33. package/src/__tests__/twilio-routes.test.ts +39 -3
  34. package/src/__tests__/twitter-cli-error-shaping.test.ts +2 -2
  35. package/src/__tests__/user-reference.test.ts +68 -0
  36. package/src/__tests__/web-search.test.ts +1 -1
  37. package/src/__tests__/work-item-output.test.ts +110 -0
  38. package/src/calls/call-domain.ts +8 -5
  39. package/src/calls/call-orchestrator.ts +85 -22
  40. package/src/calls/twilio-config.ts +17 -11
  41. package/src/calls/twilio-rest.ts +276 -0
  42. package/src/calls/twilio-routes.ts +39 -1
  43. package/src/cli/map.ts +6 -0
  44. package/src/commands/__tests__/cc-command-registry.test.ts +67 -0
  45. package/src/commands/cc-command-registry.ts +14 -1
  46. package/src/config/bundled-skills/claude-code/TOOLS.json +10 -3
  47. package/src/config/bundled-skills/knowledge-graph/SKILL.md +15 -0
  48. package/src/config/bundled-skills/knowledge-graph/TOOLS.json +56 -0
  49. package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +185 -0
  50. package/src/config/bundled-skills/media-processing/SKILL.md +199 -0
  51. package/src/config/bundled-skills/media-processing/TOOLS.json +320 -0
  52. package/src/config/bundled-skills/media-processing/services/capability-registry.ts +137 -0
  53. package/src/config/bundled-skills/media-processing/services/event-detection-service.ts +280 -0
  54. package/src/config/bundled-skills/media-processing/services/feedback-aggregation.ts +144 -0
  55. package/src/config/bundled-skills/media-processing/services/feedback-store.ts +136 -0
  56. package/src/config/bundled-skills/media-processing/services/processing-pipeline.ts +261 -0
  57. package/src/config/bundled-skills/media-processing/services/retrieval-service.ts +95 -0
  58. package/src/config/bundled-skills/media-processing/services/timeline-service.ts +267 -0
  59. package/src/config/bundled-skills/media-processing/tools/analyze-keyframes.ts +301 -0
  60. package/src/config/bundled-skills/media-processing/tools/detect-events.ts +110 -0
  61. package/src/config/bundled-skills/media-processing/tools/extract-keyframes.ts +190 -0
  62. package/src/config/bundled-skills/media-processing/tools/generate-clip.ts +195 -0
  63. package/src/config/bundled-skills/media-processing/tools/ingest-media.ts +197 -0
  64. package/src/config/bundled-skills/media-processing/tools/media-diagnostics.ts +166 -0
  65. package/src/config/bundled-skills/media-processing/tools/media-status.ts +75 -0
  66. package/src/config/bundled-skills/media-processing/tools/query-media-events.ts +300 -0
  67. package/src/config/bundled-skills/media-processing/tools/recalibrate.ts +235 -0
  68. package/src/config/bundled-skills/media-processing/tools/select-tracking-profile.ts +142 -0
  69. package/src/config/bundled-skills/media-processing/tools/submit-feedback.ts +150 -0
  70. package/src/config/bundled-skills/messaging/SKILL.md +24 -5
  71. package/src/config/bundled-skills/messaging/tools/messaging-send.ts +5 -1
  72. package/src/config/bundled-skills/phone-calls/SKILL.md +2 -2
  73. package/src/config/bundled-skills/twitter/SKILL.md +19 -3
  74. package/src/config/defaults.ts +2 -1
  75. package/src/config/schema.ts +9 -3
  76. package/src/config/skills.ts +5 -32
  77. package/src/config/system-prompt.ts +40 -0
  78. package/src/config/templates/IDENTITY.md +2 -2
  79. package/src/config/user-reference.ts +29 -0
  80. package/src/config/vellum-skills/catalog.json +58 -0
  81. package/src/config/vellum-skills/google-oauth-setup/SKILL.md +3 -3
  82. package/src/config/vellum-skills/slack-oauth-setup/SKILL.md +3 -3
  83. package/src/config/vellum-skills/sms-setup/SKILL.md +118 -0
  84. package/src/config/vellum-skills/telegram-setup/SKILL.md +6 -1
  85. package/src/config/vellum-skills/twilio-setup/SKILL.md +76 -6
  86. package/src/daemon/auth-manager.ts +103 -0
  87. package/src/daemon/computer-use-session.ts +8 -1
  88. package/src/daemon/config-watcher.ts +253 -0
  89. package/src/daemon/handlers/config.ts +819 -22
  90. package/src/daemon/handlers/dictation.ts +182 -0
  91. package/src/daemon/handlers/identity.ts +14 -23
  92. package/src/daemon/handlers/index.ts +2 -0
  93. package/src/daemon/handlers/sessions.ts +2 -0
  94. package/src/daemon/handlers/shared.ts +3 -0
  95. package/src/daemon/handlers/skills.ts +6 -7
  96. package/src/daemon/handlers/work-items.ts +15 -7
  97. package/src/daemon/ipc-contract-inventory.json +10 -0
  98. package/src/daemon/ipc-contract.ts +114 -4
  99. package/src/daemon/ipc-handler.ts +87 -0
  100. package/src/daemon/lifecycle.ts +18 -4
  101. package/src/daemon/ride-shotgun-handler.ts +11 -1
  102. package/src/daemon/server.ts +111 -504
  103. package/src/daemon/session-agent-loop.ts +10 -15
  104. package/src/daemon/session-runtime-assembly.ts +115 -44
  105. package/src/daemon/session-tool-setup.ts +2 -0
  106. package/src/daemon/session.ts +19 -2
  107. package/src/inbound/public-ingress-urls.ts +3 -3
  108. package/src/memory/channel-guardian-store.ts +2 -1
  109. package/src/memory/db-connection.ts +28 -0
  110. package/src/memory/db-init.ts +1163 -0
  111. package/src/memory/db.ts +2 -2007
  112. package/src/memory/embedding-backend.ts +79 -11
  113. package/src/memory/indexer.ts +2 -0
  114. package/src/memory/job-handlers/media-processing.ts +100 -0
  115. package/src/memory/job-utils.ts +64 -4
  116. package/src/memory/jobs-store.ts +2 -1
  117. package/src/memory/jobs-worker.ts +11 -1
  118. package/src/memory/media-store.ts +759 -0
  119. package/src/memory/recall-cache.ts +107 -0
  120. package/src/memory/retriever.ts +36 -2
  121. package/src/memory/schema-migration.ts +984 -0
  122. package/src/memory/schema.ts +99 -0
  123. package/src/memory/search/entity.ts +208 -25
  124. package/src/memory/search/ranking.ts +6 -1
  125. package/src/memory/search/types.ts +26 -0
  126. package/src/messaging/provider-types.ts +2 -0
  127. package/src/messaging/providers/sms/adapter.ts +204 -0
  128. package/src/messaging/providers/sms/client.ts +93 -0
  129. package/src/messaging/providers/sms/types.ts +7 -0
  130. package/src/permissions/checker.ts +16 -2
  131. package/src/permissions/prompter.ts +14 -3
  132. package/src/permissions/trust-store.ts +7 -0
  133. package/src/runtime/approval-message-composer.ts +143 -0
  134. package/src/runtime/channel-approvals.ts +29 -7
  135. package/src/runtime/channel-guardian-service.ts +44 -18
  136. package/src/runtime/channel-readiness-service.ts +292 -0
  137. package/src/runtime/channel-readiness-types.ts +29 -0
  138. package/src/runtime/gateway-client.ts +2 -1
  139. package/src/runtime/http-server.ts +65 -28
  140. package/src/runtime/http-types.ts +3 -0
  141. package/src/runtime/routes/call-routes.ts +2 -1
  142. package/src/runtime/routes/channel-routes.ts +237 -103
  143. package/src/runtime/routes/run-routes.ts +7 -1
  144. package/src/runtime/run-orchestrator.ts +43 -3
  145. package/src/security/secret-scanner.ts +218 -0
  146. package/src/skills/frontmatter.ts +63 -0
  147. package/src/skills/slash-commands.ts +23 -0
  148. package/src/skills/vellum-catalog-remote.ts +107 -0
  149. package/src/tools/assets/materialize.ts +2 -2
  150. package/src/tools/browser/auto-navigate.ts +132 -24
  151. package/src/tools/browser/browser-manager.ts +67 -61
  152. package/src/tools/calls/call-start.ts +1 -0
  153. package/src/tools/claude-code/claude-code.ts +55 -3
  154. package/src/tools/credentials/vault.ts +1 -1
  155. package/src/tools/execution-target.ts +11 -1
  156. package/src/tools/executor.ts +10 -2
  157. package/src/tools/network/web-search.ts +1 -1
  158. package/src/tools/skills/vellum-catalog.ts +61 -156
  159. package/src/tools/terminal/parser.ts +21 -5
  160. package/src/tools/types.ts +2 -0
  161. package/src/twitter/router.ts +1 -1
  162. package/src/util/platform.ts +43 -1
  163. package/src/util/retry.ts +4 -4
@@ -0,0 +1,261 @@
1
+ /**
2
+ * Processing pipeline service.
3
+ *
4
+ * Orchestrates the full media processing pipeline with reliability features:
5
+ * - Sequential stage execution: keyframe_extraction -> vision_analysis -> timeline_generation -> event_detection
6
+ * - Stage-level retries with exponential backoff
7
+ * - Resumability: checks processing_stages to find last completed stage
8
+ * - Cancellation support: cooperative cancellation via asset status = 'cancelled'
9
+ * - Idempotency: respects content-hash dedup from media-store
10
+ * - Graceful degradation: saves partial results on failure
11
+ *
12
+ * All reliability infrastructure is generic media-processing, not domain-specific.
13
+ */
14
+
15
+ import {
16
+ getMediaAssetById,
17
+ getProcessingStagesForAsset,
18
+ createProcessingStage,
19
+ updateProcessingStage,
20
+ updateMediaAssetStatus,
21
+ type ProcessingStage,
22
+ } from '../../../../memory/media-store.js';
23
+ import { computeRetryDelay, sleep } from '../../../../util/retry.js';
24
+
25
+ // ---------------------------------------------------------------------------
26
+ // Types
27
+ // ---------------------------------------------------------------------------
28
+
29
+ export type PipelineStageName =
30
+ | 'keyframe_extraction'
31
+ | 'vision_analysis'
32
+ | 'timeline_generation'
33
+ | 'event_detection';
34
+
35
+ export interface StageHandler {
36
+ /** Execute the stage. Throw on failure. */
37
+ execute: (assetId: string, onProgress?: (msg: string) => void) => Promise<void>;
38
+ }
39
+
40
+ export interface PipelineOptions {
41
+ /** Maximum retry attempts per stage (default: 3). */
42
+ maxRetries?: number;
43
+ /** Base delay in ms for exponential backoff between retries (default: 1000). */
44
+ baseDelayMs?: number;
45
+ /** Progress callback for streaming status updates. */
46
+ onProgress?: (message: string) => void;
47
+ }
48
+
49
+ export interface PipelineResult {
50
+ assetId: string;
51
+ completedStages: PipelineStageName[];
52
+ failedStage: PipelineStageName | null;
53
+ failureReason: string | null;
54
+ cancelled: boolean;
55
+ resumedFrom: PipelineStageName | null;
56
+ }
57
+
58
+ // ---------------------------------------------------------------------------
59
+ // Pipeline stage ordering
60
+ // ---------------------------------------------------------------------------
61
+
62
+ const STAGE_ORDER: PipelineStageName[] = [
63
+ 'keyframe_extraction',
64
+ 'vision_analysis',
65
+ 'timeline_generation',
66
+ 'event_detection',
67
+ ];
68
+
69
+ // ---------------------------------------------------------------------------
70
+ // Helpers
71
+ // ---------------------------------------------------------------------------
72
+
73
+ function findOrCreateStage(assetId: string, stageName: string): ProcessingStage {
74
+ const stages = getProcessingStagesForAsset(assetId);
75
+ const existing = stages.find((s) => s.stage === stageName);
76
+ if (existing) return existing;
77
+ return createProcessingStage({ assetId, stage: stageName });
78
+ }
79
+
80
+ function isStageCompleted(stage: ProcessingStage): boolean {
81
+ return stage.status === 'completed';
82
+ }
83
+
84
+ /**
85
+ * Check if the asset has been cancelled. Cooperative cancellation:
86
+ * the pipeline checks this between stages to allow graceful stopping.
87
+ */
88
+ function isAssetCancelled(assetId: string): boolean {
89
+ const asset = getMediaAssetById(assetId);
90
+ if (!asset) return true;
91
+ return (asset.status as string) === 'cancelled';
92
+ }
93
+
94
+ // ---------------------------------------------------------------------------
95
+ // Main pipeline
96
+ // ---------------------------------------------------------------------------
97
+
98
+ /**
99
+ * Run the full processing pipeline for a media asset.
100
+ *
101
+ * The pipeline is resumable: if previous stages are already completed,
102
+ * execution resumes from the first incomplete stage. Each stage is
103
+ * retried with exponential backoff on failure. If a stage exhausts
104
+ * its retries, partial results are preserved and the pipeline stops.
105
+ */
106
+ export async function runPipeline(
107
+ assetId: string,
108
+ handlers: Record<PipelineStageName, StageHandler>,
109
+ options?: PipelineOptions,
110
+ ): Promise<PipelineResult> {
111
+ const maxRetries = options?.maxRetries ?? 3;
112
+ const baseDelayMs = options?.baseDelayMs ?? 1000;
113
+ const onProgress = options?.onProgress;
114
+
115
+ const asset = getMediaAssetById(assetId);
116
+ if (!asset) {
117
+ throw new Error(`Media asset not found: ${assetId}`);
118
+ }
119
+
120
+ // Check if asset is already cancelled before forcing processing
121
+ if ((asset.status as string) === 'cancelled') {
122
+ return {
123
+ assetId,
124
+ completedStages: [],
125
+ failedStage: null,
126
+ failureReason: null,
127
+ cancelled: true,
128
+ resumedFrom: null,
129
+ };
130
+ }
131
+
132
+ // Mark asset as processing
133
+ updateMediaAssetStatus(assetId, 'processing');
134
+
135
+ const completedStages: PipelineStageName[] = [];
136
+ let failedStage: PipelineStageName | null = null;
137
+ let failureReason: string | null = null;
138
+ let cancelled = false;
139
+ let resumedFrom: PipelineStageName | null = null;
140
+
141
+ // Find where to resume from by checking existing stage records
142
+ let startIndex = 0;
143
+ for (let i = 0; i < STAGE_ORDER.length; i++) {
144
+ const stage = findOrCreateStage(assetId, STAGE_ORDER[i]);
145
+ if (isStageCompleted(stage)) {
146
+ completedStages.push(STAGE_ORDER[i]);
147
+ startIndex = i + 1;
148
+ } else {
149
+ break;
150
+ }
151
+ }
152
+
153
+ if (startIndex > 0 && startIndex < STAGE_ORDER.length) {
154
+ resumedFrom = STAGE_ORDER[startIndex];
155
+ onProgress?.(`Resuming pipeline from stage: ${resumedFrom}`);
156
+ } else if (startIndex >= STAGE_ORDER.length) {
157
+ // All stages already completed — idempotent no-op
158
+ onProgress?.('All pipeline stages already completed.');
159
+ updateMediaAssetStatus(assetId, 'indexed');
160
+ return {
161
+ assetId,
162
+ completedStages,
163
+ failedStage: null,
164
+ failureReason: null,
165
+ cancelled: false,
166
+ resumedFrom: null,
167
+ };
168
+ }
169
+
170
+ // Execute stages sequentially from the resume point
171
+ for (let i = startIndex; i < STAGE_ORDER.length; i++) {
172
+ const stageName = STAGE_ORDER[i];
173
+
174
+ // Cooperative cancellation check between stages
175
+ if (isAssetCancelled(assetId)) {
176
+ onProgress?.(`Pipeline cancelled before stage: ${stageName}`);
177
+ cancelled = true;
178
+ break;
179
+ }
180
+
181
+ const stageRecord = findOrCreateStage(assetId, stageName);
182
+ const handler = handlers[stageName];
183
+
184
+ onProgress?.(`Starting stage: ${stageName}`);
185
+ updateProcessingStage(stageRecord.id, {
186
+ status: 'running',
187
+ startedAt: Date.now(),
188
+ lastError: null,
189
+ });
190
+
191
+ let succeeded = false;
192
+
193
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
194
+ try {
195
+ if (attempt > 0) {
196
+ const delay = computeRetryDelay(attempt - 1, baseDelayMs);
197
+ onProgress?.(`Retrying stage ${stageName} (attempt ${attempt + 1}/${maxRetries + 1}) after ${Math.round(delay)}ms...`);
198
+ await sleep(delay);
199
+
200
+ // Re-check cancellation before retry
201
+ if (isAssetCancelled(assetId)) {
202
+ onProgress?.(`Pipeline cancelled during retry of stage: ${stageName}`);
203
+ cancelled = true;
204
+ break;
205
+ }
206
+ }
207
+
208
+ await handler.execute(assetId, onProgress);
209
+
210
+ // Mark stage as completed
211
+ updateProcessingStage(stageRecord.id, {
212
+ status: 'completed',
213
+ progress: 100,
214
+ completedAt: Date.now(),
215
+ });
216
+
217
+ completedStages.push(stageName);
218
+ succeeded = true;
219
+ onProgress?.(`Completed stage: ${stageName}`);
220
+ break;
221
+ } catch (err) {
222
+ const errorMsg = (err as Error).message.slice(0, 500);
223
+ onProgress?.(`Stage ${stageName} failed (attempt ${attempt + 1}/${maxRetries + 1}): ${errorMsg}`);
224
+
225
+ // Save partial progress — the stage handler should have already
226
+ // persisted any partial results before throwing
227
+ updateProcessingStage(stageRecord.id, {
228
+ status: attempt >= maxRetries ? 'failed' : 'running',
229
+ lastError: errorMsg,
230
+ });
231
+ }
232
+ }
233
+
234
+ if (cancelled) break;
235
+
236
+ if (!succeeded) {
237
+ failedStage = stageName;
238
+ failureReason = `Stage ${stageName} failed after ${maxRetries + 1} attempts`;
239
+ onProgress?.(`Pipeline stopped: ${failureReason}`);
240
+ break;
241
+ }
242
+ }
243
+
244
+ // Update final asset status
245
+ if (cancelled) {
246
+ // Leave status as-is (already 'cancelled')
247
+ } else if (failedStage) {
248
+ updateMediaAssetStatus(assetId, 'failed');
249
+ } else {
250
+ updateMediaAssetStatus(assetId, 'indexed');
251
+ }
252
+
253
+ return {
254
+ assetId,
255
+ completedStages,
256
+ failedStage,
257
+ failureReason,
258
+ cancelled,
259
+ resumedFrom,
260
+ };
261
+ }
@@ -0,0 +1,95 @@
1
+ /**
2
+ * Generic media event retrieval service.
3
+ *
4
+ * Pure data retrieval with configurable filters and ranking — no
5
+ * domain-specific logic. Callers (e.g. a query tool) are responsible
6
+ * for translating domain concepts into filter parameters.
7
+ */
8
+
9
+ import {
10
+ getEventsForAsset,
11
+ type MediaEvent,
12
+ } from '../../../../memory/media-store.js';
13
+
14
+ // ---------------------------------------------------------------------------
15
+ // Types
16
+ // ---------------------------------------------------------------------------
17
+
18
+ export interface RetrievalFilters {
19
+ /** Scope results to a specific media asset. */
20
+ assetId?: string;
21
+ /** Filter by event type label. */
22
+ eventType?: string;
23
+ /** Minimum confidence threshold (0–1). */
24
+ minConfidence?: number;
25
+ /** Maximum number of results to return. */
26
+ limit?: number;
27
+ /** Sort order for results. */
28
+ sortBy?: 'confidence' | 'startTime';
29
+ /** Only return events that start at or after this time (seconds). */
30
+ startTimeMin?: number;
31
+ /** Only return events that start at or before this time (seconds). */
32
+ startTimeMax?: number;
33
+ }
34
+
35
+ export interface RetrievalResult {
36
+ events: MediaEvent[];
37
+ totalReturned: number;
38
+ filters: RetrievalFilters;
39
+ }
40
+
41
+ // ---------------------------------------------------------------------------
42
+ // Retrieval
43
+ // ---------------------------------------------------------------------------
44
+
45
+ /**
46
+ * Query the media_events table with the given filters and return ranked
47
+ * results with full event metadata.
48
+ *
49
+ * When `assetId` is not provided the function returns an empty result set
50
+ * because the underlying store requires an asset scope. Callers that want
51
+ * cross-asset queries should iterate over asset IDs externally.
52
+ */
53
+ export function retrieveEvents(filters: RetrievalFilters): RetrievalResult {
54
+ const {
55
+ assetId,
56
+ eventType,
57
+ minConfidence,
58
+ limit = 10,
59
+ sortBy = 'confidence',
60
+ startTimeMin,
61
+ startTimeMax,
62
+ } = filters;
63
+
64
+ if (!assetId) {
65
+ return { events: [], totalReturned: 0, filters };
66
+ }
67
+
68
+ // Fetch from the store with the subset of filters it supports natively
69
+ let events = getEventsForAsset(assetId, {
70
+ eventType,
71
+ minConfidence,
72
+ sortBy,
73
+ // Fetch more than needed so we can apply time-range filtering locally
74
+ limit: startTimeMin !== undefined || startTimeMax !== undefined ? undefined : limit,
75
+ });
76
+
77
+ // Apply time-range filters that the store doesn't support directly
78
+ if (startTimeMin !== undefined) {
79
+ events = events.filter((e) => e.startTime >= startTimeMin);
80
+ }
81
+ if (startTimeMax !== undefined) {
82
+ events = events.filter((e) => e.startTime <= startTimeMax);
83
+ }
84
+
85
+ // Re-apply limit after local filtering
86
+ if (limit && events.length > limit) {
87
+ events = events.slice(0, limit);
88
+ }
89
+
90
+ return {
91
+ events,
92
+ totalReturned: events.length,
93
+ filters,
94
+ };
95
+ }
@@ -0,0 +1,267 @@
1
+ /**
2
+ * Timeline generation service.
3
+ *
4
+ * Aggregates sequential vision outputs into coherent timeline segments.
5
+ * Each segment groups adjacent keyframes that share similar scene characteristics
6
+ * into a single time range with merged attributes.
7
+ */
8
+
9
+ import {
10
+ getMediaAssetById,
11
+ getKeyframesForAsset,
12
+ getVisionOutputsForAsset,
13
+ deleteTimelineForAsset,
14
+ insertTimelineSegmentsBatch,
15
+ createProcessingStage,
16
+ updateProcessingStage,
17
+ getProcessingStagesForAsset,
18
+ type MediaVisionOutput,
19
+ type MediaKeyframe,
20
+ type MediaTimeline,
21
+ } from '../../../../memory/media-store.js';
22
+
23
+ export interface TimelineGenerationResult {
24
+ assetId: string;
25
+ segmentCount: number;
26
+ segments: MediaTimeline[];
27
+ }
28
+
29
+ /**
30
+ * Generate a timeline for a media asset from its vision analysis outputs.
31
+ *
32
+ * Groups consecutive keyframes with similar scene descriptions into segments.
33
+ * If a timeline already exists for this asset, it is replaced.
34
+ */
35
+ export function generateTimeline(
36
+ assetId: string,
37
+ options?: {
38
+ analysisType?: string;
39
+ onProgress?: (message: string) => void;
40
+ },
41
+ ): TimelineGenerationResult {
42
+ const analysisType = options?.analysisType ?? 'scene_description';
43
+ const onProgress = options?.onProgress;
44
+
45
+ const asset = getMediaAssetById(assetId);
46
+ if (!asset) {
47
+ throw new Error(`Media asset not found: ${assetId}`);
48
+ }
49
+
50
+ const keyframes = getKeyframesForAsset(assetId);
51
+ if (keyframes.length === 0) {
52
+ throw new Error('No keyframes found for this asset. Run extract_keyframes first.');
53
+ }
54
+
55
+ const visionOutputs = getVisionOutputsForAsset(assetId, analysisType);
56
+ if (visionOutputs.length === 0) {
57
+ throw new Error(`No vision outputs found for analysis type "${analysisType}". Run analyze_keyframes first.`);
58
+ }
59
+
60
+ // Find or create the timeline_generation processing stage
61
+ const existingStages = getProcessingStagesForAsset(assetId);
62
+ let stage = existingStages.find((s) => s.stage === 'timeline_generation');
63
+ if (!stage) {
64
+ stage = createProcessingStage({ assetId, stage: 'timeline_generation' });
65
+ }
66
+ updateProcessingStage(stage.id, { status: 'running', startedAt: Date.now() });
67
+
68
+ try {
69
+ // Build a map of keyframeId -> keyframe for timestamp lookup
70
+ const keyframeMap = new Map<string, MediaKeyframe>();
71
+ for (const kf of keyframes) {
72
+ keyframeMap.set(kf.id, kf);
73
+ }
74
+
75
+ // Build a map of keyframeId -> vision output
76
+ const outputByKeyframe = new Map<string, MediaVisionOutput>();
77
+ for (const vo of visionOutputs) {
78
+ outputByKeyframe.set(vo.keyframeId, vo);
79
+ }
80
+
81
+ // Sort keyframes by timestamp to ensure sequential processing
82
+ const sortedKeyframes = [...keyframes]
83
+ .filter((kf) => outputByKeyframe.has(kf.id))
84
+ .sort((a, b) => a.timestamp - b.timestamp);
85
+
86
+ if (sortedKeyframes.length === 0) {
87
+ updateProcessingStage(stage.id, {
88
+ status: 'completed',
89
+ progress: 100,
90
+ completedAt: Date.now(),
91
+ });
92
+ return { assetId, segmentCount: 0, segments: [] };
93
+ }
94
+
95
+ onProgress?.('Aggregating vision outputs into timeline segments...');
96
+
97
+ // Aggregate consecutive frames into segments based on scene similarity
98
+ const segmentRows: Array<{
99
+ assetId: string;
100
+ startTime: number;
101
+ endTime: number;
102
+ segmentType: string;
103
+ attributes: Record<string, unknown>;
104
+ confidence: number;
105
+ }> = [];
106
+
107
+ let currentSegment = createSegmentFromOutput(
108
+ assetId,
109
+ sortedKeyframes[0],
110
+ outputByKeyframe.get(sortedKeyframes[0].id)!,
111
+ );
112
+
113
+ for (let i = 1; i < sortedKeyframes.length; i++) {
114
+ const kf = sortedKeyframes[i];
115
+ const vo = outputByKeyframe.get(kf.id)!;
116
+
117
+ if (shouldMergeIntoSegment(currentSegment, vo)) {
118
+ // Extend the current segment
119
+ currentSegment.endTime = kf.timestamp;
120
+ const newConfidence = vo.confidence ?? 0.5;
121
+ currentSegment.confidence =
122
+ (currentSegment.confidence * currentSegment.frameCount + newConfidence) / (currentSegment.frameCount + 1);
123
+ currentSegment.frameCount++;
124
+ mergeSubjects(currentSegment.attributes, vo.output);
125
+ mergeActions(currentSegment.attributes, vo.output);
126
+ } else {
127
+ // Finalize current segment and start a new one
128
+ segmentRows.push(currentSegment);
129
+ currentSegment = createSegmentFromOutput(assetId, kf, vo);
130
+ }
131
+
132
+ // Update progress
133
+ const progress = Math.round((i / sortedKeyframes.length) * 100);
134
+ updateProcessingStage(stage.id, { progress });
135
+ }
136
+
137
+ // Don't forget the last segment
138
+ segmentRows.push(currentSegment);
139
+
140
+ // Clear existing timeline and insert new segments
141
+ deleteTimelineForAsset(assetId);
142
+ const segments = insertTimelineSegmentsBatch(segmentRows);
143
+
144
+ updateProcessingStage(stage.id, {
145
+ status: 'completed',
146
+ progress: 100,
147
+ completedAt: Date.now(),
148
+ });
149
+
150
+ onProgress?.(`Generated ${segments.length} timeline segments.`);
151
+
152
+ return { assetId, segmentCount: segments.length, segments };
153
+ } catch (err) {
154
+ updateProcessingStage(stage.id, {
155
+ status: 'failed',
156
+ lastError: (err as Error).message.slice(0, 500),
157
+ });
158
+ throw err;
159
+ }
160
+ }
161
+
162
+ // ---------------------------------------------------------------------------
163
+ // Internal helpers
164
+ // ---------------------------------------------------------------------------
165
+
166
+ interface PendingSegment {
167
+ assetId: string;
168
+ startTime: number;
169
+ endTime: number;
170
+ segmentType: string;
171
+ attributes: Record<string, unknown>;
172
+ confidence: number;
173
+ frameCount: number;
174
+ }
175
+
176
+ function createSegmentFromOutput(
177
+ assetId: string,
178
+ keyframe: MediaKeyframe,
179
+ vo: MediaVisionOutput,
180
+ ): PendingSegment {
181
+ const sceneDescription = (vo.output.sceneDescription as string) ?? '';
182
+ const segmentType = deriveSegmentType(vo.output);
183
+ return {
184
+ assetId,
185
+ startTime: keyframe.timestamp,
186
+ endTime: keyframe.timestamp,
187
+ segmentType,
188
+ attributes: {
189
+ sceneDescription,
190
+ subjects: Array.isArray(vo.output.subjects) ? [...(vo.output.subjects as string[])] : [],
191
+ actions: Array.isArray(vo.output.actions) ? [...(vo.output.actions as string[])] : [],
192
+ context: (vo.output.context as string) ?? '',
193
+ },
194
+ confidence: vo.confidence ?? 0.5,
195
+ frameCount: 1,
196
+ };
197
+ }
198
+
199
+ /**
200
+ * Derive a generic segment type from vision output.
201
+ * Uses simple heuristics on the scene description — domain-specific
202
+ * interpretation belongs in the VLM prompt, not here.
203
+ */
204
+ function deriveSegmentType(output: Record<string, unknown>): string {
205
+ const actions = output.actions as string[] | undefined;
206
+ if (actions && actions.length > 0) {
207
+ return 'activity';
208
+ }
209
+ const subjects = output.subjects as string[] | undefined;
210
+ if (subjects && subjects.length > 0) {
211
+ return 'scene';
212
+ }
213
+ return 'static';
214
+ }
215
+
216
+ /**
217
+ * Decide whether a new vision output is similar enough to merge
218
+ * into the current segment.
219
+ *
220
+ * Uses a simple heuristic: same segment type and overlapping subjects.
221
+ */
222
+ function shouldMergeIntoSegment(
223
+ segment: PendingSegment,
224
+ vo: MediaVisionOutput,
225
+ ): boolean {
226
+ const newType = deriveSegmentType(vo.output);
227
+ if (newType !== segment.segmentType) return false;
228
+
229
+ // Check subject overlap
230
+ const existingSubjects = new Set(
231
+ (segment.attributes.subjects as string[]) ?? [],
232
+ );
233
+ const newSubjects = (vo.output.subjects as string[]) ?? [];
234
+
235
+ if (existingSubjects.size === 0 && newSubjects.length === 0) return true;
236
+ if (existingSubjects.size === 0 || newSubjects.length === 0) return false;
237
+
238
+ const overlap = newSubjects.filter((s) => existingSubjects.has(s)).length;
239
+ const unionSize = new Set([...existingSubjects, ...newSubjects]).size;
240
+
241
+ // Merge if at least 30% overlap (Jaccard similarity)
242
+ return unionSize > 0 && overlap / unionSize >= 0.3;
243
+ }
244
+
245
+ function mergeSubjects(
246
+ attributes: Record<string, unknown>,
247
+ newOutput: Record<string, unknown>,
248
+ ): void {
249
+ const existing = new Set((attributes.subjects as string[]) ?? []);
250
+ const incoming = (newOutput.subjects as string[]) ?? [];
251
+ for (const s of incoming) {
252
+ existing.add(s);
253
+ }
254
+ attributes.subjects = [...existing];
255
+ }
256
+
257
+ function mergeActions(
258
+ attributes: Record<string, unknown>,
259
+ newOutput: Record<string, unknown>,
260
+ ): void {
261
+ const existing = new Set((attributes.actions as string[]) ?? []);
262
+ const incoming = (newOutput.actions as string[]) ?? [];
263
+ for (const a of incoming) {
264
+ existing.add(a);
265
+ }
266
+ attributes.actions = [...existing];
267
+ }