escribano 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +297 -0
  3. package/dist/0_types.js +279 -0
  4. package/dist/actions/classify-session.js +77 -0
  5. package/dist/actions/create-contexts.js +44 -0
  6. package/dist/actions/create-topic-blocks.js +68 -0
  7. package/dist/actions/extract-metadata.js +24 -0
  8. package/dist/actions/generate-artifact-v3.js +296 -0
  9. package/dist/actions/generate-artifact.js +61 -0
  10. package/dist/actions/generate-summary-v3.js +260 -0
  11. package/dist/actions/outline-index.js +204 -0
  12. package/dist/actions/process-recording-v2.js +494 -0
  13. package/dist/actions/process-recording-v3.js +412 -0
  14. package/dist/actions/process-session.js +183 -0
  15. package/dist/actions/publish-summary-v3.js +303 -0
  16. package/dist/actions/sync-to-outline.js +196 -0
  17. package/dist/adapters/audio.silero.adapter.js +69 -0
  18. package/dist/adapters/cap.adapter.js +94 -0
  19. package/dist/adapters/capture.cap.adapter.js +107 -0
  20. package/dist/adapters/capture.filesystem.adapter.js +124 -0
  21. package/dist/adapters/embedding.ollama.adapter.js +141 -0
  22. package/dist/adapters/intelligence.adapter.js +202 -0
  23. package/dist/adapters/intelligence.mlx.adapter.js +395 -0
  24. package/dist/adapters/intelligence.ollama.adapter.js +741 -0
  25. package/dist/adapters/publishing.outline.adapter.js +75 -0
  26. package/dist/adapters/storage.adapter.js +81 -0
  27. package/dist/adapters/storage.fs.adapter.js +83 -0
  28. package/dist/adapters/transcription.whisper.adapter.js +206 -0
  29. package/dist/adapters/video.ffmpeg.adapter.js +405 -0
  30. package/dist/adapters/whisper.adapter.js +168 -0
  31. package/dist/batch-context.js +329 -0
  32. package/dist/db/helpers.js +50 -0
  33. package/dist/db/index.js +95 -0
  34. package/dist/db/migrate.js +80 -0
  35. package/dist/db/repositories/artifact.sqlite.js +77 -0
  36. package/dist/db/repositories/cluster.sqlite.js +92 -0
  37. package/dist/db/repositories/context.sqlite.js +75 -0
  38. package/dist/db/repositories/index.js +10 -0
  39. package/dist/db/repositories/observation.sqlite.js +70 -0
  40. package/dist/db/repositories/recording.sqlite.js +56 -0
  41. package/dist/db/repositories/subject.sqlite.js +64 -0
  42. package/dist/db/repositories/topic-block.sqlite.js +45 -0
  43. package/dist/db/types.js +4 -0
  44. package/dist/domain/classification.js +60 -0
  45. package/dist/domain/context.js +97 -0
  46. package/dist/domain/index.js +2 -0
  47. package/dist/domain/observation.js +17 -0
  48. package/dist/domain/recording.js +41 -0
  49. package/dist/domain/segment.js +93 -0
  50. package/dist/domain/session.js +93 -0
  51. package/dist/domain/time-range.js +38 -0
  52. package/dist/domain/transcript.js +79 -0
  53. package/dist/index.js +173 -0
  54. package/dist/pipeline/context.js +162 -0
  55. package/dist/pipeline/events.js +2 -0
  56. package/dist/prerequisites.js +226 -0
  57. package/dist/scripts/rebuild-index.js +53 -0
  58. package/dist/scripts/seed-fixtures.js +290 -0
  59. package/dist/services/activity-segmentation.js +333 -0
  60. package/dist/services/activity-segmentation.test.js +191 -0
  61. package/dist/services/app-normalization.js +212 -0
  62. package/dist/services/cluster-merge.js +69 -0
  63. package/dist/services/clustering.js +237 -0
  64. package/dist/services/debug.js +58 -0
  65. package/dist/services/frame-sampling.js +318 -0
  66. package/dist/services/signal-extraction.js +106 -0
  67. package/dist/services/subject-grouping.js +342 -0
  68. package/dist/services/temporal-alignment.js +99 -0
  69. package/dist/services/vlm-enrichment.js +84 -0
  70. package/dist/services/vlm-service.js +130 -0
  71. package/dist/stats/index.js +3 -0
  72. package/dist/stats/observer.js +65 -0
  73. package/dist/stats/repository.js +36 -0
  74. package/dist/stats/resource-tracker.js +86 -0
  75. package/dist/stats/types.js +1 -0
  76. package/dist/test-classification-prompts.js +181 -0
  77. package/dist/tests/cap.adapter.test.js +75 -0
  78. package/dist/tests/capture.cap.adapter.test.js +69 -0
  79. package/dist/tests/classify-session.test.js +140 -0
  80. package/dist/tests/db/repositories.test.js +243 -0
  81. package/dist/tests/domain/time-range.test.js +31 -0
  82. package/dist/tests/integration.test.js +84 -0
  83. package/dist/tests/intelligence.adapter.test.js +102 -0
  84. package/dist/tests/intelligence.ollama.adapter.test.js +178 -0
  85. package/dist/tests/process-v2.test.js +90 -0
  86. package/dist/tests/services/clustering.test.js +112 -0
  87. package/dist/tests/services/frame-sampling.test.js +152 -0
  88. package/dist/tests/utils/ocr.test.js +76 -0
  89. package/dist/tests/utils/parallel.test.js +57 -0
  90. package/dist/tests/visual-observer.test.js +175 -0
  91. package/dist/utils/id-normalization.js +15 -0
  92. package/dist/utils/index.js +9 -0
  93. package/dist/utils/model-detector.js +154 -0
  94. package/dist/utils/ocr.js +80 -0
  95. package/dist/utils/parallel.js +32 -0
  96. package/migrations/001_initial.sql +109 -0
  97. package/migrations/002_clusters.sql +41 -0
  98. package/migrations/003_observations_vlm_fields.sql +14 -0
  99. package/migrations/004_observations_unique.sql +18 -0
  100. package/migrations/005_processing_stats.sql +29 -0
  101. package/migrations/006_vlm_raw_response.sql +6 -0
  102. package/migrations/007_subjects.sql +23 -0
  103. package/migrations/008_artifacts_recording.sql +6 -0
  104. package/migrations/009_artifact_subjects.sql +10 -0
  105. package/package.json +82 -0
  106. package/prompts/action-items.md +55 -0
  107. package/prompts/blog-draft.md +54 -0
  108. package/prompts/blog-research.md +87 -0
  109. package/prompts/card.md +54 -0
  110. package/prompts/classify-segment.md +38 -0
  111. package/prompts/classify.md +37 -0
  112. package/prompts/code-snippets.md +163 -0
  113. package/prompts/extract-metadata.md +149 -0
  114. package/prompts/notes.md +83 -0
  115. package/prompts/runbook.md +123 -0
  116. package/prompts/standup.md +50 -0
  117. package/prompts/step-by-step.md +125 -0
  118. package/prompts/subject-grouping.md +31 -0
  119. package/prompts/summary-v3.md +89 -0
  120. package/prompts/summary.md +77 -0
  121. package/prompts/topic-classifier.md +24 -0
  122. package/prompts/topic-extract.md +13 -0
  123. package/prompts/vlm-batch.md +21 -0
  124. package/prompts/vlm-single.md +19 -0
@@ -0,0 +1,303 @@
1
+ /**
2
+ * Escribano - Publish Summary V3
3
+ *
4
+ * Publishes V3 session summaries to Outline wiki.
5
+ */
6
+ import { log } from '../pipeline/context.js';
7
+ /**
8
+ * Publish a V3 session summary to Outline.
9
+ *
10
+ * Creates a single document per recording with full summary content.
11
+ *
12
+ * @param recordingId - Recording ID
13
+ * @param content - Summary markdown content
14
+ * @param topicBlocks - V3 topic blocks for metadata
15
+ * @param repos - Database repositories
16
+ * @param publishing - Outline publishing service
17
+ * @param options - Publishing options
18
+ * @returns Published document info
19
+ */
20
+ export async function publishSummaryV3(recordingId, content, topicBlocks, repos, publishing, options = {}) {
21
+ const collectionName = options.collectionName ?? 'Escribano Sessions';
22
+ const indexTitle = '📋 Session Summaries Index';
23
+ log('info', `[Publish V3] Publishing summary for ${recordingId}...`);
24
+ // 1. Get recording info
25
+ const recording = repos.recordings.findById(recordingId);
26
+ if (!recording) {
27
+ throw new Error(`Recording ${recordingId} not found`);
28
+ }
29
+ // 2. Ensure collection exists
30
+ const collection = await publishing.ensureCollection(collectionName);
31
+ log('info', `[Publish V3] Using collection: ${collectionName} (${collection.id})`);
32
+ // Find index document to use as parent (if it exists)
33
+ const indexDoc = await publishing.findDocumentByTitle(collection.id, indexTitle);
34
+ if (indexDoc) {
35
+ log('info', `[Publish V3] Nesting under index: ${indexTitle}`);
36
+ }
37
+ // 3. Build document title and content
38
+ const title = buildDocumentTitle(recording, topicBlocks, options.format);
39
+ const documentContent = buildDocumentContent(recording, content, topicBlocks);
40
+ // 4. Check for existing document (by title)
41
+ const existing = await publishing.findDocumentByTitle(collection.id, title);
42
+ // 5. Create or update document
43
+ let document;
44
+ if (existing) {
45
+ log('info', `[Publish V3] Updating existing document: ${title}`);
46
+ await publishing.updateDocument(existing.id, {
47
+ title,
48
+ content: documentContent,
49
+ });
50
+ document = existing;
51
+ }
52
+ else {
53
+ log('info', `[Publish V3] Creating new document: ${title}`);
54
+ document = await publishing.createDocument({
55
+ collectionId: collection.id,
56
+ parentDocumentId: indexDoc?.id, // Nest under index if it exists
57
+ title,
58
+ content: documentContent,
59
+ publish: options.publish ?? true,
60
+ });
61
+ }
62
+ // 6. Build sync state and content hash
63
+ const contentHash = hashContent(content);
64
+ const syncState = {
65
+ collectionId: collection.id,
66
+ sessionDocumentId: document.id,
67
+ sessionDocumentUrl: document.url,
68
+ artifacts: [], // V3 doesn't use artifacts as children
69
+ lastSyncedAt: new Date(),
70
+ };
71
+ log('info', `[Publish V3] Published to: ${document.url}`);
72
+ return {
73
+ url: document.url,
74
+ documentId: document.id,
75
+ collectionId: collection.id,
76
+ syncState,
77
+ contentHash,
78
+ };
79
+ }
80
+ /**
81
+ * Build a descriptive document title from recording and topic blocks.
82
+ */
83
+ function buildDocumentTitle(recording, topicBlocks, format) {
84
+ const date = new Date(recording.captured_at);
85
+ const dateStr = date.toISOString().split('T')[0];
86
+ const timeStr = date.toTimeString().split(' ')[0].substring(0, 5);
87
+ // Try to extract primary activity from blocks
88
+ const activities = extractActivities(topicBlocks);
89
+ const primaryActivity = activities[0] ?? 'Session';
90
+ // Append format if provided
91
+ const formatSuffix = format ? ` [${format}]` : '';
92
+ return `[${dateStr} ${timeStr}] ${primaryActivity} (${formatDuration(recording.duration)})${formatSuffix}`;
93
+ }
94
+ /**
95
+ * Extract unique activities from topic blocks, sorted by frequency.
96
+ */
97
+ function extractActivities(topicBlocks) {
98
+ const activityCounts = new Map();
99
+ for (const block of topicBlocks) {
100
+ try {
101
+ const classification = JSON.parse(block.classification || '{}');
102
+ const activity = classification.activity_type;
103
+ if (activity) {
104
+ activityCounts.set(activity, (activityCounts.get(activity) ?? 0) + 1);
105
+ }
106
+ }
107
+ catch {
108
+ // Ignore invalid JSON
109
+ }
110
+ }
111
+ // Sort by count descending
112
+ return Array.from(activityCounts.entries())
113
+ .sort((a, b) => b[1] - a[1])
114
+ .map(([activity]) => activity.charAt(0).toUpperCase() + activity.slice(1));
115
+ }
116
+ /**
117
+ * Format duration in human-readable form.
118
+ */
119
+ function formatDuration(seconds) {
120
+ const minutes = Math.round(seconds / 60);
121
+ if (minutes < 60) {
122
+ return `${minutes}m`;
123
+ }
124
+ const hours = Math.floor(minutes / 60);
125
+ const remainingMinutes = minutes % 60;
126
+ if (remainingMinutes === 0) {
127
+ return `${hours}h`;
128
+ }
129
+ return `${hours}h ${remainingMinutes}m`;
130
+ }
131
+ /**
132
+ * Build the full document content with metadata and summary.
133
+ */
134
+ function buildDocumentContent(recording, summary, topicBlocks) {
135
+ const date = new Date(recording.captured_at);
136
+ const activities = extractActivities(topicBlocks);
137
+ // Build metadata section
138
+ let metadata = `---\n`;
139
+ metadata += `**Date:** ${date.toLocaleString()}\n\n`;
140
+ metadata += `**Duration:** ${formatDuration(recording.duration)}\n\n`;
141
+ metadata += `**Activities:** ${activities.join(', ') || 'Unknown'}\n\n`;
142
+ metadata += `**Recording ID:** \`${recording.id}\`\n\n`;
143
+ if (recording.source_type) {
144
+ metadata += `**Source:** ${recording.source_type}\n\n`;
145
+ }
146
+ metadata += `---\n\n`;
147
+ // Append timeline of blocks if available
148
+ const timeline = buildTimeline(topicBlocks);
149
+ if (timeline) {
150
+ metadata += `## Timeline\n\n`;
151
+ metadata += timeline;
152
+ metadata += `\n---\n\n`;
153
+ }
154
+ // Summary header
155
+ const summaryHeader = `# Session Summary\n\n`;
156
+ // Combine all parts
157
+ return metadata + summaryHeader + summary;
158
+ }
159
+ /**
160
+ * Build a brief timeline from topic blocks.
161
+ */
162
+ function buildTimeline(topicBlocks) {
163
+ if (topicBlocks.length === 0)
164
+ return '';
165
+ // Sort by start time
166
+ const sortedBlocks = [...topicBlocks].sort((a, b) => {
167
+ const aStart = JSON.parse(a.classification || '{}').start_time ?? 0;
168
+ const bStart = JSON.parse(b.classification || '{}').start_time ?? 0;
169
+ return aStart - bStart;
170
+ });
171
+ let timeline = '';
172
+ for (const block of sortedBlocks) {
173
+ try {
174
+ const classification = JSON.parse(block.classification || '{}');
175
+ const activity = classification.activity_type ?? 'unknown';
176
+ const startTime = classification.start_time ?? 0;
177
+ const endTime = classification.end_time ?? 0;
178
+ const duration = endTime - startTime;
179
+ const apps = (classification.apps ?? []).join(', ') || 'none';
180
+ const timeStr = formatTime(startTime);
181
+ const durationStr = formatDuration(duration);
182
+ timeline += `- **${timeStr}** (${durationStr}): ${activity}`;
183
+ if (apps !== 'none') {
184
+ timeline += ` — ${apps}`;
185
+ }
186
+ timeline += `\n`;
187
+ }
188
+ catch {
189
+ // Skip invalid blocks
190
+ }
191
+ }
192
+ return timeline;
193
+ }
194
+ /**
195
+ * Format seconds as MM:SS.
196
+ */
197
+ function formatTime(seconds) {
198
+ const mins = Math.floor(seconds / 60);
199
+ const secs = Math.floor(seconds % 60);
200
+ return `${mins}:${secs.toString().padStart(2, '0')}`;
201
+ }
202
+ /**
203
+ * Simple content hashing for change detection.
204
+ */
205
+ function hashContent(content) {
206
+ let hash = 0;
207
+ for (let i = 0; i < content.length; i++) {
208
+ const char = content.charCodeAt(i);
209
+ hash = (hash << 5) - hash + char;
210
+ hash |= 0;
211
+ }
212
+ return hash.toString(16);
213
+ }
214
+ /**
215
+ * Update recording metadata with Outline publishing info.
216
+ * This should be called after successful publish.
217
+ */
218
+ export function updateRecordingOutlineMetadata(recordingId, outlineInfo, repos, format) {
219
+ const recording = repos.recordings.findById(recordingId);
220
+ if (!recording) {
221
+ throw new Error(`Recording ${recordingId} not found`);
222
+ }
223
+ // Parse existing metadata
224
+ const currentMetadata = recording.source_metadata
225
+ ? JSON.parse(recording.source_metadata)
226
+ : {};
227
+ // Store format-specific metadata
228
+ if (format) {
229
+ // Initialize formats array if needed
230
+ if (!currentMetadata.outline_formats) {
231
+ currentMetadata.outline_formats = [];
232
+ }
233
+ // Remove any existing entry for this format and add the new one
234
+ currentMetadata.outline_formats = currentMetadata.outline_formats.filter((f) => f.format !== format);
235
+ currentMetadata.outline_formats.push({
236
+ format,
237
+ ...outlineInfo,
238
+ });
239
+ }
240
+ else {
241
+ // Backward compatibility: store as single outline object if no format specified
242
+ currentMetadata.outline = outlineInfo;
243
+ }
244
+ repos.recordings.updateMetadata(recordingId, JSON.stringify(currentMetadata));
245
+ log('info', `[Publish V3] Updated metadata for ${recordingId}${format ? ` (${format})` : ''}`);
246
+ }
247
+ /**
248
+ * Get current Outline metadata from recording if it exists (legacy single-outline format).
249
+ */
250
+ export function getOutlineMetadata(recording) {
251
+ try {
252
+ const metadata = recording.source_metadata
253
+ ? JSON.parse(recording.source_metadata)
254
+ : {};
255
+ return metadata.outline ?? null;
256
+ }
257
+ catch {
258
+ return null;
259
+ }
260
+ }
261
+ /**
262
+ * Get Outline metadata for a specific format.
263
+ * Checks outline_formats[] first, falls back to legacy outline for backward compat.
264
+ */
265
+ export function getOutlineMetadataForFormat(recording, format) {
266
+ try {
267
+ const metadata = recording.source_metadata
268
+ ? JSON.parse(recording.source_metadata)
269
+ : {};
270
+ // Check multi-format structure first
271
+ if (format &&
272
+ metadata.outline_formats &&
273
+ Array.isArray(metadata.outline_formats)) {
274
+ const formatEntry = metadata.outline_formats.find((f) => f.format === format);
275
+ if (formatEntry) {
276
+ return {
277
+ url: formatEntry.url,
278
+ documentId: formatEntry.documentId,
279
+ collectionId: formatEntry.collectionId,
280
+ publishedAt: formatEntry.publishedAt,
281
+ contentHash: formatEntry.contentHash,
282
+ error: formatEntry.error,
283
+ failedAt: formatEntry.failedAt,
284
+ };
285
+ }
286
+ }
287
+ // Fallback to legacy single-outline
288
+ return metadata.outline ?? null;
289
+ }
290
+ catch {
291
+ return null;
292
+ }
293
+ }
294
+ /**
295
+ * Check if content has changed since last publish for a specific format.
296
+ */
297
+ export function hasContentChanged(recording, currentContent, format) {
298
+ const outlineMeta = getOutlineMetadataForFormat(recording, format);
299
+ if (!outlineMeta)
300
+ return true;
301
+ const currentHash = hashContent(currentContent);
302
+ return currentHash !== outlineMeta.contentHash;
303
+ }
@@ -0,0 +1,196 @@
1
+ /**
2
+ * Sync Session to Outline Action
3
+ *
4
+ * Orchestrates publishing a session and its artifacts to Outline.
5
+ */
6
+ /**
7
+ * Syncs a session and all its artifacts to Outline.
8
+ */
9
+ export async function syncSessionToOutline(session, publishing, storage, collectionName = 'Escribano Sessions') {
10
+ // 1. Ensure collection exists
11
+ const collection = await publishing.ensureCollection(collectionName);
12
+ // 2. Create or update session parent document
13
+ const sessionTitle = formatSessionTitle(session);
14
+ const sessionContent = generateSessionDocument(session);
15
+ const existingSession = await publishing.findDocumentByTitle(collection.id, sessionTitle);
16
+ const sessionDoc = await upsertDocument(publishing, collection.id, sessionTitle, sessionContent, existingSession?.id);
17
+ // 3. Sync each artifact as child document
18
+ const syncedArtifacts = [];
19
+ for (const artifact of session.artifacts) {
20
+ const artifactTitle = formatArtifactType(artifact.type);
21
+ const existingArtifact = await findChildDocumentByTitle(publishing, collection.id, sessionDoc.id, artifactTitle);
22
+ const artifactDoc = await upsertDocument(publishing, collection.id, artifactTitle, artifact.content, existingArtifact?.id, sessionDoc.id);
23
+ syncedArtifacts.push({
24
+ type: artifact.type,
25
+ documentId: artifactDoc.id,
26
+ documentUrl: artifactDoc.url,
27
+ syncedAt: new Date(),
28
+ contentHash: hashContent(artifact.content),
29
+ });
30
+ }
31
+ // 4. Update sync state
32
+ session.outlineSyncState = {
33
+ collectionId: collection.id,
34
+ sessionDocumentId: sessionDoc.id,
35
+ sessionDocumentUrl: sessionDoc.url,
36
+ artifacts: syncedArtifacts,
37
+ lastSyncedAt: new Date(),
38
+ };
39
+ await storage.saveSession(session);
40
+ // 5. Update global index
41
+ await updateGlobalIndex(publishing, storage, collection.id);
42
+ return { url: sessionDoc.url };
43
+ }
44
+ /**
45
+ * Creates or updates a document in Outline
46
+ */
47
+ async function upsertDocument(publishing, collectionId, title, content, existingId, parentDocumentId) {
48
+ if (existingId) {
49
+ await publishing.updateDocument(existingId, { content });
50
+ // We need the URL, but updateDocument doesn't return it.
51
+ // Most publishing services will have the URL stable if the title/ID don't change.
52
+ // For now we re-fetch or assume findDocumentByTitle was sufficient.
53
+ const updated = await publishing.findDocumentByTitle(collectionId, title);
54
+ if (!updated)
55
+ throw new Error(`Failed to find updated document: ${title}`);
56
+ return updated;
57
+ }
58
+ return publishing.createDocument({
59
+ collectionId,
60
+ title,
61
+ content,
62
+ parentDocumentId,
63
+ publish: true,
64
+ });
65
+ }
66
+ /**
67
+ * Format session title for Outline
68
+ */
69
+ function formatSessionTitle(session) {
70
+ const date = new Date(session.createdAt);
71
+ const dateStr = date.toISOString().split('T')[0];
72
+ const timeStr = date.toTimeString().split(' ')[0].substring(0, 5);
73
+ const primaryType = getPrimaryType(session);
74
+ const typeLabel = primaryType ? `[${primaryType.toUpperCase()}] ` : '';
75
+ return `${typeLabel}${dateStr} ${timeStr} - ${session.id}`;
76
+ }
77
+ function getPrimaryType(session) {
78
+ if (!session.classification)
79
+ return null;
80
+ const top = Object.entries(session.classification).sort(([, a], [, b]) => b - a)[0];
81
+ return top[1] >= 25 ? top[0] : null;
82
+ }
83
+ /**
84
+ * Format artifact type for display
85
+ */
86
+ function formatArtifactType(type) {
87
+ return type
88
+ .split('-')
89
+ .map((word) => word.charAt(0).toUpperCase() + word.slice(1))
90
+ .join(' ');
91
+ }
92
+ /**
93
+ * Find a child document by title under a specific parent
94
+ */
95
+ async function findChildDocumentByTitle(publishing, collectionId, parentId, title) {
96
+ const docs = await publishing.listDocuments(collectionId);
97
+ return (docs.find((d) => d.parentDocumentId === parentId && d.title === title) ||
98
+ null);
99
+ }
100
+ /**
101
+ * Generate parent session document content
102
+ */
103
+ function generateSessionDocument(session) {
104
+ const date = new Date(session.createdAt).toLocaleString();
105
+ const types = session.classification
106
+ ? Object.entries(session.classification)
107
+ .filter(([, s]) => s >= 25)
108
+ .sort(([, a], [, b]) => b - a)
109
+ .map(([t, s]) => `${t} (${s}%)`)
110
+ .join(' | ')
111
+ : 'Not classified';
112
+ let content = `# Session: ${session.id}\n\n`;
113
+ content += `**Date:** ${date}\n`;
114
+ content += `**Classification:** ${types}\n\n`;
115
+ if (session.artifacts.length > 0) {
116
+ content += `## Artifacts\n\n`;
117
+ for (const artifact of session.artifacts) {
118
+ content += `- ${formatArtifactType(artifact.type)}\n`;
119
+ }
120
+ content += `\n`;
121
+ }
122
+ if (session.metadata) {
123
+ content += `## Metadata\n\n`;
124
+ if (session.metadata.speakers?.length) {
125
+ content += `### Speakers\n`;
126
+ for (const s of session.metadata.speakers) {
127
+ content += `- ${s.name}${s.role ? ` (${s.role})` : ''}\n`;
128
+ }
129
+ content += `\n`;
130
+ }
131
+ if (session.metadata.keyMoments?.length) {
132
+ content += `### Key Moments\n`;
133
+ for (const m of session.metadata.keyMoments) {
134
+ content += `- [${formatTime(m.timestamp)}] ${m.description}\n`;
135
+ }
136
+ content += `\n`;
137
+ }
138
+ }
139
+ return content;
140
+ }
141
+ function formatTime(seconds) {
142
+ const mins = Math.floor(seconds / 60);
143
+ const secs = Math.floor(seconds % 60);
144
+ return `${mins}:${secs.toString().padStart(2, '0')}`;
145
+ }
146
+ /**
147
+ * Simple content hashing (placeholder)
148
+ */
149
+ function hashContent(content) {
150
+ let hash = 0;
151
+ for (let i = 0; i < content.length; i++) {
152
+ const char = content.charCodeAt(i);
153
+ hash = (hash << 5) - hash + char;
154
+ hash |= 0;
155
+ }
156
+ return hash.toString();
157
+ }
158
+ /**
159
+ * Updates the global session index document in Outline
160
+ */
161
+ async function updateGlobalIndex(publishing, storage, collectionId) {
162
+ const sessions = await storage.listSessions();
163
+ const title = '📋 Session Index';
164
+ let content = `# 📋 Escribano Session Index\n\n`;
165
+ content += `*Last updated: ${new Date().toLocaleString()}*\n\n`;
166
+ // Group by month
167
+ const grouped = {};
168
+ for (const s of sessions) {
169
+ const month = new Date(s.createdAt).toLocaleString('default', {
170
+ month: 'long',
171
+ year: 'numeric',
172
+ });
173
+ if (!grouped[month])
174
+ grouped[month] = [];
175
+ grouped[month].push(s);
176
+ }
177
+ for (const [month, monthSessions] of Object.entries(grouped)) {
178
+ content += `## ${month}\n\n`;
179
+ content += `| Date | Type | Artifacts | Link |\n`;
180
+ content += `|------|------|-----------|------|\n`;
181
+ for (const s of monthSessions) {
182
+ const date = new Date(s.createdAt).toLocaleString();
183
+ const type = getPrimaryType(s) || 'Unknown';
184
+ const artifacts = s.artifacts
185
+ .map((a) => formatArtifactType(a.type))
186
+ .join(', ');
187
+ const link = s.outlineSyncState?.sessionDocumentUrl
188
+ ? `[View](${s.outlineSyncState.sessionDocumentUrl})`
189
+ : 'N/A';
190
+ content += `| ${date} | ${type} | ${artifacts} | ${link} |\n`;
191
+ }
192
+ content += `\n`;
193
+ }
194
+ const existing = await publishing.findDocumentByTitle(collectionId, title);
195
+ await upsertDocument(publishing, collectionId, title, content, existing?.id);
196
+ }
@@ -0,0 +1,69 @@
1
+ import { exec, spawn } from 'node:child_process';
2
+ import { mkdir, readFile, rm } from 'node:fs/promises';
3
+ import os from 'node:os';
4
+ import path from 'node:path';
5
+ import { promisify } from 'node:util';
6
+ const execAsync = promisify(exec);
7
+ export function createSileroPreprocessor() {
8
+ let currentProcess = null;
9
+ return {
10
+ extractSpeechSegments: async (audioPath, recordingId) => {
11
+ const tempDir = path.join(os.tmpdir(), 'escribano', recordingId, 'segments');
12
+ const manifestPath = path.join(tempDir, 'manifest.json');
13
+ await mkdir(tempDir, { recursive: true });
14
+ const inputWavPath = path.join(tempDir, 'input_16k.wav');
15
+ try {
16
+ console.log(`Converting ${audioPath} to 16kHz mono WAV...`);
17
+ await execAsync(`ffmpeg -i "${audioPath}" -ar 16000 -ac 1 "${inputWavPath}" -y`);
18
+ }
19
+ catch (error) {
20
+ throw new Error(`Failed to pre-convert audio for VAD: ${error.message}`);
21
+ }
22
+ const scriptPath = path.join(process.cwd(), 'src', 'scripts', 'audio_preprocessor.py');
23
+ const command = `uv run "${scriptPath}" --audio "${inputWavPath}" --output-dir "${tempDir}" --output-json "${manifestPath}"`;
24
+ try {
25
+ console.log(`Running Silero VAD on ${inputWavPath}...`);
26
+ currentProcess = spawn('sh', ['-c', command]);
27
+ await new Promise((resolve, reject) => {
28
+ currentProcess?.on('close', (code) => {
29
+ currentProcess = null;
30
+ if (code === 0) {
31
+ resolve();
32
+ }
33
+ else {
34
+ reject(new Error(`Silero VAD failed with code ${code}`));
35
+ }
36
+ });
37
+ currentProcess?.on('error', (err) => {
38
+ currentProcess = null;
39
+ reject(err);
40
+ });
41
+ });
42
+ const manifestContent = await readFile(manifestPath, 'utf-8');
43
+ const segments = JSON.parse(manifestContent);
44
+ return { segments, tempDir };
45
+ }
46
+ catch (error) {
47
+ currentProcess = null;
48
+ console.error(`Silero VAD failed: ${error.message}`);
49
+ throw new Error(`Failed to extract speech segments: ${error.message}`);
50
+ }
51
+ },
52
+ cleanup: async (tempDir) => {
53
+ try {
54
+ await rm(tempDir, { recursive: true, force: true });
55
+ const recordingDir = path.dirname(tempDir);
56
+ await rm(recordingDir).catch(() => { });
57
+ }
58
+ catch (error) {
59
+ console.warn(`Failed to cleanup temp segments: ${error.message}`);
60
+ }
61
+ },
62
+ getResourceName() {
63
+ return 'silero-python';
64
+ },
65
+ getPid() {
66
+ return currentProcess?.pid ?? null;
67
+ },
68
+ };
69
+ }
@@ -0,0 +1,94 @@
1
+ import { readdir, readFile, stat } from 'node:fs/promises';
2
+ import { homedir } from 'node:os';
3
+ import { join } from 'node:path';
4
+ import { capConfigSchema } from '../0_types.js';
5
+ function expandPath(path) {
6
+ if (path.startsWith('~/')) {
7
+ return join(homedir(), path.slice(2));
8
+ }
9
+ return path;
10
+ }
11
+ async function parseCapRecording(capDirPath) {
12
+ try {
13
+ const metaPath = join(capDirPath, 'recording-meta.json');
14
+ const metaContent = await readFile(metaPath, 'utf-8');
15
+ const meta = JSON.parse(metaContent);
16
+ if (!meta.segments ||
17
+ !Array.isArray(meta.segments) ||
18
+ meta.segments.length === 0) {
19
+ throw new Error(`Invalid metadata in ${capDirPath}: missing or empty segments array`);
20
+ }
21
+ const firstSegment = meta.segments[0];
22
+ const videoPath = firstSegment.display?.path
23
+ ? join(capDirPath, firstSegment.display.path)
24
+ : null;
25
+ // we fked up cuz we have mic but also system_audio.ogg
26
+ const micAudio = firstSegment.mic?.path
27
+ ? join(capDirPath, firstSegment.mic.path)
28
+ : null;
29
+ const systemAudio = firstSegment.system_audio?.path
30
+ ? join(capDirPath, firstSegment.system_audio.path)
31
+ : null;
32
+ const audioToStat = micAudio || systemAudio;
33
+ if (!audioToStat) {
34
+ console.log(`Skipping ${capDirPath}: none audio track found`);
35
+ return null;
36
+ }
37
+ const stats = await stat(audioToStat);
38
+ const capturedAt = stats.mtime;
39
+ const recordingId = capDirPath.split('/').pop() || 'unknown';
40
+ return {
41
+ id: recordingId,
42
+ source: {
43
+ type: 'cap',
44
+ originalPath: capDirPath,
45
+ metadata: meta,
46
+ },
47
+ videoPath,
48
+ audioMicPath: micAudio ? micAudio : null,
49
+ audioSystemPath: systemAudio ? systemAudio : null,
50
+ duration: 0,
51
+ capturedAt,
52
+ };
53
+ }
54
+ catch (error) {
55
+ if (error.code === 'ENOENT') {
56
+ throw new Error(`Recording directory or files not found: ${capDirPath}`);
57
+ }
58
+ if (error.name === 'SyntaxError') {
59
+ throw new Error(`Invalid JSON in recording-meta.json at ${capDirPath}`);
60
+ }
61
+ throw new Error(`Failed to parse recording at ${capDirPath}: ${error.message}`);
62
+ }
63
+ }
64
+ export function createCapSource(config = {}) {
65
+ const parsedConfig = capConfigSchema.parse(config);
66
+ const recordingsPath = expandPath(parsedConfig.recordingsPath);
67
+ const innerList = async (limit = 10) => {
68
+ try {
69
+ //
70
+ // 7 directories, 5 files
71
+ const entries = await readdir(recordingsPath, { withFileTypes: true });
72
+ const capDirs = entries.filter((entry) => entry.isDirectory() && entry.name.endsWith('.cap'));
73
+ const recordings = await Promise.allSettled(capDirs.map(async (dir) => parseCapRecording(join(recordingsPath, dir.name))));
74
+ // logging errors
75
+ console.log(recordings
76
+ .filter((p) => p.status === 'rejected')
77
+ .map((p) => p.reason + '\n'));
78
+ return recordings
79
+ .filter((p) => p.status === 'fulfilled')
80
+ .map((x) => x.value)
81
+ .filter((r) => r !== null)
82
+ .sort((a, b) => b.capturedAt.getTime() - a.capturedAt.getTime())
83
+ .slice(0, limit);
84
+ }
85
+ catch (error) {
86
+ console.error('Failed to list Cap recordings:', error);
87
+ return [];
88
+ }
89
+ };
90
+ return {
91
+ getLatestRecording: () => innerList(1).then((recordings) => recordings[0] ?? null),
92
+ listRecordings: innerList,
93
+ };
94
+ }