@nathanvale/chatline 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/CHANGELOG.md +1 -0
  2. package/LICENSE +21 -0
  3. package/README.md +1535 -0
  4. package/dist/bin/index.js +5121 -0
  5. package/dist/cli/commands/clean.d.ts +17 -0
  6. package/dist/cli/commands/clean.d.ts.map +1 -0
  7. package/dist/cli/commands/clean.js +142 -0
  8. package/dist/cli/commands/clean.js.map +1 -0
  9. package/dist/cli/commands/doctor.d.ts +17 -0
  10. package/dist/cli/commands/doctor.d.ts.map +1 -0
  11. package/dist/cli/commands/doctor.js +202 -0
  12. package/dist/cli/commands/doctor.js.map +1 -0
  13. package/dist/cli/commands/enrich-ai.d.ts +17 -0
  14. package/dist/cli/commands/enrich-ai.d.ts.map +1 -0
  15. package/dist/cli/commands/enrich-ai.js +371 -0
  16. package/dist/cli/commands/enrich-ai.js.map +1 -0
  17. package/dist/cli/commands/index.d.ts +16 -0
  18. package/dist/cli/commands/index.d.ts.map +1 -0
  19. package/dist/cli/commands/index.js +16 -0
  20. package/dist/cli/commands/index.js.map +1 -0
  21. package/dist/cli/commands/ingest-csv.d.ts +17 -0
  22. package/dist/cli/commands/ingest-csv.d.ts.map +1 -0
  23. package/dist/cli/commands/ingest-csv.js +138 -0
  24. package/dist/cli/commands/ingest-csv.js.map +1 -0
  25. package/dist/cli/commands/ingest-db.d.ts +17 -0
  26. package/dist/cli/commands/ingest-db.d.ts.map +1 -0
  27. package/dist/cli/commands/ingest-db.js +159 -0
  28. package/dist/cli/commands/ingest-db.js.map +1 -0
  29. package/dist/cli/commands/init.d.ts +17 -0
  30. package/dist/cli/commands/init.d.ts.map +1 -0
  31. package/dist/cli/commands/init.js +110 -0
  32. package/dist/cli/commands/init.js.map +1 -0
  33. package/dist/cli/commands/normalize-link.d.ts +16 -0
  34. package/dist/cli/commands/normalize-link.d.ts.map +1 -0
  35. package/dist/cli/commands/normalize-link.js +144 -0
  36. package/dist/cli/commands/normalize-link.js.map +1 -0
  37. package/dist/cli/commands/render-markdown.d.ts +17 -0
  38. package/dist/cli/commands/render-markdown.d.ts.map +1 -0
  39. package/dist/cli/commands/render-markdown.js +218 -0
  40. package/dist/cli/commands/render-markdown.js.map +1 -0
  41. package/dist/cli/commands/stats.d.ts +17 -0
  42. package/dist/cli/commands/stats.d.ts.map +1 -0
  43. package/dist/cli/commands/stats.js +175 -0
  44. package/dist/cli/commands/stats.js.map +1 -0
  45. package/dist/cli/commands/validate.d.ts +17 -0
  46. package/dist/cli/commands/validate.d.ts.map +1 -0
  47. package/dist/cli/commands/validate.js +152 -0
  48. package/dist/cli/commands/validate.js.map +1 -0
  49. package/dist/cli/index.d.ts +13 -0
  50. package/dist/cli/index.d.ts.map +1 -0
  51. package/dist/cli/index.js +121 -0
  52. package/dist/cli/index.js.map +1 -0
  53. package/dist/cli/types.d.ts +93 -0
  54. package/dist/cli/types.d.ts.map +1 -0
  55. package/dist/cli/types.js +7 -0
  56. package/dist/cli/types.js.map +1 -0
  57. package/dist/cli/utils.d.ts +29 -0
  58. package/dist/cli/utils.d.ts.map +1 -0
  59. package/dist/cli/utils.js +53 -0
  60. package/dist/cli/utils.js.map +1 -0
  61. package/dist/cli.d.ts +9 -0
  62. package/dist/cli.d.ts.map +1 -0
  63. package/dist/cli.js +1805 -0
  64. package/dist/config/generator.d.ts +90 -0
  65. package/dist/config/generator.d.ts.map +1 -0
  66. package/dist/config/generator.js +320 -0
  67. package/dist/config/generator.js.map +1 -0
  68. package/dist/config/loader.d.ts +107 -0
  69. package/dist/config/loader.d.ts.map +1 -0
  70. package/dist/config/loader.js +251 -0
  71. package/dist/config/loader.js.map +1 -0
  72. package/dist/config/schema.d.ts +107 -0
  73. package/dist/config/schema.d.ts.map +1 -0
  74. package/dist/config/schema.js +169 -0
  75. package/dist/config/schema.js.map +1 -0
  76. package/dist/enrich/audio-transcription.d.ts +77 -0
  77. package/dist/enrich/audio-transcription.d.ts.map +1 -0
  78. package/dist/enrich/audio-transcription.js +370 -0
  79. package/dist/enrich/audio-transcription.js.map +1 -0
  80. package/dist/enrich/checkpoint.d.ts +137 -0
  81. package/dist/enrich/checkpoint.d.ts.map +1 -0
  82. package/dist/enrich/checkpoint.js +205 -0
  83. package/dist/enrich/checkpoint.js.map +1 -0
  84. package/dist/enrich/idempotency.d.ts +90 -0
  85. package/dist/enrich/idempotency.d.ts.map +1 -0
  86. package/dist/enrich/idempotency.js +188 -0
  87. package/dist/enrich/idempotency.js.map +1 -0
  88. package/dist/enrich/image-analysis.d.ts +62 -0
  89. package/dist/enrich/image-analysis.d.ts.map +1 -0
  90. package/dist/enrich/image-analysis.js +264 -0
  91. package/dist/enrich/image-analysis.js.map +1 -0
  92. package/dist/enrich/index.d.ts +60 -0
  93. package/dist/enrich/index.d.ts.map +1 -0
  94. package/dist/enrich/index.js +74 -0
  95. package/dist/enrich/index.js.map +1 -0
  96. package/dist/enrich/link-enrichment.d.ts +37 -0
  97. package/dist/enrich/link-enrichment.d.ts.map +1 -0
  98. package/dist/enrich/link-enrichment.js +202 -0
  99. package/dist/enrich/link-enrichment.js.map +1 -0
  100. package/dist/enrich/pdf-video-handling.d.ts +49 -0
  101. package/dist/enrich/pdf-video-handling.d.ts.map +1 -0
  102. package/dist/enrich/pdf-video-handling.js +325 -0
  103. package/dist/enrich/pdf-video-handling.js.map +1 -0
  104. package/dist/enrich/progress-tracker.d.ts +120 -0
  105. package/dist/enrich/progress-tracker.d.ts.map +1 -0
  106. package/dist/enrich/progress-tracker.js +220 -0
  107. package/dist/enrich/progress-tracker.js.map +1 -0
  108. package/dist/enrich/providers/firecrawl.d.ts +18 -0
  109. package/dist/enrich/providers/firecrawl.d.ts.map +1 -0
  110. package/dist/enrich/providers/firecrawl.js +48 -0
  111. package/dist/enrich/providers/firecrawl.js.map +1 -0
  112. package/dist/enrich/providers/generic.d.ts +16 -0
  113. package/dist/enrich/providers/generic.d.ts.map +1 -0
  114. package/dist/enrich/providers/generic.js +36 -0
  115. package/dist/enrich/providers/generic.js.map +1 -0
  116. package/dist/enrich/providers/index.d.ts +14 -0
  117. package/dist/enrich/providers/index.d.ts.map +1 -0
  118. package/dist/enrich/providers/index.js +13 -0
  119. package/dist/enrich/providers/index.js.map +1 -0
  120. package/dist/enrich/providers/instagram.d.ts +16 -0
  121. package/dist/enrich/providers/instagram.d.ts.map +1 -0
  122. package/dist/enrich/providers/instagram.js +43 -0
  123. package/dist/enrich/providers/instagram.js.map +1 -0
  124. package/dist/enrich/providers/spotify.d.ts +16 -0
  125. package/dist/enrich/providers/spotify.d.ts.map +1 -0
  126. package/dist/enrich/providers/spotify.js +45 -0
  127. package/dist/enrich/providers/spotify.js.map +1 -0
  128. package/dist/enrich/providers/twitter.d.ts +16 -0
  129. package/dist/enrich/providers/twitter.d.ts.map +1 -0
  130. package/dist/enrich/providers/twitter.js +43 -0
  131. package/dist/enrich/providers/twitter.js.map +1 -0
  132. package/dist/enrich/providers/types.d.ts +47 -0
  133. package/dist/enrich/providers/types.d.ts.map +1 -0
  134. package/dist/enrich/providers/types.js +15 -0
  135. package/dist/enrich/providers/types.js.map +1 -0
  136. package/dist/enrich/providers/youtube.d.ts +16 -0
  137. package/dist/enrich/providers/youtube.d.ts.map +1 -0
  138. package/dist/enrich/providers/youtube.js +43 -0
  139. package/dist/enrich/providers/youtube.js.map +1 -0
  140. package/dist/enrich/rate-limiting.d.ts +118 -0
  141. package/dist/enrich/rate-limiting.d.ts.map +1 -0
  142. package/dist/enrich/rate-limiting.js +258 -0
  143. package/dist/enrich/rate-limiting.js.map +1 -0
  144. package/dist/index.d.ts +688 -0
  145. package/dist/index.d.ts.map +1 -0
  146. package/dist/index.js +1729 -0
  147. package/dist/index.js.map +1 -0
  148. package/dist/ingest/dedup-merge.d.ts +82 -0
  149. package/dist/ingest/dedup-merge.d.ts.map +1 -0
  150. package/dist/ingest/dedup-merge.js +262 -0
  151. package/dist/ingest/dedup-merge.js.map +1 -0
  152. package/dist/ingest/ingest-csv.d.ts +62 -0
  153. package/dist/ingest/ingest-csv.d.ts.map +1 -0
  154. package/dist/ingest/ingest-csv.js +300 -0
  155. package/dist/ingest/ingest-csv.js.map +1 -0
  156. package/dist/ingest/ingest-db.d.ts +64 -0
  157. package/dist/ingest/ingest-db.d.ts.map +1 -0
  158. package/dist/ingest/ingest-db.js +172 -0
  159. package/dist/ingest/ingest-db.js.map +1 -0
  160. package/dist/ingest/link-replies-and-tapbacks.d.ts +53 -0
  161. package/dist/ingest/link-replies-and-tapbacks.d.ts.map +1 -0
  162. package/dist/ingest/link-replies-and-tapbacks.js +381 -0
  163. package/dist/ingest/link-replies-and-tapbacks.js.map +1 -0
  164. package/dist/normalize/date-converters.d.ts +45 -0
  165. package/dist/normalize/date-converters.d.ts.map +1 -0
  166. package/dist/normalize/date-converters.js +166 -0
  167. package/dist/normalize/date-converters.js.map +1 -0
  168. package/dist/normalize/path-validator.d.ts +65 -0
  169. package/dist/normalize/path-validator.d.ts.map +1 -0
  170. package/dist/normalize/path-validator.js +221 -0
  171. package/dist/normalize/path-validator.js.map +1 -0
  172. package/dist/normalize/validate-normalized.d.ts +45 -0
  173. package/dist/normalize/validate-normalized.d.ts.map +1 -0
  174. package/dist/normalize/validate-normalized.js +144 -0
  175. package/dist/normalize/validate-normalized.js.map +1 -0
  176. package/dist/render/embeds-blockquotes.d.ts +84 -0
  177. package/dist/render/embeds-blockquotes.d.ts.map +1 -0
  178. package/dist/render/embeds-blockquotes.js +204 -0
  179. package/dist/render/embeds-blockquotes.js.map +1 -0
  180. package/dist/render/grouping.d.ts +78 -0
  181. package/dist/render/grouping.d.ts.map +1 -0
  182. package/dist/render/grouping.js +134 -0
  183. package/dist/render/grouping.js.map +1 -0
  184. package/dist/render/index.d.ts +47 -0
  185. package/dist/render/index.d.ts.map +1 -0
  186. package/dist/render/index.js +245 -0
  187. package/dist/render/index.js.map +1 -0
  188. package/dist/render/reply-rendering.d.ts +88 -0
  189. package/dist/render/reply-rendering.d.ts.map +1 -0
  190. package/dist/render/reply-rendering.js +196 -0
  191. package/dist/render/reply-rendering.js.map +1 -0
  192. package/dist/schema/message.d.ts +125 -0
  193. package/dist/schema/message.d.ts.map +1 -0
  194. package/dist/schema/message.js +331 -0
  195. package/dist/schema/message.js.map +1 -0
  196. package/dist/utils/delta-detection.d.ts +107 -0
  197. package/dist/utils/delta-detection.d.ts.map +1 -0
  198. package/dist/utils/delta-detection.js +199 -0
  199. package/dist/utils/delta-detection.js.map +1 -0
  200. package/dist/utils/enrichment-merge.d.ts +135 -0
  201. package/dist/utils/enrichment-merge.d.ts.map +1 -0
  202. package/dist/utils/enrichment-merge.js +280 -0
  203. package/dist/utils/enrichment-merge.js.map +1 -0
  204. package/dist/utils/human.d.ts +15 -0
  205. package/dist/utils/human.d.ts.map +1 -0
  206. package/dist/utils/human.js +27 -0
  207. package/dist/utils/human.js.map +1 -0
  208. package/dist/utils/incremental-state.d.ts +133 -0
  209. package/dist/utils/incremental-state.d.ts.map +1 -0
  210. package/dist/utils/incremental-state.js +237 -0
  211. package/dist/utils/incremental-state.js.map +1 -0
  212. package/dist/utils/logger.d.ts +40 -0
  213. package/dist/utils/logger.d.ts.map +1 -0
  214. package/dist/utils/logger.js +176 -0
  215. package/dist/utils/logger.js.map +1 -0
  216. package/package.json +165 -0
@@ -0,0 +1,370 @@
1
+ /**
2
+ * Audio Transcription Module (ENRICH--T02)
3
+ *
4
+ * Implements audio transcription with structured output:
5
+ * - AC01: Structured prompt requesting timestamps and speaker identification
6
+ * - AC02: Extract speaker labels (Speaker 1, Speaker 2, etc.)
7
+ * - AC03: Generate short description (1-2 sentences)
8
+ * - AC04: Store under media.enrichment with kind='transcription'
9
+ * - AC05: Handle long audio files (>10min) with streaming/chunking
10
+ *
11
+ * Architecture:
12
+ * - transcribeAudioChunk: Transcribe single chunk with Gemini API
13
+ * - handleLongAudio: Split large files and process chunks
14
+ * - transcribeAudio: Call Gemini Audio API with structured prompt
15
+ * - analyzeAudio: Main entry point, handles single message enrichment
16
+ * - analyzeAudios: Batch processing wrapper
17
+ *
18
+ * Error Handling:
19
+ * - Non-fatal errors are logged and original message is returned
20
+ * - Transcription failures don't block enrichment pipeline
21
+ * - Pipeline never crashes on enrichment errors
22
+ */
23
+ import { access, stat } from 'node:fs/promises';
24
+ import { GoogleGenerativeAI } from '@google/generative-ai';
25
+ import { createLogger } from '#utils/logger';
26
+ const logger = createLogger('enrich:audio-transcription');
27
+ /**
28
+ * Structured prompt for Gemini Audio API
29
+ * Requests transcription with speaker identification, timestamps, and summary
30
+ */
31
+ const GEMINI_AUDIO_PROMPT = `You are an expert at transcribing audio. Please transcribe the audio and provide:
32
+
33
+ 1. Full Transcription:
34
+ Format with speaker labels as "Speaker 1: [text]", "Speaker 2: [text]", etc.
35
+ Keep the exact words spoken, preserving natural speech patterns.
36
+
37
+ 2. Timestamps:
38
+ Format as MM:SS - Speaker N: [brief content]
39
+ Include timestamp for each speaker change or major topic shift.
40
+
41
+ 3. Short Description:
42
+ Provide a 1-2 sentence summary of the audio content and main topics.
43
+
44
+ Format your response exactly as:
45
+
46
+ Transcription:
47
+ [full transcription with Speaker labels here]
48
+
49
+ Timestamps:
50
+ [timestamps here]
51
+
52
+ Short Description: [1-2 sentence summary here]`;
53
+ /**
54
+ * Extract audio file duration in seconds (rough estimate from file size)
55
+ * Audio bitrate typically 128kbps for M4A/AAC
56
+ */
57
+ function estimateAudioDuration(fileSizeBytes) {
58
+ const bitRate = 128 * 1024; // 128 kbps in bytes per second
59
+ return Math.ceil(fileSizeBytes / bitRate);
60
+ }
61
+ /**
62
+ * AC05: Split long audio into chunks for processing
63
+ * Returns chunk info needed for streaming API
64
+ */
65
+ function getAudioChunks(durationSeconds, maxChunkDuration = 600) {
66
+ const chunks = [];
67
+ for (let i = 0; i * maxChunkDuration < durationSeconds; i++) {
68
+ chunks.push({
69
+ index: i,
70
+ startSec: i * maxChunkDuration,
71
+ endSec: Math.min((i + 1) * maxChunkDuration, durationSeconds),
72
+ });
73
+ }
74
+ return chunks;
75
+ }
76
+ /**
77
+ * AC01: Call Gemini Audio API with structured prompt for single chunk
78
+ * AC02, AC03: Parse response into transcription, speakers, and description
79
+ * AC04: Create enrichment with provenance
80
+ */
81
+ export async function transcribeAudioChunk(audioPath, chunkIndex, config) {
82
+ const apiKey = config.geminiApiKey;
83
+ const modelName = config.geminiModel || 'gemini-1.5-pro';
84
+ if (!apiKey) {
85
+ throw new Error('GEMINI_API_KEY is required for audio transcription');
86
+ }
87
+ try {
88
+ // AC01: Create Gemini client and call with structured prompt
89
+ const genAI = new GoogleGenerativeAI(apiKey);
90
+ const model = genAI.getGenerativeModel({ model: modelName });
91
+ // Read and encode the actual audio file
92
+ const { readFile } = await import('node:fs/promises');
93
+ const audioBuffer = await readFile(audioPath);
94
+ const audioBase64 = audioBuffer.toString('base64');
95
+ // Determine MIME type from file extension
96
+ const ext = audioPath.toLowerCase().split('.').pop() || 'm4a';
97
+ const mimeTypeMap = {
98
+ m4a: 'audio/mp4',
99
+ mp3: 'audio/mpeg',
100
+ wav: 'audio/wav',
101
+ aac: 'audio/aac',
102
+ ogg: 'audio/ogg',
103
+ flac: 'audio/flac',
104
+ };
105
+ const mimeType = mimeTypeMap[ext] || 'audio/mp4';
106
+ const response = await model.generateContent([
107
+ {
108
+ inlineData: {
109
+ mimeType,
110
+ data: audioBase64,
111
+ },
112
+ },
113
+ GEMINI_AUDIO_PROMPT,
114
+ ]);
115
+ const responseText = response.response.text();
116
+ logger.debug(`Gemini response received (chunk ${chunkIndex}): ${responseText.substring(0, 200)}...`);
117
+ // AC02: Parse speaker labels from response
118
+ const speakerMatches = responseText.match(/Speaker \d+/g) ?? [];
119
+ const speakers = Array.from(new Set(speakerMatches)); // Unique speakers in order
120
+ // AC03: Extract short description
121
+ const shortDescriptionMatch = responseText.match(/Short Description:\\s*(.+?)(?=\n|$)/is);
122
+ const shortDescription = shortDescriptionMatch?.[1]?.trim() || 'Audio transcription available';
123
+ // Extract full transcription section
124
+ const transcriptionMatch = responseText.match(/Transcription:\s*([\s\S]+?)(?=\n\nTimestamps:|$)/i);
125
+ const transcription = transcriptionMatch?.[1]?.trim() || responseText;
126
+ // Extract timestamps section
127
+ const timestampsMatch = responseText.match(/Timestamps:\s*([\s\S]+?)(?=\n\nShort Description:|$)/i);
128
+ const timestampsText = timestampsMatch?.[1]?.trim() || '';
129
+ // Parse individual timestamps
130
+ const timestamps = timestampsText
131
+ .split('\n')
132
+ .filter((line) => line.trim())
133
+ .map((line) => {
134
+ const match = line.match(/(\d{2}:\d{2})\s*-\s*Speaker (\d+):\s*(.+)/);
135
+ return {
136
+ time: match?.[1] || '00:00',
137
+ speaker: `Speaker ${match?.[2] || '1'}`,
138
+ content: match?.[3] || line,
139
+ };
140
+ });
141
+ return {
142
+ transcription,
143
+ speakers,
144
+ timestamps,
145
+ shortDescription,
146
+ };
147
+ }
148
+ catch (error) {
149
+ logger.error(`Gemini API error for ${audioPath} (chunk ${chunkIndex})`, {
150
+ error,
151
+ });
152
+ throw error;
153
+ }
154
+ }
155
+ /**
156
+ * AC05: Handle long audio files by splitting and processing chunks
157
+ * Merges results from all chunks into single transcription
158
+ */
159
+ export async function handleLongAudio(audioPath, durationSeconds, config) {
160
+ const maxChunkDuration = (config.maxAudioChunkDuration || 10) * 60; // Convert to seconds
161
+ if (durationSeconds <= maxChunkDuration) {
162
+ // Single chunk - call directly
163
+ return transcribeAudioChunk(audioPath, 0, config);
164
+ }
165
+ // AC05: Split into chunks
166
+ const chunks = getAudioChunks(durationSeconds, maxChunkDuration);
167
+ logger.info(`Processing ${chunks.length} audio chunks for ${audioPath}`, {
168
+ duration: durationSeconds,
169
+ chunkDuration: maxChunkDuration,
170
+ });
171
+ const chunkResults = [];
172
+ for (const chunk of chunks) {
173
+ try {
174
+ const result = await transcribeAudioChunk(audioPath, chunk.index, config);
175
+ chunkResults.push(result);
176
+ // AC05: Respect rate limiting between chunks
177
+ if (chunk.index < chunks.length - 1 && config.rateLimitDelay) {
178
+ await new Promise((resolve) => setTimeout(resolve, config.rateLimitDelay));
179
+ }
180
+ }
181
+ catch (err) {
182
+ logger.warn(`Failed to transcribe chunk ${chunk.index}, continuing with others`, {
183
+ error: err instanceof Error ? err.message : String(err),
184
+ });
185
+ // Continue with next chunk even if this one fails
186
+ }
187
+ }
188
+ if (chunkResults.length === 0) {
189
+ throw new Error(`Failed to transcribe any chunks for ${audioPath}`);
190
+ }
191
+ // AC05: Merge all chunk transcriptions
192
+ const mergedTranscription = chunkResults
193
+ .map((r) => r.transcription)
194
+ .join('\n\n');
195
+ const allSpeakers = [...new Set(chunkResults.flatMap((r) => r.speakers))];
196
+ const mergedTimestamps = chunkResults.flatMap((r) => r.timestamps);
197
+ // AC03: Generate merged short description (use last chunk's description as primary)
198
+ const shortDescription = chunkResults[chunkResults.length - 1]?.shortDescription ||
199
+ 'Audio transcription available';
200
+ return {
201
+ transcription: mergedTranscription,
202
+ speakers: allSpeakers,
203
+ timestamps: mergedTimestamps,
204
+ shortDescription,
205
+ };
206
+ }
207
+ /**
208
+ * AC01-AC05: Main transcription orchestrator
209
+ * Handles chunk detection, API calls, and response parsing
210
+ */
211
+ export async function transcribeAudio(audioPath, config) {
212
+ try {
213
+ // Estimate audio duration from file size
214
+ const fileStats = await stat(audioPath);
215
+ const durationSeconds = estimateAudioDuration(fileStats.size);
216
+ logger.info(`Transcribing audio: ${audioPath}`, {
217
+ fileSizeKB: Math.round(fileStats.size / 1024),
218
+ estimatedDuration: Math.round(durationSeconds / 60),
219
+ });
220
+ // AC05: Handle long audio with chunking if needed
221
+ const transcriptionData = await handleLongAudio(audioPath, durationSeconds, config);
222
+ // AC04: Create enrichment entry with full provenance
223
+ const modelName = config.geminiModel || 'gemini-1.5-pro';
224
+ const version = new Date().toISOString().split('T')[0] || 'unknown';
225
+ const enrichment = {
226
+ kind: 'transcription',
227
+ provider: 'gemini',
228
+ model: modelName,
229
+ version, // YYYY-MM-DD
230
+ createdAt: new Date().toISOString(),
231
+ transcription: transcriptionData.transcription,
232
+ speakers: transcriptionData.speakers,
233
+ timestamps: transcriptionData.timestamps,
234
+ shortDescription: transcriptionData.shortDescription,
235
+ };
236
+ logger.info(`Audio transcription complete for ${audioPath}`, {
237
+ kind: enrichment.kind,
238
+ speakerCount: enrichment.speakers?.length,
239
+ duration: Math.round(durationSeconds / 60),
240
+ });
241
+ return enrichment;
242
+ }
243
+ catch (error) {
244
+ logger.error(`Transcription error for ${audioPath}`, { error });
245
+ throw error;
246
+ }
247
+ }
248
+ /**
249
+ * Main entry point - analyze audio media message and enrich it
250
+ * Handles all ACs (AC01-AC05) through helper functions
251
+ *
252
+ * Responsibilities:
253
+ * 1. Check if media is audio type (skip non-audio)
254
+ * 2. Check if path is available
255
+ * 3. Call transcription with chunking support (AC05)
256
+ * 4. Parse response and extract data (AC01-AC03)
257
+ * 5. Add enrichment with provenance (AC04)
258
+ */
259
+ export async function analyzeAudio(message, config) {
260
+ // Skip if not enabled
261
+ if (!config.enableAudioTranscription) {
262
+ logger.debug('Audio transcription disabled in config');
263
+ return message;
264
+ }
265
+ // Skip if not a media message
266
+ if (message.messageKind !== 'media' || !message.media) {
267
+ return message;
268
+ }
269
+ // Skip if media is not audio
270
+ if (message.media.mediaKind !== 'audio') {
271
+ logger.debug('Skipping non-audio media', {
272
+ mediaKind: message.media.mediaKind,
273
+ });
274
+ return message;
275
+ }
276
+ // Skip if path is missing
277
+ if (!message.media.path) {
278
+ logger.warn('Skipping audio with missing path', {
279
+ filename: message.media.filename,
280
+ });
281
+ return message;
282
+ }
283
+ // Check if audio file exists
284
+ try {
285
+ await access(message.media.path);
286
+ }
287
+ catch {
288
+ logger.warn('Audio file not found at path', { path: message.media.path });
289
+ return message;
290
+ }
291
+ try {
292
+ // AC01-AC05: Transcribe audio (handles chunking, API calls, parsing)
293
+ const enrichment = await transcribeAudio(message.media.path, config);
294
+ // Check idempotency: don't re-transcribe if already done
295
+ const existingTranscription = message.media.enrichment?.find((e) => e.kind === 'transcription' &&
296
+ e.provider === (config.geminiModel ? 'gemini' : 'gemini'));
297
+ if (existingTranscription) {
298
+ logger.debug('Transcription already exists, skipping re-analysis', {
299
+ model: existingTranscription.model,
300
+ guid: message.guid,
301
+ });
302
+ return message;
303
+ }
304
+ // Update message with enrichment
305
+ const updatedMedia = {
306
+ ...message.media,
307
+ enrichment: [...(message.media.enrichment || []), enrichment],
308
+ };
309
+ logger.info('Audio enriched', {
310
+ filename: message.media.filename,
311
+ guid: message.guid,
312
+ });
313
+ return {
314
+ ...message,
315
+ media: updatedMedia,
316
+ };
317
+ }
318
+ catch (error) {
319
+ logger.error('Error analyzing audio', {
320
+ filename: message.media?.filename,
321
+ guid: message.guid,
322
+ error: error instanceof Error ? error.message : String(error),
323
+ });
324
+ // Don't crash pipeline - return original message
325
+ return message;
326
+ }
327
+ }
328
+ /**
329
+ * Batch analyze multiple messages
330
+ * Useful for enrichment stage that processes arrays of messages
331
+ * Each message is processed independently; errors don't stop the batch
332
+ */
333
+ export async function analyzeAudios(messages, config) {
334
+ const results = [];
335
+ let successCount = 0;
336
+ let skipCount = 0;
337
+ let errorCount = 0;
338
+ for (const message of messages) {
339
+ try {
340
+ const analyzed = await analyzeAudio(message, config);
341
+ // Track if enrichment was added
342
+ if (analyzed.media?.enrichment &&
343
+ analyzed.media.enrichment.length >
344
+ (message.media?.enrichment?.length || 0)) {
345
+ successCount++;
346
+ }
347
+ else {
348
+ skipCount++;
349
+ }
350
+ results.push(analyzed);
351
+ }
352
+ catch (err) {
353
+ errorCount++;
354
+ logger.error('Failed to analyze message', {
355
+ guid: message.guid,
356
+ error: err instanceof Error ? err.message : String(err),
357
+ });
358
+ // Keep original message if analysis fails
359
+ results.push(message);
360
+ }
361
+ }
362
+ logger.info('Batch audio transcription complete', {
363
+ successCount,
364
+ skipCount,
365
+ errorCount,
366
+ total: messages.length,
367
+ });
368
+ return results;
369
+ }
370
+ //# sourceMappingURL=audio-transcription.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"audio-transcription.js","sourceRoot":"","sources":["../../src/enrich/audio-transcription.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAA;AAE/C,OAAO,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAA;AAI1D,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAA;AAkB5C,MAAM,MAAM,GAAG,YAAY,CAAC,4BAA4B,CAAC,CAAA;AAEzD;;;GAGG;AACH,MAAM,mBAAmB,GAAG;;;;;;;;;;;;;;;;;;;;;+CAqBmB,CAAA;AAE/C;;;GAGG;AACH,SAAS,qBAAqB,CAAC,aAAqB;IACnD,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,CAAA,CAAC,+BAA+B;IAC1D,OAAO,IAAI,CAAC,IAAI,CAAC,aAAa,GAAG,OAAO,CAAC,CAAA;AAC1C,CAAC;AAED;;;GAGG;AACH,SAAS,cAAc,CACtB,eAAuB,EACvB,gBAAgB,GAAG,GAAG;IAEtB,MAAM,MAAM,GAA+D,EAAE,CAAA;IAE7E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,gBAAgB,GAAG,eAAe,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7D,MAAM,CAAC,IAAI,CAAC;YACX,KAAK,EAAE,CAAC;YACR,QAAQ,EAAE,CAAC,GAAG,gBAAgB;YAC9B,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,gBAAgB,EAAE,eAAe,CAAC;SAC7D,CAAC,CAAA;IACH,CAAC;IAED,OAAO,MAAM,CAAA;AACd,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACzC,SAAiB,EACjB,UAAkB,EAClB,MAAyC;IAEzC,MAAM,MAAM,GAAG,MAAM,CAAC,YAAY,CAAA;IAClC,MAAM,SAAS,GAAG,MAAM,CAAC,WAAW,IAAI,gBAAgB,CAAA;IAExD,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAA;IACtE,CAAC;IAED,IAAI,CAAC;QACJ,6DAA6D;QAC7D,MAAM,KAAK,GAAG,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAA;QAC5C,MAAM,KAAK,GAAG,KAAK,CAAC,kBAAkB,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAA;QAE5D,wCAAwC;QACxC,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAA;QACrD,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,SAAS,CAAC,CAAA;QAC7C,MAAM,WAAW,GAAG,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAA;QAElD,0CAA0C;QAC1C,MAAM,GAAG,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,KAAK,CAAA;QAC7D,MAAM,WAAW,GAA2B;YAC3C,GAAG,EAAE,WAAW;YAChB,GAAG,EAAE,YAAY;YACjB,GAAG,EAAE,WAAW;YAChB,GAAG,EAAE,WAAW;YAChB,GAAG,EAAE,WAAW;YAChB,IAAI,EAAE,YAAY;SAClB,CAAA;QACD,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,IAAI,WAAW,CAAA;QAEhD,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,eAAe,CAAC;YAC5C;gBACC,UAAU,EAAE;oBACX,QAAQ;oBACR,IAAI,EAAE,WAAW;iBACjB;aACD;YACD,mBAAmB;SACnB,CAAC,CAAA;QAEF,MAAM,YAAY,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAA;QAC7C,MAAM,CAAC,KAAK,CACX,mCAAmC,UAAU,MAAM,YAAY,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,KAAK,CACtF,CAAA;QAED,2CAA2C;QAC3C,MAAM,cAAc,GAAG,YAAY,CAAC,KAAK,CAAC,cAAc,CAAC,IAAI,EAAE,CAAA;QAC/D,MAAM,QAAQ,GAAa,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAS,cAAc,CAAC,CAAC,CAAA,CAAC,2BAA2B;QAElG,kCAAkC;QAClC,MAAM,qBAAqB,GAAG,YAAY,CAAC,KAAK,CAC/C,uCAAuC,CACvC,CAAA;QACD,MAAM,gBAAgB,GACrB,qBAAqB,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,+BAA+B,CAAA;QAEtE,qCAAqC;QACrC,MAAM,kBAAkB,GAAG,YAAY,CAAC,KAAK,CAC5C,mDAAmD,CACnD,CAAA;QACD,MAAM,aAAa,GAAG,kBAAkB,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,YAAY,CAAA;QAErE,6BAA6B;QAC7B,MAAM,eAAe,GAAG,YAAY,CAAC,KAAK,CACzC,uDAAuD,CACvD,CAAA;QACD,MAAM,cAAc,GAAG,eAAe,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;QAEzD,8BAA8B;QAC9B,MAAM,UAAU,GAAG,cAAc;aAC/B,KAAK,CAAC,IAAI,CAAC;aACX,MAAM,CAAC,CAAC,IAAY,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;aACrC,GAAG,CAAC,CAAC,IAAY,EAAE,EAAE;YACrB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,2CAA2C,CAAC,CAAA;YACrE,OAAO;gBACN,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,OAAO;gBAC3B,OAAO,EAAE,WAAW,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,GAAG,EAAE;gBACvC,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI;aAC3B,CAAA;QACF,CAAC,CAAC,CAAA;QAEH,OAAO;YACN,aAAa;YACb,QAAQ;YACR,UAAU;YACV,gBAAgB;SAChB,CAAA;IACF,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,MAAM,CAAC,KAAK,CAAC,wBAAwB,SAAS,WAAW,UAAU,GAAG,EAAE;YACvE,KAAK;SACL,CAAC,CAAA;QACF,MAAM,KAAK,CAAA;IACZ,CAAC;AACF,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACpC,SAAiB,EACjB,eAAuB,EACvB,MAAyC;IAEzC,MAAM,gBAAgB,GAAG,CAAC,MAAM,CAAC,qBAAqB,IAAI,EAAE,CAAC,GAAG,EAAE,CAAA,CAAC,qBAAqB;IAExF,IAAI,eAAe,IAAI,gBAAgB,EAAE,CAAC;QACzC,+BAA+B;QAC/B,OAAO,oBAAoB,CAAC,SAAS,EAAE,CAAC,EAAE,MAAM,CAAC,CAAA;IAClD,CAAC;IAED,0BAA0B;IAC1B,MAAM,MAAM,GAAG,cAAc,CAAC,eAAe,EAAE,gBAAgB,CAAC,CAAA;IAChE,MAAM,CAAC,IAAI,CAAC,cAAc,MAAM,CAAC,MAAM,qBAAqB,SAAS,EAAE,EAAE;QACxE,QAAQ,EAAE,eAAe;QACzB,aAAa,EAAE,gBAAgB;KAC/B,CAAC,CAAA;IAEF,MAAM,YAAY,GAAwB,EAAE,CAAA;IAE5C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC5B,IAAI,CAAC;YACJ,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,SAAS,EAAE,KAAK,CAAC,KAAK,EAAE,MAAM,CAAC,CAAA;YACzE,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YAEzB,6CAA6C;YAC7C,IAAI,KAAK,CAAC,KAAK,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;gBAC9D,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAC7B,UAAU,CAAC,OAAO,EAAE,MAAM,CAAC,cAAc,CAAC,CAC1C,CAAA;YACF,CAAC;QACF,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACd,MAAM,CAAC,IAAI,CACV,8BAA8B,KAAK,CAAC,KAAK,0BAA0B,EACnE;gBACC,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;aACvD,CACD,CAAA;YACD,kDAAkD;QACnD,CAAC;IACF,CAAC;IAED,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CAAC,uCAAuC,SAAS,EAAE,CAAC,CAAA;IACpE,CAAC;IAED,uCAAuC;IACvC,MAAM,mBAAmB,GAAG,YAAY;SACtC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC;SAC3B,IAAI,CAAC,MAAM,CAAC,CAAA;IACd,MAAM,WAAW,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAA;IACzE,MAAM,gBAAgB,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAA;IAElE,oFAAoF;IACpF,MAAM,gBAAgB,GACrB,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,gBAAgB;QACvD,+BAA+B,CAAA;IAEhC,OAAO;QACN,aAAa,EAAE,mBAAmB;QAClC,QAAQ,EAAE,WAAW;QACrB,UAAU,EAAE,gBAAgB;QAC5B,gBAAgB;KAChB,CAAA;AACF,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACpC,SAAiB,EACjB,MAAyC;IAEzC,IAAI,CAAC;QACJ,yCAAyC;QACzC,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,CAAA;QACvC,MAAM,eAAe,GAAG,qBAAqB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;QAE7D,MAAM,CAAC,IAAI,CAAC,uBAAuB,SAAS,EAAE,EAAE;YAC/C,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,GAAG,IAAI,CAAC;YAC7C,iBAAiB,EAAE,IAAI,CAAC,KAAK,CAAC,eAAe,GAAG,EAAE,CAAC;SACnD,CAAC,CAAA;QAEF,kDAAkD;QAClD,MAAM,iBAAiB,GAAG,MAAM,eAAe,CAC9C,SAAS,EACT,eAAe,EACf,MAAM,CACN,CAAA;QAED,qDAAqD;QACrD,MAAM,SAAS,GAAG,MAAM,CAAC,WAAW,IAAI,gBAAgB,CAAA;QACxD,MAAM,OAAO,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,SAAS,CAAA;QACnE,MAAM,UAAU,GAAoB;YACnC,IAAI,EAAE,eAAe;YACrB,QAAQ,EAAE,QAAQ;YAClB,KAAK,EAAE,SAAS;YAChB,OAAO,EAAE,aAAa;YACtB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,aAAa,EAAE,iBAAiB,CAAC,aAAa;YAC9C,QAAQ,EAAE,iBAAiB,CAAC,QAAQ;YACpC,UAAU,EAAE,iBAAiB,CAAC,UAAU;YACxC,gBAAgB,EAAE,iBAAiB,CAAC,gBAAgB;SACpD,CAAA;QAED,MAAM,CAAC,IAAI,CAAC,oCAAoC,SAAS,EAAE,EAAE;YAC5D,IAAI,EAAE,UAAU,CAAC,IAAI;YACrB,YAAY,EAAE,UAAU,CAAC,QAAQ,EAAE,MAAM;YACzC,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,eAAe,GAAG,EAAE,CAAC;SAC1C,CAAC,CAAA;QAEF,OAAO,UAAU,CAAA;IAClB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,MAAM,CAAC,KAAK,CAAC,2BAA2B,SAAS,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,CAAA;QAC/D,MAAM,KAAK,CAAA;IACZ,CAAC;AACF,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CACjC,OAAgB,EAChB,MAAyC;IAEzC,sBAAsB;IACtB,IAAI,CAAC,MAAM,CAAC,wBAAwB,EAAE,CAAC;QACtC,MAAM,CAAC,KAAK,CAAC,wCAAwC,CAAC,CAAA;QACtD,OAAO,OAAO,CAAA;IACf,CAAC;IAED,8BAA8B;IAC9B,IAAI,OAAO,CAAC,WAAW,KAAK,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QACvD,OAAO,OAAO,CAAA;IACf,CAAC;IAED,6BAA6B;IAC7B,IAAI,OAAO,CAAC,KAAK,CAAC,SAAS,KAAK,OAAO,EAAE,CAAC;QACzC,MAAM,CAAC,KAAK,CAAC,0BAA0B,EAAE;YACxC,SAAS,EAAE,OAAO,CAAC,KAAK,CAAC,SAAS;SAClC,CAAC,CAAA;QACF,OAAO,OAAO,CAAA;IACf,CAAC;IAED,0BAA0B;IAC1B,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;QACzB,MAAM,CAAC,IAAI,CAAC,kCAAkC,EAAE;YAC/C,QAAQ,EAAE,OAAO,CAAC,KAAK,CAAC,QAAQ;SAChC,CAAC,CAAA;QACF,OAAO,OAAO,CAAA;IACf,CAAC;IAED,6BAA6B;IAC7B,IAAI,CAAC;QACJ,MAAM,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;IACjC,CAAC;IAAC,MAAM,CAAC;QACR,MAAM,CAAC,IAAI,CAAC,8BAA8B,EAAE,EAAE,IAAI,EAAE,OAAO,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAA;QACzE,OAAO,OAAO,CAAA;IACf,CAAC;IAED,IAAI,CAAC;QACJ,qEAAqE;QACrE,MAAM,UAAU,GAAG,MAAM,eAAe,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;QAEpE,yDAAyD;QACzD,MAAM,qBAAqB,GAAG,OAAO,CAAC,KAAK,CAAC,UAAU,EAAE,IAAI,CAC3D,CAAC,CAAC,EAAE,EAAE,CACL,CAAC,CAAC,IAAI,KAAK,eAAe;YAC1B,CAAC,CAAC,QAAQ,KAAK,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAC1D,CAAA;QAED,IAAI,qBAAqB,EAAE,CAAC;YAC3B,MAAM,CAAC,KAAK,CAAC,oDAAoD,EAAE;gBAClE,KAAK,EAAE,qBAAqB,CAAC,KAAK;gBAClC,IAAI,EAAE,OAAO,CAAC,IAAI;aAClB,CAAC,CAAA;YACF,OAAO,OAAO,CAAA;QACf,CAAC;QAED,iCAAiC;QACjC,MAAM,YAAY,GAAc;YAC/B,GAAG,OAAO,CAAC,KAAK;YAChB,UAAU,EAAE,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,UAAU,IAAI,EAAE,CAAC,EAAE,UAAU,CAAC;SAC7D,CAAA;QAED,MAAM,CAAC,IAAI,CAAC,gBAAgB,EAAE;YAC7B,QAAQ,EAAE,OAAO,CAAC,KAAK,CAAC,QAAQ;YAChC,IAAI,EAAE,OAAO,CAAC,IAAI;SAClB,CAAC,CAAA;QAEF,OAAO;YACN,GAAG,OAAO;YACV,KAAK,EAAE,YAAY;SACnB,CAAA;IACF,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,MAAM,CAAC,KAAK,CAAC,uBAAuB,EAAE;YACrC,QAAQ,EAAE,OAAO,CAAC,KAAK,EAAE,QAAQ;YACjC,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC7D,CAAC,CAAA;QACF,iDAAiD;QACjD,OAAO,OAAO,CAAA;IACf,CAAC;AACF,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CAClC,QAAmB,EACnB,MAAyC;IAEzC,MAAM,OAAO,GAAc,EAAE,CAAA;IAC7B,IAAI,YAAY,GAAG,CAAC,CAAA;IACpB,IAAI,SAAS,GAAG,CAAC,CAAA;IACjB,IAAI,UAAU,GAAG,CAAC,CAAA;IAElB,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAChC,IAAI,CAAC;YACJ,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,OAAO,EAAE,MAAM,CAAC,CAAA;YACpD,gCAAgC;YAChC,IACC,QAAQ,CAAC,KAAK,EAAE,UAAU;gBAC1B,QAAQ,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM;oBAC/B,CAAC,OAAO,CAAC,KAAK,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC,CAAC,EACxC,CAAC;gBACF,YAAY,EAAE,CAAA;YACf,CAAC;iBAAM,CAAC;gBACP,SAAS,EAAE,CAAA;YACZ,CAAC;YACD,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;QACvB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACd,UAAU,EAAE,CAAA;YACZ,MAAM,CAAC,KAAK,CAAC,2BAA2B,EAAE;gBACzC,IAAI,EAAE,OAAO,CAAC,IAAI;gBAClB,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;aACvD,CAAC,CAAA;YACF,0CAA0C;YAC1C,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QACtB,CAAC;IACF,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,oCAAoC,EAAE;QACjD,YAAY;QACZ,SAAS;QACT,UAAU;QACV,KAAK,EAAE,QAAQ,CAAC,MAAM;KACtB,CAAC,CAAA;IACF,OAAO,OAAO,CAAA;AACf,CAAC"}
@@ -0,0 +1,137 @@
1
+ /**
2
+ * Checkpoint and Resume Module (ENRICH--T06)
3
+ *
4
+ * Implements resumable enrichment with:
5
+ * - AC01: Checkpoint writes after N items (configurable, default 100)
6
+ * - AC02: Full checkpoint schema with stats and failed items
7
+ * - AC03: Atomic writes using temp file + rename pattern
8
+ * - AC04: Resume within ≤1 item of last checkpoint
9
+ * - AC05: Config consistency verification with hash comparison
10
+ *
11
+ * Architecture:
12
+ * - createCheckpoint: Create new checkpoint with schema
13
+ * - shouldWriteCheckpoint: Determine if checkpoint should be written
14
+ * - getResumeIndex: Calculate resume position from checkpoint
15
+ * - verifyConfigHash: Validate config hasn't changed
16
+ * - getCheckpointPath: Generate deterministic checkpoint file path
17
+ * - loadCheckpoint: Load checkpoint from disk
18
+ * - saveCheckpoint: Write checkpoint atomically
19
+ */
20
+ export type FailedItem = {
21
+ index: number;
22
+ guid: string;
23
+ kind: string;
24
+ error: string;
25
+ };
26
+ export type CheckpointStats = {
27
+ processedCount: number;
28
+ failedCount: number;
29
+ enrichmentsByKind: Record<string, number>;
30
+ };
31
+ export type EnrichCheckpoint = {
32
+ version: string;
33
+ configHash: string;
34
+ lastProcessedIndex: number;
35
+ totalProcessed: number;
36
+ totalFailed: number;
37
+ stats: CheckpointStats;
38
+ failedItems: FailedItem[];
39
+ createdAt: string;
40
+ };
41
+ export type CheckpointInput = {
42
+ lastProcessedIndex: number;
43
+ totalProcessed: number;
44
+ totalFailed: number;
45
+ stats: CheckpointStats;
46
+ failedItems: FailedItem[];
47
+ configHash: string;
48
+ };
49
+ /**
50
+ * AC01: Determine if checkpoint should be written after N items
51
+ *
52
+ * @param itemIndex - Current item index (0-based)
53
+ * @param checkpointInterval - Checkpoint interval (default 100)
54
+ * @returns true if checkpoint should be written
55
+ */
56
+ export declare function shouldWriteCheckpoint(itemIndex: number, checkpointInterval?: number): boolean;
57
+ /**
58
+ * AC02: Create checkpoint with full schema
59
+ *
60
+ * @param input - Checkpoint input data
61
+ * @returns EnrichCheckpoint with all required fields
62
+ */
63
+ export declare function createCheckpoint(input: CheckpointInput): EnrichCheckpoint;
64
+ /**
65
+ * AC03: Generate deterministic checkpoint file path
66
+ *
67
+ * @param checkpointDir - Directory for checkpoints
68
+ * @param configHash - Config hash for uniqueness
69
+ * @returns Path to checkpoint file
70
+ */
71
+ export declare function getCheckpointPath(checkpointDir: string, configHash: string): string;
72
+ /**
73
+ * AC03: Save checkpoint atomically using temp file + rename
74
+ *
75
+ * @param checkpoint - Checkpoint to save
76
+ * @param checkpointPath - Path to save checkpoint
77
+ */
78
+ export declare function saveCheckpoint(checkpoint: EnrichCheckpoint, checkpointPath: string): Promise<void>;
79
+ /**
80
+ * AC03: Load checkpoint from disk
81
+ *
82
+ * @param checkpointPath - Path to checkpoint file
83
+ * @returns Loaded checkpoint or null if not found
84
+ */
85
+ export declare function loadCheckpoint(checkpointPath: string): Promise<EnrichCheckpoint | null>;
86
+ /**
87
+ * AC04: Calculate resume index from checkpoint
88
+ *
89
+ * Resume at lastProcessedIndex + 1 to ensure we don't re-process
90
+ * the last item that was in the previous checkpoint.
91
+ *
92
+ * @param checkpoint - Checkpoint to resume from
93
+ * @returns Resume index (within ≤1 item of last checkpoint)
94
+ */
95
+ export declare function getResumeIndex(checkpoint: EnrichCheckpoint): number;
96
+ /**
97
+ * AC05: Compute config hash for consistency checking
98
+ *
99
+ * @param config - Configuration object
100
+ * @returns SHA-256 hash of config
101
+ */
102
+ export declare function computeConfigHash(config: Record<string, unknown>): string;
103
+ /**
104
+ * AC05: Verify config hasn't changed by comparing hashes
105
+ *
106
+ * @param checkpointHash - Hash from checkpoint
107
+ * @param currentHash - Hash of current config
108
+ * @returns true if hashes match (config unchanged)
109
+ */
110
+ export declare function verifyConfigHash(checkpointHash: string, currentHash: string): boolean;
111
+ export type CheckpointState = {
112
+ isResuming: boolean;
113
+ lastCheckpointIndex: number;
114
+ configHash: string;
115
+ failedItemsInCheckpoint: FailedItem[];
116
+ };
117
+ /**
118
+ * Initialize checkpoint state for enrichment run
119
+ *
120
+ * @param checkpoint - Loaded checkpoint or null
121
+ * @param currentConfigHash - Hash of current config
122
+ * @returns Checkpoint state or error
123
+ */
124
+ export declare function initializeCheckpointState(checkpoint: EnrichCheckpoint | null, currentConfigHash: string): CheckpointState | Error;
125
+ /**
126
+ * Create new checkpoint for saving after processing batch
127
+ *
128
+ * @param lastProcessedIndex - Index of last processed item
129
+ * @param totalProcessed - Total items processed so far
130
+ * @param totalFailed - Total failed items so far
131
+ * @param batchStats - Stats for this batch
132
+ * @param failedItems - Failed items in this batch
133
+ * @param configHash - Hash of current config
134
+ * @returns Checkpoint ready to save
135
+ */
136
+ export declare function prepareCheckpoint(lastProcessedIndex: number, totalProcessed: number, totalFailed: number, batchStats: CheckpointStats, failedItems: FailedItem[], configHash: string): EnrichCheckpoint;
137
+ //# sourceMappingURL=checkpoint.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"checkpoint.d.ts","sourceRoot":"","sources":["../../src/enrich/checkpoint.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAUH,MAAM,MAAM,UAAU,GAAG;IACxB,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,MAAM,CAAA;IACZ,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;CACb,CAAA;AAED,MAAM,MAAM,eAAe,GAAG;IAC7B,cAAc,EAAE,MAAM,CAAA;IACtB,WAAW,EAAE,MAAM,CAAA;IACnB,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CACzC,CAAA;AAED,MAAM,MAAM,gBAAgB,GAAG;IAC9B,OAAO,EAAE,MAAM,CAAA;IACf,UAAU,EAAE,MAAM,CAAA;IAClB,kBAAkB,EAAE,MAAM,CAAA;IAC1B,cAAc,EAAE,MAAM,CAAA;IACtB,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,eAAe,CAAA;IACtB,WAAW,EAAE,UAAU,EAAE,CAAA;IACzB,SAAS,EAAE,MAAM,CAAA;CACjB,CAAA;AAED,MAAM,MAAM,eAAe,GAAG;IAC7B,kBAAkB,EAAE,MAAM,CAAA;IAC1B,cAAc,EAAE,MAAM,CAAA;IACtB,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,eAAe,CAAA;IACtB,WAAW,EAAE,UAAU,EAAE,CAAA;IACzB,UAAU,EAAE,MAAM,CAAA;CAClB,CAAA;AAMD;;;;;;GAMG;AACH,wBAAgB,qBAAqB,CACpC,SAAS,EAAE,MAAM,EACjB,kBAAkB,SAAM,GACtB,OAAO,CAGT;AAMD;;;;;GAKG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,eAAe,GAAG,gBAAgB,CAWzE;AAMD;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAChC,aAAa,EAAE,MAAM,EACrB,UAAU,EAAE,MAAM,GAChB,MAAM,CAER;AAED;;;;;GAKG;AACH,wBAAsB,cAAc,CACnC,UAAU,EAAE,gBAAgB,EAC5B,cAAc,EAAE,MAAM,GACpB,OAAO,CAAC,IAAI,CAAC,CAqBf;AAED;;;;;GAKG;AACH,wBAAsB,cAAc,CACnC,cAAc,EAAE,MAAM,GACpB,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC,CASlC;AAMD;;;;;;;;GAQG;AACH,wBAAgB,cAAc,CAAC,UAAU,EAAE,gBAAgB,GAAG,MAAM,CAGnE;AAMD;;;;;GAKG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAGzE;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAC/B,cAAc,EAAE,MAAM,EACtB,WAAW,EAAE,MAAM,GACjB,OAAO,CAET;AAMD,MAAM,MAAM,eAAe,GAAG;IAC7B,UAAU,EAAE,OAAO,CAAA;IACnB,mBAAmB,EAAE,MAAM,CAAA;IAC3B,UAAU,EAAE,MAAM,CAAA;IAClB,uBAAuB,EAAE,UAAU,EAAE,CAAA;CACrC,CAAA;AAED;;;;;;GAMG;AACH,wBAAgB,yBAAyB,CACxC,UAAU,EAAE,gBAAgB,GAAG,IAAI,EACnC,iBAAiB,EAAE,MAAM,GACvB,eAAe,GAAG,KAAK,CAyBzB;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,iBAAiB,CAChC,kBAAkB,EAAE,MAAM,EAC1B,cAAc,EAAE,MAAM,EACtB,WAAW,EAAE,MAAM,EACnB,UAAU,EAAE,eAAe,EAC3B,WAAW,EAAE,UAAU,EAAE,EACzB,UAAU,EAAE,MAAM,GAChB,gBAAgB,CASlB"}