@nathanvale/chatline 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1 -0
- package/LICENSE +21 -0
- package/README.md +1535 -0
- package/dist/bin/index.js +5121 -0
- package/dist/cli/commands/clean.d.ts +17 -0
- package/dist/cli/commands/clean.d.ts.map +1 -0
- package/dist/cli/commands/clean.js +142 -0
- package/dist/cli/commands/clean.js.map +1 -0
- package/dist/cli/commands/doctor.d.ts +17 -0
- package/dist/cli/commands/doctor.d.ts.map +1 -0
- package/dist/cli/commands/doctor.js +202 -0
- package/dist/cli/commands/doctor.js.map +1 -0
- package/dist/cli/commands/enrich-ai.d.ts +17 -0
- package/dist/cli/commands/enrich-ai.d.ts.map +1 -0
- package/dist/cli/commands/enrich-ai.js +371 -0
- package/dist/cli/commands/enrich-ai.js.map +1 -0
- package/dist/cli/commands/index.d.ts +16 -0
- package/dist/cli/commands/index.d.ts.map +1 -0
- package/dist/cli/commands/index.js +16 -0
- package/dist/cli/commands/index.js.map +1 -0
- package/dist/cli/commands/ingest-csv.d.ts +17 -0
- package/dist/cli/commands/ingest-csv.d.ts.map +1 -0
- package/dist/cli/commands/ingest-csv.js +138 -0
- package/dist/cli/commands/ingest-csv.js.map +1 -0
- package/dist/cli/commands/ingest-db.d.ts +17 -0
- package/dist/cli/commands/ingest-db.d.ts.map +1 -0
- package/dist/cli/commands/ingest-db.js +159 -0
- package/dist/cli/commands/ingest-db.js.map +1 -0
- package/dist/cli/commands/init.d.ts +17 -0
- package/dist/cli/commands/init.d.ts.map +1 -0
- package/dist/cli/commands/init.js +110 -0
- package/dist/cli/commands/init.js.map +1 -0
- package/dist/cli/commands/normalize-link.d.ts +16 -0
- package/dist/cli/commands/normalize-link.d.ts.map +1 -0
- package/dist/cli/commands/normalize-link.js +144 -0
- package/dist/cli/commands/normalize-link.js.map +1 -0
- package/dist/cli/commands/render-markdown.d.ts +17 -0
- package/dist/cli/commands/render-markdown.d.ts.map +1 -0
- package/dist/cli/commands/render-markdown.js +218 -0
- package/dist/cli/commands/render-markdown.js.map +1 -0
- package/dist/cli/commands/stats.d.ts +17 -0
- package/dist/cli/commands/stats.d.ts.map +1 -0
- package/dist/cli/commands/stats.js +175 -0
- package/dist/cli/commands/stats.js.map +1 -0
- package/dist/cli/commands/validate.d.ts +17 -0
- package/dist/cli/commands/validate.d.ts.map +1 -0
- package/dist/cli/commands/validate.js +152 -0
- package/dist/cli/commands/validate.js.map +1 -0
- package/dist/cli/index.d.ts +13 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +121 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/types.d.ts +93 -0
- package/dist/cli/types.d.ts.map +1 -0
- package/dist/cli/types.js +7 -0
- package/dist/cli/types.js.map +1 -0
- package/dist/cli/utils.d.ts +29 -0
- package/dist/cli/utils.d.ts.map +1 -0
- package/dist/cli/utils.js +53 -0
- package/dist/cli/utils.js.map +1 -0
- package/dist/cli.d.ts +9 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +1805 -0
- package/dist/config/generator.d.ts +90 -0
- package/dist/config/generator.d.ts.map +1 -0
- package/dist/config/generator.js +320 -0
- package/dist/config/generator.js.map +1 -0
- package/dist/config/loader.d.ts +107 -0
- package/dist/config/loader.d.ts.map +1 -0
- package/dist/config/loader.js +251 -0
- package/dist/config/loader.js.map +1 -0
- package/dist/config/schema.d.ts +107 -0
- package/dist/config/schema.d.ts.map +1 -0
- package/dist/config/schema.js +169 -0
- package/dist/config/schema.js.map +1 -0
- package/dist/enrich/audio-transcription.d.ts +77 -0
- package/dist/enrich/audio-transcription.d.ts.map +1 -0
- package/dist/enrich/audio-transcription.js +370 -0
- package/dist/enrich/audio-transcription.js.map +1 -0
- package/dist/enrich/checkpoint.d.ts +137 -0
- package/dist/enrich/checkpoint.d.ts.map +1 -0
- package/dist/enrich/checkpoint.js +205 -0
- package/dist/enrich/checkpoint.js.map +1 -0
- package/dist/enrich/idempotency.d.ts +90 -0
- package/dist/enrich/idempotency.d.ts.map +1 -0
- package/dist/enrich/idempotency.js +188 -0
- package/dist/enrich/idempotency.js.map +1 -0
- package/dist/enrich/image-analysis.d.ts +62 -0
- package/dist/enrich/image-analysis.d.ts.map +1 -0
- package/dist/enrich/image-analysis.js +264 -0
- package/dist/enrich/image-analysis.js.map +1 -0
- package/dist/enrich/index.d.ts +60 -0
- package/dist/enrich/index.d.ts.map +1 -0
- package/dist/enrich/index.js +74 -0
- package/dist/enrich/index.js.map +1 -0
- package/dist/enrich/link-enrichment.d.ts +37 -0
- package/dist/enrich/link-enrichment.d.ts.map +1 -0
- package/dist/enrich/link-enrichment.js +202 -0
- package/dist/enrich/link-enrichment.js.map +1 -0
- package/dist/enrich/pdf-video-handling.d.ts +49 -0
- package/dist/enrich/pdf-video-handling.d.ts.map +1 -0
- package/dist/enrich/pdf-video-handling.js +325 -0
- package/dist/enrich/pdf-video-handling.js.map +1 -0
- package/dist/enrich/progress-tracker.d.ts +120 -0
- package/dist/enrich/progress-tracker.d.ts.map +1 -0
- package/dist/enrich/progress-tracker.js +220 -0
- package/dist/enrich/progress-tracker.js.map +1 -0
- package/dist/enrich/providers/firecrawl.d.ts +18 -0
- package/dist/enrich/providers/firecrawl.d.ts.map +1 -0
- package/dist/enrich/providers/firecrawl.js +48 -0
- package/dist/enrich/providers/firecrawl.js.map +1 -0
- package/dist/enrich/providers/generic.d.ts +16 -0
- package/dist/enrich/providers/generic.d.ts.map +1 -0
- package/dist/enrich/providers/generic.js +36 -0
- package/dist/enrich/providers/generic.js.map +1 -0
- package/dist/enrich/providers/index.d.ts +14 -0
- package/dist/enrich/providers/index.d.ts.map +1 -0
- package/dist/enrich/providers/index.js +13 -0
- package/dist/enrich/providers/index.js.map +1 -0
- package/dist/enrich/providers/instagram.d.ts +16 -0
- package/dist/enrich/providers/instagram.d.ts.map +1 -0
- package/dist/enrich/providers/instagram.js +43 -0
- package/dist/enrich/providers/instagram.js.map +1 -0
- package/dist/enrich/providers/spotify.d.ts +16 -0
- package/dist/enrich/providers/spotify.d.ts.map +1 -0
- package/dist/enrich/providers/spotify.js +45 -0
- package/dist/enrich/providers/spotify.js.map +1 -0
- package/dist/enrich/providers/twitter.d.ts +16 -0
- package/dist/enrich/providers/twitter.d.ts.map +1 -0
- package/dist/enrich/providers/twitter.js +43 -0
- package/dist/enrich/providers/twitter.js.map +1 -0
- package/dist/enrich/providers/types.d.ts +47 -0
- package/dist/enrich/providers/types.d.ts.map +1 -0
- package/dist/enrich/providers/types.js +15 -0
- package/dist/enrich/providers/types.js.map +1 -0
- package/dist/enrich/providers/youtube.d.ts +16 -0
- package/dist/enrich/providers/youtube.d.ts.map +1 -0
- package/dist/enrich/providers/youtube.js +43 -0
- package/dist/enrich/providers/youtube.js.map +1 -0
- package/dist/enrich/rate-limiting.d.ts +118 -0
- package/dist/enrich/rate-limiting.d.ts.map +1 -0
- package/dist/enrich/rate-limiting.js +258 -0
- package/dist/enrich/rate-limiting.js.map +1 -0
- package/dist/index.d.ts +688 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +1729 -0
- package/dist/index.js.map +1 -0
- package/dist/ingest/dedup-merge.d.ts +82 -0
- package/dist/ingest/dedup-merge.d.ts.map +1 -0
- package/dist/ingest/dedup-merge.js +262 -0
- package/dist/ingest/dedup-merge.js.map +1 -0
- package/dist/ingest/ingest-csv.d.ts +62 -0
- package/dist/ingest/ingest-csv.d.ts.map +1 -0
- package/dist/ingest/ingest-csv.js +300 -0
- package/dist/ingest/ingest-csv.js.map +1 -0
- package/dist/ingest/ingest-db.d.ts +64 -0
- package/dist/ingest/ingest-db.d.ts.map +1 -0
- package/dist/ingest/ingest-db.js +172 -0
- package/dist/ingest/ingest-db.js.map +1 -0
- package/dist/ingest/link-replies-and-tapbacks.d.ts +53 -0
- package/dist/ingest/link-replies-and-tapbacks.d.ts.map +1 -0
- package/dist/ingest/link-replies-and-tapbacks.js +381 -0
- package/dist/ingest/link-replies-and-tapbacks.js.map +1 -0
- package/dist/normalize/date-converters.d.ts +45 -0
- package/dist/normalize/date-converters.d.ts.map +1 -0
- package/dist/normalize/date-converters.js +166 -0
- package/dist/normalize/date-converters.js.map +1 -0
- package/dist/normalize/path-validator.d.ts +65 -0
- package/dist/normalize/path-validator.d.ts.map +1 -0
- package/dist/normalize/path-validator.js +221 -0
- package/dist/normalize/path-validator.js.map +1 -0
- package/dist/normalize/validate-normalized.d.ts +45 -0
- package/dist/normalize/validate-normalized.d.ts.map +1 -0
- package/dist/normalize/validate-normalized.js +144 -0
- package/dist/normalize/validate-normalized.js.map +1 -0
- package/dist/render/embeds-blockquotes.d.ts +84 -0
- package/dist/render/embeds-blockquotes.d.ts.map +1 -0
- package/dist/render/embeds-blockquotes.js +204 -0
- package/dist/render/embeds-blockquotes.js.map +1 -0
- package/dist/render/grouping.d.ts +78 -0
- package/dist/render/grouping.d.ts.map +1 -0
- package/dist/render/grouping.js +134 -0
- package/dist/render/grouping.js.map +1 -0
- package/dist/render/index.d.ts +47 -0
- package/dist/render/index.d.ts.map +1 -0
- package/dist/render/index.js +245 -0
- package/dist/render/index.js.map +1 -0
- package/dist/render/reply-rendering.d.ts +88 -0
- package/dist/render/reply-rendering.d.ts.map +1 -0
- package/dist/render/reply-rendering.js +196 -0
- package/dist/render/reply-rendering.js.map +1 -0
- package/dist/schema/message.d.ts +125 -0
- package/dist/schema/message.d.ts.map +1 -0
- package/dist/schema/message.js +331 -0
- package/dist/schema/message.js.map +1 -0
- package/dist/utils/delta-detection.d.ts +107 -0
- package/dist/utils/delta-detection.d.ts.map +1 -0
- package/dist/utils/delta-detection.js +199 -0
- package/dist/utils/delta-detection.js.map +1 -0
- package/dist/utils/enrichment-merge.d.ts +135 -0
- package/dist/utils/enrichment-merge.d.ts.map +1 -0
- package/dist/utils/enrichment-merge.js +280 -0
- package/dist/utils/enrichment-merge.js.map +1 -0
- package/dist/utils/human.d.ts +15 -0
- package/dist/utils/human.d.ts.map +1 -0
- package/dist/utils/human.js +27 -0
- package/dist/utils/human.js.map +1 -0
- package/dist/utils/incremental-state.d.ts +133 -0
- package/dist/utils/incremental-state.d.ts.map +1 -0
- package/dist/utils/incremental-state.js +237 -0
- package/dist/utils/incremental-state.js.map +1 -0
- package/dist/utils/logger.d.ts +40 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +176 -0
- package/dist/utils/logger.js.map +1 -0
- package/package.json +165 -0
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Audio Transcription Module (ENRICH--T02)
|
|
3
|
+
*
|
|
4
|
+
* Implements audio transcription with structured output:
|
|
5
|
+
* - AC01: Structured prompt requesting timestamps and speaker identification
|
|
6
|
+
* - AC02: Extract speaker labels (Speaker 1, Speaker 2, etc.)
|
|
7
|
+
* - AC03: Generate short description (1-2 sentences)
|
|
8
|
+
* - AC04: Store under media.enrichment with kind='transcription'
|
|
9
|
+
* - AC05: Handle long audio files (>10min) with streaming/chunking
|
|
10
|
+
*
|
|
11
|
+
* Architecture:
|
|
12
|
+
* - transcribeAudioChunk: Transcribe single chunk with Gemini API
|
|
13
|
+
* - handleLongAudio: Split large files and process chunks
|
|
14
|
+
* - transcribeAudio: Call Gemini Audio API with structured prompt
|
|
15
|
+
* - analyzeAudio: Main entry point, handles single message enrichment
|
|
16
|
+
* - analyzeAudios: Batch processing wrapper
|
|
17
|
+
*
|
|
18
|
+
* Error Handling:
|
|
19
|
+
* - Non-fatal errors are logged and original message is returned
|
|
20
|
+
* - Transcription failures don't block enrichment pipeline
|
|
21
|
+
* - Pipeline never crashes on enrichment errors
|
|
22
|
+
*/
|
|
23
|
+
import { access, stat } from 'node:fs/promises';
|
|
24
|
+
import { GoogleGenerativeAI } from '@google/generative-ai';
|
|
25
|
+
import { createLogger } from '#utils/logger';
|
|
26
|
+
const logger = createLogger('enrich:audio-transcription');
|
|
27
|
+
/**
|
|
28
|
+
* Structured prompt for Gemini Audio API
|
|
29
|
+
* Requests transcription with speaker identification, timestamps, and summary
|
|
30
|
+
*/
|
|
31
|
+
const GEMINI_AUDIO_PROMPT = `You are an expert at transcribing audio. Please transcribe the audio and provide:
|
|
32
|
+
|
|
33
|
+
1. Full Transcription:
|
|
34
|
+
Format with speaker labels as "Speaker 1: [text]", "Speaker 2: [text]", etc.
|
|
35
|
+
Keep the exact words spoken, preserving natural speech patterns.
|
|
36
|
+
|
|
37
|
+
2. Timestamps:
|
|
38
|
+
Format as MM:SS - Speaker N: [brief content]
|
|
39
|
+
Include timestamp for each speaker change or major topic shift.
|
|
40
|
+
|
|
41
|
+
3. Short Description:
|
|
42
|
+
Provide a 1-2 sentence summary of the audio content and main topics.
|
|
43
|
+
|
|
44
|
+
Format your response exactly as:
|
|
45
|
+
|
|
46
|
+
Transcription:
|
|
47
|
+
[full transcription with Speaker labels here]
|
|
48
|
+
|
|
49
|
+
Timestamps:
|
|
50
|
+
[timestamps here]
|
|
51
|
+
|
|
52
|
+
Short Description: [1-2 sentence summary here]`;
|
|
53
|
+
/**
|
|
54
|
+
* Extract audio file duration in seconds (rough estimate from file size)
|
|
55
|
+
* Audio bitrate typically 128kbps for M4A/AAC
|
|
56
|
+
*/
|
|
57
|
+
function estimateAudioDuration(fileSizeBytes) {
|
|
58
|
+
const bitRate = 128 * 1024; // 128 kbps in bytes per second
|
|
59
|
+
return Math.ceil(fileSizeBytes / bitRate);
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* AC05: Split long audio into chunks for processing
|
|
63
|
+
* Returns chunk info needed for streaming API
|
|
64
|
+
*/
|
|
65
|
+
function getAudioChunks(durationSeconds, maxChunkDuration = 600) {
|
|
66
|
+
const chunks = [];
|
|
67
|
+
for (let i = 0; i * maxChunkDuration < durationSeconds; i++) {
|
|
68
|
+
chunks.push({
|
|
69
|
+
index: i,
|
|
70
|
+
startSec: i * maxChunkDuration,
|
|
71
|
+
endSec: Math.min((i + 1) * maxChunkDuration, durationSeconds),
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
return chunks;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* AC01: Call Gemini Audio API with structured prompt for single chunk
|
|
78
|
+
* AC02, AC03: Parse response into transcription, speakers, and description
|
|
79
|
+
* AC04: Create enrichment with provenance
|
|
80
|
+
*/
|
|
81
|
+
export async function transcribeAudioChunk(audioPath, chunkIndex, config) {
|
|
82
|
+
const apiKey = config.geminiApiKey;
|
|
83
|
+
const modelName = config.geminiModel || 'gemini-1.5-pro';
|
|
84
|
+
if (!apiKey) {
|
|
85
|
+
throw new Error('GEMINI_API_KEY is required for audio transcription');
|
|
86
|
+
}
|
|
87
|
+
try {
|
|
88
|
+
// AC01: Create Gemini client and call with structured prompt
|
|
89
|
+
const genAI = new GoogleGenerativeAI(apiKey);
|
|
90
|
+
const model = genAI.getGenerativeModel({ model: modelName });
|
|
91
|
+
// Read and encode the actual audio file
|
|
92
|
+
const { readFile } = await import('node:fs/promises');
|
|
93
|
+
const audioBuffer = await readFile(audioPath);
|
|
94
|
+
const audioBase64 = audioBuffer.toString('base64');
|
|
95
|
+
// Determine MIME type from file extension
|
|
96
|
+
const ext = audioPath.toLowerCase().split('.').pop() || 'm4a';
|
|
97
|
+
const mimeTypeMap = {
|
|
98
|
+
m4a: 'audio/mp4',
|
|
99
|
+
mp3: 'audio/mpeg',
|
|
100
|
+
wav: 'audio/wav',
|
|
101
|
+
aac: 'audio/aac',
|
|
102
|
+
ogg: 'audio/ogg',
|
|
103
|
+
flac: 'audio/flac',
|
|
104
|
+
};
|
|
105
|
+
const mimeType = mimeTypeMap[ext] || 'audio/mp4';
|
|
106
|
+
const response = await model.generateContent([
|
|
107
|
+
{
|
|
108
|
+
inlineData: {
|
|
109
|
+
mimeType,
|
|
110
|
+
data: audioBase64,
|
|
111
|
+
},
|
|
112
|
+
},
|
|
113
|
+
GEMINI_AUDIO_PROMPT,
|
|
114
|
+
]);
|
|
115
|
+
const responseText = response.response.text();
|
|
116
|
+
logger.debug(`Gemini response received (chunk ${chunkIndex}): ${responseText.substring(0, 200)}...`);
|
|
117
|
+
// AC02: Parse speaker labels from response
|
|
118
|
+
const speakerMatches = responseText.match(/Speaker \d+/g) ?? [];
|
|
119
|
+
const speakers = Array.from(new Set(speakerMatches)); // Unique speakers in order
|
|
120
|
+
// AC03: Extract short description
|
|
121
|
+
const shortDescriptionMatch = responseText.match(/Short Description:\\s*(.+?)(?=\n|$)/is);
|
|
122
|
+
const shortDescription = shortDescriptionMatch?.[1]?.trim() || 'Audio transcription available';
|
|
123
|
+
// Extract full transcription section
|
|
124
|
+
const transcriptionMatch = responseText.match(/Transcription:\s*([\s\S]+?)(?=\n\nTimestamps:|$)/i);
|
|
125
|
+
const transcription = transcriptionMatch?.[1]?.trim() || responseText;
|
|
126
|
+
// Extract timestamps section
|
|
127
|
+
const timestampsMatch = responseText.match(/Timestamps:\s*([\s\S]+?)(?=\n\nShort Description:|$)/i);
|
|
128
|
+
const timestampsText = timestampsMatch?.[1]?.trim() || '';
|
|
129
|
+
// Parse individual timestamps
|
|
130
|
+
const timestamps = timestampsText
|
|
131
|
+
.split('\n')
|
|
132
|
+
.filter((line) => line.trim())
|
|
133
|
+
.map((line) => {
|
|
134
|
+
const match = line.match(/(\d{2}:\d{2})\s*-\s*Speaker (\d+):\s*(.+)/);
|
|
135
|
+
return {
|
|
136
|
+
time: match?.[1] || '00:00',
|
|
137
|
+
speaker: `Speaker ${match?.[2] || '1'}`,
|
|
138
|
+
content: match?.[3] || line,
|
|
139
|
+
};
|
|
140
|
+
});
|
|
141
|
+
return {
|
|
142
|
+
transcription,
|
|
143
|
+
speakers,
|
|
144
|
+
timestamps,
|
|
145
|
+
shortDescription,
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
catch (error) {
|
|
149
|
+
logger.error(`Gemini API error for ${audioPath} (chunk ${chunkIndex})`, {
|
|
150
|
+
error,
|
|
151
|
+
});
|
|
152
|
+
throw error;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* AC05: Handle long audio files by splitting and processing chunks
|
|
157
|
+
* Merges results from all chunks into single transcription
|
|
158
|
+
*/
|
|
159
|
+
export async function handleLongAudio(audioPath, durationSeconds, config) {
|
|
160
|
+
const maxChunkDuration = (config.maxAudioChunkDuration || 10) * 60; // Convert to seconds
|
|
161
|
+
if (durationSeconds <= maxChunkDuration) {
|
|
162
|
+
// Single chunk - call directly
|
|
163
|
+
return transcribeAudioChunk(audioPath, 0, config);
|
|
164
|
+
}
|
|
165
|
+
// AC05: Split into chunks
|
|
166
|
+
const chunks = getAudioChunks(durationSeconds, maxChunkDuration);
|
|
167
|
+
logger.info(`Processing ${chunks.length} audio chunks for ${audioPath}`, {
|
|
168
|
+
duration: durationSeconds,
|
|
169
|
+
chunkDuration: maxChunkDuration,
|
|
170
|
+
});
|
|
171
|
+
const chunkResults = [];
|
|
172
|
+
for (const chunk of chunks) {
|
|
173
|
+
try {
|
|
174
|
+
const result = await transcribeAudioChunk(audioPath, chunk.index, config);
|
|
175
|
+
chunkResults.push(result);
|
|
176
|
+
// AC05: Respect rate limiting between chunks
|
|
177
|
+
if (chunk.index < chunks.length - 1 && config.rateLimitDelay) {
|
|
178
|
+
await new Promise((resolve) => setTimeout(resolve, config.rateLimitDelay));
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
catch (err) {
|
|
182
|
+
logger.warn(`Failed to transcribe chunk ${chunk.index}, continuing with others`, {
|
|
183
|
+
error: err instanceof Error ? err.message : String(err),
|
|
184
|
+
});
|
|
185
|
+
// Continue with next chunk even if this one fails
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
if (chunkResults.length === 0) {
|
|
189
|
+
throw new Error(`Failed to transcribe any chunks for ${audioPath}`);
|
|
190
|
+
}
|
|
191
|
+
// AC05: Merge all chunk transcriptions
|
|
192
|
+
const mergedTranscription = chunkResults
|
|
193
|
+
.map((r) => r.transcription)
|
|
194
|
+
.join('\n\n');
|
|
195
|
+
const allSpeakers = [...new Set(chunkResults.flatMap((r) => r.speakers))];
|
|
196
|
+
const mergedTimestamps = chunkResults.flatMap((r) => r.timestamps);
|
|
197
|
+
// AC03: Generate merged short description (use last chunk's description as primary)
|
|
198
|
+
const shortDescription = chunkResults[chunkResults.length - 1]?.shortDescription ||
|
|
199
|
+
'Audio transcription available';
|
|
200
|
+
return {
|
|
201
|
+
transcription: mergedTranscription,
|
|
202
|
+
speakers: allSpeakers,
|
|
203
|
+
timestamps: mergedTimestamps,
|
|
204
|
+
shortDescription,
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* AC01-AC05: Main transcription orchestrator
|
|
209
|
+
* Handles chunk detection, API calls, and response parsing
|
|
210
|
+
*/
|
|
211
|
+
export async function transcribeAudio(audioPath, config) {
|
|
212
|
+
try {
|
|
213
|
+
// Estimate audio duration from file size
|
|
214
|
+
const fileStats = await stat(audioPath);
|
|
215
|
+
const durationSeconds = estimateAudioDuration(fileStats.size);
|
|
216
|
+
logger.info(`Transcribing audio: ${audioPath}`, {
|
|
217
|
+
fileSizeKB: Math.round(fileStats.size / 1024),
|
|
218
|
+
estimatedDuration: Math.round(durationSeconds / 60),
|
|
219
|
+
});
|
|
220
|
+
// AC05: Handle long audio with chunking if needed
|
|
221
|
+
const transcriptionData = await handleLongAudio(audioPath, durationSeconds, config);
|
|
222
|
+
// AC04: Create enrichment entry with full provenance
|
|
223
|
+
const modelName = config.geminiModel || 'gemini-1.5-pro';
|
|
224
|
+
const version = new Date().toISOString().split('T')[0] || 'unknown';
|
|
225
|
+
const enrichment = {
|
|
226
|
+
kind: 'transcription',
|
|
227
|
+
provider: 'gemini',
|
|
228
|
+
model: modelName,
|
|
229
|
+
version, // YYYY-MM-DD
|
|
230
|
+
createdAt: new Date().toISOString(),
|
|
231
|
+
transcription: transcriptionData.transcription,
|
|
232
|
+
speakers: transcriptionData.speakers,
|
|
233
|
+
timestamps: transcriptionData.timestamps,
|
|
234
|
+
shortDescription: transcriptionData.shortDescription,
|
|
235
|
+
};
|
|
236
|
+
logger.info(`Audio transcription complete for ${audioPath}`, {
|
|
237
|
+
kind: enrichment.kind,
|
|
238
|
+
speakerCount: enrichment.speakers?.length,
|
|
239
|
+
duration: Math.round(durationSeconds / 60),
|
|
240
|
+
});
|
|
241
|
+
return enrichment;
|
|
242
|
+
}
|
|
243
|
+
catch (error) {
|
|
244
|
+
logger.error(`Transcription error for ${audioPath}`, { error });
|
|
245
|
+
throw error;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Main entry point - analyze audio media message and enrich it
|
|
250
|
+
* Handles all ACs (AC01-AC05) through helper functions
|
|
251
|
+
*
|
|
252
|
+
* Responsibilities:
|
|
253
|
+
* 1. Check if media is audio type (skip non-audio)
|
|
254
|
+
* 2. Check if path is available
|
|
255
|
+
* 3. Call transcription with chunking support (AC05)
|
|
256
|
+
* 4. Parse response and extract data (AC01-AC03)
|
|
257
|
+
* 5. Add enrichment with provenance (AC04)
|
|
258
|
+
*/
|
|
259
|
+
export async function analyzeAudio(message, config) {
|
|
260
|
+
// Skip if not enabled
|
|
261
|
+
if (!config.enableAudioTranscription) {
|
|
262
|
+
logger.debug('Audio transcription disabled in config');
|
|
263
|
+
return message;
|
|
264
|
+
}
|
|
265
|
+
// Skip if not a media message
|
|
266
|
+
if (message.messageKind !== 'media' || !message.media) {
|
|
267
|
+
return message;
|
|
268
|
+
}
|
|
269
|
+
// Skip if media is not audio
|
|
270
|
+
if (message.media.mediaKind !== 'audio') {
|
|
271
|
+
logger.debug('Skipping non-audio media', {
|
|
272
|
+
mediaKind: message.media.mediaKind,
|
|
273
|
+
});
|
|
274
|
+
return message;
|
|
275
|
+
}
|
|
276
|
+
// Skip if path is missing
|
|
277
|
+
if (!message.media.path) {
|
|
278
|
+
logger.warn('Skipping audio with missing path', {
|
|
279
|
+
filename: message.media.filename,
|
|
280
|
+
});
|
|
281
|
+
return message;
|
|
282
|
+
}
|
|
283
|
+
// Check if audio file exists
|
|
284
|
+
try {
|
|
285
|
+
await access(message.media.path);
|
|
286
|
+
}
|
|
287
|
+
catch {
|
|
288
|
+
logger.warn('Audio file not found at path', { path: message.media.path });
|
|
289
|
+
return message;
|
|
290
|
+
}
|
|
291
|
+
try {
|
|
292
|
+
// AC01-AC05: Transcribe audio (handles chunking, API calls, parsing)
|
|
293
|
+
const enrichment = await transcribeAudio(message.media.path, config);
|
|
294
|
+
// Check idempotency: don't re-transcribe if already done
|
|
295
|
+
const existingTranscription = message.media.enrichment?.find((e) => e.kind === 'transcription' &&
|
|
296
|
+
e.provider === (config.geminiModel ? 'gemini' : 'gemini'));
|
|
297
|
+
if (existingTranscription) {
|
|
298
|
+
logger.debug('Transcription already exists, skipping re-analysis', {
|
|
299
|
+
model: existingTranscription.model,
|
|
300
|
+
guid: message.guid,
|
|
301
|
+
});
|
|
302
|
+
return message;
|
|
303
|
+
}
|
|
304
|
+
// Update message with enrichment
|
|
305
|
+
const updatedMedia = {
|
|
306
|
+
...message.media,
|
|
307
|
+
enrichment: [...(message.media.enrichment || []), enrichment],
|
|
308
|
+
};
|
|
309
|
+
logger.info('Audio enriched', {
|
|
310
|
+
filename: message.media.filename,
|
|
311
|
+
guid: message.guid,
|
|
312
|
+
});
|
|
313
|
+
return {
|
|
314
|
+
...message,
|
|
315
|
+
media: updatedMedia,
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
catch (error) {
|
|
319
|
+
logger.error('Error analyzing audio', {
|
|
320
|
+
filename: message.media?.filename,
|
|
321
|
+
guid: message.guid,
|
|
322
|
+
error: error instanceof Error ? error.message : String(error),
|
|
323
|
+
});
|
|
324
|
+
// Don't crash pipeline - return original message
|
|
325
|
+
return message;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
/**
|
|
329
|
+
* Batch analyze multiple messages
|
|
330
|
+
* Useful for enrichment stage that processes arrays of messages
|
|
331
|
+
* Each message is processed independently; errors don't stop the batch
|
|
332
|
+
*/
|
|
333
|
+
export async function analyzeAudios(messages, config) {
|
|
334
|
+
const results = [];
|
|
335
|
+
let successCount = 0;
|
|
336
|
+
let skipCount = 0;
|
|
337
|
+
let errorCount = 0;
|
|
338
|
+
for (const message of messages) {
|
|
339
|
+
try {
|
|
340
|
+
const analyzed = await analyzeAudio(message, config);
|
|
341
|
+
// Track if enrichment was added
|
|
342
|
+
if (analyzed.media?.enrichment &&
|
|
343
|
+
analyzed.media.enrichment.length >
|
|
344
|
+
(message.media?.enrichment?.length || 0)) {
|
|
345
|
+
successCount++;
|
|
346
|
+
}
|
|
347
|
+
else {
|
|
348
|
+
skipCount++;
|
|
349
|
+
}
|
|
350
|
+
results.push(analyzed);
|
|
351
|
+
}
|
|
352
|
+
catch (err) {
|
|
353
|
+
errorCount++;
|
|
354
|
+
logger.error('Failed to analyze message', {
|
|
355
|
+
guid: message.guid,
|
|
356
|
+
error: err instanceof Error ? err.message : String(err),
|
|
357
|
+
});
|
|
358
|
+
// Keep original message if analysis fails
|
|
359
|
+
results.push(message);
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
logger.info('Batch audio transcription complete', {
|
|
363
|
+
successCount,
|
|
364
|
+
skipCount,
|
|
365
|
+
errorCount,
|
|
366
|
+
total: messages.length,
|
|
367
|
+
});
|
|
368
|
+
return results;
|
|
369
|
+
}
|
|
370
|
+
//# sourceMappingURL=audio-transcription.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audio-transcription.js","sourceRoot":"","sources":["../../src/enrich/audio-transcription.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAA;AAE/C,OAAO,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAA;AAI1D,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAA;AAkB5C,MAAM,MAAM,GAAG,YAAY,CAAC,4BAA4B,CAAC,CAAA;AAEzD;;;GAGG;AACH,MAAM,mBAAmB,GAAG;;;;;;;;;;;;;;;;;;;;;+CAqBmB,CAAA;AAE/C;;;GAGG;AACH,SAAS,qBAAqB,CAAC,aAAqB;IACnD,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,CAAA,CAAC,+BAA+B;IAC1D,OAAO,IAAI,CAAC,IAAI,CAAC,aAAa,GAAG,OAAO,CAAC,CAAA;AAC1C,CAAC;AAED;;;GAGG;AACH,SAAS,cAAc,CACtB,eAAuB,EACvB,gBAAgB,GAAG,GAAG;IAEtB,MAAM,MAAM,GAA+D,EAAE,CAAA;IAE7E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,gBAAgB,GAAG,eAAe,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7D,MAAM,CAAC,IAAI,CAAC;YACX,KAAK,EAAE,CAAC;YACR,QAAQ,EAAE,CAAC,GAAG,gBAAgB;YAC9B,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,gBAAgB,EAAE,eAAe,CAAC;SAC7D,CAAC,CAAA;IACH,CAAC;IAED,OAAO,MAAM,CAAA;AACd,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACzC,SAAiB,EACjB,UAAkB,EAClB,MAAyC;IAEzC,MAAM,MAAM,GAAG,MAAM,CAAC,YAAY,CAAA;IAClC,MAAM,SAAS,GAAG,MAAM,CAAC,WAAW,IAAI,gBAAgB,CAAA;IAExD,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAA;IACtE,CAAC;IAED,IAAI,CAAC;QACJ,6DAA6D;QAC7D,MAAM,KAAK,GAAG,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAA;QAC5C,MAAM,KAAK,GAAG,KAAK,CAAC,kBAAkB,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAA;QAE5D,wCAAwC;QACxC,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAA;QACrD,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,SAAS,CAAC,CAAA;QAC7C,MAAM,WAAW,GAAG,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAA;QAElD,0CAA0C;QAC1C,MAAM,GAAG,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,KAAK,CAAA;QAC7D,MAAM,WAAW,GAA2B;YAC3C,GAAG,EAAE,WAAW;YAChB,GAAG,EAAE,YAAY;YACjB,GAAG,EAAE,WAAW;YAChB,GAAG,EAAE,WAAW;YAChB,GAAG,EAAE,WAAW;YAChB,IAAI,EAAE,YAAY;SAClB,CAAA;QACD,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,IAAI,WAAW,CAAA;QAEhD,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,eAAe,CAAC;YAC5C;gBACC,UAAU,EAAE;oBACX,QAAQ;oBACR,IAAI,EAAE,WAAW;iBACjB;aACD;YACD,mBAAmB;SACnB,CAAC,CAAA;QAEF,MAAM,YAAY,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAA;QAC7C,MAAM,CAAC,KAAK,CACX,mCAAmC,UAAU,MAAM,YAAY,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,KAAK,CACtF,CAAA;QAED,2CAA2C;QAC3C,MAAM,cAAc,GAAG,YAAY,CAAC,KAAK,CAAC,cAAc,CAAC,IAAI,EAAE,CAAA;QAC/D,MAAM,QAAQ,GAAa,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAS,cAAc,CAAC,CAAC,CAAA,CAAC,2BAA2B;QAElG,kCAAkC;QAClC,MAAM,qBAAqB,GAAG,YAAY,CAAC,KAAK,CAC/C,uCAAuC,CACvC,CAAA;QACD,MAAM,gBAAgB,GACrB,qBAAqB,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,+BAA+B,CAAA;QAEtE,qCAAqC;QACrC,MAAM,kBAAkB,GAAG,YAAY,CAAC,KAAK,CAC5C,mDAAmD,CACnD,CAAA;QACD,MAAM,aAAa,GAAG,kBAAkB,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,YAAY,CAAA;QAErE,6BAA6B;QAC7B,MAAM,eAAe,GAAG,YAAY,CAAC,KAAK,CACzC,uDAAuD,CACvD,CAAA;QACD,MAAM,cAAc,GAAG,eAAe,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;QAEzD,8BAA8B;QAC9B,MAAM,UAAU,GAAG,cAAc;aAC/B,KAAK,CAAC,IAAI,CAAC;aACX,MAAM,CAAC,CAAC,IAAY,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;aACrC,GAAG,CAAC,CAAC,IAAY,EAAE,EAAE;YACrB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,2CAA2C,CAAC,CAAA;YACrE,OAAO;gBACN,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,OAAO;gBAC3B,OAAO,EAAE,WAAW,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,GAAG,EAAE;gBACvC,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI;aAC3B,CAAA;QACF,CAAC,CAAC,CAAA;QAEH,OAAO;YACN,aAAa;YACb,QAAQ;YACR,UAAU;YACV,gBAAgB;SAChB,CAAA;IACF,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,MAAM,CAAC,KAAK,CAAC,wBAAwB,SAAS,WAAW,UAAU,GAAG,EAAE;YACvE,KAAK;SACL,CAAC,CAAA;QACF,MAAM,KAAK,CAAA;IACZ,CAAC;AACF,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACpC,SAAiB,EACjB,eAAuB,EACvB,MAAyC;IAEzC,MAAM,gBAAgB,GAAG,CAAC,MAAM,CAAC,qBAAqB,IAAI,EAAE,CAAC,GAAG,EAAE,CAAA,CAAC,qBAAqB;IAExF,IAAI,eAAe,IAAI,gBAAgB,EAAE,CAAC;QACzC,+BAA+B;QAC/B,OAAO,oBAAoB,CAAC,SAAS,EAAE,CAAC,EAAE,MAAM,CAAC,CAAA;IAClD,CAAC;IAED,0BAA0B;IAC1B,MAAM,MAAM,GAAG,cAAc,CAAC,eAAe,EAAE,gBAAgB,CAAC,CAAA;IAChE,MAAM,CAAC,IAAI,CAAC,cAAc,MAAM,CAAC,MAAM,qBAAqB,SAAS,EAAE,EAAE;QACxE,QAAQ,EAAE,eAAe;QACzB,aAAa,EAAE,gBAAgB;KAC/B,CAAC,CAAA;IAEF,MAAM,YAAY,GAAwB,EAAE,CAAA;IAE5C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC5B,IAAI,CAAC;YACJ,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,SAAS,EAAE,KAAK,CAAC,KAAK,EAAE,MAAM,CAAC,CAAA;YACzE,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YAEzB,6CAA6C;YAC7C,IAAI,KAAK,CAAC,KAAK,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;gBAC9D,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAC7B,UAAU,CAAC,OAAO,EAAE,MAAM,CAAC,cAAc,CAAC,CAC1C,CAAA;YACF,CAAC;QACF,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACd,MAAM,CAAC,IAAI,CACV,8BAA8B,KAAK,CAAC,KAAK,0BAA0B,EACnE;gBACC,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;aACvD,CACD,CAAA;YACD,kDAAkD;QACnD,CAAC;IACF,CAAC;IAED,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CAAC,uCAAuC,SAAS,EAAE,CAAC,CAAA;IACpE,CAAC;IAED,uCAAuC;IACvC,MAAM,mBAAmB,GAAG,YAAY;SACtC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC;SAC3B,IAAI,CAAC,MAAM,CAAC,CAAA;IACd,MAAM,WAAW,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAA;IACzE,MAAM,gBAAgB,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAA;IAElE,oFAAoF;IACpF,MAAM,gBAAgB,GACrB,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,gBAAgB;QACvD,+BAA+B,CAAA;IAEhC,OAAO;QACN,aAAa,EAAE,mBAAmB;QAClC,QAAQ,EAAE,WAAW;QACrB,UAAU,EAAE,gBAAgB;QAC5B,gBAAgB;KAChB,CAAA;AACF,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACpC,SAAiB,EACjB,MAAyC;IAEzC,IAAI,CAAC;QACJ,yCAAyC;QACzC,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,CAAA;QACvC,MAAM,eAAe,GAAG,qBAAqB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;QAE7D,MAAM,CAAC,IAAI,CAAC,uBAAuB,SAAS,EAAE,EAAE;YAC/C,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,GAAG,IAAI,CAAC;YAC7C,iBAAiB,EAAE,IAAI,CAAC,KAAK,CAAC,eAAe,GAAG,EAAE,CAAC;SACnD,CAAC,CAAA;QAEF,kDAAkD;QAClD,MAAM,iBAAiB,GAAG,MAAM,eAAe,CAC9C,SAAS,EACT,eAAe,EACf,MAAM,CACN,CAAA;QAED,qDAAqD;QACrD,MAAM,SAAS,GAAG,MAAM,CAAC,WAAW,IAAI,gBAAgB,CAAA;QACxD,MAAM,OAAO,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,SAAS,CAAA;QACnE,MAAM,UAAU,GAAoB;YACnC,IAAI,EAAE,eAAe;YACrB,QAAQ,EAAE,QAAQ;YAClB,KAAK,EAAE,SAAS;YAChB,OAAO,EAAE,aAAa;YACtB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,aAAa,EAAE,iBAAiB,CAAC,aAAa;YAC9C,QAAQ,EAAE,iBAAiB,CAAC,QAAQ;YACpC,UAAU,EAAE,iBAAiB,CAAC,UAAU;YACxC,gBAAgB,EAAE,iBAAiB,CAAC,gBAAgB;SACpD,CAAA;QAED,MAAM,CAAC,IAAI,CAAC,oCAAoC,SAAS,EAAE,EAAE;YAC5D,IAAI,EAAE,UAAU,CAAC,IAAI;YACrB,YAAY,EAAE,UAAU,CAAC,QAAQ,EAAE,MAAM;YACzC,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,eAAe,GAAG,EAAE,CAAC;SAC1C,CAAC,CAAA;QAEF,OAAO,UAAU,CAAA;IAClB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,MAAM,CAAC,KAAK,CAAC,2BAA2B,SAAS,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,CAAA;QAC/D,MAAM,KAAK,CAAA;IACZ,CAAC;AACF,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CACjC,OAAgB,EAChB,MAAyC;IAEzC,sBAAsB;IACtB,IAAI,CAAC,MAAM,CAAC,wBAAwB,EAAE,CAAC;QACtC,MAAM,CAAC,KAAK,CAAC,wCAAwC,CAAC,CAAA;QACtD,OAAO,OAAO,CAAA;IACf,CAAC;IAED,8BAA8B;IAC9B,IAAI,OAAO,CAAC,WAAW,KAAK,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QACvD,OAAO,OAAO,CAAA;IACf,CAAC;IAED,6BAA6B;IAC7B,IAAI,OAAO,CAAC,KAAK,CAAC,SAAS,KAAK,OAAO,EAAE,CAAC;QACzC,MAAM,CAAC,KAAK,CAAC,0BAA0B,EAAE;YACxC,SAAS,EAAE,OAAO,CAAC,KAAK,CAAC,SAAS;SAClC,CAAC,CAAA;QACF,OAAO,OAAO,CAAA;IACf,CAAC;IAED,0BAA0B;IAC1B,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;QACzB,MAAM,CAAC,IAAI,CAAC,kCAAkC,EAAE;YAC/C,QAAQ,EAAE,OAAO,CAAC,KAAK,CAAC,QAAQ;SAChC,CAAC,CAAA;QACF,OAAO,OAAO,CAAA;IACf,CAAC;IAED,6BAA6B;IAC7B,IAAI,CAAC;QACJ,MAAM,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;IACjC,CAAC;IAAC,MAAM,CAAC;QACR,MAAM,CAAC,IAAI,CAAC,8BAA8B,EAAE,EAAE,IAAI,EAAE,OAAO,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAA;QACzE,OAAO,OAAO,CAAA;IACf,CAAC;IAED,IAAI,CAAC;QACJ,qEAAqE;QACrE,MAAM,UAAU,GAAG,MAAM,eAAe,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;QAEpE,yDAAyD;QACzD,MAAM,qBAAqB,GAAG,OAAO,CAAC,KAAK,CAAC,UAAU,EAAE,IAAI,CAC3D,CAAC,CAAC,EAAE,EAAE,CACL,CAAC,CAAC,IAAI,KAAK,eAAe;YAC1B,CAAC,CAAC,QAAQ,KAAK,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAC1D,CAAA;QAED,IAAI,qBAAqB,EAAE,CAAC;YAC3B,MAAM,CAAC,KAAK,CAAC,oDAAoD,EAAE;gBAClE,KAAK,EAAE,qBAAqB,CAAC,KAAK;gBAClC,IAAI,EAAE,OAAO,CAAC,IAAI;aAClB,CAAC,CAAA;YACF,OAAO,OAAO,CAAA;QACf,CAAC;QAED,iCAAiC;QACjC,MAAM,YAAY,GAAc;YAC/B,GAAG,OAAO,CAAC,KAAK;YAChB,UAAU,EAAE,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,UAAU,IAAI,EAAE,CAAC,EAAE,UAAU,CAAC;SAC7D,CAAA;QAED,MAAM,CAAC,IAAI,CAAC,gBAAgB,EAAE;YAC7B,QAAQ,EAAE,OAAO,CAAC,KAAK,CAAC,QAAQ;YAChC,IAAI,EAAE,OAAO,CAAC,IAAI;SAClB,CAAC,CAAA;QAEF,OAAO;YACN,GAAG,OAAO;YACV,KAAK,EAAE,YAAY;SACnB,CAAA;IACF,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,MAAM,CAAC,KAAK,CAAC,uBAAuB,EAAE;YACrC,QAAQ,EAAE,OAAO,CAAC,KAAK,EAAE,QAAQ;YACjC,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC7D,CAAC,CAAA;QACF,iDAAiD;QACjD,OAAO,OAAO,CAAA;IACf,CAAC;AACF,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CAClC,QAAmB,EACnB,MAAyC;IAEzC,MAAM,OAAO,GAAc,EAAE,CAAA;IAC7B,IAAI,YAAY,GAAG,CAAC,CAAA;IACpB,IAAI,SAAS,GAAG,CAAC,CAAA;IACjB,IAAI,UAAU,GAAG,CAAC,CAAA;IAElB,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAChC,IAAI,CAAC;YACJ,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,OAAO,EAAE,MAAM,CAAC,CAAA;YACpD,gCAAgC;YAChC,IACC,QAAQ,CAAC,KAAK,EAAE,UAAU;gBAC1B,QAAQ,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM;oBAC/B,CAAC,OAAO,CAAC,KAAK,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC,CAAC,EACxC,CAAC;gBACF,YAAY,EAAE,CAAA;YACf,CAAC;iBAAM,CAAC;gBACP,SAAS,EAAE,CAAA;YACZ,CAAC;YACD,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;QACvB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACd,UAAU,EAAE,CAAA;YACZ,MAAM,CAAC,KAAK,CAAC,2BAA2B,EAAE;gBACzC,IAAI,EAAE,OAAO,CAAC,IAAI;gBAClB,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;aACvD,CAAC,CAAA;YACF,0CAA0C;YAC1C,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QACtB,CAAC;IACF,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,oCAAoC,EAAE;QACjD,YAAY;QACZ,SAAS;QACT,UAAU;QACV,KAAK,EAAE,QAAQ,CAAC,MAAM;KACtB,CAAC,CAAA;IACF,OAAO,OAAO,CAAA;AACf,CAAC"}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Checkpoint and Resume Module (ENRICH--T06)
|
|
3
|
+
*
|
|
4
|
+
* Implements resumable enrichment with:
|
|
5
|
+
* - AC01: Checkpoint writes after N items (configurable, default 100)
|
|
6
|
+
* - AC02: Full checkpoint schema with stats and failed items
|
|
7
|
+
* - AC03: Atomic writes using temp file + rename pattern
|
|
8
|
+
* - AC04: Resume within ≤1 item of last checkpoint
|
|
9
|
+
* - AC05: Config consistency verification with hash comparison
|
|
10
|
+
*
|
|
11
|
+
* Architecture:
|
|
12
|
+
* - createCheckpoint: Create new checkpoint with schema
|
|
13
|
+
* - shouldWriteCheckpoint: Determine if checkpoint should be written
|
|
14
|
+
* - getResumeIndex: Calculate resume position from checkpoint
|
|
15
|
+
* - verifyConfigHash: Validate config hasn't changed
|
|
16
|
+
* - getCheckpointPath: Generate deterministic checkpoint file path
|
|
17
|
+
* - loadCheckpoint: Load checkpoint from disk
|
|
18
|
+
* - saveCheckpoint: Write checkpoint atomically
|
|
19
|
+
*/
|
|
20
|
+
export type FailedItem = {
|
|
21
|
+
index: number;
|
|
22
|
+
guid: string;
|
|
23
|
+
kind: string;
|
|
24
|
+
error: string;
|
|
25
|
+
};
|
|
26
|
+
export type CheckpointStats = {
|
|
27
|
+
processedCount: number;
|
|
28
|
+
failedCount: number;
|
|
29
|
+
enrichmentsByKind: Record<string, number>;
|
|
30
|
+
};
|
|
31
|
+
export type EnrichCheckpoint = {
|
|
32
|
+
version: string;
|
|
33
|
+
configHash: string;
|
|
34
|
+
lastProcessedIndex: number;
|
|
35
|
+
totalProcessed: number;
|
|
36
|
+
totalFailed: number;
|
|
37
|
+
stats: CheckpointStats;
|
|
38
|
+
failedItems: FailedItem[];
|
|
39
|
+
createdAt: string;
|
|
40
|
+
};
|
|
41
|
+
export type CheckpointInput = {
|
|
42
|
+
lastProcessedIndex: number;
|
|
43
|
+
totalProcessed: number;
|
|
44
|
+
totalFailed: number;
|
|
45
|
+
stats: CheckpointStats;
|
|
46
|
+
failedItems: FailedItem[];
|
|
47
|
+
configHash: string;
|
|
48
|
+
};
|
|
49
|
+
/**
|
|
50
|
+
* AC01: Determine if checkpoint should be written after N items
|
|
51
|
+
*
|
|
52
|
+
* @param itemIndex - Current item index (0-based)
|
|
53
|
+
* @param checkpointInterval - Checkpoint interval (default 100)
|
|
54
|
+
* @returns true if checkpoint should be written
|
|
55
|
+
*/
|
|
56
|
+
export declare function shouldWriteCheckpoint(itemIndex: number, checkpointInterval?: number): boolean;
|
|
57
|
+
/**
|
|
58
|
+
* AC02: Create checkpoint with full schema
|
|
59
|
+
*
|
|
60
|
+
* @param input - Checkpoint input data
|
|
61
|
+
* @returns EnrichCheckpoint with all required fields
|
|
62
|
+
*/
|
|
63
|
+
export declare function createCheckpoint(input: CheckpointInput): EnrichCheckpoint;
|
|
64
|
+
/**
|
|
65
|
+
* AC03: Generate deterministic checkpoint file path
|
|
66
|
+
*
|
|
67
|
+
* @param checkpointDir - Directory for checkpoints
|
|
68
|
+
* @param configHash - Config hash for uniqueness
|
|
69
|
+
* @returns Path to checkpoint file
|
|
70
|
+
*/
|
|
71
|
+
export declare function getCheckpointPath(checkpointDir: string, configHash: string): string;
|
|
72
|
+
/**
|
|
73
|
+
* AC03: Save checkpoint atomically using temp file + rename
|
|
74
|
+
*
|
|
75
|
+
* @param checkpoint - Checkpoint to save
|
|
76
|
+
* @param checkpointPath - Path to save checkpoint
|
|
77
|
+
*/
|
|
78
|
+
export declare function saveCheckpoint(checkpoint: EnrichCheckpoint, checkpointPath: string): Promise<void>;
|
|
79
|
+
/**
|
|
80
|
+
* AC03: Load checkpoint from disk
|
|
81
|
+
*
|
|
82
|
+
* @param checkpointPath - Path to checkpoint file
|
|
83
|
+
* @returns Loaded checkpoint or null if not found
|
|
84
|
+
*/
|
|
85
|
+
export declare function loadCheckpoint(checkpointPath: string): Promise<EnrichCheckpoint | null>;
|
|
86
|
+
/**
|
|
87
|
+
* AC04: Calculate resume index from checkpoint
|
|
88
|
+
*
|
|
89
|
+
* Resume at lastProcessedIndex + 1 to ensure we don't re-process
|
|
90
|
+
* the last item that was in the previous checkpoint.
|
|
91
|
+
*
|
|
92
|
+
* @param checkpoint - Checkpoint to resume from
|
|
93
|
+
* @returns Resume index (within ≤1 item of last checkpoint)
|
|
94
|
+
*/
|
|
95
|
+
export declare function getResumeIndex(checkpoint: EnrichCheckpoint): number;
|
|
96
|
+
/**
|
|
97
|
+
* AC05: Compute config hash for consistency checking
|
|
98
|
+
*
|
|
99
|
+
* @param config - Configuration object
|
|
100
|
+
* @returns SHA-256 hash of config
|
|
101
|
+
*/
|
|
102
|
+
export declare function computeConfigHash(config: Record<string, unknown>): string;
|
|
103
|
+
/**
|
|
104
|
+
* AC05: Verify config hasn't changed by comparing hashes
|
|
105
|
+
*
|
|
106
|
+
* @param checkpointHash - Hash from checkpoint
|
|
107
|
+
* @param currentHash - Hash of current config
|
|
108
|
+
* @returns true if hashes match (config unchanged)
|
|
109
|
+
*/
|
|
110
|
+
export declare function verifyConfigHash(checkpointHash: string, currentHash: string): boolean;
|
|
111
|
+
export type CheckpointState = {
|
|
112
|
+
isResuming: boolean;
|
|
113
|
+
lastCheckpointIndex: number;
|
|
114
|
+
configHash: string;
|
|
115
|
+
failedItemsInCheckpoint: FailedItem[];
|
|
116
|
+
};
|
|
117
|
+
/**
|
|
118
|
+
* Initialize checkpoint state for enrichment run
|
|
119
|
+
*
|
|
120
|
+
* @param checkpoint - Loaded checkpoint or null
|
|
121
|
+
* @param currentConfigHash - Hash of current config
|
|
122
|
+
* @returns Checkpoint state or error
|
|
123
|
+
*/
|
|
124
|
+
export declare function initializeCheckpointState(checkpoint: EnrichCheckpoint | null, currentConfigHash: string): CheckpointState | Error;
|
|
125
|
+
/**
|
|
126
|
+
* Create new checkpoint for saving after processing batch
|
|
127
|
+
*
|
|
128
|
+
* @param lastProcessedIndex - Index of last processed item
|
|
129
|
+
* @param totalProcessed - Total items processed so far
|
|
130
|
+
* @param totalFailed - Total failed items so far
|
|
131
|
+
* @param batchStats - Stats for this batch
|
|
132
|
+
* @param failedItems - Failed items in this batch
|
|
133
|
+
* @param configHash - Hash of current config
|
|
134
|
+
* @returns Checkpoint ready to save
|
|
135
|
+
*/
|
|
136
|
+
export declare function prepareCheckpoint(lastProcessedIndex: number, totalProcessed: number, totalFailed: number, batchStats: CheckpointStats, failedItems: FailedItem[], configHash: string): EnrichCheckpoint;
|
|
137
|
+
//# sourceMappingURL=checkpoint.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"checkpoint.d.ts","sourceRoot":"","sources":["../../src/enrich/checkpoint.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAUH,MAAM,MAAM,UAAU,GAAG;IACxB,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,MAAM,CAAA;IACZ,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;CACb,CAAA;AAED,MAAM,MAAM,eAAe,GAAG;IAC7B,cAAc,EAAE,MAAM,CAAA;IACtB,WAAW,EAAE,MAAM,CAAA;IACnB,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CACzC,CAAA;AAED,MAAM,MAAM,gBAAgB,GAAG;IAC9B,OAAO,EAAE,MAAM,CAAA;IACf,UAAU,EAAE,MAAM,CAAA;IAClB,kBAAkB,EAAE,MAAM,CAAA;IAC1B,cAAc,EAAE,MAAM,CAAA;IACtB,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,eAAe,CAAA;IACtB,WAAW,EAAE,UAAU,EAAE,CAAA;IACzB,SAAS,EAAE,MAAM,CAAA;CACjB,CAAA;AAED,MAAM,MAAM,eAAe,GAAG;IAC7B,kBAAkB,EAAE,MAAM,CAAA;IAC1B,cAAc,EAAE,MAAM,CAAA;IACtB,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,eAAe,CAAA;IACtB,WAAW,EAAE,UAAU,EAAE,CAAA;IACzB,UAAU,EAAE,MAAM,CAAA;CAClB,CAAA;AAMD;;;;;;GAMG;AACH,wBAAgB,qBAAqB,CACpC,SAAS,EAAE,MAAM,EACjB,kBAAkB,SAAM,GACtB,OAAO,CAGT;AAMD;;;;;GAKG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,eAAe,GAAG,gBAAgB,CAWzE;AAMD;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAChC,aAAa,EAAE,MAAM,EACrB,UAAU,EAAE,MAAM,GAChB,MAAM,CAER;AAED;;;;;GAKG;AACH,wBAAsB,cAAc,CACnC,UAAU,EAAE,gBAAgB,EAC5B,cAAc,EAAE,MAAM,GACpB,OAAO,CAAC,IAAI,CAAC,CAqBf;AAED;;;;;GAKG;AACH,wBAAsB,cAAc,CACnC,cAAc,EAAE,MAAM,GACpB,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC,CASlC;AAMD;;;;;;;;GAQG;AACH,wBAAgB,cAAc,CAAC,UAAU,EAAE,gBAAgB,GAAG,MAAM,CAGnE;AAMD;;;;;GAKG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAGzE;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAC/B,cAAc,EAAE,MAAM,EACtB,WAAW,EAAE,MAAM,GACjB,OAAO,CAET;AAMD,MAAM,MAAM,eAAe,GAAG;IAC7B,UAAU,EAAE,OAAO,CAAA;IACnB,mBAAmB,EAAE,MAAM,CAAA;IAC3B,UAAU,EAAE,MAAM,CAAA;IAClB,uBAAuB,EAAE,UAAU,EAAE,CAAA;CACrC,CAAA;AAED;;;;;;GAMG;AACH,wBAAgB,yBAAyB,CACxC,UAAU,EAAE,gBAAgB,GAAG,IAAI,EACnC,iBAAiB,EAAE,MAAM,GACvB,eAAe,GAAG,KAAK,CAyBzB;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,iBAAiB,CAChC,kBAAkB,EAAE,MAAM,EAC1B,cAAc,EAAE,MAAM,EACtB,WAAW,EAAE,MAAM,EACnB,UAAU,EAAE,eAAe,EAC3B,WAAW,EAAE,UAAU,EAAE,EACzB,UAAU,EAAE,MAAM,GAChB,gBAAgB,CASlB"}
|