listener-ai 2.5.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,9 +36,104 @@ Object.defineProperty(exports, "__esModule", { value: true });
36
36
  exports.GeminiService = void 0;
37
37
  const fs = __importStar(require("fs"));
38
38
  const path = __importStar(require("path"));
39
+ const child_process_1 = require("child_process");
40
+ const util_1 = require("util");
39
41
  const genai_1 = require("@google/genai");
40
42
  const audioFormats_1 = require("./audioFormats");
43
+ const outputService_1 = require("./outputService");
41
44
  const ffmpegManager_1 = require("./services/ffmpegManager");
45
+ const execFileAsync = (0, util_1.promisify)(child_process_1.execFile);
46
+ // Append a section to the summary prompt instructing Gemini to enrich each
47
+ // user-flagged moment with a subtitle + categorized bullets, returned as a
48
+ // `highlights` array on the JSON response. Returns '' when there's nothing to
49
+ // enrich -- prompt stays untouched in that case so we don't pay for empty
50
+ // instructions.
51
+ function buildHighlightsPromptBlock(notes) {
52
+ if (notes.length === 0)
53
+ return '';
54
+ const lines = notes.map((n) => `- offsetMs=${n.offsetMs}, timestamp=${(0, outputService_1.formatOffsetTimestamp)(n.offsetMs)}, userText=${JSON.stringify(n.text)}`);
55
+ return `In addition, the user flagged the following moments during the meeting. For each note, produce a structured analysis tied to that moment in the transcript:
56
+
57
+ ${lines.join('\n')}
58
+
59
+ For every flagged moment above, write one entry in a JSON array named "highlights". Each entry must include:
60
+ - "offsetMs": the exact integer from the input
61
+ - "userText": the user's typed text, copied verbatim
62
+ - "subtitle": a short topic label in Korean (3-7 words) summarising what was being discussed at that timestamp
63
+ - "bullets": 2-5 short Korean bullet strings categorising the discussion at that point. Prefix each bullet with one of these categories when applicable, omitting categories that don't fit: "결정 사항:", "주요 인사이트:", "실행 항목:", "식별된 리스크:". If none of the categories fit, just write the bullet without a prefix.
64
+
65
+ Use the transcript as the ground truth -- if the user's typed text doesn't clearly match anything in the transcript, fall back to the meeting content nearest the given timestamp. Return the highlights array as an additional key alongside the other fields in the JSON.`;
66
+ }
67
+ function mergeHighlights(liveNotes, raw) {
68
+ if (!liveNotes || liveNotes.length === 0)
69
+ return undefined;
70
+ // Index Gemini's returned highlights by offsetMs so we can attach
71
+ // enrichment to the matching user note. Treat anything malformed as
72
+ // "no enrichment for that note" -- the bare offset+userText still
73
+ // round-trips so the user's data is never lost.
74
+ const byOffset = new Map();
75
+ if (Array.isArray(raw)) {
76
+ for (const item of raw) {
77
+ if (!item || typeof item !== 'object')
78
+ continue;
79
+ const offsetMs = Number(item.offsetMs);
80
+ if (!Number.isFinite(offsetMs))
81
+ continue;
82
+ const subtitleRaw = item.subtitle;
83
+ const bulletsRaw = item.bullets;
84
+ const subtitle = typeof subtitleRaw === 'string' && subtitleRaw.trim().length > 0
85
+ ? subtitleRaw.trim()
86
+ : undefined;
87
+ const bullets = Array.isArray(bulletsRaw)
88
+ ? bulletsRaw.map((b) => (typeof b === 'string' ? b.trim() : '')).filter((b) => b.length > 0)
89
+ : undefined;
90
+ byOffset.set(offsetMs, {
91
+ subtitle,
92
+ bullets: bullets && bullets.length > 0 ? bullets : undefined,
93
+ });
94
+ }
95
+ }
96
+ return liveNotes.map((note) => {
97
+ const enrichment = byOffset.get(note.offsetMs);
98
+ return {
99
+ offsetMs: note.offsetMs,
100
+ userText: note.text,
101
+ subtitle: enrichment?.subtitle,
102
+ bullets: enrichment?.bullets,
103
+ };
104
+ });
105
+ }
106
+ function transcriptOnlyResult(transcript) {
107
+ return {
108
+ transcript,
109
+ summary: '',
110
+ keyPoints: [],
111
+ actionItems: [],
112
+ emoji: '',
113
+ };
114
+ }
115
+ const DEFAULT_TRANSCRIPT_PROMPT = `Please transcribe this audio recording with proper speaker identification.
116
+
117
+ Format requirements:
118
+ 1. IDENTIFY different speakers and label them as 참가자1, 참가자2, etc.
119
+ 2. Each speaker's turn MUST start on a NEW LINE
120
+ 3. Format: 참가자X: [what they said]
121
+ 4. Add a blank line between different speakers
122
+
123
+ Example format:
124
+ 참가자1: 안녕하세요, 오늘 회의를 시작하겠습니다.
125
+
126
+ 참가자2: 네, 준비됐습니다.
127
+
128
+ 참가자1: 첫 번째 안건은...
129
+
130
+ IMPORTANT:
131
+ - You MUST identify and differentiate between speakers
132
+ - Each speaker turn MUST start on a new line
133
+ - Add blank line between different speakers
134
+ - DO NOT include timestamps
135
+ - Keep the transcription in the original spoken language
136
+ - Return ONLY the transcription text, no JSON formatting`;
42
137
  class GeminiService {
43
138
  // Get FFmpeg path for this service
44
139
  async getFFmpegPath() {
@@ -63,7 +158,7 @@ class GeminiService {
63
158
  const wordList = this.knownWords.map((w) => `- ${w}`).join('\n');
64
159
  return `The following proper nouns, names, and terms may appear in the audio. Transcribe them exactly as spelled:\n${wordList}\n\n`;
65
160
  }
66
- async transcribeAudio(audioFilePath, progressCallback, summaryPrompt) {
161
+ async transcribeAudio(audioFilePath, progressCallback, summaryPrompt, liveNotes, options = {}) {
67
162
  // Integration-test escape hatch: avoid the real Gemini call so tests can
68
163
  // exercise the surrounding pipeline (CLI parsing, IPC, ffmpeg, save) for
69
164
  // free and offline. Gated on NODE_ENV=test so a stray LISTENER_TEST_MODE
@@ -71,6 +166,9 @@ class GeminiService {
71
166
  if (process.env.LISTENER_TEST_MODE && process.env.NODE_ENV === 'test') {
72
167
  if (progressCallback)
73
168
  progressCallback(100, 'Stubbed transcription');
169
+ if (options.transcriptOnly) {
170
+ return transcriptOnlyResult('Stubbed transcript.');
171
+ }
74
172
  return {
75
173
  transcript: 'Stubbed transcript.',
76
174
  summary: 'Stubbed summary.',
@@ -84,20 +182,20 @@ class GeminiService {
84
182
  // Check file size
85
183
  const stats = fs.statSync(audioFilePath);
86
184
  const fileSizeInMB = stats.size / (1024 * 1024);
87
- console.log(`Audio file size: ${fileSizeInMB.toFixed(2)} MB`);
185
+ console.error(`Audio file size: ${fileSizeInMB.toFixed(2)} MB`);
88
186
  if (progressCallback) {
89
187
  progressCallback(15, `Processing ${fileSizeInMB.toFixed(1)} MB audio file...`);
90
188
  }
91
189
  // Get audio duration using ffmpeg
92
190
  const duration = await this.getAudioDuration(audioFilePath);
93
- console.log(`Audio duration: ${duration} seconds`);
191
+ console.error(`Audio duration: ${duration} seconds`);
94
192
  // If duration is 0, log a warning but continue processing
95
193
  if (duration === 0) {
96
194
  console.warn('WARNING: Could not determine audio duration. Will process as single file without segmentation.');
97
195
  }
98
196
  // Always use the two-step approach for consistency
99
- console.log('Using two-step transcription approach...');
100
- return await this.transcribeWithTwoSteps(audioFilePath, duration, progressCallback, summaryPrompt);
197
+ console.error('Using two-step transcription approach...');
198
+ return await this.transcribeWithTwoSteps(audioFilePath, duration, progressCallback, summaryPrompt, liveNotes, options);
101
199
  }
102
200
  catch (error) {
103
201
  console.error('Error transcribing audio:', error);
@@ -116,21 +214,24 @@ class GeminiService {
116
214
  throw new Error(`Failed to transcribe audio: ${error instanceof Error ? error.message : String(error)}`);
117
215
  }
118
216
  }
119
- // Get audio duration using ffprobe
217
+ // Get audio duration using ffmpeg
120
218
  async getAudioDuration(audioFilePath) {
121
- const { exec } = require('child_process');
122
- const { promisify } = require('util');
123
- const execAsync = promisify(exec);
124
219
  try {
125
220
  const ffmpegPath = await this.getFFmpegPath();
126
221
  // Use ffmpeg with -f null to get file info including duration
127
222
  // This will output file info to stderr which we can parse
128
- const ffmpegCommand = `"${ffmpegPath}" -i "${audioFilePath}" -f null -`;
129
- console.log('Running ffmpeg command for duration:', ffmpegCommand);
130
- const { stderr } = await execAsync(ffmpegCommand).catch((e) => {
223
+ console.error('Running ffmpeg for duration:', ffmpegPath, audioFilePath);
224
+ const { stderr } = await execFileAsync(ffmpegPath, [
225
+ '-i',
226
+ audioFilePath,
227
+ '-f',
228
+ 'null',
229
+ '-',
230
+ ]).catch((error) => {
231
+ const execError = error;
131
232
  // FFmpeg exits with non-zero code when output is null, but still provides info in stderr
132
233
  // This is expected behavior, so we return the error object which contains stdout/stderr
133
- return { stdout: e.stdout || '', stderr: e.stderr || '' };
234
+ return { stdout: execError.stdout || '', stderr: execError.stderr || '' };
134
235
  });
135
236
  // Extract duration from stderr (where ffmpeg outputs file info)
136
237
  const durationMatch = stderr?.match(/Duration: (\d{2}):(\d{2}):(\d{2}\.\d{2})/);
@@ -139,7 +240,7 @@ class GeminiService {
139
240
  const minutes = Number.parseInt(durationMatch[2]);
140
241
  const seconds = Number.parseFloat(durationMatch[3]);
141
242
  const totalSeconds = hours * 3600 + minutes * 60 + seconds;
142
- console.log(`FFmpeg extracted duration: ${totalSeconds} seconds`);
243
+ console.error(`FFmpeg extracted duration: ${totalSeconds} seconds`);
143
244
  return totalSeconds;
144
245
  }
145
246
  // Alternative regex pattern for different duration formats
@@ -149,7 +250,7 @@ class GeminiService {
149
250
  const minutes = Number.parseInt(altDurationMatch[2]);
150
251
  const seconds = Number.parseInt(altDurationMatch[3]);
151
252
  const totalSeconds = hours * 3600 + minutes * 60 + seconds;
152
- console.log(`FFmpeg extracted duration (alt format): ${totalSeconds} seconds`);
253
+ console.error(`FFmpeg extracted duration (alt format): ${totalSeconds} seconds`);
153
254
  return totalSeconds;
154
255
  }
155
256
  // Default to 0 if we can't determine duration
@@ -164,9 +265,6 @@ class GeminiService {
164
265
  }
165
266
  // Split audio file into segments
166
267
  async splitAudioIntoSegments(audioFilePath, segmentDuration = 300) {
167
- const { exec } = require('child_process');
168
- const { promisify } = require('util');
169
- const execAsync = promisify(exec);
170
268
  const outputDir = path.dirname(audioFilePath);
171
269
  const baseName = path.basename(audioFilePath, path.extname(audioFilePath));
172
270
  const ext = path.extname(audioFilePath);
@@ -175,14 +273,24 @@ class GeminiService {
175
273
  const ffmpegPath = await this.getFFmpegPath();
176
274
  try {
177
275
  // Split audio into segments
178
- await execAsync(`"${ffmpegPath}" -i "${audioFilePath}" -f segment -segment_time ${segmentDuration} -c copy "${segmentPath}"`);
276
+ await execFileAsync(ffmpegPath, [
277
+ '-i',
278
+ audioFilePath,
279
+ '-f',
280
+ 'segment',
281
+ '-segment_time',
282
+ String(segmentDuration),
283
+ '-c',
284
+ 'copy',
285
+ segmentPath,
286
+ ]);
179
287
  // Find all created segment files
180
288
  const segmentFiles = fs
181
289
  .readdirSync(outputDir)
182
290
  .filter((file) => file.startsWith(`${baseName}_segment_`) && file.endsWith(ext))
183
291
  .map((file) => path.join(outputDir, file))
184
292
  .sort();
185
- console.log(`Split audio into ${segmentFiles.length} segments`);
293
+ console.error(`Split audio into ${segmentFiles.length} segments`);
186
294
  return segmentFiles;
187
295
  }
188
296
  catch (error) {
@@ -228,25 +336,31 @@ class GeminiService {
228
336
  }
229
337
  }
230
338
  // Two-step transcription approach for all audio files
231
- async transcribeWithTwoSteps(audioFilePath, duration, progressCallback, customSummaryPrompt) {
339
+ async transcribeWithTwoSteps(audioFilePath, duration, progressCallback, customSummaryPrompt, liveNotes, options = {}) {
232
340
  try {
233
341
  let fullTranscript = '';
234
342
  // Step 1: Get transcript
235
343
  if (duration > 300) {
236
344
  // Use segmented approach for long audio
237
- console.log('Audio is longer than 5 minutes, using segmented transcription...');
238
- fullTranscript = await this.getSegmentedTranscript(audioFilePath, duration, progressCallback);
345
+ console.error('Audio is longer than 5 minutes, using segmented transcription...');
346
+ fullTranscript = await this.getSegmentedTranscript(audioFilePath, duration, progressCallback, options.transcriptionPrompt);
239
347
  }
240
348
  else {
241
349
  // Get transcript for short audio
242
- console.log('Transcribing short audio...');
243
- fullTranscript = await this.getShortAudioTranscript(audioFilePath, progressCallback);
350
+ console.error('Transcribing short audio...');
351
+ fullTranscript = await this.getShortAudioTranscript(audioFilePath, progressCallback, options.transcriptionPrompt);
352
+ }
353
+ if (options.transcriptOnly) {
354
+ if (progressCallback) {
355
+ progressCallback(100, 'Transcript ready');
356
+ }
357
+ return transcriptOnlyResult(fullTranscript);
244
358
  }
245
359
  // Step 2: Generate summary, key points, action items from transcript
246
360
  if (progressCallback) {
247
361
  progressCallback(85, 'Generating summary and key points...');
248
362
  }
249
- const summaryPrompt = customSummaryPrompt ||
363
+ const basePrompt = customSummaryPrompt ||
250
364
  `Based on this meeting transcript, provide:
251
365
 
252
366
  1. A concise meeting title in Korean (10-20 characters that captures the main topic)
@@ -263,6 +377,9 @@ Return as JSON:
263
377
  "actionItems": ["action 1", "action 2"],
264
378
  "emoji": "📝"
265
379
  }`;
380
+ const enrichableNotes = (liveNotes ?? []).filter((n) => (n.text ?? '').trim().length > 0);
381
+ const highlightsBlock = buildHighlightsPromptBlock(enrichableNotes);
382
+ const summaryPrompt = highlightsBlock ? `${basePrompt}\n\n${highlightsBlock}` : basePrompt;
266
383
  const summaryResult = await this.ai.models.generateContent({
267
384
  model: this.proModel,
268
385
  contents: [{ role: 'user', parts: [{ text: summaryPrompt }, { text: fullTranscript }] }],
@@ -286,11 +403,14 @@ Return as JSON:
286
403
  'keyPoints',
287
404
  'actionItems',
288
405
  'emoji',
406
+ 'highlights',
289
407
  ]);
290
408
  const customFields = {};
409
+ let rawHighlights;
291
410
  try {
292
411
  const parsed = JSON.parse(summaryText);
293
412
  summaryData = parsed;
413
+ rawHighlights = parsed.highlights;
294
414
  // Extract custom fields (any keys not in the known set)
295
415
  for (const [key, value] of Object.entries(parsed)) {
296
416
  if (!KNOWN_KEYS.has(key)) {
@@ -306,6 +426,7 @@ Return as JSON:
306
426
  summaryData.summary = summaryMatch[1].replace(/\\n/g, '\n');
307
427
  }
308
428
  }
429
+ const highlights = mergeHighlights(liveNotes, rawHighlights);
309
430
  if (progressCallback) {
310
431
  progressCallback(95, 'Finalizing results...');
311
432
  }
@@ -317,6 +438,7 @@ Return as JSON:
317
438
  emoji: summaryData.emoji,
318
439
  suggestedTitle: summaryData.suggestedTitle,
319
440
  customFields: Object.keys(customFields).length > 0 ? customFields : undefined,
441
+ highlights,
320
442
  };
321
443
  }
322
444
  catch (error) {
@@ -325,7 +447,7 @@ Return as JSON:
325
447
  }
326
448
  }
327
449
  // Get transcript for short audio files
328
- async getShortAudioTranscript(audioFilePath, progressCallback) {
450
+ async getShortAudioTranscript(audioFilePath, progressCallback, customPrompt) {
329
451
  try {
330
452
  const stats = fs.statSync(audioFilePath);
331
453
  const fileSizeInMB = stats.size / (1024 * 1024);
@@ -335,7 +457,7 @@ Return as JSON:
335
457
  // Use Files API for files over 20MB
336
458
  let fileUri = null;
337
459
  if (fileSizeInMB > 20) {
338
- console.log('File is over 20MB, using Files API for upload...');
460
+ console.error('File is over 20MB, using Files API for upload...');
339
461
  if (progressCallback) {
340
462
  progressCallback(25, 'Uploading large file to Gemini...');
341
463
  }
@@ -349,7 +471,7 @@ Return as JSON:
349
471
  let file = await this.ai.files.get({ name: uploadResult.name || '' });
350
472
  let retries = 0;
351
473
  while (file.state === 'PROCESSING' && retries < 30) {
352
- console.log(`Waiting for file to be processed... (attempt ${retries + 1}/30)`);
474
+ console.error(`Waiting for file to be processed... (attempt ${retries + 1}/30)`);
353
475
  await new Promise((resolve) => setTimeout(resolve, 2000));
354
476
  file = await this.ai.files.get({ name: uploadResult.name || '' });
355
477
  retries++;
@@ -361,28 +483,7 @@ Return as JSON:
361
483
  if (progressCallback) {
362
484
  progressCallback(50, 'Transcribing audio...');
363
485
  }
364
- const transcriptPrompt = `${this.buildGlossaryBlock()}Please transcribe this audio recording with proper speaker identification.
365
-
366
- Format requirements:
367
- 1. IDENTIFY different speakers and label them as 참가자1, 참가자2, etc.
368
- 2. Each speaker's turn MUST start on a NEW LINE
369
- 3. Format: 참가자X: [what they said]
370
- 4. Add a blank line between different speakers
371
-
372
- Example format:
373
- 참가자1: 안녕하세요, 오늘 회의를 시작하겠습니다.
374
-
375
- 참가자2: 네, 준비됐습니다.
376
-
377
- 참가자1: 첫 번째 안건은...
378
-
379
- IMPORTANT:
380
- - You MUST identify and differentiate between speakers
381
- - Each speaker turn MUST start on a new line
382
- - Add blank line between different speakers
383
- - DO NOT include timestamps
384
- - Keep the transcription in the original spoken language
385
- - Return ONLY the transcription text, no JSON formatting`;
486
+ const transcriptPrompt = `${this.buildGlossaryBlock()}${customPrompt ?? DEFAULT_TRANSCRIPT_PROMPT}`;
386
487
  let result;
387
488
  if (fileUri) {
388
489
  const mimeType = (0, audioFormats_1.mimeTypeForExtension)(path.extname(audioFilePath));
@@ -453,38 +554,19 @@ IMPORTANT:
453
554
  return `[Segment ${segmentIndex + 1}: ${this.formatTime(segmentStartTime)} ~ ${this.formatTime(segmentEndTime)}]\n\n`;
454
555
  }
455
556
  // Create prompt for segment transcription
456
- createSegmentPrompt(segmentIndex, totalSegments) {
457
- return `${this.buildGlossaryBlock()}Please transcribe audio segment ${segmentIndex + 1} of ${totalSegments} with proper speaker identification.
458
-
459
- Format requirements:
460
- 1. IDENTIFY different speakers and label them as 참가자1, 참가자2, etc.
461
- 2. Each speaker's turn MUST start on a NEW LINE
462
- 3. Format: 참가자X: [what they said]
463
- 4. Add a blank line between different speakers
464
-
465
- Example format:
466
- 참가자1: 안녕하세요, 오늘 회의를 시작하겠습니다.
467
-
468
- 참가자2: 네, 준비됐습니다.
469
-
470
- 참가자1: 첫 번째 안건은...
471
-
472
- IMPORTANT:
473
- - You MUST identify and differentiate between speakers
474
- - Each speaker turn MUST start on a new line
475
- - Add blank line between different speakers
476
- - DO NOT include timestamps
477
- - Keep the transcription in the original spoken language
478
- - Return ONLY the transcription text, no JSON formatting`;
557
+ createSegmentPrompt(segmentIndex, totalSegments, customPrompt) {
558
+ const positional = `[Audio segment ${segmentIndex + 1} of ${totalSegments}]\n\n`;
559
+ const body = customPrompt ?? DEFAULT_TRANSCRIPT_PROMPT;
560
+ return `${this.buildGlossaryBlock()}${positional}${body}`;
479
561
  }
480
562
  // Transcribe a single segment with retry logic
481
- async transcribeSingleSegment(segmentFile, segmentIndex, totalSegments, segmentStartTime, segmentEndTime) {
563
+ async transcribeSingleSegment(segmentFile, segmentIndex, totalSegments, segmentStartTime, segmentEndTime, customPrompt) {
482
564
  const maxRetries = 3;
483
565
  let lastError = null;
484
- const segmentPrompt = this.createSegmentPrompt(segmentIndex, totalSegments);
566
+ const segmentPrompt = this.createSegmentPrompt(segmentIndex, totalSegments, customPrompt);
485
567
  for (let attempt = 1; attempt <= maxRetries; attempt++) {
486
568
  try {
487
- console.log(`Starting transcription for segment ${segmentIndex + 1}/${totalSegments} (attempt ${attempt}/${maxRetries})...`);
569
+ console.error(`Starting transcription for segment ${segmentIndex + 1}/${totalSegments} (attempt ${attempt}/${maxRetries})...`);
488
570
  const audioData = fs.readFileSync(segmentFile);
489
571
  const base64Audio = audioData.toString('base64');
490
572
  const mimeType = (0, audioFormats_1.mimeTypeForExtension)(path.extname(segmentFile));
@@ -510,7 +592,7 @@ IMPORTANT:
510
592
  },
511
593
  });
512
594
  const transcript = result.text || '';
513
- console.log(`Completed transcription for segment ${segmentIndex + 1}/${totalSegments}`);
595
+ console.error(`Completed transcription for segment ${segmentIndex + 1}/${totalSegments}`);
514
596
  // Add segment time range header
515
597
  const segmentHeader = this.createSegmentHeader(segmentIndex, segmentStartTime, segmentEndTime);
516
598
  return {
@@ -524,7 +606,7 @@ IMPORTANT:
524
606
  if (attempt < maxRetries) {
525
607
  // Wait before retry with exponential backoff
526
608
  const retryDelay = Math.min(1000 * 2 ** (attempt - 1), 10000); // Max 10 seconds
527
- console.log(`Retrying segment ${segmentIndex + 1} in ${retryDelay}ms...`);
609
+ console.error(`Retrying segment ${segmentIndex + 1} in ${retryDelay}ms...`);
528
610
  await new Promise((resolve) => setTimeout(resolve, retryDelay));
529
611
  }
530
612
  }
@@ -537,7 +619,7 @@ IMPORTANT:
537
619
  };
538
620
  }
539
621
  // Get segmented transcript (renamed from transcribeAudioSegmented)
540
- async getSegmentedTranscript(audioFilePath, duration, progressCallback) {
622
+ async getSegmentedTranscript(audioFilePath, duration, progressCallback, customPrompt) {
541
623
  try {
542
624
  // Split audio into 5-minute segments
543
625
  const segmentFiles = await this.splitAudioIntoSegments(audioFilePath, 300);
@@ -548,7 +630,7 @@ IMPORTANT:
548
630
  const transcriptionPromises = segmentFiles.map(async (segmentFile, i) => {
549
631
  const segmentStartTime = i * 300; // 5 minutes in seconds
550
632
  const segmentEndTime = Math.min(segmentStartTime + 300, duration);
551
- return this.transcribeSingleSegment(segmentFile, i, segmentFiles.length, segmentStartTime, segmentEndTime);
633
+ return this.transcribeSingleSegment(segmentFile, i, segmentFiles.length, segmentStartTime, segmentEndTime, customPrompt);
552
634
  });
553
635
  // Track progress of concurrent transcriptions
554
636
  let completedCount = 0;
package/dist/main.js CHANGED
@@ -424,7 +424,7 @@ electron_1.app.whenReady().then(() => {
424
424
  },
425
425
  },
426
426
  {
427
- label: 'Check for Updates...',
427
+ label: autoUpdaterService_1.autoUpdaterService.getManualUpdateLabel(),
428
428
  click: () => {
429
429
  autoUpdaterService_1.autoUpdaterService.checkForUpdatesManually();
430
430
  },
@@ -439,7 +439,7 @@ electron_1.app.whenReady().then(() => {
439
439
  { role: 'about' },
440
440
  { type: 'separator' },
441
441
  {
442
- label: 'Check for Updates...',
442
+ label: autoUpdaterService_1.autoUpdaterService.getManualUpdateLabel(),
443
443
  click: () => {
444
444
  autoUpdaterService_1.autoUpdaterService.checkForUpdatesManually();
445
445
  },
@@ -1001,7 +1001,7 @@ electron_1.ipcMain.on('recording-chunk', (_event, data) => {
1001
1001
  console.error('Invalid chunk payload:', error);
1002
1002
  }
1003
1003
  });
1004
- electron_1.ipcMain.handle('stop-recording', async () => {
1004
+ electron_1.ipcMain.handle('stop-recording', async (_, opts) => {
1005
1005
  try {
1006
1006
  finalizeRecordingSession();
1007
1007
  const result = await audioRecorder.stopRecording();
@@ -1014,6 +1014,18 @@ electron_1.ipcMain.handle('stop-recording', async () => {
1014
1014
  // Silently skip if ffmpeg isn't available — file still plays, transcription
1015
1015
  // pipeline is unaffected.
1016
1016
  await remuxRecordingHeader(result.filePath);
1017
+ // Persist live notes alongside the audio so they survive even if the
1018
+ // user transcribes later (auto-mode off). transcribe-audio falls back
1019
+ // to this when its own arg is missing.
1020
+ const liveNotes = sanitizeLiveNotes(opts?.liveNotes);
1021
+ if (liveNotes && liveNotes.length > 0) {
1022
+ try {
1023
+ await metadataService_1.metadataService.saveMetadata(result.filePath, { liveNotes });
1024
+ }
1025
+ catch (err) {
1026
+ console.error('Failed to persist live notes to metadata:', err);
1027
+ }
1028
+ }
1017
1029
  }
1018
1030
  }
1019
1031
  return result;
@@ -1108,6 +1120,31 @@ function isContainedTranscriptionPath(folderPath) {
1108
1120
  const resolved = path.resolve(folderPath);
1109
1121
  return resolved === root || resolved.startsWith(root + path.sep);
1110
1122
  }
1123
+ // Validate + normalize the renderer's live-notes payload before it touches disk.
1124
+ // Renderer state is untrusted (compromised content scripts, future agent flows)
1125
+ // so this enforces shape + caps text length to keep summary.md/Notion sane.
1126
+ const LIVE_NOTE_MAX_TEXT = 2000;
1127
+ const LIVE_NOTE_MAX_COUNT = 500;
1128
+ function sanitizeLiveNotes(raw) {
1129
+ if (!Array.isArray(raw))
1130
+ return undefined;
1131
+ const out = [];
1132
+ for (const item of raw) {
1133
+ if (!item || typeof item !== 'object')
1134
+ continue;
1135
+ const offsetMs = Number(item.offsetMs);
1136
+ const text = item.text;
1137
+ if (!Number.isFinite(offsetMs))
1138
+ continue;
1139
+ out.push({
1140
+ offsetMs: Math.max(0, Math.floor(offsetMs)),
1141
+ text: typeof text === 'string' ? text.slice(0, LIVE_NOTE_MAX_TEXT) : '',
1142
+ });
1143
+ if (out.length >= LIVE_NOTE_MAX_COUNT)
1144
+ break;
1145
+ }
1146
+ return out.length > 0 ? out : undefined;
1147
+ }
1111
1148
  // Tell the renderer the config has changed out-of-band so it can re-read and
1112
1149
  // re-render its UI state (toggle checkboxes etc.). Used by the agent flow.
1113
1150
  function broadcastConfigChanged() {
@@ -1182,9 +1219,24 @@ electron_1.ipcMain.handle('get-meeting-status', async () => {
1182
1219
  };
1183
1220
  });
1184
1221
  // Transcription handler
1185
- electron_1.ipcMain.handle('transcribe-audio', async (_, filePath) => {
1222
+ electron_1.ipcMain.handle('transcribe-audio', async (_, filePath, liveNotesRaw) => {
1186
1223
  try {
1187
1224
  console.log('Transcription requested for:', filePath);
1225
+ let liveNotes = sanitizeLiveNotes(liveNotesRaw);
1226
+ if (!liveNotes || liveNotes.length === 0) {
1227
+ // Fall back to whatever stop-recording persisted -- covers the
1228
+ // record-now-transcribe-later flow when auto-mode is off.
1229
+ try {
1230
+ const existing = await metadataService_1.metadataService.getMetadata(filePath);
1231
+ const fromMetadata = sanitizeLiveNotes(existing?.liveNotes);
1232
+ if (fromMetadata && fromMetadata.length > 0) {
1233
+ liveNotes = fromMetadata;
1234
+ }
1235
+ }
1236
+ catch (err) {
1237
+ console.warn('Failed to read live notes from metadata:', err);
1238
+ }
1239
+ }
1188
1240
  // Send progress update
1189
1241
  if (mainWindow) {
1190
1242
  mainWindow.webContents.send('transcription-progress', {
@@ -1216,9 +1268,14 @@ electron_1.ipcMain.handle('transcribe-audio', async (_, filePath) => {
1216
1268
  }
1217
1269
  };
1218
1270
  const summaryPrompt = configService.getSummaryPrompt();
1219
- const result = await geminiService.transcribeAudio(filePath, progressCallback, summaryPrompt);
1271
+ const result = await geminiService.transcribeAudio(filePath, progressCallback, summaryPrompt, liveNotes);
1220
1272
  console.log('Transcription completed successfully');
1221
1273
  console.log('Saving metadata for:', filePath);
1274
+ // Attach renderer-captured notes so downstream consumers (Notion upload,
1275
+ // re-render in the modal) can read them off the result object.
1276
+ if (liveNotes && liveNotes.length > 0) {
1277
+ result.liveNotes = liveNotes;
1278
+ }
1222
1279
  // Save transcription files (summary.md + transcript.md)
1223
1280
  const title = result.suggestedTitle || path.basename(filePath, path.extname(filePath));
1224
1281
  let transcriptionPath;
@@ -1228,6 +1285,7 @@ electron_1.ipcMain.handle('transcribe-audio', async (_, filePath) => {
1228
1285
  result,
1229
1286
  audioFilePath: filePath,
1230
1287
  dataPath: electron_1.app.getPath('userData'),
1288
+ liveNotes,
1231
1289
  });
1232
1290
  console.log('Transcription saved to:', transcriptionPath);
1233
1291
  }
@@ -1242,6 +1300,7 @@ electron_1.ipcMain.handle('transcribe-audio', async (_, filePath) => {
1242
1300
  suggestedTitle: result.suggestedTitle,
1243
1301
  transcriptionPath,
1244
1302
  customFields: result.customFields,
1303
+ liveNotes,
1245
1304
  transcribedAt: new Date().toISOString(),
1246
1305
  });
1247
1306
  }
@@ -1255,6 +1314,7 @@ electron_1.ipcMain.handle('transcribe-audio', async (_, filePath) => {
1255
1314
  keyPoints: result.keyPoints,
1256
1315
  actionItems: result.actionItems,
1257
1316
  customFields: result.customFields,
1317
+ liveNotes,
1258
1318
  transcribedAt: new Date().toISOString(),
1259
1319
  });
1260
1320
  }
@@ -1413,6 +1473,8 @@ electron_1.ipcMain.handle('get-metadata', async (_, filePath) => {
1413
1473
  actionItems: transcription.actionItems,
1414
1474
  customFields: transcription.customFields ?? metadata.customFields,
1415
1475
  emoji: transcription.emoji,
1476
+ liveNotes: transcription.liveNotes ?? metadata.liveNotes,
1477
+ highlights: transcription.highlights,
1416
1478
  notionPageUrl: transcription.notionPageUrl,
1417
1479
  slackSentAt: transcription.slackSentAt,
1418
1480
  slackError: transcription.slackError,
@@ -1512,33 +1574,40 @@ electron_1.ipcMain.handle('get-recordings', async () => {
1512
1574
  }
1513
1575
  // Read all files in the recordings directory
1514
1576
  const files = fs.readdirSync(recordingsDir);
1515
- // Filter for audio files and get their stats
1516
- const recordings = files
1517
- .filter((file) => {
1518
- // Filter out segment files
1577
+ // Filter for audio files and get their stats. Skip per-file races so one
1578
+ // deleted recording does not make the whole list fail.
1579
+ const recordings = [];
1580
+ for (const file of files) {
1519
1581
  if (file.includes('_segment_'))
1520
- return false;
1521
- return (0, audioFormats_1.isSupportedAudioExtension)(path.extname(file));
1522
- })
1523
- .map((file) => {
1582
+ continue;
1583
+ if (!(0, audioFormats_1.isSupportedAudioExtension)(path.extname(file)))
1584
+ continue;
1524
1585
  const filePath = path.join(recordingsDir, file);
1525
- const stats = fs.statSync(filePath);
1586
+ let stats;
1587
+ try {
1588
+ stats = fs.statSync(filePath);
1589
+ }
1590
+ catch (err) {
1591
+ if (err.code === 'ENOENT')
1592
+ continue;
1593
+ throw err;
1594
+ }
1526
1595
  // Extract title from filename (format: title_timestamp.ext)
1527
1596
  const nameWithoutExt = path.basename(file, path.extname(file));
1528
1597
  const parts = nameWithoutExt.split('_');
1529
1598
  const timestamp = parts.pop(); // Remove timestamp
1530
1599
  const title = parts.join('_') || 'Untitled';
1531
- return {
1600
+ recordings.push({
1532
1601
  filename: file,
1533
1602
  path: filePath,
1534
- title: title,
1535
- timestamp: timestamp,
1603
+ title,
1604
+ timestamp,
1536
1605
  size: stats.size,
1537
1606
  createdAt: stats.birthtime,
1538
1607
  modifiedAt: stats.mtime,
1539
- };
1540
- })
1541
- .sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime()); // Sort by newest first
1608
+ });
1609
+ }
1610
+ recordings.sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime()); // Sort by newest first
1542
1611
  return { success: true, recordings };
1543
1612
  }
1544
1613
  catch (error) {