task-summary-extractor 9.5.0 → 9.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,7 +26,9 @@ const {
26
26
  sliceVttForSegment,
27
27
  buildProgressiveContext,
28
28
  buildSegmentFocus,
29
+ buildBatchSegmentFocus,
29
30
  estimateTokens,
31
+ estimateDocTokens,
30
32
  } = require('../utils/context-manager');
31
33
  const { formatHMS } = require('../utils/format');
32
34
  const { withRetry } = require('../utils/retry');
@@ -564,6 +566,230 @@ async function processWithGemini(ai, filePath, displayName, contextDocs = [], pr
564
566
  };
565
567
  }
566
568
 
569
+ // ======================== MULTI-SEGMENT BATCH ANALYSIS ========================
570
+
571
+ /**
572
+ * Process multiple consecutive video segments in a single Gemini call.
573
+ * This takes advantage of unused context-window headroom (especially after
574
+ * deep summary) to reduce the number of API calls and give the model a
575
+ * more holistic view of the meeting.
576
+ *
577
+ * @param {object} ai – Gemini AI instance
578
+ * @param {Array<{ segPath: string, segName: string, durSec: number, storageUrl?: string }>} batchSegments
579
+ * @param {string} displayName – label for logging (e.g. "call1_video_batch0-2")
580
+ * @param {Array} contextDocs – prepared context docs
581
+ * @param {Array} previousAnalyses – analyses from earlier batches
582
+ * @param {string} userName
583
+ * @param {string} scriptDir – where prompt.json lives
584
+ * @param {object} batchOpts
585
+ * @param {number[]} batchOpts.segmentIndices – 0-based global indices of the segments
586
+ * @param {number} batchOpts.totalSegments – total segment count for the whole file
587
+ * @param {Array<{startTimeSec: number, endTimeSec: number}>} batchOpts.segmentTimes
588
+ * @param {number} [batchOpts.thinkingBudget=24576]
589
+ * @param {boolean} [batchOpts.noStorageUrl=false]
590
+ * @returns {Promise<object>} Run record (same shape as processWithGemini)
591
+ */
592
+ async function processSegmentBatch(ai, batchSegments, displayName, contextDocs, previousAnalyses, userName, scriptDir, batchOpts = {}) {
593
+ const {
594
+ segmentIndices = batchSegments.map((_, i) => i),
595
+ totalSegments = batchSegments.length,
596
+ segmentTimes = [],
597
+ thinkingBudget = 24576,
598
+ noStorageUrl = false,
599
+ } = batchOpts;
600
+
601
+ const { systemInstruction, promptText } = loadPrompt(scriptDir);
602
+
603
+ const EXTERNAL_URL_MAX_BYTES = 20 * 1024 * 1024;
604
+
605
+ // ── Upload / reference all video files ─────────────────────────────────────
606
+ const fileRefs = []; // { uri, mimeType, name, usedExternalUrl }
607
+
608
+ for (const seg of batchSegments) {
609
+ const fileSizeBytes = fs.existsSync(seg.segPath) ? fs.statSync(seg.segPath).size : 0;
610
+
611
+ if (!noStorageUrl && seg.storageUrl && fileSizeBytes <= EXTERNAL_URL_MAX_BYTES) {
612
+ fileRefs.push({ uri: seg.storageUrl, mimeType: 'video/mp4', name: null, usedExternalUrl: true });
613
+ console.log(` ${seg.segName}: using Storage URL`);
614
+ } else {
615
+ // Upload via Gemini File API
616
+ console.log(` ${seg.segName}: uploading to Gemini File API...`);
617
+ let uploaded = await withRetry(
618
+ () => ai.files.upload({
619
+ file: seg.segPath,
620
+ config: { mimeType: 'video/mp4', displayName: `${displayName}_${seg.segName}` },
621
+ }),
622
+ { label: `Gemini upload (${seg.segName})`, maxRetries: 3 }
623
+ );
624
+
625
+ let waited = 0;
626
+ const pollStart = Date.now();
627
+ while (uploaded.state === 'PROCESSING') {
628
+ if (Date.now() - pollStart > GEMINI_POLL_TIMEOUT_MS) {
629
+ throw new Error(`File "${seg.segName}" still processing after ${(GEMINI_POLL_TIMEOUT_MS / 1000).toFixed(0)}s`);
630
+ }
631
+ process.stdout.write(` Processing ${seg.segName}${'.'.repeat((waited % 3) + 1)} \r`);
632
+ await new Promise(r => setTimeout(r, 5000));
633
+ waited++;
634
+ uploaded = await withRetry(
635
+ () => ai.files.get({ name: uploaded.name }),
636
+ { label: 'Gemini file status', maxRetries: 2, baseDelay: 1000 }
637
+ );
638
+ }
639
+ if (uploaded.state === 'FAILED') {
640
+ throw new Error(`Gemini processing failed for ${seg.segName}`);
641
+ }
642
+ fileRefs.push({ uri: uploaded.uri, mimeType: uploaded.mimeType || 'video/mp4', name: uploaded.name, usedExternalUrl: false });
643
+ console.log(` ${seg.segName}: upload complete`);
644
+ }
645
+ }
646
+
647
+ // ── Build content parts ────────────────────────────────────────────────────
648
+ const contentParts = [];
649
+
650
+ // Video files — one fileData part per segment, in order
651
+ for (let i = 0; i < fileRefs.length; i++) {
652
+ const ref = fileRefs[i];
653
+ const segIdx = segmentIndices[i];
654
+ contentParts.push({ text: `=== VIDEO SEGMENT ${segIdx + 1} of ${totalSegments} ===` });
655
+ contentParts.push({ fileData: { mimeType: ref.mimeType, fileUri: ref.uri } });
656
+ }
657
+
658
+ // Context docs — same budget logic as single-segment but account for multiple videos
659
+ const videoTokenEstimate = batchSegments.reduce((sum, s) => sum + Math.ceil((s.durSec || 280) * 300), 0);
660
+ const prevContextEstimate = estimateTokens(buildProgressiveContext(previousAnalyses, userName) || '');
661
+ const docBudget = Math.max(50000, config.GEMINI_CONTEXT_WINDOW - videoTokenEstimate - 120000 - prevContextEstimate);
662
+ console.log(` Doc budget: ${(docBudget / 1000).toFixed(0)}K tokens for ${contextDocs.length} doc(s)`);
663
+
664
+ const { selected: selectedDocs, excluded } = selectDocsByBudget(contextDocs, docBudget, { segmentIndex: segmentIndices[0] });
665
+ if (excluded.length > 0) {
666
+ console.log(` Context: ${selectedDocs.length} docs included, ${excluded.length} excluded`);
667
+ }
668
+
669
+ // Attach selected docs with VTT time-slicing across the batch range
670
+ const batchStartSec = segmentTimes.length > 0 ? segmentTimes[0].startTimeSec : null;
671
+ const batchEndSec = segmentTimes.length > 0 ? segmentTimes[segmentTimes.length - 1].endTimeSec : null;
672
+
673
+ for (const doc of selectedDocs) {
674
+ if (doc.type === 'inlineText') {
675
+ let content = doc.content;
676
+ const isVtt = doc.fileName.toLowerCase().endsWith('.vtt') || doc.fileName.toLowerCase().endsWith('.srt');
677
+ if (isVtt && batchStartSec != null && batchEndSec != null) {
678
+ content = sliceVttForSegment(content, batchStartSec, batchEndSec);
679
+ console.log(` VTT sliced to ${formatHMS(batchStartSec)}–${formatHMS(batchEndSec)} range`);
680
+ }
681
+ contentParts.push({ text: `=== Document: ${doc.fileName} ===\n${content}` });
682
+ } else if (doc.type === 'fileData') {
683
+ contentParts.push({ fileData: { mimeType: doc.mimeType, fileUri: doc.fileUri } });
684
+ }
685
+ }
686
+
687
+ // Bridge text
688
+ const bridgeText = buildDocBridgeText(selectedDocs);
689
+ if (bridgeText) contentParts.push({ text: bridgeText });
690
+
691
+ // Progressive context from previous batches
692
+ const prevText = buildProgressiveContext(previousAnalyses, userName);
693
+ if (prevText) contentParts.push({ text: prevText });
694
+
695
+ // Multi-segment focus instructions
696
+ const focusText = buildBatchSegmentFocus(segmentIndices, totalSegments, previousAnalyses, userName);
697
+ contentParts.push({ text: focusText });
698
+
699
+ // User identity
700
+ if (userName) {
701
+ contentParts.push({
702
+ text: `CURRENT USER: "${userName}". Tag tasks assigned to or owned by "${userName}". Populate the "your_tasks" section.`
703
+ });
704
+ }
705
+
706
+ contentParts.push({ text: promptText });
707
+
708
+ // ── Send request ──────────────────────────────────────────────────────────
709
+ console.log(` Analyzing batch [segments ${segmentIndices[0] + 1}–${segmentIndices[segmentIndices.length - 1] + 1}] with ${config.GEMINI_MODEL}...`);
710
+
711
+ const requestPayload = {
712
+ model: config.GEMINI_MODEL,
713
+ contents: [{ role: 'user', parts: contentParts }],
714
+ config: {
715
+ systemInstruction,
716
+ maxOutputTokens: 65536,
717
+ temperature: 0,
718
+ },
719
+ };
720
+
721
+ const t0 = Date.now();
722
+ const response = await withRetry(
723
+ () => ai.models.generateContent(requestPayload),
724
+ { label: `Gemini batch analysis (${displayName})`, maxRetries: 2, baseDelay: 5000 }
725
+ );
726
+ const durationMs = Date.now() - t0;
727
+
728
+ const rawText = response.text;
729
+
730
+ // Token usage
731
+ const usage = response.usageMetadata || {};
732
+ const tokenUsage = {
733
+ inputTokens: usage.promptTokenCount || 0,
734
+ outputTokens: usage.candidatesTokenCount || 0,
735
+ totalTokens: usage.totalTokenCount || 0,
736
+ thoughtTokens: usage.thoughtsTokenCount || 0,
737
+ };
738
+ const contextRemaining = config.GEMINI_CONTEXT_WINDOW - tokenUsage.inputTokens;
739
+ const contextUsedPct = ((tokenUsage.inputTokens / config.GEMINI_CONTEXT_WINDOW) * 100).toFixed(1);
740
+ tokenUsage.contextWindow = config.GEMINI_CONTEXT_WINDOW;
741
+ tokenUsage.contextRemaining = contextRemaining;
742
+ tokenUsage.contextUsedPct = parseFloat(contextUsedPct);
743
+
744
+ console.log(` Tokens — input: ${tokenUsage.inputTokens.toLocaleString()} | output: ${tokenUsage.outputTokens.toLocaleString()} | thinking: ${tokenUsage.thoughtTokens.toLocaleString()}`);
745
+ console.log(` Context — used: ${contextUsedPct}% | remaining: ${contextRemaining.toLocaleString()} tokens`);
746
+
747
+ // Parse
748
+ const parsed = extractJson(rawText);
749
+
750
+ // Input summary
751
+ const inputSummary = contentParts.map(part => {
752
+ if (part.fileData) return { type: 'fileData', mimeType: part.fileData.mimeType, fileUri: part.fileData.fileUri };
753
+ if (part.text) return { type: 'text', chars: part.text.length, preview: part.text.substring(0, 300) };
754
+ return part;
755
+ });
756
+
757
+ // ── Cleanup Gemini File API uploads ────────────────────────────────────────
758
+ const geminiFileNames = fileRefs.filter(r => r.name && !r.usedExternalUrl).map(r => r.name);
759
+
760
+ return {
761
+ run: {
762
+ model: config.GEMINI_MODEL,
763
+ displayName,
764
+ userName,
765
+ timestamp: new Date().toISOString(),
766
+ durationMs,
767
+ tokenUsage,
768
+ systemInstruction,
769
+ batchMode: true,
770
+ segmentIndices,
771
+ },
772
+ input: {
773
+ videoFiles: fileRefs.map((ref, i) => ({
774
+ mimeType: ref.mimeType,
775
+ fileUri: ref.uri,
776
+ segmentName: batchSegments[i].segName,
777
+ usedExternalUrl: ref.usedExternalUrl,
778
+ })),
779
+ contextDocuments: contextDocs.map(d => ({ fileName: d.fileName, type: d.type })),
780
+ previousSegmentCount: previousAnalyses.length,
781
+ parts: inputSummary,
782
+ promptText,
783
+ },
784
+ output: {
785
+ raw: rawText,
786
+ parsed,
787
+ parseSuccess: parsed !== null,
788
+ },
789
+ _geminiFileNames: geminiFileNames,
790
+ };
791
+ }
792
+
567
793
  // ======================== FINAL COMPILATION ========================
568
794
 
569
795
  /**
@@ -945,7 +1171,12 @@ console.log(` ${c.success(`Summary: ${summary.length.toLocaleString()} chars
945
1171
  */
946
1172
  async function cleanupGeminiFiles(ai, geminiFileName, contextDocs = []) {
947
1173
  const toDelete = [];
948
- if (geminiFileName) toDelete.push(geminiFileName);
1174
+ // Accept a single name string or an array of names
1175
+ if (Array.isArray(geminiFileName)) {
1176
+ toDelete.push(...geminiFileName.filter(Boolean));
1177
+ } else if (geminiFileName) {
1178
+ toDelete.push(geminiFileName);
1179
+ }
949
1180
  for (const doc of contextDocs) {
950
1181
  if (doc.type === 'fileData' && doc.geminiFileName) {
951
1182
  toDelete.push(doc.geminiFileName);
@@ -970,6 +1201,7 @@ module.exports = {
970
1201
  prepareDocsForGemini,
971
1202
  loadPrompt,
972
1203
  processWithGemini,
1204
+ processSegmentBatch,
973
1205
  compileFinalResult,
974
1206
  buildDocBridgeText,
975
1207
  analyzeVideoForContext,
@@ -237,7 +237,7 @@ function compressAndSegment(inputFile, outputDir, { segTime = SEG_TIME, speed =
237
237
  const fbResult = spawnSync(getFFmpeg(), fbArgs, { stdio: 'inherit' });
238
238
  if (fbResult.status === 0 && verifySegment(fallbackPath)) {
239
239
  // Remove all corrupt segments and replace with the fallback
240
- for (const seg of corrupt) { try { fs.unlinkSync(seg); } catch {} }
240
+ for (const seg of corrupt) { try { fs.unlinkSync(seg); } catch { /* best-effort cleanup */ } }
241
241
  // If this was the only segment, just rename it
242
242
  if (segments.length === 1) {
243
243
  const dest = path.join(outputDir, 'segment_00.mp4');
@@ -261,8 +261,8 @@ function compressAndSegment(inputFile, outputDir, { segTime = SEG_TIME, speed =
261
261
  for (const f of reSegs) {
262
262
  fs.renameSync(path.join(reSegDir, f), path.join(outputDir, f));
263
263
  }
264
- try { fs.rmSync(reSegDir, { recursive: true }); } catch {}
265
- try { fs.unlinkSync(fallbackPath); } catch {}
264
+ try { fs.rmSync(reSegDir, { recursive: true }); } catch { /* best-effort cleanup */ }
265
+ try { fs.unlinkSync(fallbackPath); } catch { /* best-effort cleanup */ }
266
266
  // Re-collect
267
267
  segments = fs.readdirSync(outputDir)
268
268
  .filter(f => f.startsWith('segment_') && f.endsWith('.mp4'))
@@ -272,13 +272,13 @@ function compressAndSegment(inputFile, outputDir, { segTime = SEG_TIME, speed =
272
272
  }
273
273
  } else {
274
274
  console.error(` ${c.error('Fallback re-encode also failed')}`);
275
- try { fs.unlinkSync(fallbackPath); } catch {}
275
+ try { fs.unlinkSync(fallbackPath); } catch { /* best-effort cleanup */ }
276
276
  }
277
277
  } else if (corrupt.length > 0 && !needsSegmentation) {
278
278
  // Single-output mode also failed — try once more without segment muxer flags
279
279
  console.log(` Retrying single-output compression...`);
280
280
  const retryPath = path.join(outputDir, 'segment_00.mp4');
281
- try { fs.unlinkSync(retryPath); } catch {}
281
+ try { fs.unlinkSync(retryPath); } catch { /* best-effort cleanup */ }
282
282
  const retryArgs = [
283
283
  '-y',
284
284
  '-i', inputFile,
@@ -373,7 +373,7 @@ function compressAndSegmentAudio(inputFile, outputDir, { segTime = SEG_TIME, spe
373
373
  const fbArgs = ['-y', '-i', inputFile, ...encodingArgs, fallbackPath];
374
374
  const fbResult = spawnSync(getFFmpeg(), fbArgs, { stdio: 'inherit' });
375
375
  if (fbResult.status === 0 && verifySegment(fallbackPath)) {
376
- for (const seg of corrupt) { try { fs.unlinkSync(seg); } catch {} }
376
+ for (const seg of corrupt) { try { fs.unlinkSync(seg); } catch { /* best-effort cleanup */ } }
377
377
  if (segments.length === 1) {
378
378
  const dest = path.join(outputDir, 'segment_00.m4a');
379
379
  fs.renameSync(fallbackPath, dest);
@@ -394,8 +394,8 @@ function compressAndSegmentAudio(inputFile, outputDir, { segTime = SEG_TIME, spe
394
394
  for (const f of reSegs) {
395
395
  fs.renameSync(path.join(reSegDir, f), path.join(outputDir, f));
396
396
  }
397
- try { fs.rmSync(reSegDir, { recursive: true }); } catch {}
398
- try { fs.unlinkSync(fallbackPath); } catch {}
397
+ try { fs.rmSync(reSegDir, { recursive: true }); } catch { /* best-effort cleanup */ }
398
+ try { fs.unlinkSync(fallbackPath); } catch { /* best-effort cleanup */ }
399
399
  segments = fs.readdirSync(outputDir)
400
400
  .filter(f => f.startsWith('segment_') && (f.endsWith('.m4a') || f.endsWith('.mp4')))
401
401
  .sort()
@@ -404,7 +404,7 @@ function compressAndSegmentAudio(inputFile, outputDir, { segTime = SEG_TIME, spe
404
404
  }
405
405
  } else {
406
406
  console.error(` ${c.error('Fallback audio re-encode failed')}`);
407
- try { fs.unlinkSync(fallbackPath); } catch {}
407
+ try { fs.unlinkSync(fallbackPath); } catch { /* best-effort cleanup */ }
408
408
  }
409
409
  }
410
410