task-summary-extractor 9.5.0 → 9.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +51 -0
- package/QUICK_START.md +11 -0
- package/README.md +12 -7
- package/package.json +1 -1
- package/src/phases/init.js +3 -0
- package/src/phases/process-media.js +213 -2
- package/src/phases/summary.js +5 -5
- package/src/pipeline.js +2 -1
- package/src/renderers/docx.js +1 -1
- package/src/renderers/html.js +1 -2
- package/src/services/gemini.js +233 -1
- package/src/services/video.js +9 -9
- package/src/utils/cli.js +104 -299
- package/src/utils/context-manager.js +152 -0
- package/src/utils/diff-engine.js +7 -7
- package/src/utils/interactive.js +402 -0
- package/src/utils/progress-bar.js +11 -10
package/src/services/gemini.js
CHANGED
|
@@ -26,7 +26,9 @@ const {
|
|
|
26
26
|
sliceVttForSegment,
|
|
27
27
|
buildProgressiveContext,
|
|
28
28
|
buildSegmentFocus,
|
|
29
|
+
buildBatchSegmentFocus,
|
|
29
30
|
estimateTokens,
|
|
31
|
+
estimateDocTokens,
|
|
30
32
|
} = require('../utils/context-manager');
|
|
31
33
|
const { formatHMS } = require('../utils/format');
|
|
32
34
|
const { withRetry } = require('../utils/retry');
|
|
@@ -564,6 +566,230 @@ async function processWithGemini(ai, filePath, displayName, contextDocs = [], pr
|
|
|
564
566
|
};
|
|
565
567
|
}
|
|
566
568
|
|
|
569
|
+
// ======================== MULTI-SEGMENT BATCH ANALYSIS ========================
|
|
570
|
+
|
|
571
|
+
/**
|
|
572
|
+
* Process multiple consecutive video segments in a single Gemini call.
|
|
573
|
+
* This takes advantage of unused context-window headroom (especially after
|
|
574
|
+
* deep summary) to reduce the number of API calls and give the model a
|
|
575
|
+
* more holistic view of the meeting.
|
|
576
|
+
*
|
|
577
|
+
* @param {object} ai – Gemini AI instance
|
|
578
|
+
* @param {Array<{ segPath: string, segName: string, durSec: number, storageUrl?: string }>} batchSegments
|
|
579
|
+
* @param {string} displayName – label for logging (e.g. "call1_video_batch0-2")
|
|
580
|
+
* @param {Array} contextDocs – prepared context docs
|
|
581
|
+
* @param {Array} previousAnalyses – analyses from earlier batches
|
|
582
|
+
* @param {string} userName
|
|
583
|
+
* @param {string} scriptDir – where prompt.json lives
|
|
584
|
+
* @param {object} batchOpts
|
|
585
|
+
* @param {number[]} batchOpts.segmentIndices – 0-based global indices of the segments
|
|
586
|
+
* @param {number} batchOpts.totalSegments – total segment count for the whole file
|
|
587
|
+
* @param {Array<{startTimeSec: number, endTimeSec: number}>} batchOpts.segmentTimes
|
|
588
|
+
* @param {number} [batchOpts.thinkingBudget=24576]
|
|
589
|
+
* @param {boolean} [batchOpts.noStorageUrl=false]
|
|
590
|
+
* @returns {Promise<object>} Run record (same shape as processWithGemini)
|
|
591
|
+
*/
|
|
592
|
+
async function processSegmentBatch(ai, batchSegments, displayName, contextDocs, previousAnalyses, userName, scriptDir, batchOpts = {}) {
|
|
593
|
+
const {
|
|
594
|
+
segmentIndices = batchSegments.map((_, i) => i),
|
|
595
|
+
totalSegments = batchSegments.length,
|
|
596
|
+
segmentTimes = [],
|
|
597
|
+
thinkingBudget = 24576,
|
|
598
|
+
noStorageUrl = false,
|
|
599
|
+
} = batchOpts;
|
|
600
|
+
|
|
601
|
+
const { systemInstruction, promptText } = loadPrompt(scriptDir);
|
|
602
|
+
|
|
603
|
+
const EXTERNAL_URL_MAX_BYTES = 20 * 1024 * 1024;
|
|
604
|
+
|
|
605
|
+
// ── Upload / reference all video files ─────────────────────────────────────
|
|
606
|
+
const fileRefs = []; // { uri, mimeType, name, usedExternalUrl }
|
|
607
|
+
|
|
608
|
+
for (const seg of batchSegments) {
|
|
609
|
+
const fileSizeBytes = fs.existsSync(seg.segPath) ? fs.statSync(seg.segPath).size : 0;
|
|
610
|
+
|
|
611
|
+
if (!noStorageUrl && seg.storageUrl && fileSizeBytes <= EXTERNAL_URL_MAX_BYTES) {
|
|
612
|
+
fileRefs.push({ uri: seg.storageUrl, mimeType: 'video/mp4', name: null, usedExternalUrl: true });
|
|
613
|
+
console.log(` ${seg.segName}: using Storage URL`);
|
|
614
|
+
} else {
|
|
615
|
+
// Upload via Gemini File API
|
|
616
|
+
console.log(` ${seg.segName}: uploading to Gemini File API...`);
|
|
617
|
+
let uploaded = await withRetry(
|
|
618
|
+
() => ai.files.upload({
|
|
619
|
+
file: seg.segPath,
|
|
620
|
+
config: { mimeType: 'video/mp4', displayName: `${displayName}_${seg.segName}` },
|
|
621
|
+
}),
|
|
622
|
+
{ label: `Gemini upload (${seg.segName})`, maxRetries: 3 }
|
|
623
|
+
);
|
|
624
|
+
|
|
625
|
+
let waited = 0;
|
|
626
|
+
const pollStart = Date.now();
|
|
627
|
+
while (uploaded.state === 'PROCESSING') {
|
|
628
|
+
if (Date.now() - pollStart > GEMINI_POLL_TIMEOUT_MS) {
|
|
629
|
+
throw new Error(`File "${seg.segName}" still processing after ${(GEMINI_POLL_TIMEOUT_MS / 1000).toFixed(0)}s`);
|
|
630
|
+
}
|
|
631
|
+
process.stdout.write(` Processing ${seg.segName}${'.'.repeat((waited % 3) + 1)} \r`);
|
|
632
|
+
await new Promise(r => setTimeout(r, 5000));
|
|
633
|
+
waited++;
|
|
634
|
+
uploaded = await withRetry(
|
|
635
|
+
() => ai.files.get({ name: uploaded.name }),
|
|
636
|
+
{ label: 'Gemini file status', maxRetries: 2, baseDelay: 1000 }
|
|
637
|
+
);
|
|
638
|
+
}
|
|
639
|
+
if (uploaded.state === 'FAILED') {
|
|
640
|
+
throw new Error(`Gemini processing failed for ${seg.segName}`);
|
|
641
|
+
}
|
|
642
|
+
fileRefs.push({ uri: uploaded.uri, mimeType: uploaded.mimeType || 'video/mp4', name: uploaded.name, usedExternalUrl: false });
|
|
643
|
+
console.log(` ${seg.segName}: upload complete`);
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
// ── Build content parts ────────────────────────────────────────────────────
|
|
648
|
+
const contentParts = [];
|
|
649
|
+
|
|
650
|
+
// Video files — one fileData part per segment, in order
|
|
651
|
+
for (let i = 0; i < fileRefs.length; i++) {
|
|
652
|
+
const ref = fileRefs[i];
|
|
653
|
+
const segIdx = segmentIndices[i];
|
|
654
|
+
contentParts.push({ text: `=== VIDEO SEGMENT ${segIdx + 1} of ${totalSegments} ===` });
|
|
655
|
+
contentParts.push({ fileData: { mimeType: ref.mimeType, fileUri: ref.uri } });
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
// Context docs — same budget logic as single-segment but account for multiple videos
|
|
659
|
+
const videoTokenEstimate = batchSegments.reduce((sum, s) => sum + Math.ceil((s.durSec || 280) * 300), 0);
|
|
660
|
+
const prevContextEstimate = estimateTokens(buildProgressiveContext(previousAnalyses, userName) || '');
|
|
661
|
+
const docBudget = Math.max(50000, config.GEMINI_CONTEXT_WINDOW - videoTokenEstimate - 120000 - prevContextEstimate);
|
|
662
|
+
console.log(` Doc budget: ${(docBudget / 1000).toFixed(0)}K tokens for ${contextDocs.length} doc(s)`);
|
|
663
|
+
|
|
664
|
+
const { selected: selectedDocs, excluded } = selectDocsByBudget(contextDocs, docBudget, { segmentIndex: segmentIndices[0] });
|
|
665
|
+
if (excluded.length > 0) {
|
|
666
|
+
console.log(` Context: ${selectedDocs.length} docs included, ${excluded.length} excluded`);
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
// Attach selected docs with VTT time-slicing across the batch range
|
|
670
|
+
const batchStartSec = segmentTimes.length > 0 ? segmentTimes[0].startTimeSec : null;
|
|
671
|
+
const batchEndSec = segmentTimes.length > 0 ? segmentTimes[segmentTimes.length - 1].endTimeSec : null;
|
|
672
|
+
|
|
673
|
+
for (const doc of selectedDocs) {
|
|
674
|
+
if (doc.type === 'inlineText') {
|
|
675
|
+
let content = doc.content;
|
|
676
|
+
const isVtt = doc.fileName.toLowerCase().endsWith('.vtt') || doc.fileName.toLowerCase().endsWith('.srt');
|
|
677
|
+
if (isVtt && batchStartSec != null && batchEndSec != null) {
|
|
678
|
+
content = sliceVttForSegment(content, batchStartSec, batchEndSec);
|
|
679
|
+
console.log(` VTT sliced to ${formatHMS(batchStartSec)}–${formatHMS(batchEndSec)} range`);
|
|
680
|
+
}
|
|
681
|
+
contentParts.push({ text: `=== Document: ${doc.fileName} ===\n${content}` });
|
|
682
|
+
} else if (doc.type === 'fileData') {
|
|
683
|
+
contentParts.push({ fileData: { mimeType: doc.mimeType, fileUri: doc.fileUri } });
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
// Bridge text
|
|
688
|
+
const bridgeText = buildDocBridgeText(selectedDocs);
|
|
689
|
+
if (bridgeText) contentParts.push({ text: bridgeText });
|
|
690
|
+
|
|
691
|
+
// Progressive context from previous batches
|
|
692
|
+
const prevText = buildProgressiveContext(previousAnalyses, userName);
|
|
693
|
+
if (prevText) contentParts.push({ text: prevText });
|
|
694
|
+
|
|
695
|
+
// Multi-segment focus instructions
|
|
696
|
+
const focusText = buildBatchSegmentFocus(segmentIndices, totalSegments, previousAnalyses, userName);
|
|
697
|
+
contentParts.push({ text: focusText });
|
|
698
|
+
|
|
699
|
+
// User identity
|
|
700
|
+
if (userName) {
|
|
701
|
+
contentParts.push({
|
|
702
|
+
text: `CURRENT USER: "${userName}". Tag tasks assigned to or owned by "${userName}". Populate the "your_tasks" section.`
|
|
703
|
+
});
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
contentParts.push({ text: promptText });
|
|
707
|
+
|
|
708
|
+
// ── Send request ──────────────────────────────────────────────────────────
|
|
709
|
+
console.log(` Analyzing batch [segments ${segmentIndices[0] + 1}–${segmentIndices[segmentIndices.length - 1] + 1}] with ${config.GEMINI_MODEL}...`);
|
|
710
|
+
|
|
711
|
+
const requestPayload = {
|
|
712
|
+
model: config.GEMINI_MODEL,
|
|
713
|
+
contents: [{ role: 'user', parts: contentParts }],
|
|
714
|
+
config: {
|
|
715
|
+
systemInstruction,
|
|
716
|
+
maxOutputTokens: 65536,
|
|
717
|
+
temperature: 0,
|
|
718
|
+
},
|
|
719
|
+
};
|
|
720
|
+
|
|
721
|
+
const t0 = Date.now();
|
|
722
|
+
const response = await withRetry(
|
|
723
|
+
() => ai.models.generateContent(requestPayload),
|
|
724
|
+
{ label: `Gemini batch analysis (${displayName})`, maxRetries: 2, baseDelay: 5000 }
|
|
725
|
+
);
|
|
726
|
+
const durationMs = Date.now() - t0;
|
|
727
|
+
|
|
728
|
+
const rawText = response.text;
|
|
729
|
+
|
|
730
|
+
// Token usage
|
|
731
|
+
const usage = response.usageMetadata || {};
|
|
732
|
+
const tokenUsage = {
|
|
733
|
+
inputTokens: usage.promptTokenCount || 0,
|
|
734
|
+
outputTokens: usage.candidatesTokenCount || 0,
|
|
735
|
+
totalTokens: usage.totalTokenCount || 0,
|
|
736
|
+
thoughtTokens: usage.thoughtsTokenCount || 0,
|
|
737
|
+
};
|
|
738
|
+
const contextRemaining = config.GEMINI_CONTEXT_WINDOW - tokenUsage.inputTokens;
|
|
739
|
+
const contextUsedPct = ((tokenUsage.inputTokens / config.GEMINI_CONTEXT_WINDOW) * 100).toFixed(1);
|
|
740
|
+
tokenUsage.contextWindow = config.GEMINI_CONTEXT_WINDOW;
|
|
741
|
+
tokenUsage.contextRemaining = contextRemaining;
|
|
742
|
+
tokenUsage.contextUsedPct = parseFloat(contextUsedPct);
|
|
743
|
+
|
|
744
|
+
console.log(` Tokens — input: ${tokenUsage.inputTokens.toLocaleString()} | output: ${tokenUsage.outputTokens.toLocaleString()} | thinking: ${tokenUsage.thoughtTokens.toLocaleString()}`);
|
|
745
|
+
console.log(` Context — used: ${contextUsedPct}% | remaining: ${contextRemaining.toLocaleString()} tokens`);
|
|
746
|
+
|
|
747
|
+
// Parse
|
|
748
|
+
const parsed = extractJson(rawText);
|
|
749
|
+
|
|
750
|
+
// Input summary
|
|
751
|
+
const inputSummary = contentParts.map(part => {
|
|
752
|
+
if (part.fileData) return { type: 'fileData', mimeType: part.fileData.mimeType, fileUri: part.fileData.fileUri };
|
|
753
|
+
if (part.text) return { type: 'text', chars: part.text.length, preview: part.text.substring(0, 300) };
|
|
754
|
+
return part;
|
|
755
|
+
});
|
|
756
|
+
|
|
757
|
+
// ── Cleanup Gemini File API uploads ────────────────────────────────────────
|
|
758
|
+
const geminiFileNames = fileRefs.filter(r => r.name && !r.usedExternalUrl).map(r => r.name);
|
|
759
|
+
|
|
760
|
+
return {
|
|
761
|
+
run: {
|
|
762
|
+
model: config.GEMINI_MODEL,
|
|
763
|
+
displayName,
|
|
764
|
+
userName,
|
|
765
|
+
timestamp: new Date().toISOString(),
|
|
766
|
+
durationMs,
|
|
767
|
+
tokenUsage,
|
|
768
|
+
systemInstruction,
|
|
769
|
+
batchMode: true,
|
|
770
|
+
segmentIndices,
|
|
771
|
+
},
|
|
772
|
+
input: {
|
|
773
|
+
videoFiles: fileRefs.map((ref, i) => ({
|
|
774
|
+
mimeType: ref.mimeType,
|
|
775
|
+
fileUri: ref.uri,
|
|
776
|
+
segmentName: batchSegments[i].segName,
|
|
777
|
+
usedExternalUrl: ref.usedExternalUrl,
|
|
778
|
+
})),
|
|
779
|
+
contextDocuments: contextDocs.map(d => ({ fileName: d.fileName, type: d.type })),
|
|
780
|
+
previousSegmentCount: previousAnalyses.length,
|
|
781
|
+
parts: inputSummary,
|
|
782
|
+
promptText,
|
|
783
|
+
},
|
|
784
|
+
output: {
|
|
785
|
+
raw: rawText,
|
|
786
|
+
parsed,
|
|
787
|
+
parseSuccess: parsed !== null,
|
|
788
|
+
},
|
|
789
|
+
_geminiFileNames: geminiFileNames,
|
|
790
|
+
};
|
|
791
|
+
}
|
|
792
|
+
|
|
567
793
|
// ======================== FINAL COMPILATION ========================
|
|
568
794
|
|
|
569
795
|
/**
|
|
@@ -945,7 +1171,12 @@ console.log(` ${c.success(`Summary: ${summary.length.toLocaleString()} chars
|
|
|
945
1171
|
*/
|
|
946
1172
|
async function cleanupGeminiFiles(ai, geminiFileName, contextDocs = []) {
|
|
947
1173
|
const toDelete = [];
|
|
948
|
-
|
|
1174
|
+
// Accept a single name string or an array of names
|
|
1175
|
+
if (Array.isArray(geminiFileName)) {
|
|
1176
|
+
toDelete.push(...geminiFileName.filter(Boolean));
|
|
1177
|
+
} else if (geminiFileName) {
|
|
1178
|
+
toDelete.push(geminiFileName);
|
|
1179
|
+
}
|
|
949
1180
|
for (const doc of contextDocs) {
|
|
950
1181
|
if (doc.type === 'fileData' && doc.geminiFileName) {
|
|
951
1182
|
toDelete.push(doc.geminiFileName);
|
|
@@ -970,6 +1201,7 @@ module.exports = {
|
|
|
970
1201
|
prepareDocsForGemini,
|
|
971
1202
|
loadPrompt,
|
|
972
1203
|
processWithGemini,
|
|
1204
|
+
processSegmentBatch,
|
|
973
1205
|
compileFinalResult,
|
|
974
1206
|
buildDocBridgeText,
|
|
975
1207
|
analyzeVideoForContext,
|
package/src/services/video.js
CHANGED
|
@@ -237,7 +237,7 @@ function compressAndSegment(inputFile, outputDir, { segTime = SEG_TIME, speed =
|
|
|
237
237
|
const fbResult = spawnSync(getFFmpeg(), fbArgs, { stdio: 'inherit' });
|
|
238
238
|
if (fbResult.status === 0 && verifySegment(fallbackPath)) {
|
|
239
239
|
// Remove all corrupt segments and replace with the fallback
|
|
240
|
-
for (const seg of corrupt) { try { fs.unlinkSync(seg); } catch {} }
|
|
240
|
+
for (const seg of corrupt) { try { fs.unlinkSync(seg); } catch { /* best-effort cleanup */ } }
|
|
241
241
|
// If this was the only segment, just rename it
|
|
242
242
|
if (segments.length === 1) {
|
|
243
243
|
const dest = path.join(outputDir, 'segment_00.mp4');
|
|
@@ -261,8 +261,8 @@ function compressAndSegment(inputFile, outputDir, { segTime = SEG_TIME, speed =
|
|
|
261
261
|
for (const f of reSegs) {
|
|
262
262
|
fs.renameSync(path.join(reSegDir, f), path.join(outputDir, f));
|
|
263
263
|
}
|
|
264
|
-
try { fs.rmSync(reSegDir, { recursive: true }); } catch {}
|
|
265
|
-
try { fs.unlinkSync(fallbackPath); } catch {}
|
|
264
|
+
try { fs.rmSync(reSegDir, { recursive: true }); } catch { /* best-effort cleanup */ }
|
|
265
|
+
try { fs.unlinkSync(fallbackPath); } catch { /* best-effort cleanup */ }
|
|
266
266
|
// Re-collect
|
|
267
267
|
segments = fs.readdirSync(outputDir)
|
|
268
268
|
.filter(f => f.startsWith('segment_') && f.endsWith('.mp4'))
|
|
@@ -272,13 +272,13 @@ function compressAndSegment(inputFile, outputDir, { segTime = SEG_TIME, speed =
|
|
|
272
272
|
}
|
|
273
273
|
} else {
|
|
274
274
|
console.error(` ${c.error('Fallback re-encode also failed')}`);
|
|
275
|
-
try { fs.unlinkSync(fallbackPath); } catch {}
|
|
275
|
+
try { fs.unlinkSync(fallbackPath); } catch { /* best-effort cleanup */ }
|
|
276
276
|
}
|
|
277
277
|
} else if (corrupt.length > 0 && !needsSegmentation) {
|
|
278
278
|
// Single-output mode also failed — try once more without segment muxer flags
|
|
279
279
|
console.log(` Retrying single-output compression...`);
|
|
280
280
|
const retryPath = path.join(outputDir, 'segment_00.mp4');
|
|
281
|
-
try { fs.unlinkSync(retryPath); } catch {}
|
|
281
|
+
try { fs.unlinkSync(retryPath); } catch { /* best-effort cleanup */ }
|
|
282
282
|
const retryArgs = [
|
|
283
283
|
'-y',
|
|
284
284
|
'-i', inputFile,
|
|
@@ -373,7 +373,7 @@ function compressAndSegmentAudio(inputFile, outputDir, { segTime = SEG_TIME, spe
|
|
|
373
373
|
const fbArgs = ['-y', '-i', inputFile, ...encodingArgs, fallbackPath];
|
|
374
374
|
const fbResult = spawnSync(getFFmpeg(), fbArgs, { stdio: 'inherit' });
|
|
375
375
|
if (fbResult.status === 0 && verifySegment(fallbackPath)) {
|
|
376
|
-
for (const seg of corrupt) { try { fs.unlinkSync(seg); } catch {} }
|
|
376
|
+
for (const seg of corrupt) { try { fs.unlinkSync(seg); } catch { /* best-effort cleanup */ } }
|
|
377
377
|
if (segments.length === 1) {
|
|
378
378
|
const dest = path.join(outputDir, 'segment_00.m4a');
|
|
379
379
|
fs.renameSync(fallbackPath, dest);
|
|
@@ -394,8 +394,8 @@ function compressAndSegmentAudio(inputFile, outputDir, { segTime = SEG_TIME, spe
|
|
|
394
394
|
for (const f of reSegs) {
|
|
395
395
|
fs.renameSync(path.join(reSegDir, f), path.join(outputDir, f));
|
|
396
396
|
}
|
|
397
|
-
try { fs.rmSync(reSegDir, { recursive: true }); } catch {}
|
|
398
|
-
try { fs.unlinkSync(fallbackPath); } catch {}
|
|
397
|
+
try { fs.rmSync(reSegDir, { recursive: true }); } catch { /* best-effort cleanup */ }
|
|
398
|
+
try { fs.unlinkSync(fallbackPath); } catch { /* best-effort cleanup */ }
|
|
399
399
|
segments = fs.readdirSync(outputDir)
|
|
400
400
|
.filter(f => f.startsWith('segment_') && (f.endsWith('.m4a') || f.endsWith('.mp4')))
|
|
401
401
|
.sort()
|
|
@@ -404,7 +404,7 @@ function compressAndSegmentAudio(inputFile, outputDir, { segTime = SEG_TIME, spe
|
|
|
404
404
|
}
|
|
405
405
|
} else {
|
|
406
406
|
console.error(` ${c.error('Fallback audio re-encode failed')}`);
|
|
407
|
-
try { fs.unlinkSync(fallbackPath); } catch {}
|
|
407
|
+
try { fs.unlinkSync(fallbackPath); } catch { /* best-effort cleanup */ }
|
|
408
408
|
}
|
|
409
409
|
}
|
|
410
410
|
|