task-summary-extractor 8.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,779 @@
1
+ /**
2
+ * Gemini AI service — init, document preparation, segment analysis,
3
+ * and final compilation of all segment outputs into one unified result.
4
+ */
5
+
6
+ 'use strict';
7
+
8
+ const fs = require('fs');
9
+ const path = require('path');
10
+ const config = require('../config');
11
+ const {
12
+ GEMINI_API_KEY,
13
+ GEMINI_FILE_API_EXTS,
14
+ INLINE_TEXT_EXTS,
15
+ GEMINI_UNSUPPORTED,
16
+ MIME_MAP,
17
+ GEMINI_POLL_TIMEOUT_MS,
18
+ } = config;
19
+ // Access config.GEMINI_MODEL and config.GEMINI_CONTEXT_WINDOW at call time
20
+ // (not destructured) so runtime model changes via setActiveModel() are visible.
21
+ const { extractJson } = require('../utils/json-parser');
22
+ const {
23
+ selectDocsByBudget,
24
+ sliceVttForSegment,
25
+ buildProgressiveContext,
26
+ buildSegmentFocus,
27
+ estimateTokens,
28
+ } = require('../utils/context-manager');
29
+ const { formatHMS } = require('../utils/format');
30
+ const { withRetry } = require('../utils/retry');
31
+
32
+ // ======================== INIT ========================
33
+
34
+ async function initGemini() {
35
+ const { GoogleGenAI } = require('@google/genai');
36
+ const ai = new GoogleGenAI({ apiKey: GEMINI_API_KEY });
37
+ return ai;
38
+ }
39
+
40
+ // ======================== DOCUMENT PREPARATION ========================
41
+
42
+ /**
43
+ * Prepare documents for Gemini context — inline text files, upload PDFs via File API, skip unsupported.
44
+ * Accepts array of { absPath, relPath } from findDocsRecursive.
45
+ */
46
+ async function prepareDocsForGemini(ai, docFileList) {
47
+ if (docFileList.length === 0) return [];
48
+
49
+ console.log(` Found ${docFileList.length} document(s) to include as context:`);
50
+ docFileList.forEach(f => console.log(` - ${f.relPath}`));
51
+ console.log('');
52
+
53
+ const prepared = [];
54
+ for (const { absPath: docPath, relPath } of docFileList) {
55
+ const ext = path.extname(docPath).toLowerCase();
56
+ const name = relPath;
57
+
58
+ try {
59
+ if (INLINE_TEXT_EXTS.includes(ext)) {
60
+ console.log(` Reading ${name} (inline text)...`);
61
+ const content = await fs.promises.readFile(docPath, 'utf8');
62
+ prepared.push({ type: 'inlineText', fileName: name, content });
63
+ console.log(` ✓ ${name} ready (${(content.length / 1024).toFixed(1)} KB)`);
64
+ } else if (GEMINI_FILE_API_EXTS.includes(ext)) {
65
+ const mime = MIME_MAP[ext] || 'application/octet-stream';
66
+ console.log(` Uploading ${name} to Gemini File API...`);
67
+ let file = await withRetry(
68
+ () => ai.files.upload({
69
+ file: docPath,
70
+ config: { mimeType: mime, displayName: name },
71
+ }),
72
+ { label: `Gemini doc upload (${name})`, maxRetries: 3 }
73
+ );
74
+
75
+ // Poll with timeout
76
+ const pollStart = Date.now();
77
+ while (file.state === 'PROCESSING') {
78
+ if (Date.now() - pollStart > GEMINI_POLL_TIMEOUT_MS) {
79
+ console.warn(` ⚠ ${name} — polling timed out after ${(GEMINI_POLL_TIMEOUT_MS / 1000).toFixed(0)}s, skipping`);
80
+ file = null;
81
+ break;
82
+ }
83
+ await new Promise(r => setTimeout(r, 3000));
84
+ file = await withRetry(
85
+ () => ai.files.get({ name: file.name }),
86
+ { label: `Gemini doc status (${name})`, maxRetries: 2, baseDelay: 1000 }
87
+ );
88
+ }
89
+
90
+ if (!file || file.state === 'FAILED') {
91
+ console.warn(` ⚠ ${name} — Gemini processing failed, skipping`);
92
+ continue;
93
+ }
94
+
95
+ prepared.push({
96
+ type: 'fileData',
97
+ fileName: name,
98
+ mimeType: file.mimeType,
99
+ fileUri: file.uri,
100
+ geminiFileName: file.name,
101
+ });
102
+ console.log(` ✓ ${name} ready (File API)`);
103
+ } else if (GEMINI_UNSUPPORTED.includes(ext)) {
104
+ console.warn(` ⚠ ${name} — format not supported by Gemini, will upload to Firebase only`);
105
+ } else {
106
+ console.warn(` ⚠ ${name} — unknown doc type, skipping`);
107
+ }
108
+ } catch (err) {
109
+ console.warn(` ⚠ ${name} — failed: ${err.message}`);
110
+ }
111
+ }
112
+
113
+ const inlineCount = prepared.filter(d => d.type === 'inlineText').length;
114
+ const fileCount = prepared.filter(d => d.type === 'fileData').length;
115
+ console.log(` ${prepared.length} document(s) prepared (${inlineCount} inline, ${fileCount} File API)`);
116
+ console.log('');
117
+ return prepared;
118
+ }
119
+
120
+ // ======================== PROMPT LOADING ========================
121
+
122
+ /** Load prompt from prompt.json — builds a system message + structured extraction prompt */
123
+ function loadPrompt(scriptDir) {
124
+ const promptPath = path.join(scriptDir, 'prompt.json');
125
+ if (!fs.existsSync(promptPath)) {
126
+ throw new Error(`prompt.json not found at "${promptPath}". Ensure it exists alongside the entry script.`);
127
+ }
128
+ let promptConfig;
129
+ try {
130
+ promptConfig = JSON.parse(fs.readFileSync(promptPath, 'utf8'));
131
+ } catch (err) {
132
+ throw new Error(`Failed to parse prompt.json at "${promptPath}": ${err.message}`);
133
+ }
134
+
135
+ const instructions = promptConfig.instructions
136
+ ? promptConfig.instructions.map(i => `- ${i}`).join('\n')
137
+ : '';
138
+
139
+ const outputExample = JSON.stringify(promptConfig.output_structure, null, 2);
140
+
141
+ const text = [
142
+ promptConfig.system,
143
+ '',
144
+ `Task: ${promptConfig.task}`,
145
+ '',
146
+ 'Instructions:',
147
+ instructions,
148
+ '',
149
+ 'You MUST respond with ONLY valid JSON (no markdown fences, no extra text).',
150
+ 'Use this exact output structure:',
151
+ outputExample,
152
+ ].join('\n');
153
+
154
+ return { systemInstruction: promptConfig.system, promptText: text };
155
+ }
156
+
157
+ // ======================== CONTEXT BUILDING HELPERS ========================
158
+
159
+ /** Build the bridge text that explains document tiers to Gemini */
160
+ function buildDocBridgeText(contextDocs) {
161
+ if (contextDocs.length === 0) return null;
162
+
163
+ const taskDocs = contextDocs.filter(d => d.fileName.includes('.tasks/'));
164
+ const robotDocs = contextDocs.filter(d => d.fileName.includes('.robot/'));
165
+ const archDocs = contextDocs.filter(d => d.fileName.includes('.docs/'));
166
+ const otherDocs = contextDocs.filter(d =>
167
+ !d.fileName.includes('.tasks/') &&
168
+ !d.fileName.includes('.robot/') &&
169
+ !d.fileName.includes('.docs/')
170
+ );
171
+
172
+ let bridgeText = `The above includes ${contextDocs.length} supporting document(s) organized in 3 tiers:\n`;
173
+
174
+ // Tier 1 — Task execution documents
175
+ if (taskDocs.length > 0) {
176
+ bridgeText += `\n=== TIER 1: TASK EXECUTION DOCUMENTS (${taskDocs.length}) — SOURCE OF TRUTH FOR TICKET STATE ===`;
177
+ bridgeText += `\nThese contain execution plans, implementation checklists with ✅/⬜/⏸️/🔲 status markers, code maps with exact file paths, sub-ticket breakdowns, business requirements, and PR templates.`;
178
+ bridgeText += `\nFiles: ${taskDocs.map(d => d.fileName).join(', ')}`;
179
+
180
+ // Pre-extract ticket state from execution plans
181
+ const execPlanDocs = taskDocs.filter(d => d.type === 'inlineText' && (
182
+ d.fileName.includes('execution-plan') ||
183
+ d.fileName.includes('checklist') ||
184
+ d.fileName.includes('REMAINING-WORK')
185
+ ));
186
+ if (execPlanDocs.length > 0) {
187
+ bridgeText += `\n\nPRE-EXTRACTED TICKET STATE (from execution plans & checklists):`;
188
+ for (const doc of execPlanDocs) {
189
+ const statusMatch = doc.content.match(/\*\*Status\*\*:\s*(.+)/);
190
+ const crMatch = doc.content.match(/\*\*CR\*\*:\s*#?(\d+)/);
191
+ const ticketId = crMatch ? `CR${crMatch[1]}` : doc.fileName;
192
+ const status = statusMatch ? statusMatch[1].trim() : 'unknown';
193
+
194
+ const doneCount = (doc.content.match(/- \[x\]/gi) || []).length;
195
+ const todoCount = (doc.content.match(/- \[ \]/g) || []).length;
196
+ const deferredCount = (doc.content.match(/⏸️/g) || []).length;
197
+ const blockedCount = (doc.content.match(/🔲/g) || []).length;
198
+
199
+ const openQs = (doc.content.match(/\|\s*Q\d+\s*\|[^|]*\|[^|]*\|\s*(⬜|✅|⏸️)[^|]*\|/g) || []);
200
+ const dbItems = (doc.content.match(/- \[[ x]\] \*\*DB-\d+\*\*.*/g) || []);
201
+
202
+ bridgeText += `\n ${ticketId} (${doc.fileName}):`;
203
+ bridgeText += `\n Plan status: ${status}`;
204
+ bridgeText += `\n Checklist: ${doneCount} done, ${todoCount} todo, ${deferredCount} deferred, ${blockedCount} blocked`;
205
+ if (openQs.length > 0) bridgeText += `\n Open questions: ${openQs.length} tracked`;
206
+ if (dbItems.length > 0) bridgeText += `\n DB prerequisites: ${dbItems.length} items`;
207
+ }
208
+ }
209
+
210
+ bridgeText += `\n\nCRITICAL: Cross-reference these task documents with the video discussion. When the call mentions a file, class, procedure, module, CR number, or ticket — match it to the corresponding task document. Use exact file paths and component names from the code-map.md and execution-plan.md in your output. The task documents contain the ground truth for what was planned — the call reveals what was actually discussed, confirmed, or changed. Flag any discrepancies between documented state and discussed state.`;
211
+ }
212
+
213
+ // Tier 2 — Robot/AI knowledge base
214
+ if (robotDocs.length > 0) {
215
+ bridgeText += `\n\n=== TIER 2: CODEBASE KNOWLEDGE BASE (${robotDocs.length}) — FILE MAPS & PATTERNS ===`;
216
+ bridgeText += `\nThese contain complete file maps for every app/service, backend API maps, database schemas, auth configs, coding patterns, and naming conventions.`;
217
+ bridgeText += `\nUse these to RESOLVE exact file paths when the call mentions a class, component, service, or controller by name.`;
218
+ bridgeText += `\nFiles: ${robotDocs.map(d => d.fileName).join(', ')}`;
219
+ }
220
+
221
+ // Tier 3 — Project documentation
222
+ if (archDocs.length > 0) {
223
+ bridgeText += `\n\n=== TIER 3: PROJECT DOCUMENTATION (${archDocs.length}) — ARCHITECTURE & REFERENCE ===`;
224
+ bridgeText += `\nThese provide background on the solution architecture, tech stack, patterns, best practices, payment systems, evaluation system, i18n, and more.`;
225
+ bridgeText += `\nUse for context when the call discusses system concepts, design decisions, or technical constraints.`;
226
+ bridgeText += `\nFiles: ${archDocs.map(d => d.fileName).join(', ')}`;
227
+ }
228
+
229
+ // Other docs
230
+ if (otherDocs.length > 0) {
231
+ bridgeText += `\n\n=== CALL DOCUMENTS (${otherDocs.length}) — SUBTITLES, TRANSCRIPTS, NOTES ===`;
232
+ bridgeText += `\nFiles: ${otherDocs.map(d => d.fileName).join(', ')}`;
233
+ }
234
+
235
+ return bridgeText;
236
+ }
237
+
238
+ // ======================== SEGMENT ANALYSIS ========================
239
+
240
+ /**
241
+ * Process a single video segment with Gemini.
242
+ * Returns a complete model run record (run, input, output).
243
+ */
244
+ async function processWithGemini(ai, filePath, displayName, contextDocs = [], previousAnalyses = [], userName = '', scriptDir = __dirname, segmentOpts = {}) {
245
+ // segmentOpts: { segmentIndex, totalSegments, segmentStartSec, segmentEndSec, thinkingBudget, boundaryContext, retryHints, existingFileUri, existingFileMime, existingGeminiFileName, storageDownloadUrl }
246
+ const { segmentIndex = 0, totalSegments = 1, segmentStartSec, segmentEndSec, thinkingBudget = 24576,
247
+ boundaryContext = null, retryHints = [],
248
+ existingFileUri = null, existingFileMime = 'video/mp4', existingGeminiFileName = null,
249
+ storageDownloadUrl = null } = segmentOpts;
250
+
251
+ // 1. Load structured prompt
252
+ const { systemInstruction, promptText } = loadPrompt(scriptDir);
253
+
254
+ // 2. Resolve video file reference (3 strategies, in priority order):
255
+ // a) Reuse existing Gemini File API URI (retry / focused pass)
256
+ // b) Use Firebase Storage download URL as External URL (skip Gemini upload)
257
+ // c) Upload to Gemini File API as fallback
258
+ let file;
259
+ let usedExternalUrl = false;
260
+
261
+ if (existingFileUri) {
262
+ // Strategy A: Reuse Gemini File API URI from a previous pass
263
+ file = { uri: existingFileUri, mimeType: existingFileMime, name: existingGeminiFileName, state: 'ACTIVE' };
264
+ console.log(` Reusing Gemini File API URI (skip upload)`);
265
+ } else if (storageDownloadUrl) {
266
+ // Strategy B: Use Firebase Storage download URL as Gemini External URL
267
+ // Supported for models >= 2.5; limit 100MB per payload.
268
+ // Gemini fetches the file on-demand — no separate upload + polling needed.
269
+ file = { uri: storageDownloadUrl, mimeType: 'video/mp4', name: null, state: 'ACTIVE' };
270
+ usedExternalUrl = true;
271
+ console.log(` Using Firebase Storage URL as external reference (skip Gemini upload)`);
272
+ } else {
273
+ // Strategy C: Upload to Gemini File API (default fallback)
274
+ console.log(` Uploading to Gemini File API...`);
275
+ file = await withRetry(
276
+ () => ai.files.upload({
277
+ file: filePath,
278
+ config: { mimeType: 'video/mp4', displayName },
279
+ }),
280
+ { label: `Gemini file upload (${displayName})`, maxRetries: 3 }
281
+ );
282
+
283
+ // 3. Wait for processing (with polling + retry on get + timeout)
284
+ let waited = 0;
285
+ const pollStart = Date.now();
286
+ while (file.state === 'PROCESSING') {
287
+ if (Date.now() - pollStart > GEMINI_POLL_TIMEOUT_MS) {
288
+ throw new Error(`Gemini file processing timed out after ${(GEMINI_POLL_TIMEOUT_MS / 1000).toFixed(0)}s for ${displayName}. Try again or increase GEMINI_POLL_TIMEOUT_MS.`);
289
+ }
290
+ process.stdout.write(` Processing${'.'.repeat((waited % 3) + 1)} \r`);
291
+ await new Promise(r => setTimeout(r, 5000));
292
+ waited++;
293
+ file = await withRetry(
294
+ () => ai.files.get({ name: file.name }),
295
+ { label: 'Gemini file status check', maxRetries: 2, baseDelay: 1000 }
296
+ );
297
+ }
298
+ console.log(' Processing complete. ');
299
+
300
+ if (file.state === 'FAILED') {
301
+ throw new Error(`Gemini file processing failed for ${displayName}`);
302
+ }
303
+ }
304
+
305
+ // 4. Build content parts with SMART CONTEXT MANAGEMENT
306
+ console.log(` Analyzing with ${config.GEMINI_MODEL} [segment ${segmentIndex + 1}/${totalSegments}]...`);
307
+
308
+ const contentParts = [
309
+ { fileData: { mimeType: file.mimeType, fileUri: file.uri } },
310
+ ];
311
+
312
+ // --- Smart document selection by priority ---
313
+ // Reserve tokens for: video (~250K), previous analyses, prompt, thinking
314
+ const prevContextEstimate = estimateTokens(
315
+ buildProgressiveContext(previousAnalyses, userName) || ''
316
+ );
317
+ const docBudget = Math.max(100000, config.GEMINI_CONTEXT_WINDOW - 350000 - prevContextEstimate);
318
+ console.log(` Context budget: ${(docBudget / 1000).toFixed(0)}K tokens for docs (${contextDocs.length} available)`);
319
+
320
+ const { selected: selectedDocs, excluded, stats } = selectDocsByBudget(
321
+ contextDocs, docBudget, { segmentIndex }
322
+ );
323
+ if (excluded.length > 0) {
324
+ console.log(` Context: ${stats.selectedDocs} docs included, ${stats.excludedDocs} lower-priority docs excluded`);
325
+ }
326
+
327
+ // Attach selected context documents with VTT time-slicing
328
+ for (const doc of selectedDocs) {
329
+ if (doc.type === 'inlineText') {
330
+ let content = doc.content;
331
+ // Slice VTT to segment time range if available
332
+ const isVtt = doc.fileName.toLowerCase().endsWith('.vtt') || doc.fileName.toLowerCase().endsWith('.srt');
333
+ if (isVtt && segmentStartSec != null && segmentEndSec != null) {
334
+ content = sliceVttForSegment(content, segmentStartSec, segmentEndSec);
335
+ console.log(` VTT sliced to ${formatHMS(segmentStartSec)}–${formatHMS(segmentEndSec)} range`);
336
+ }
337
+ contentParts.push({ text: `=== Document: ${doc.fileName} ===\n${content}` });
338
+ } else if (doc.type === 'fileData') {
339
+ contentParts.push({ fileData: { mimeType: doc.mimeType, fileUri: doc.fileUri } });
340
+ }
341
+ }
342
+
343
+ // Document tier bridge text (using selected docs, not all)
344
+ const bridgeText = buildDocBridgeText(selectedDocs);
345
+ if (bridgeText) contentParts.push({ text: bridgeText });
346
+
347
+ // --- Progressive previous-segment context (compressed for older segments) ---
348
+ const prevText = buildProgressiveContext(previousAnalyses, userName);
349
+ if (prevText) contentParts.push({ text: prevText });
350
+
351
+ // --- Segment focus instructions ---
352
+ const focusText = buildSegmentFocus(segmentIndex, totalSegments, previousAnalyses, userName);
353
+ contentParts.push({ text: focusText });
354
+
355
+ // --- Smart boundary overlap context ---
356
+ if (boundaryContext) {
357
+ contentParts.push({ text: boundaryContext });
358
+ console.log(` Boundary context injected (mid-conversation detected)`);
359
+ }
360
+
361
+ // --- Retry hints (if this is a quality-gate retry) ---
362
+ if (retryHints.length > 0) {
363
+ const retryText = 'RETRY INSTRUCTIONS — Your previous attempt had quality issues. Address ALL of the following:\n' +
364
+ retryHints.map((h, i) => `${i + 1}. ${h}`).join('\n');
365
+ contentParts.push({ text: retryText });
366
+ console.log(` Retry hints injected (${retryHints.length} correction(s))`);
367
+ }
368
+
369
+ // User identity injection
370
+ if (userName) {
371
+ contentParts.push({
372
+ text: `CURRENT USER: "${userName}". This is the person running this analysis. When extracting tasks, action items, change requests, and scope changes — clearly identify which ones are assigned to or owned by "${userName}" vs. others. In the output, populate the "your_tasks" section with a focused summary of everything ${userName} needs to do, decisions they are waiting on, and items they own. If the call mentions ${userName} (even by partial name, first name, or nickname), attribute those tasks to them.`
373
+ });
374
+ }
375
+
376
+ contentParts.push({ text: promptText });
377
+
378
+ // 5. Send request (configurable thinking budget for complex multi-ticket analysis)
379
+ const requestPayload = {
380
+ model: config.GEMINI_MODEL,
381
+ contents: [{ role: 'user', parts: contentParts }],
382
+ config: {
383
+ systemInstruction,
384
+ maxOutputTokens: 65536,
385
+ temperature: 0,
386
+ thinkingConfig: { thinkingBudget },
387
+ },
388
+ };
389
+
390
+ const t0 = Date.now();
391
+ const response = await withRetry(
392
+ () => ai.models.generateContent(requestPayload),
393
+ { label: `Gemini segment analysis (${displayName})`, maxRetries: 2, baseDelay: 5000 }
394
+ );
395
+ const durationMs = Date.now() - t0;
396
+
397
+ const rawText = response.text;
398
+
399
+ // 6. Extract token usage
400
+ const usage = response.usageMetadata || {};
401
+ const tokenUsage = {
402
+ inputTokens: usage.promptTokenCount || 0,
403
+ outputTokens: usage.candidatesTokenCount || 0,
404
+ totalTokens: usage.totalTokenCount || 0,
405
+ thoughtTokens: usage.thoughtsTokenCount || 0,
406
+ };
407
+ const contextRemaining = config.GEMINI_CONTEXT_WINDOW - tokenUsage.inputTokens;
408
+ const contextUsedPct = ((tokenUsage.inputTokens / config.GEMINI_CONTEXT_WINDOW) * 100).toFixed(1);
409
+ tokenUsage.contextWindow = config.GEMINI_CONTEXT_WINDOW;
410
+ tokenUsage.contextRemaining = contextRemaining;
411
+ tokenUsage.contextUsedPct = parseFloat(contextUsedPct);
412
+
413
+ console.log(` Tokens — input: ${tokenUsage.inputTokens.toLocaleString()} | output: ${tokenUsage.outputTokens.toLocaleString()} | thinking: ${tokenUsage.thoughtTokens.toLocaleString()} | total: ${tokenUsage.totalTokens.toLocaleString()}`);
414
+ console.log(` Context — used: ${contextUsedPct}% | remaining: ${contextRemaining.toLocaleString()} / ${config.GEMINI_CONTEXT_WINDOW.toLocaleString()} tokens`);
415
+
416
+ // 7. Parse JSON response
417
+ const parsed = extractJson(rawText);
418
+
419
+ // Build serialisable input summary
420
+ const inputSummary = contentParts.map(part => {
421
+ if (part.fileData) return { type: 'fileData', mimeType: part.fileData.mimeType, fileUri: part.fileData.fileUri };
422
+ if (part.text) return { type: 'text', chars: part.text.length, preview: part.text.substring(0, 300) };
423
+ return part;
424
+ });
425
+
426
+ return {
427
+ run: {
428
+ model: config.GEMINI_MODEL,
429
+ displayName,
430
+ userName,
431
+ timestamp: new Date().toISOString(),
432
+ durationMs,
433
+ tokenUsage,
434
+ systemInstruction,
435
+ },
436
+ input: {
437
+ videoFile: { mimeType: file.mimeType, fileUri: file.uri, displayName, geminiFileName: file.name, usedExternalUrl },
438
+ contextDocuments: contextDocs.map(d => ({ fileName: d.fileName, type: d.type })),
439
+ previousSegmentCount: previousAnalyses.length,
440
+ parts: inputSummary,
441
+ promptText,
442
+ },
443
+ output: {
444
+ raw: rawText,
445
+ parsed,
446
+ parseSuccess: parsed !== null,
447
+ },
448
+ };
449
+ }
450
+
451
+ // ======================== FINAL COMPILATION ========================
452
+
453
+ /**
454
+ * Compile all segment analyses into a single unified result using Gemini.
455
+ *
456
+ * Instead of naive merging / flatMap across segments, this sends all segment
457
+ * outputs to Gemini to produce one deduplicated, reconciled, coherent final
458
+ * analysis — the "compiled" result.
459
+ *
460
+ * @param {object} ai - Gemini AI instance
461
+ * @param {Array} allSegmentAnalyses - Array of parsed analysis objects from each segment
462
+ * @param {string} userName - Current user's name
463
+ * @param {string} callName - Name of the call
464
+ * @param {string} scriptDir - Directory where prompt.json lives
465
+ * @param {object} [opts] - Options { thinkingBudget }
466
+ * @returns {{ compiled: object, run: object }} - The compiled analysis + run metadata
467
+ */
468
+ async function compileFinalResult(ai, allSegmentAnalyses, userName, callName, scriptDir, opts = {}) {
469
+ const { thinkingBudget: compilationThinking = 10240 } = opts;
470
+ const { systemInstruction } = loadPrompt(scriptDir);
471
+
472
+ console.log('');
473
+ console.log('══════════════════════════════════════════════');
474
+ console.log(' FINAL COMPILATION — AI merging all segments');
475
+ console.log('══════════════════════════════════════════════');
476
+ console.log('');
477
+
478
+ // Build a detailed dump of all segment analyses
479
+ const segmentDumps = allSegmentAnalyses.map((analysis, idx) => {
480
+ // Strip internal metadata and bloated fields before sending to AI
481
+ const clean = { ...analysis };
482
+ delete clean._geminiMeta;
483
+ delete clean.seg;
484
+ // Remove full transcript/comments arrays — they bloat the compilation input
485
+ // and cause the output to exceed token limits with malformed JSON
486
+ if (clean.tickets) {
487
+ clean.tickets = clean.tickets.map(t => {
488
+ const tc = { ...t };
489
+ // Keep max 5 key comments per ticket for context, drop the rest
490
+ if (tc.comments && tc.comments.length > 5) {
491
+ tc.comments = tc.comments.slice(0, 5);
492
+ tc.comments.push({ note: `...${t.comments.length - 5} more comments omitted for brevity` });
493
+ }
494
+ return tc;
495
+ });
496
+ }
497
+ // Remove any top-level conversation_transcript if the segment produced one
498
+ delete clean.conversation_transcript;
499
+ return `=== SEGMENT ${idx + 1} OF ${allSegmentAnalyses.length} ===\n${JSON.stringify(clean, null, 2)}`;
500
+ }).join('\n\n');
501
+
502
+ const compilationPrompt = `You are compiling the FINAL unified analysis from a multi-segment video call.
503
+
504
+ CONTEXT:
505
+ - Call name: "${callName}"
506
+ - Current user: "${userName}"
507
+ - Total segments analyzed: ${allSegmentAnalyses.length}
508
+
509
+ Below are the individual segment analyses. Each segment was analyzed independently but with cross-segment context. Your job is to produce ONE final, compiled, deduplicated result.
510
+
511
+ REQUIRED OUTPUT STRUCTURE:
512
+ Your JSON output MUST include ALL of these top-level fields (use empty arrays [] only when genuinely no items exist):
513
+ "tickets": [...], // All unique tickets discussed (deduplicated by ticket_id)
514
+ "change_requests": [...], // All unique CRs (deduplicated by id)
515
+ "action_items": [...], // All unique action items (deduplicated, re-numbered AI-1, AI-2, ...)
516
+ "blockers": [...], // All unique blockers (deduplicated, re-numbered BLK-1, ...)
517
+ "scope_changes": [...], // All unique scope changes (deduplicated, re-numbered SC-1, ...)
518
+ "file_references": [...], // All unique file references (deduplicated by resolved_path)
519
+ "your_tasks": { ... }, // Unified task summary for "${userName}"
520
+ "summary": "..." // ONE coherent executive summary for the entire call (3-5 sentences)
521
+
522
+ OUTPUT FORMAT RULES:
523
+ - Respond with ONLY valid JSON. No markdown fences, no extra text before or after.
524
+ - Double-check your JSON syntax: no trailing commas, no doubled braces }}, no doubled commas ,,.
525
+ - Keep descriptions complete but compact — do not pad or elaborate beyond what the segments contain.
526
+ - DO NOT include "conversation_transcript" field.
527
+ - Keep only the 3-5 most decisive comments per ticket. Do not bulk-copy all comments from segments.
528
+
529
+ COMPILATION RULES:
530
+ 1. STRICT DEDUP: Every ticket, CR, action item, blocker, scope change, and file reference MUST appear EXACTLY ONCE. Match by ID first, then by description similarity. NEVER repeat the same item.
531
+ 2. NAME NORMALIZATION: Merge variant names for the same person:
532
+ - Case differences ("Youssef Adel" / "youssef adel") → use proper case
533
+ - Role suffixes ("Mohamed Elhadi" / "Mohamed Elhadi (Service Desk)") → use the base name only, drop role qualifiers
534
+ - Nicknames or partial names referring to the same person → normalize to full proper name
535
+ Ensure your_tasks.user_name uses the properly-cased version of "${userName}".
536
+ 3. RECONCILE CONFLICTS: If two segments give different status for the same item, use the LATEST/most-specific state.
537
+ 4. MERGE SUMMARIES: Write ONE coherent executive summary for the entire call (3-5 sentences max). Not per-segment.
538
+ 5. UNIFIED your_tasks: Produce ONE your_tasks section for "${userName}" — deduplicated, final states only.
539
+ 6. SEQUENTIAL IDs: Re-number action items (AI-1, AI-2, ...), scope changes (SC-1, SC-2, ...), blockers (BLK-1, ...) sequentially. Keep real CR/ticket numbers (e.g. CR31296872) unchanged.
540
+ 7. FILE REFERENCES: Merge and deduplicate — keep the most specific resolved_path. Each file appears ONCE.
541
+ 8. PRESERVE ALL DATA: Include every unique ticket, action item, blocker, etc. from the segments. Do NOT omit items for brevity. The goal is completeness with deduplication, not summarization.
542
+ 9. PRESERVE source_segment: Every item in the input has a "source_segment" field (1-based integer) indicating which video segment it originated from. You MUST preserve this field on EVERY output item (action_items, change_requests, blockers, scope_changes, file_references, and inside tickets: comments, code_changes, video_segments). For your_tasks sub-arrays (tasks_todo, tasks_waiting_on_others, decisions_needed), also preserve source_segment. If an item appears in multiple segments, keep the source_segment of the FIRST (earliest) occurrence.
543
+
544
+ You MUST respond with ONLY valid JSON (no markdown fences, no extra text).
545
+ Use the same output structure as the individual segment analyses.
546
+
547
+ SEGMENT ANALYSES:
548
+ ${segmentDumps}`;
549
+
550
+ const contentParts = [{ text: compilationPrompt }];
551
+
552
+ const requestPayload = {
553
+ model: config.GEMINI_MODEL,
554
+ contents: [{ role: 'user', parts: contentParts }],
555
+ config: {
556
+ systemInstruction: `${systemInstruction}\n\nYou are now in COMPILATION MODE — your job is to merge multiple segment analyses into one final unified output. Deduplicate, reconcile conflicts, and produce the definitive analysis. Output valid JSON only — no markdown fences.`,
557
+ maxOutputTokens: 65536,
558
+ temperature: 0,
559
+ // Thinking tokens share the maxOutputTokens pool in Gemini 2.5+ models.
560
+ // Default 10240 leaves ~55K for output — enough for full structured merge.
561
+ // Too low (4096) → model hits ceiling and produces minimal output.
562
+ // Too high (16384) → eats into output budget causing truncation.
563
+ thinkingConfig: { thinkingBudget: compilationThinking },
564
+ },
565
+ };
566
+
567
+ const t0 = Date.now();
568
+ console.log(` Compiling with ${config.GEMINI_MODEL}...`);
569
+ const response = await withRetry(
570
+ () => ai.models.generateContent(requestPayload),
571
+ { label: 'Gemini final compilation', maxRetries: 2, baseDelay: 5000 }
572
+ );
573
+ const durationMs = Date.now() - t0;
574
+ const rawText = response.text;
575
+
576
+ // Token usage
577
+ const usage = response.usageMetadata || {};
578
+ const tokenUsage = {
579
+ inputTokens: usage.promptTokenCount || 0,
580
+ outputTokens: usage.candidatesTokenCount || 0,
581
+ totalTokens: usage.totalTokenCount || 0,
582
+ thoughtTokens: usage.thoughtsTokenCount || 0,
583
+ };
584
+ const contextUsedPct = ((tokenUsage.inputTokens / config.GEMINI_CONTEXT_WINDOW) * 100).toFixed(1);
585
+ tokenUsage.contextWindow = config.GEMINI_CONTEXT_WINDOW;
586
+ tokenUsage.contextRemaining = config.GEMINI_CONTEXT_WINDOW - tokenUsage.inputTokens;
587
+ tokenUsage.contextUsedPct = parseFloat(contextUsedPct);
588
+
589
+ console.log(` Tokens — input: ${tokenUsage.inputTokens.toLocaleString()} | output: ${tokenUsage.outputTokens.toLocaleString()} | thinking: ${tokenUsage.thoughtTokens.toLocaleString()} | total: ${tokenUsage.totalTokens.toLocaleString()}`);
590
+ console.log(` Context — used: ${contextUsedPct}% | remaining: ${tokenUsage.contextRemaining.toLocaleString()} / ${config.GEMINI_CONTEXT_WINDOW.toLocaleString()} tokens`);
591
+ console.log(` Compilation duration: ${(durationMs / 1000).toFixed(1)}s`);
592
+
593
+ // Parse compiled result
594
+ const compiled = extractJson(rawText);
595
+
596
+ if (!compiled) {
597
+ console.warn(' ⚠ Failed to parse compiled result — falling back to raw segment merge');
598
+ } else {
599
+ console.log(' ✓ Final compilation complete');
600
+ }
601
+
602
+ return {
603
+ compiled,
604
+ raw: rawText,
605
+ run: {
606
+ model: config.GEMINI_MODEL,
607
+ type: 'compilation',
608
+ timestamp: new Date().toISOString(),
609
+ durationMs,
610
+ tokenUsage,
611
+ segmentCount: allSegmentAnalyses.length,
612
+ parseSuccess: compiled !== null,
613
+ },
614
+ };
615
+ }
616
+
617
+ // ======================== DYNAMIC MODE — VIDEO CONTEXT EXTRACTION ========================
618
+
619
+ /**
620
+ * Analyze a video segment for dynamic mode — produces a comprehensive text summary
621
+ * instead of structured JSON. Used as context for dynamic document generation.
622
+ *
623
+ * @param {object} ai - GoogleGenAI instance
624
+ * @param {string} filePath - Path to video segment on disk
625
+ * @param {string} displayName - Display name (e.g. "segment_00.mp4")
626
+ * @param {object} [opts] - { thinkingBudget, segmentIndex, totalSegments }
627
+ * @returns {Promise<{summary: string, durationMs: number, tokenUsage: object}>}
628
+ */
629
+ async function analyzeVideoForContext(ai, filePath, displayName, opts = {}) {
630
+ const { thinkingBudget = 8192, segmentIndex = 0, totalSegments = 1 } = opts;
631
+
632
+ // 1. Upload video to Gemini File API
633
+ console.log(` Uploading ${displayName} to Gemini File API...`);
634
+ let file = await withRetry(
635
+ () => ai.files.upload({
636
+ file: filePath,
637
+ config: { mimeType: 'video/mp4', displayName },
638
+ }),
639
+ { label: `Gemini video upload (${displayName})`, maxRetries: 3 }
640
+ );
641
+
642
+ // 2. Poll until processing complete
643
+ let waited = 0;
644
+ const pollStart = Date.now();
645
+ while (file.state === 'PROCESSING') {
646
+ if (Date.now() - pollStart > GEMINI_POLL_TIMEOUT_MS) {
647
+ throw new Error(`Gemini file processing timed out after ${(GEMINI_POLL_TIMEOUT_MS / 1000).toFixed(0)}s for ${displayName}`);
648
+ }
649
+ process.stdout.write(` Processing${'.'.repeat((waited % 3) + 1)} \r`);
650
+ await new Promise(r => setTimeout(r, 5000));
651
+ waited++;
652
+ file = await withRetry(
653
+ () => ai.files.get({ name: file.name }),
654
+ { label: 'Gemini file status check', maxRetries: 2, baseDelay: 1000 }
655
+ );
656
+ }
657
+ console.log(' Processing complete. ');
658
+
659
+ if (file.state === 'FAILED') {
660
+ throw new Error(`Gemini file processing failed for ${displayName}`);
661
+ }
662
+
663
+ // 3. Build prompt for comprehensive summary
664
+ const segmentLabel = totalSegments > 1
665
+ ? `This is segment ${segmentIndex + 1} of ${totalSegments} from a longer video.`
666
+ : 'This is the complete video.';
667
+
668
+ const prompt = `You are an expert analyst. Watch this video carefully and produce a COMPREHENSIVE summary.
669
+
670
+ ${segmentLabel}
671
+
672
+ Your summary must capture ALL of the following (where applicable):
673
+ 1. **Transcript / Dialog**: Who said what — capture all speakers and their statements as accurately as possible. Use speaker names if visible/mentioned, otherwise "Speaker 1", "Speaker 2", etc.
674
+ 2. **Topics Discussed**: Every topic, subject, or theme covered — with detail, not just labels.
675
+ 3. **Decisions Made**: Any decisions, agreements, or conclusions reached.
676
+ 4. **Action Items**: Any tasks assigned, commitments made, or next steps discussed.
677
+ 5. **Technical Details**: Code, architecture, configurations, APIs, tools, or technologies mentioned.
678
+ 6. **Problems / Blockers**: Issues raised, bugs discussed, challenges identified.
679
+ 7. **Key Information**: Numbers, dates, names, URLs, file paths, or any specific data mentioned.
680
+ 8. **Visual Content**: Screen shares, presentations, diagrams, code on screen — describe what is shown.
681
+ 9. **Context & Background**: Any background information or context provided in the discussion.
682
+
683
+ FORMAT:
684
+ - Write in clear, detailed prose with section headers.
685
+ - Include direct quotes for important statements.
686
+ - Be thorough — capture everything, even seemingly minor details.
687
+ - This summary will be used as context for generating documents, so completeness is critical.
688
+ - Do NOT use JSON. Write natural language with Markdown formatting.`;
689
+
690
+ const contentParts = [
691
+ { fileData: { mimeType: file.mimeType, fileUri: file.uri } },
692
+ { text: prompt },
693
+ ];
694
+
695
+ // 4. Send to Gemini
696
+ console.log(` Analyzing with ${config.GEMINI_MODEL} [segment ${segmentIndex + 1}/${totalSegments}]...`);
697
+ const requestPayload = {
698
+ model: config.GEMINI_MODEL,
699
+ contents: [{ role: 'user', parts: contentParts }],
700
+ config: {
701
+ systemInstruction: 'You are a meticulous video analyst. Produce comprehensive, detailed summaries that capture everything in the video. Write in clear Markdown prose.',
702
+ maxOutputTokens: 32768,
703
+ temperature: 0.1,
704
+ thinkingConfig: { thinkingBudget },
705
+ },
706
+ };
707
+
708
+ const t0 = Date.now();
709
+ const response = await withRetry(
710
+ () => ai.models.generateContent(requestPayload),
711
+ { label: `Dynamic video analysis (${displayName})`, maxRetries: 2, baseDelay: 5000 }
712
+ );
713
+ const durationMs = Date.now() - t0;
714
+
715
+ const summary = (response.text || '').trim();
716
+
717
+ // Cleanup: delete uploaded file from Gemini File API
718
+ try {
719
+ await ai.files.delete({ name: file.name });
720
+ } catch (cleanupErr) {
721
+ console.warn(` ⚠ Gemini file cleanup failed: ${cleanupErr.message}`);
722
+ }
723
+
724
+ const usage = response.usageMetadata || {};
725
+ const tokenUsage = {
726
+ inputTokens: usage.promptTokenCount || 0,
727
+ outputTokens: usage.candidatesTokenCount || 0,
728
+ totalTokens: usage.totalTokenCount || 0,
729
+ thoughtTokens: usage.thoughtsTokenCount || 0,
730
+ };
731
+
732
+ console.log(` Tokens — input: ${tokenUsage.inputTokens.toLocaleString()} | output: ${tokenUsage.outputTokens.toLocaleString()} | thinking: ${tokenUsage.thoughtTokens.toLocaleString()}`);
733
+ console.log(` ✓ Summary: ${summary.length.toLocaleString()} chars in ${(durationMs / 1000).toFixed(1)}s`);
734
+
735
+ return { summary, durationMs, tokenUsage };
736
+ }
737
+
738
+ // ======================== CLEANUP ========================
739
+
740
+ /**
741
+ * Delete uploaded files from Gemini File API.
742
+ * Call after all analysis (including focused passes) is complete.
743
+ *
744
+ * @param {object} ai - GoogleGenAI instance
745
+ * @param {string|null} geminiFileName - The Gemini file resource name (from file.name)
746
+ * @param {Array} [contextDocs] - Prepared context docs (may contain File API uploads)
747
+ */
748
+ async function cleanupGeminiFiles(ai, geminiFileName, contextDocs = []) {
749
+ const toDelete = [];
750
+ if (geminiFileName) toDelete.push(geminiFileName);
751
+ for (const doc of contextDocs) {
752
+ if (doc.type === 'fileData' && doc.geminiFileName) {
753
+ toDelete.push(doc.geminiFileName);
754
+ }
755
+ }
756
+ if (toDelete.length === 0) return;
757
+
758
+ let cleaned = 0;
759
+ for (const name of toDelete) {
760
+ try {
761
+ await ai.files.delete({ name });
762
+ cleaned++;
763
+ } catch { /* ignore — files may already be expired */ }
764
+ }
765
+ if (cleaned > 0) {
766
+ console.log(` 🧹 Cleaned up ${cleaned} Gemini File API upload(s)`);
767
+ }
768
+ }
769
+
770
+ module.exports = {
771
+ initGemini,
772
+ prepareDocsForGemini,
773
+ loadPrompt,
774
+ processWithGemini,
775
+ compileFinalResult,
776
+ buildDocBridgeText,
777
+ analyzeVideoForContext,
778
+ cleanupGeminiFiles,
779
+ };