task-summary-extractor 8.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +605 -0
- package/EXPLORATION.md +451 -0
- package/QUICK_START.md +272 -0
- package/README.md +544 -0
- package/bin/taskex.js +64 -0
- package/package.json +63 -0
- package/process_and_upload.js +107 -0
- package/prompt.json +265 -0
- package/setup.js +505 -0
- package/src/config.js +327 -0
- package/src/logger.js +355 -0
- package/src/pipeline.js +2006 -0
- package/src/renderers/markdown.js +968 -0
- package/src/services/firebase.js +106 -0
- package/src/services/gemini.js +779 -0
- package/src/services/git.js +329 -0
- package/src/services/video.js +305 -0
- package/src/utils/adaptive-budget.js +266 -0
- package/src/utils/change-detector.js +466 -0
- package/src/utils/cli.js +415 -0
- package/src/utils/context-manager.js +499 -0
- package/src/utils/cost-tracker.js +156 -0
- package/src/utils/deep-dive.js +549 -0
- package/src/utils/diff-engine.js +315 -0
- package/src/utils/dynamic-mode.js +567 -0
- package/src/utils/focused-reanalysis.js +317 -0
- package/src/utils/format.js +32 -0
- package/src/utils/fs.js +39 -0
- package/src/utils/global-config.js +315 -0
- package/src/utils/health-dashboard.js +216 -0
- package/src/utils/inject-cli-flags.js +58 -0
- package/src/utils/json-parser.js +245 -0
- package/src/utils/learning-loop.js +301 -0
- package/src/utils/progress-updater.js +451 -0
- package/src/utils/progress.js +166 -0
- package/src/utils/prompt.js +32 -0
- package/src/utils/quality-gate.js +429 -0
- package/src/utils/retry.js +129 -0
|
@@ -0,0 +1,779 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gemini AI service — init, document preparation, segment analysis,
|
|
3
|
+
* and final compilation of all segment outputs into one unified result.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
'use strict';
|
|
7
|
+
|
|
8
|
+
const fs = require('fs');
|
|
9
|
+
const path = require('path');
|
|
10
|
+
const config = require('../config');
|
|
11
|
+
const {
|
|
12
|
+
GEMINI_API_KEY,
|
|
13
|
+
GEMINI_FILE_API_EXTS,
|
|
14
|
+
INLINE_TEXT_EXTS,
|
|
15
|
+
GEMINI_UNSUPPORTED,
|
|
16
|
+
MIME_MAP,
|
|
17
|
+
GEMINI_POLL_TIMEOUT_MS,
|
|
18
|
+
} = config;
|
|
19
|
+
// Access config.GEMINI_MODEL and config.GEMINI_CONTEXT_WINDOW at call time
|
|
20
|
+
// (not destructured) so runtime model changes via setActiveModel() are visible.
|
|
21
|
+
const { extractJson } = require('../utils/json-parser');
|
|
22
|
+
const {
|
|
23
|
+
selectDocsByBudget,
|
|
24
|
+
sliceVttForSegment,
|
|
25
|
+
buildProgressiveContext,
|
|
26
|
+
buildSegmentFocus,
|
|
27
|
+
estimateTokens,
|
|
28
|
+
} = require('../utils/context-manager');
|
|
29
|
+
const { formatHMS } = require('../utils/format');
|
|
30
|
+
const { withRetry } = require('../utils/retry');
|
|
31
|
+
|
|
32
|
+
// ======================== INIT ========================
|
|
33
|
+
|
|
34
|
+
async function initGemini() {
|
|
35
|
+
const { GoogleGenAI } = require('@google/genai');
|
|
36
|
+
const ai = new GoogleGenAI({ apiKey: GEMINI_API_KEY });
|
|
37
|
+
return ai;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// ======================== DOCUMENT PREPARATION ========================
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Prepare documents for Gemini context — inline text files, upload PDFs via File API, skip unsupported.
|
|
44
|
+
* Accepts array of { absPath, relPath } from findDocsRecursive.
|
|
45
|
+
*/
|
|
46
|
+
async function prepareDocsForGemini(ai, docFileList) {
|
|
47
|
+
if (docFileList.length === 0) return [];
|
|
48
|
+
|
|
49
|
+
console.log(` Found ${docFileList.length} document(s) to include as context:`);
|
|
50
|
+
docFileList.forEach(f => console.log(` - ${f.relPath}`));
|
|
51
|
+
console.log('');
|
|
52
|
+
|
|
53
|
+
const prepared = [];
|
|
54
|
+
for (const { absPath: docPath, relPath } of docFileList) {
|
|
55
|
+
const ext = path.extname(docPath).toLowerCase();
|
|
56
|
+
const name = relPath;
|
|
57
|
+
|
|
58
|
+
try {
|
|
59
|
+
if (INLINE_TEXT_EXTS.includes(ext)) {
|
|
60
|
+
console.log(` Reading ${name} (inline text)...`);
|
|
61
|
+
const content = await fs.promises.readFile(docPath, 'utf8');
|
|
62
|
+
prepared.push({ type: 'inlineText', fileName: name, content });
|
|
63
|
+
console.log(` ✓ ${name} ready (${(content.length / 1024).toFixed(1)} KB)`);
|
|
64
|
+
} else if (GEMINI_FILE_API_EXTS.includes(ext)) {
|
|
65
|
+
const mime = MIME_MAP[ext] || 'application/octet-stream';
|
|
66
|
+
console.log(` Uploading ${name} to Gemini File API...`);
|
|
67
|
+
let file = await withRetry(
|
|
68
|
+
() => ai.files.upload({
|
|
69
|
+
file: docPath,
|
|
70
|
+
config: { mimeType: mime, displayName: name },
|
|
71
|
+
}),
|
|
72
|
+
{ label: `Gemini doc upload (${name})`, maxRetries: 3 }
|
|
73
|
+
);
|
|
74
|
+
|
|
75
|
+
// Poll with timeout
|
|
76
|
+
const pollStart = Date.now();
|
|
77
|
+
while (file.state === 'PROCESSING') {
|
|
78
|
+
if (Date.now() - pollStart > GEMINI_POLL_TIMEOUT_MS) {
|
|
79
|
+
console.warn(` ⚠ ${name} — polling timed out after ${(GEMINI_POLL_TIMEOUT_MS / 1000).toFixed(0)}s, skipping`);
|
|
80
|
+
file = null;
|
|
81
|
+
break;
|
|
82
|
+
}
|
|
83
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
84
|
+
file = await withRetry(
|
|
85
|
+
() => ai.files.get({ name: file.name }),
|
|
86
|
+
{ label: `Gemini doc status (${name})`, maxRetries: 2, baseDelay: 1000 }
|
|
87
|
+
);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (!file || file.state === 'FAILED') {
|
|
91
|
+
console.warn(` ⚠ ${name} — Gemini processing failed, skipping`);
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
prepared.push({
|
|
96
|
+
type: 'fileData',
|
|
97
|
+
fileName: name,
|
|
98
|
+
mimeType: file.mimeType,
|
|
99
|
+
fileUri: file.uri,
|
|
100
|
+
geminiFileName: file.name,
|
|
101
|
+
});
|
|
102
|
+
console.log(` ✓ ${name} ready (File API)`);
|
|
103
|
+
} else if (GEMINI_UNSUPPORTED.includes(ext)) {
|
|
104
|
+
console.warn(` ⚠ ${name} — format not supported by Gemini, will upload to Firebase only`);
|
|
105
|
+
} else {
|
|
106
|
+
console.warn(` ⚠ ${name} — unknown doc type, skipping`);
|
|
107
|
+
}
|
|
108
|
+
} catch (err) {
|
|
109
|
+
console.warn(` ⚠ ${name} — failed: ${err.message}`);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const inlineCount = prepared.filter(d => d.type === 'inlineText').length;
|
|
114
|
+
const fileCount = prepared.filter(d => d.type === 'fileData').length;
|
|
115
|
+
console.log(` ${prepared.length} document(s) prepared (${inlineCount} inline, ${fileCount} File API)`);
|
|
116
|
+
console.log('');
|
|
117
|
+
return prepared;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// ======================== PROMPT LOADING ========================
|
|
121
|
+
|
|
122
|
+
/** Load prompt from prompt.json — builds a system message + structured extraction prompt */
|
|
123
|
+
function loadPrompt(scriptDir) {
|
|
124
|
+
const promptPath = path.join(scriptDir, 'prompt.json');
|
|
125
|
+
if (!fs.existsSync(promptPath)) {
|
|
126
|
+
throw new Error(`prompt.json not found at "${promptPath}". Ensure it exists alongside the entry script.`);
|
|
127
|
+
}
|
|
128
|
+
let promptConfig;
|
|
129
|
+
try {
|
|
130
|
+
promptConfig = JSON.parse(fs.readFileSync(promptPath, 'utf8'));
|
|
131
|
+
} catch (err) {
|
|
132
|
+
throw new Error(`Failed to parse prompt.json at "${promptPath}": ${err.message}`);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const instructions = promptConfig.instructions
|
|
136
|
+
? promptConfig.instructions.map(i => `- ${i}`).join('\n')
|
|
137
|
+
: '';
|
|
138
|
+
|
|
139
|
+
const outputExample = JSON.stringify(promptConfig.output_structure, null, 2);
|
|
140
|
+
|
|
141
|
+
const text = [
|
|
142
|
+
promptConfig.system,
|
|
143
|
+
'',
|
|
144
|
+
`Task: ${promptConfig.task}`,
|
|
145
|
+
'',
|
|
146
|
+
'Instructions:',
|
|
147
|
+
instructions,
|
|
148
|
+
'',
|
|
149
|
+
'You MUST respond with ONLY valid JSON (no markdown fences, no extra text).',
|
|
150
|
+
'Use this exact output structure:',
|
|
151
|
+
outputExample,
|
|
152
|
+
].join('\n');
|
|
153
|
+
|
|
154
|
+
return { systemInstruction: promptConfig.system, promptText: text };
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// ======================== CONTEXT BUILDING HELPERS ========================
|
|
158
|
+
|
|
159
|
+
/** Build the bridge text that explains document tiers to Gemini */
|
|
160
|
+
function buildDocBridgeText(contextDocs) {
|
|
161
|
+
if (contextDocs.length === 0) return null;
|
|
162
|
+
|
|
163
|
+
const taskDocs = contextDocs.filter(d => d.fileName.includes('.tasks/'));
|
|
164
|
+
const robotDocs = contextDocs.filter(d => d.fileName.includes('.robot/'));
|
|
165
|
+
const archDocs = contextDocs.filter(d => d.fileName.includes('.docs/'));
|
|
166
|
+
const otherDocs = contextDocs.filter(d =>
|
|
167
|
+
!d.fileName.includes('.tasks/') &&
|
|
168
|
+
!d.fileName.includes('.robot/') &&
|
|
169
|
+
!d.fileName.includes('.docs/')
|
|
170
|
+
);
|
|
171
|
+
|
|
172
|
+
let bridgeText = `The above includes ${contextDocs.length} supporting document(s) organized in 3 tiers:\n`;
|
|
173
|
+
|
|
174
|
+
// Tier 1 — Task execution documents
|
|
175
|
+
if (taskDocs.length > 0) {
|
|
176
|
+
bridgeText += `\n=== TIER 1: TASK EXECUTION DOCUMENTS (${taskDocs.length}) — SOURCE OF TRUTH FOR TICKET STATE ===`;
|
|
177
|
+
bridgeText += `\nThese contain execution plans, implementation checklists with ✅/⬜/⏸️/🔲 status markers, code maps with exact file paths, sub-ticket breakdowns, business requirements, and PR templates.`;
|
|
178
|
+
bridgeText += `\nFiles: ${taskDocs.map(d => d.fileName).join(', ')}`;
|
|
179
|
+
|
|
180
|
+
// Pre-extract ticket state from execution plans
|
|
181
|
+
const execPlanDocs = taskDocs.filter(d => d.type === 'inlineText' && (
|
|
182
|
+
d.fileName.includes('execution-plan') ||
|
|
183
|
+
d.fileName.includes('checklist') ||
|
|
184
|
+
d.fileName.includes('REMAINING-WORK')
|
|
185
|
+
));
|
|
186
|
+
if (execPlanDocs.length > 0) {
|
|
187
|
+
bridgeText += `\n\nPRE-EXTRACTED TICKET STATE (from execution plans & checklists):`;
|
|
188
|
+
for (const doc of execPlanDocs) {
|
|
189
|
+
const statusMatch = doc.content.match(/\*\*Status\*\*:\s*(.+)/);
|
|
190
|
+
const crMatch = doc.content.match(/\*\*CR\*\*:\s*#?(\d+)/);
|
|
191
|
+
const ticketId = crMatch ? `CR${crMatch[1]}` : doc.fileName;
|
|
192
|
+
const status = statusMatch ? statusMatch[1].trim() : 'unknown';
|
|
193
|
+
|
|
194
|
+
const doneCount = (doc.content.match(/- \[x\]/gi) || []).length;
|
|
195
|
+
const todoCount = (doc.content.match(/- \[ \]/g) || []).length;
|
|
196
|
+
const deferredCount = (doc.content.match(/⏸️/g) || []).length;
|
|
197
|
+
const blockedCount = (doc.content.match(/🔲/g) || []).length;
|
|
198
|
+
|
|
199
|
+
const openQs = (doc.content.match(/\|\s*Q\d+\s*\|[^|]*\|[^|]*\|\s*(⬜|✅|⏸️)[^|]*\|/g) || []);
|
|
200
|
+
const dbItems = (doc.content.match(/- \[[ x]\] \*\*DB-\d+\*\*.*/g) || []);
|
|
201
|
+
|
|
202
|
+
bridgeText += `\n ${ticketId} (${doc.fileName}):`;
|
|
203
|
+
bridgeText += `\n Plan status: ${status}`;
|
|
204
|
+
bridgeText += `\n Checklist: ${doneCount} done, ${todoCount} todo, ${deferredCount} deferred, ${blockedCount} blocked`;
|
|
205
|
+
if (openQs.length > 0) bridgeText += `\n Open questions: ${openQs.length} tracked`;
|
|
206
|
+
if (dbItems.length > 0) bridgeText += `\n DB prerequisites: ${dbItems.length} items`;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
bridgeText += `\n\nCRITICAL: Cross-reference these task documents with the video discussion. When the call mentions a file, class, procedure, module, CR number, or ticket — match it to the corresponding task document. Use exact file paths and component names from the code-map.md and execution-plan.md in your output. The task documents contain the ground truth for what was planned — the call reveals what was actually discussed, confirmed, or changed. Flag any discrepancies between documented state and discussed state.`;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Tier 2 — Robot/AI knowledge base
|
|
214
|
+
if (robotDocs.length > 0) {
|
|
215
|
+
bridgeText += `\n\n=== TIER 2: CODEBASE KNOWLEDGE BASE (${robotDocs.length}) — FILE MAPS & PATTERNS ===`;
|
|
216
|
+
bridgeText += `\nThese contain complete file maps for every app/service, backend API maps, database schemas, auth configs, coding patterns, and naming conventions.`;
|
|
217
|
+
bridgeText += `\nUse these to RESOLVE exact file paths when the call mentions a class, component, service, or controller by name.`;
|
|
218
|
+
bridgeText += `\nFiles: ${robotDocs.map(d => d.fileName).join(', ')}`;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Tier 3 — Project documentation
|
|
222
|
+
if (archDocs.length > 0) {
|
|
223
|
+
bridgeText += `\n\n=== TIER 3: PROJECT DOCUMENTATION (${archDocs.length}) — ARCHITECTURE & REFERENCE ===`;
|
|
224
|
+
bridgeText += `\nThese provide background on the solution architecture, tech stack, patterns, best practices, payment systems, evaluation system, i18n, and more.`;
|
|
225
|
+
bridgeText += `\nUse for context when the call discusses system concepts, design decisions, or technical constraints.`;
|
|
226
|
+
bridgeText += `\nFiles: ${archDocs.map(d => d.fileName).join(', ')}`;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// Other docs
|
|
230
|
+
if (otherDocs.length > 0) {
|
|
231
|
+
bridgeText += `\n\n=== CALL DOCUMENTS (${otherDocs.length}) — SUBTITLES, TRANSCRIPTS, NOTES ===`;
|
|
232
|
+
bridgeText += `\nFiles: ${otherDocs.map(d => d.fileName).join(', ')}`;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
return bridgeText;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// ======================== SEGMENT ANALYSIS ========================
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Process a single video segment with Gemini.
|
|
242
|
+
* Returns a complete model run record (run, input, output).
|
|
243
|
+
*/
|
|
244
|
+
async function processWithGemini(ai, filePath, displayName, contextDocs = [], previousAnalyses = [], userName = '', scriptDir = __dirname, segmentOpts = {}) {
|
|
245
|
+
// segmentOpts: { segmentIndex, totalSegments, segmentStartSec, segmentEndSec, thinkingBudget, boundaryContext, retryHints, existingFileUri, existingFileMime, existingGeminiFileName, storageDownloadUrl }
|
|
246
|
+
const { segmentIndex = 0, totalSegments = 1, segmentStartSec, segmentEndSec, thinkingBudget = 24576,
|
|
247
|
+
boundaryContext = null, retryHints = [],
|
|
248
|
+
existingFileUri = null, existingFileMime = 'video/mp4', existingGeminiFileName = null,
|
|
249
|
+
storageDownloadUrl = null } = segmentOpts;
|
|
250
|
+
|
|
251
|
+
// 1. Load structured prompt
|
|
252
|
+
const { systemInstruction, promptText } = loadPrompt(scriptDir);
|
|
253
|
+
|
|
254
|
+
// 2. Resolve video file reference (3 strategies, in priority order):
|
|
255
|
+
// a) Reuse existing Gemini File API URI (retry / focused pass)
|
|
256
|
+
// b) Use Firebase Storage download URL as External URL (skip Gemini upload)
|
|
257
|
+
// c) Upload to Gemini File API as fallback
|
|
258
|
+
let file;
|
|
259
|
+
let usedExternalUrl = false;
|
|
260
|
+
|
|
261
|
+
if (existingFileUri) {
|
|
262
|
+
// Strategy A: Reuse Gemini File API URI from a previous pass
|
|
263
|
+
file = { uri: existingFileUri, mimeType: existingFileMime, name: existingGeminiFileName, state: 'ACTIVE' };
|
|
264
|
+
console.log(` Reusing Gemini File API URI (skip upload)`);
|
|
265
|
+
} else if (storageDownloadUrl) {
|
|
266
|
+
// Strategy B: Use Firebase Storage download URL as Gemini External URL
|
|
267
|
+
// Supported for models >= 2.5; limit 100MB per payload.
|
|
268
|
+
// Gemini fetches the file on-demand — no separate upload + polling needed.
|
|
269
|
+
file = { uri: storageDownloadUrl, mimeType: 'video/mp4', name: null, state: 'ACTIVE' };
|
|
270
|
+
usedExternalUrl = true;
|
|
271
|
+
console.log(` Using Firebase Storage URL as external reference (skip Gemini upload)`);
|
|
272
|
+
} else {
|
|
273
|
+
// Strategy C: Upload to Gemini File API (default fallback)
|
|
274
|
+
console.log(` Uploading to Gemini File API...`);
|
|
275
|
+
file = await withRetry(
|
|
276
|
+
() => ai.files.upload({
|
|
277
|
+
file: filePath,
|
|
278
|
+
config: { mimeType: 'video/mp4', displayName },
|
|
279
|
+
}),
|
|
280
|
+
{ label: `Gemini file upload (${displayName})`, maxRetries: 3 }
|
|
281
|
+
);
|
|
282
|
+
|
|
283
|
+
// 3. Wait for processing (with polling + retry on get + timeout)
|
|
284
|
+
let waited = 0;
|
|
285
|
+
const pollStart = Date.now();
|
|
286
|
+
while (file.state === 'PROCESSING') {
|
|
287
|
+
if (Date.now() - pollStart > GEMINI_POLL_TIMEOUT_MS) {
|
|
288
|
+
throw new Error(`Gemini file processing timed out after ${(GEMINI_POLL_TIMEOUT_MS / 1000).toFixed(0)}s for ${displayName}. Try again or increase GEMINI_POLL_TIMEOUT_MS.`);
|
|
289
|
+
}
|
|
290
|
+
process.stdout.write(` Processing${'.'.repeat((waited % 3) + 1)} \r`);
|
|
291
|
+
await new Promise(r => setTimeout(r, 5000));
|
|
292
|
+
waited++;
|
|
293
|
+
file = await withRetry(
|
|
294
|
+
() => ai.files.get({ name: file.name }),
|
|
295
|
+
{ label: 'Gemini file status check', maxRetries: 2, baseDelay: 1000 }
|
|
296
|
+
);
|
|
297
|
+
}
|
|
298
|
+
console.log(' Processing complete. ');
|
|
299
|
+
|
|
300
|
+
if (file.state === 'FAILED') {
|
|
301
|
+
throw new Error(`Gemini file processing failed for ${displayName}`);
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// 4. Build content parts with SMART CONTEXT MANAGEMENT
|
|
306
|
+
console.log(` Analyzing with ${config.GEMINI_MODEL} [segment ${segmentIndex + 1}/${totalSegments}]...`);
|
|
307
|
+
|
|
308
|
+
const contentParts = [
|
|
309
|
+
{ fileData: { mimeType: file.mimeType, fileUri: file.uri } },
|
|
310
|
+
];
|
|
311
|
+
|
|
312
|
+
// --- Smart document selection by priority ---
|
|
313
|
+
// Reserve tokens for: video (~250K), previous analyses, prompt, thinking
|
|
314
|
+
const prevContextEstimate = estimateTokens(
|
|
315
|
+
buildProgressiveContext(previousAnalyses, userName) || ''
|
|
316
|
+
);
|
|
317
|
+
const docBudget = Math.max(100000, config.GEMINI_CONTEXT_WINDOW - 350000 - prevContextEstimate);
|
|
318
|
+
console.log(` Context budget: ${(docBudget / 1000).toFixed(0)}K tokens for docs (${contextDocs.length} available)`);
|
|
319
|
+
|
|
320
|
+
const { selected: selectedDocs, excluded, stats } = selectDocsByBudget(
|
|
321
|
+
contextDocs, docBudget, { segmentIndex }
|
|
322
|
+
);
|
|
323
|
+
if (excluded.length > 0) {
|
|
324
|
+
console.log(` Context: ${stats.selectedDocs} docs included, ${stats.excludedDocs} lower-priority docs excluded`);
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
// Attach selected context documents with VTT time-slicing
|
|
328
|
+
for (const doc of selectedDocs) {
|
|
329
|
+
if (doc.type === 'inlineText') {
|
|
330
|
+
let content = doc.content;
|
|
331
|
+
// Slice VTT to segment time range if available
|
|
332
|
+
const isVtt = doc.fileName.toLowerCase().endsWith('.vtt') || doc.fileName.toLowerCase().endsWith('.srt');
|
|
333
|
+
if (isVtt && segmentStartSec != null && segmentEndSec != null) {
|
|
334
|
+
content = sliceVttForSegment(content, segmentStartSec, segmentEndSec);
|
|
335
|
+
console.log(` VTT sliced to ${formatHMS(segmentStartSec)}–${formatHMS(segmentEndSec)} range`);
|
|
336
|
+
}
|
|
337
|
+
contentParts.push({ text: `=== Document: ${doc.fileName} ===\n${content}` });
|
|
338
|
+
} else if (doc.type === 'fileData') {
|
|
339
|
+
contentParts.push({ fileData: { mimeType: doc.mimeType, fileUri: doc.fileUri } });
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Document tier bridge text (using selected docs, not all)
|
|
344
|
+
const bridgeText = buildDocBridgeText(selectedDocs);
|
|
345
|
+
if (bridgeText) contentParts.push({ text: bridgeText });
|
|
346
|
+
|
|
347
|
+
// --- Progressive previous-segment context (compressed for older segments) ---
|
|
348
|
+
const prevText = buildProgressiveContext(previousAnalyses, userName);
|
|
349
|
+
if (prevText) contentParts.push({ text: prevText });
|
|
350
|
+
|
|
351
|
+
// --- Segment focus instructions ---
|
|
352
|
+
const focusText = buildSegmentFocus(segmentIndex, totalSegments, previousAnalyses, userName);
|
|
353
|
+
contentParts.push({ text: focusText });
|
|
354
|
+
|
|
355
|
+
// --- Smart boundary overlap context ---
|
|
356
|
+
if (boundaryContext) {
|
|
357
|
+
contentParts.push({ text: boundaryContext });
|
|
358
|
+
console.log(` Boundary context injected (mid-conversation detected)`);
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// --- Retry hints (if this is a quality-gate retry) ---
|
|
362
|
+
if (retryHints.length > 0) {
|
|
363
|
+
const retryText = 'RETRY INSTRUCTIONS — Your previous attempt had quality issues. Address ALL of the following:\n' +
|
|
364
|
+
retryHints.map((h, i) => `${i + 1}. ${h}`).join('\n');
|
|
365
|
+
contentParts.push({ text: retryText });
|
|
366
|
+
console.log(` Retry hints injected (${retryHints.length} correction(s))`);
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
// User identity injection
|
|
370
|
+
if (userName) {
|
|
371
|
+
contentParts.push({
|
|
372
|
+
text: `CURRENT USER: "${userName}". This is the person running this analysis. When extracting tasks, action items, change requests, and scope changes — clearly identify which ones are assigned to or owned by "${userName}" vs. others. In the output, populate the "your_tasks" section with a focused summary of everything ${userName} needs to do, decisions they are waiting on, and items they own. If the call mentions ${userName} (even by partial name, first name, or nickname), attribute those tasks to them.`
|
|
373
|
+
});
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
contentParts.push({ text: promptText });
|
|
377
|
+
|
|
378
|
+
// 5. Send request (configurable thinking budget for complex multi-ticket analysis)
|
|
379
|
+
const requestPayload = {
|
|
380
|
+
model: config.GEMINI_MODEL,
|
|
381
|
+
contents: [{ role: 'user', parts: contentParts }],
|
|
382
|
+
config: {
|
|
383
|
+
systemInstruction,
|
|
384
|
+
maxOutputTokens: 65536,
|
|
385
|
+
temperature: 0,
|
|
386
|
+
thinkingConfig: { thinkingBudget },
|
|
387
|
+
},
|
|
388
|
+
};
|
|
389
|
+
|
|
390
|
+
const t0 = Date.now();
|
|
391
|
+
const response = await withRetry(
|
|
392
|
+
() => ai.models.generateContent(requestPayload),
|
|
393
|
+
{ label: `Gemini segment analysis (${displayName})`, maxRetries: 2, baseDelay: 5000 }
|
|
394
|
+
);
|
|
395
|
+
const durationMs = Date.now() - t0;
|
|
396
|
+
|
|
397
|
+
const rawText = response.text;
|
|
398
|
+
|
|
399
|
+
// 6. Extract token usage
|
|
400
|
+
const usage = response.usageMetadata || {};
|
|
401
|
+
const tokenUsage = {
|
|
402
|
+
inputTokens: usage.promptTokenCount || 0,
|
|
403
|
+
outputTokens: usage.candidatesTokenCount || 0,
|
|
404
|
+
totalTokens: usage.totalTokenCount || 0,
|
|
405
|
+
thoughtTokens: usage.thoughtsTokenCount || 0,
|
|
406
|
+
};
|
|
407
|
+
const contextRemaining = config.GEMINI_CONTEXT_WINDOW - tokenUsage.inputTokens;
|
|
408
|
+
const contextUsedPct = ((tokenUsage.inputTokens / config.GEMINI_CONTEXT_WINDOW) * 100).toFixed(1);
|
|
409
|
+
tokenUsage.contextWindow = config.GEMINI_CONTEXT_WINDOW;
|
|
410
|
+
tokenUsage.contextRemaining = contextRemaining;
|
|
411
|
+
tokenUsage.contextUsedPct = parseFloat(contextUsedPct);
|
|
412
|
+
|
|
413
|
+
console.log(` Tokens — input: ${tokenUsage.inputTokens.toLocaleString()} | output: ${tokenUsage.outputTokens.toLocaleString()} | thinking: ${tokenUsage.thoughtTokens.toLocaleString()} | total: ${tokenUsage.totalTokens.toLocaleString()}`);
|
|
414
|
+
console.log(` Context — used: ${contextUsedPct}% | remaining: ${contextRemaining.toLocaleString()} / ${config.GEMINI_CONTEXT_WINDOW.toLocaleString()} tokens`);
|
|
415
|
+
|
|
416
|
+
// 7. Parse JSON response
|
|
417
|
+
const parsed = extractJson(rawText);
|
|
418
|
+
|
|
419
|
+
// Build serialisable input summary
|
|
420
|
+
const inputSummary = contentParts.map(part => {
|
|
421
|
+
if (part.fileData) return { type: 'fileData', mimeType: part.fileData.mimeType, fileUri: part.fileData.fileUri };
|
|
422
|
+
if (part.text) return { type: 'text', chars: part.text.length, preview: part.text.substring(0, 300) };
|
|
423
|
+
return part;
|
|
424
|
+
});
|
|
425
|
+
|
|
426
|
+
return {
|
|
427
|
+
run: {
|
|
428
|
+
model: config.GEMINI_MODEL,
|
|
429
|
+
displayName,
|
|
430
|
+
userName,
|
|
431
|
+
timestamp: new Date().toISOString(),
|
|
432
|
+
durationMs,
|
|
433
|
+
tokenUsage,
|
|
434
|
+
systemInstruction,
|
|
435
|
+
},
|
|
436
|
+
input: {
|
|
437
|
+
videoFile: { mimeType: file.mimeType, fileUri: file.uri, displayName, geminiFileName: file.name, usedExternalUrl },
|
|
438
|
+
contextDocuments: contextDocs.map(d => ({ fileName: d.fileName, type: d.type })),
|
|
439
|
+
previousSegmentCount: previousAnalyses.length,
|
|
440
|
+
parts: inputSummary,
|
|
441
|
+
promptText,
|
|
442
|
+
},
|
|
443
|
+
output: {
|
|
444
|
+
raw: rawText,
|
|
445
|
+
parsed,
|
|
446
|
+
parseSuccess: parsed !== null,
|
|
447
|
+
},
|
|
448
|
+
};
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
// ======================== FINAL COMPILATION ========================
|
|
452
|
+
|
|
453
|
+
/**
|
|
454
|
+
* Compile all segment analyses into a single unified result using Gemini.
|
|
455
|
+
*
|
|
456
|
+
* Instead of naive merging / flatMap across segments, this sends all segment
|
|
457
|
+
* outputs to Gemini to produce one deduplicated, reconciled, coherent final
|
|
458
|
+
* analysis — the "compiled" result.
|
|
459
|
+
*
|
|
460
|
+
* @param {object} ai - Gemini AI instance
|
|
461
|
+
* @param {Array} allSegmentAnalyses - Array of parsed analysis objects from each segment
|
|
462
|
+
* @param {string} userName - Current user's name
|
|
463
|
+
* @param {string} callName - Name of the call
|
|
464
|
+
* @param {string} scriptDir - Directory where prompt.json lives
|
|
465
|
+
* @param {object} [opts] - Options { thinkingBudget }
|
|
466
|
+
* @returns {{ compiled: object, run: object }} - The compiled analysis + run metadata
|
|
467
|
+
*/
|
|
468
|
+
async function compileFinalResult(ai, allSegmentAnalyses, userName, callName, scriptDir, opts = {}) {
|
|
469
|
+
const { thinkingBudget: compilationThinking = 10240 } = opts;
|
|
470
|
+
const { systemInstruction } = loadPrompt(scriptDir);
|
|
471
|
+
|
|
472
|
+
console.log('');
|
|
473
|
+
console.log('══════════════════════════════════════════════');
|
|
474
|
+
console.log(' FINAL COMPILATION — AI merging all segments');
|
|
475
|
+
console.log('══════════════════════════════════════════════');
|
|
476
|
+
console.log('');
|
|
477
|
+
|
|
478
|
+
// Build a detailed dump of all segment analyses
|
|
479
|
+
const segmentDumps = allSegmentAnalyses.map((analysis, idx) => {
|
|
480
|
+
// Strip internal metadata and bloated fields before sending to AI
|
|
481
|
+
const clean = { ...analysis };
|
|
482
|
+
delete clean._geminiMeta;
|
|
483
|
+
delete clean.seg;
|
|
484
|
+
// Remove full transcript/comments arrays — they bloat the compilation input
|
|
485
|
+
// and cause the output to exceed token limits with malformed JSON
|
|
486
|
+
if (clean.tickets) {
|
|
487
|
+
clean.tickets = clean.tickets.map(t => {
|
|
488
|
+
const tc = { ...t };
|
|
489
|
+
// Keep max 5 key comments per ticket for context, drop the rest
|
|
490
|
+
if (tc.comments && tc.comments.length > 5) {
|
|
491
|
+
tc.comments = tc.comments.slice(0, 5);
|
|
492
|
+
tc.comments.push({ note: `...${t.comments.length - 5} more comments omitted for brevity` });
|
|
493
|
+
}
|
|
494
|
+
return tc;
|
|
495
|
+
});
|
|
496
|
+
}
|
|
497
|
+
// Remove any top-level conversation_transcript if the segment produced one
|
|
498
|
+
delete clean.conversation_transcript;
|
|
499
|
+
return `=== SEGMENT ${idx + 1} OF ${allSegmentAnalyses.length} ===\n${JSON.stringify(clean, null, 2)}`;
|
|
500
|
+
}).join('\n\n');
|
|
501
|
+
|
|
502
|
+
const compilationPrompt = `You are compiling the FINAL unified analysis from a multi-segment video call.
|
|
503
|
+
|
|
504
|
+
CONTEXT:
|
|
505
|
+
- Call name: "${callName}"
|
|
506
|
+
- Current user: "${userName}"
|
|
507
|
+
- Total segments analyzed: ${allSegmentAnalyses.length}
|
|
508
|
+
|
|
509
|
+
Below are the individual segment analyses. Each segment was analyzed independently but with cross-segment context. Your job is to produce ONE final, compiled, deduplicated result.
|
|
510
|
+
|
|
511
|
+
REQUIRED OUTPUT STRUCTURE:
|
|
512
|
+
Your JSON output MUST include ALL of these top-level fields (use empty arrays [] only when genuinely no items exist):
|
|
513
|
+
"tickets": [...], // All unique tickets discussed (deduplicated by ticket_id)
|
|
514
|
+
"change_requests": [...], // All unique CRs (deduplicated by id)
|
|
515
|
+
"action_items": [...], // All unique action items (deduplicated, re-numbered AI-1, AI-2, ...)
|
|
516
|
+
"blockers": [...], // All unique blockers (deduplicated, re-numbered BLK-1, ...)
|
|
517
|
+
"scope_changes": [...], // All unique scope changes (deduplicated, re-numbered SC-1, ...)
|
|
518
|
+
"file_references": [...], // All unique file references (deduplicated by resolved_path)
|
|
519
|
+
"your_tasks": { ... }, // Unified task summary for "${userName}"
|
|
520
|
+
"summary": "..." // ONE coherent executive summary for the entire call (3-5 sentences)
|
|
521
|
+
|
|
522
|
+
OUTPUT FORMAT RULES:
|
|
523
|
+
- Respond with ONLY valid JSON. No markdown fences, no extra text before or after.
|
|
524
|
+
- Double-check your JSON syntax: no trailing commas, no doubled braces }}, no doubled commas ,,.
|
|
525
|
+
- Keep descriptions complete but compact — do not pad or elaborate beyond what the segments contain.
|
|
526
|
+
- DO NOT include "conversation_transcript" field.
|
|
527
|
+
- Keep only the 3-5 most decisive comments per ticket. Do not bulk-copy all comments from segments.
|
|
528
|
+
|
|
529
|
+
COMPILATION RULES:
|
|
530
|
+
1. STRICT DEDUP: Every ticket, CR, action item, blocker, scope change, and file reference MUST appear EXACTLY ONCE. Match by ID first, then by description similarity. NEVER repeat the same item.
|
|
531
|
+
2. NAME NORMALIZATION: Merge variant names for the same person:
|
|
532
|
+
- Case differences ("Youssef Adel" / "youssef adel") → use proper case
|
|
533
|
+
- Role suffixes ("Mohamed Elhadi" / "Mohamed Elhadi (Service Desk)") → use the base name only, drop role qualifiers
|
|
534
|
+
- Nicknames or partial names referring to the same person → normalize to full proper name
|
|
535
|
+
Ensure your_tasks.user_name uses the properly-cased version of "${userName}".
|
|
536
|
+
3. RECONCILE CONFLICTS: If two segments give different status for the same item, use the LATEST/most-specific state.
|
|
537
|
+
4. MERGE SUMMARIES: Write ONE coherent executive summary for the entire call (3-5 sentences max). Not per-segment.
|
|
538
|
+
5. UNIFIED your_tasks: Produce ONE your_tasks section for "${userName}" — deduplicated, final states only.
|
|
539
|
+
6. SEQUENTIAL IDs: Re-number action items (AI-1, AI-2, ...), scope changes (SC-1, SC-2, ...), blockers (BLK-1, ...) sequentially. Keep real CR/ticket numbers (e.g. CR31296872) unchanged.
|
|
540
|
+
7. FILE REFERENCES: Merge and deduplicate — keep the most specific resolved_path. Each file appears ONCE.
|
|
541
|
+
8. PRESERVE ALL DATA: Include every unique ticket, action item, blocker, etc. from the segments. Do NOT omit items for brevity. The goal is completeness with deduplication, not summarization.
|
|
542
|
+
9. PRESERVE source_segment: Every item in the input has a "source_segment" field (1-based integer) indicating which video segment it originated from. You MUST preserve this field on EVERY output item (action_items, change_requests, blockers, scope_changes, file_references, and inside tickets: comments, code_changes, video_segments). For your_tasks sub-arrays (tasks_todo, tasks_waiting_on_others, decisions_needed), also preserve source_segment. If an item appears in multiple segments, keep the source_segment of the FIRST (earliest) occurrence.
|
|
543
|
+
|
|
544
|
+
You MUST respond with ONLY valid JSON (no markdown fences, no extra text).
|
|
545
|
+
Use the same output structure as the individual segment analyses.
|
|
546
|
+
|
|
547
|
+
SEGMENT ANALYSES:
|
|
548
|
+
${segmentDumps}`;
|
|
549
|
+
|
|
550
|
+
const contentParts = [{ text: compilationPrompt }];
|
|
551
|
+
|
|
552
|
+
const requestPayload = {
|
|
553
|
+
model: config.GEMINI_MODEL,
|
|
554
|
+
contents: [{ role: 'user', parts: contentParts }],
|
|
555
|
+
config: {
|
|
556
|
+
systemInstruction: `${systemInstruction}\n\nYou are now in COMPILATION MODE — your job is to merge multiple segment analyses into one final unified output. Deduplicate, reconcile conflicts, and produce the definitive analysis. Output valid JSON only — no markdown fences.`,
|
|
557
|
+
maxOutputTokens: 65536,
|
|
558
|
+
temperature: 0,
|
|
559
|
+
// Thinking tokens share the maxOutputTokens pool in Gemini 2.5+ models.
|
|
560
|
+
// Default 10240 leaves ~55K for output — enough for full structured merge.
|
|
561
|
+
// Too low (4096) → model hits ceiling and produces minimal output.
|
|
562
|
+
// Too high (16384) → eats into output budget causing truncation.
|
|
563
|
+
thinkingConfig: { thinkingBudget: compilationThinking },
|
|
564
|
+
},
|
|
565
|
+
};
|
|
566
|
+
|
|
567
|
+
const t0 = Date.now();
|
|
568
|
+
console.log(` Compiling with ${config.GEMINI_MODEL}...`);
|
|
569
|
+
const response = await withRetry(
|
|
570
|
+
() => ai.models.generateContent(requestPayload),
|
|
571
|
+
{ label: 'Gemini final compilation', maxRetries: 2, baseDelay: 5000 }
|
|
572
|
+
);
|
|
573
|
+
const durationMs = Date.now() - t0;
|
|
574
|
+
const rawText = response.text;
|
|
575
|
+
|
|
576
|
+
// Token usage
|
|
577
|
+
const usage = response.usageMetadata || {};
|
|
578
|
+
const tokenUsage = {
|
|
579
|
+
inputTokens: usage.promptTokenCount || 0,
|
|
580
|
+
outputTokens: usage.candidatesTokenCount || 0,
|
|
581
|
+
totalTokens: usage.totalTokenCount || 0,
|
|
582
|
+
thoughtTokens: usage.thoughtsTokenCount || 0,
|
|
583
|
+
};
|
|
584
|
+
const contextUsedPct = ((tokenUsage.inputTokens / config.GEMINI_CONTEXT_WINDOW) * 100).toFixed(1);
|
|
585
|
+
tokenUsage.contextWindow = config.GEMINI_CONTEXT_WINDOW;
|
|
586
|
+
tokenUsage.contextRemaining = config.GEMINI_CONTEXT_WINDOW - tokenUsage.inputTokens;
|
|
587
|
+
tokenUsage.contextUsedPct = parseFloat(contextUsedPct);
|
|
588
|
+
|
|
589
|
+
console.log(` Tokens — input: ${tokenUsage.inputTokens.toLocaleString()} | output: ${tokenUsage.outputTokens.toLocaleString()} | thinking: ${tokenUsage.thoughtTokens.toLocaleString()} | total: ${tokenUsage.totalTokens.toLocaleString()}`);
|
|
590
|
+
console.log(` Context — used: ${contextUsedPct}% | remaining: ${tokenUsage.contextRemaining.toLocaleString()} / ${config.GEMINI_CONTEXT_WINDOW.toLocaleString()} tokens`);
|
|
591
|
+
console.log(` Compilation duration: ${(durationMs / 1000).toFixed(1)}s`);
|
|
592
|
+
|
|
593
|
+
// Parse compiled result
|
|
594
|
+
const compiled = extractJson(rawText);
|
|
595
|
+
|
|
596
|
+
if (!compiled) {
|
|
597
|
+
console.warn(' ⚠ Failed to parse compiled result — falling back to raw segment merge');
|
|
598
|
+
} else {
|
|
599
|
+
console.log(' ✓ Final compilation complete');
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
return {
|
|
603
|
+
compiled,
|
|
604
|
+
raw: rawText,
|
|
605
|
+
run: {
|
|
606
|
+
model: config.GEMINI_MODEL,
|
|
607
|
+
type: 'compilation',
|
|
608
|
+
timestamp: new Date().toISOString(),
|
|
609
|
+
durationMs,
|
|
610
|
+
tokenUsage,
|
|
611
|
+
segmentCount: allSegmentAnalyses.length,
|
|
612
|
+
parseSuccess: compiled !== null,
|
|
613
|
+
},
|
|
614
|
+
};
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
// ======================== DYNAMIC MODE — VIDEO CONTEXT EXTRACTION ========================
|
|
618
|
+
|
|
619
|
+
/**
|
|
620
|
+
* Analyze a video segment for dynamic mode — produces a comprehensive text summary
|
|
621
|
+
* instead of structured JSON. Used as context for dynamic document generation.
|
|
622
|
+
*
|
|
623
|
+
* @param {object} ai - GoogleGenAI instance
|
|
624
|
+
* @param {string} filePath - Path to video segment on disk
|
|
625
|
+
* @param {string} displayName - Display name (e.g. "segment_00.mp4")
|
|
626
|
+
* @param {object} [opts] - { thinkingBudget, segmentIndex, totalSegments }
|
|
627
|
+
* @returns {Promise<{summary: string, durationMs: number, tokenUsage: object}>}
|
|
628
|
+
*/
|
|
629
|
+
async function analyzeVideoForContext(ai, filePath, displayName, opts = {}) {
|
|
630
|
+
const { thinkingBudget = 8192, segmentIndex = 0, totalSegments = 1 } = opts;
|
|
631
|
+
|
|
632
|
+
// 1. Upload video to Gemini File API
|
|
633
|
+
console.log(` Uploading ${displayName} to Gemini File API...`);
|
|
634
|
+
let file = await withRetry(
|
|
635
|
+
() => ai.files.upload({
|
|
636
|
+
file: filePath,
|
|
637
|
+
config: { mimeType: 'video/mp4', displayName },
|
|
638
|
+
}),
|
|
639
|
+
{ label: `Gemini video upload (${displayName})`, maxRetries: 3 }
|
|
640
|
+
);
|
|
641
|
+
|
|
642
|
+
// 2. Poll until processing complete
|
|
643
|
+
let waited = 0;
|
|
644
|
+
const pollStart = Date.now();
|
|
645
|
+
while (file.state === 'PROCESSING') {
|
|
646
|
+
if (Date.now() - pollStart > GEMINI_POLL_TIMEOUT_MS) {
|
|
647
|
+
throw new Error(`Gemini file processing timed out after ${(GEMINI_POLL_TIMEOUT_MS / 1000).toFixed(0)}s for ${displayName}`);
|
|
648
|
+
}
|
|
649
|
+
process.stdout.write(` Processing${'.'.repeat((waited % 3) + 1)} \r`);
|
|
650
|
+
await new Promise(r => setTimeout(r, 5000));
|
|
651
|
+
waited++;
|
|
652
|
+
file = await withRetry(
|
|
653
|
+
() => ai.files.get({ name: file.name }),
|
|
654
|
+
{ label: 'Gemini file status check', maxRetries: 2, baseDelay: 1000 }
|
|
655
|
+
);
|
|
656
|
+
}
|
|
657
|
+
console.log(' Processing complete. ');
|
|
658
|
+
|
|
659
|
+
if (file.state === 'FAILED') {
|
|
660
|
+
throw new Error(`Gemini file processing failed for ${displayName}`);
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
// 3. Build prompt for comprehensive summary
|
|
664
|
+
const segmentLabel = totalSegments > 1
|
|
665
|
+
? `This is segment ${segmentIndex + 1} of ${totalSegments} from a longer video.`
|
|
666
|
+
: 'This is the complete video.';
|
|
667
|
+
|
|
668
|
+
const prompt = `You are an expert analyst. Watch this video carefully and produce a COMPREHENSIVE summary.
|
|
669
|
+
|
|
670
|
+
${segmentLabel}
|
|
671
|
+
|
|
672
|
+
Your summary must capture ALL of the following (where applicable):
|
|
673
|
+
1. **Transcript / Dialog**: Who said what — capture all speakers and their statements as accurately as possible. Use speaker names if visible/mentioned, otherwise "Speaker 1", "Speaker 2", etc.
|
|
674
|
+
2. **Topics Discussed**: Every topic, subject, or theme covered — with detail, not just labels.
|
|
675
|
+
3. **Decisions Made**: Any decisions, agreements, or conclusions reached.
|
|
676
|
+
4. **Action Items**: Any tasks assigned, commitments made, or next steps discussed.
|
|
677
|
+
5. **Technical Details**: Code, architecture, configurations, APIs, tools, or technologies mentioned.
|
|
678
|
+
6. **Problems / Blockers**: Issues raised, bugs discussed, challenges identified.
|
|
679
|
+
7. **Key Information**: Numbers, dates, names, URLs, file paths, or any specific data mentioned.
|
|
680
|
+
8. **Visual Content**: Screen shares, presentations, diagrams, code on screen — describe what is shown.
|
|
681
|
+
9. **Context & Background**: Any background information or context provided in the discussion.
|
|
682
|
+
|
|
683
|
+
FORMAT:
|
|
684
|
+
- Write in clear, detailed prose with section headers.
|
|
685
|
+
- Include direct quotes for important statements.
|
|
686
|
+
- Be thorough — capture everything, even seemingly minor details.
|
|
687
|
+
- This summary will be used as context for generating documents, so completeness is critical.
|
|
688
|
+
- Do NOT use JSON. Write natural language with Markdown formatting.`;
|
|
689
|
+
|
|
690
|
+
const contentParts = [
|
|
691
|
+
{ fileData: { mimeType: file.mimeType, fileUri: file.uri } },
|
|
692
|
+
{ text: prompt },
|
|
693
|
+
];
|
|
694
|
+
|
|
695
|
+
// 4. Send to Gemini
|
|
696
|
+
console.log(` Analyzing with ${config.GEMINI_MODEL} [segment ${segmentIndex + 1}/${totalSegments}]...`);
|
|
697
|
+
const requestPayload = {
|
|
698
|
+
model: config.GEMINI_MODEL,
|
|
699
|
+
contents: [{ role: 'user', parts: contentParts }],
|
|
700
|
+
config: {
|
|
701
|
+
systemInstruction: 'You are a meticulous video analyst. Produce comprehensive, detailed summaries that capture everything in the video. Write in clear Markdown prose.',
|
|
702
|
+
maxOutputTokens: 32768,
|
|
703
|
+
temperature: 0.1,
|
|
704
|
+
thinkingConfig: { thinkingBudget },
|
|
705
|
+
},
|
|
706
|
+
};
|
|
707
|
+
|
|
708
|
+
const t0 = Date.now();
|
|
709
|
+
const response = await withRetry(
|
|
710
|
+
() => ai.models.generateContent(requestPayload),
|
|
711
|
+
{ label: `Dynamic video analysis (${displayName})`, maxRetries: 2, baseDelay: 5000 }
|
|
712
|
+
);
|
|
713
|
+
const durationMs = Date.now() - t0;
|
|
714
|
+
|
|
715
|
+
const summary = (response.text || '').trim();
|
|
716
|
+
|
|
717
|
+
// Cleanup: delete uploaded file from Gemini File API
|
|
718
|
+
try {
|
|
719
|
+
await ai.files.delete({ name: file.name });
|
|
720
|
+
} catch (cleanupErr) {
|
|
721
|
+
console.warn(` ⚠ Gemini file cleanup failed: ${cleanupErr.message}`);
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
const usage = response.usageMetadata || {};
|
|
725
|
+
const tokenUsage = {
|
|
726
|
+
inputTokens: usage.promptTokenCount || 0,
|
|
727
|
+
outputTokens: usage.candidatesTokenCount || 0,
|
|
728
|
+
totalTokens: usage.totalTokenCount || 0,
|
|
729
|
+
thoughtTokens: usage.thoughtsTokenCount || 0,
|
|
730
|
+
};
|
|
731
|
+
|
|
732
|
+
console.log(` Tokens — input: ${tokenUsage.inputTokens.toLocaleString()} | output: ${tokenUsage.outputTokens.toLocaleString()} | thinking: ${tokenUsage.thoughtTokens.toLocaleString()}`);
|
|
733
|
+
console.log(` ✓ Summary: ${summary.length.toLocaleString()} chars in ${(durationMs / 1000).toFixed(1)}s`);
|
|
734
|
+
|
|
735
|
+
return { summary, durationMs, tokenUsage };
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
// ======================== CLEANUP ========================
|
|
739
|
+
|
|
740
|
+
/**
|
|
741
|
+
* Delete uploaded files from Gemini File API.
|
|
742
|
+
* Call after all analysis (including focused passes) is complete.
|
|
743
|
+
*
|
|
744
|
+
* @param {object} ai - GoogleGenAI instance
|
|
745
|
+
* @param {string|null} geminiFileName - The Gemini file resource name (from file.name)
|
|
746
|
+
* @param {Array} [contextDocs] - Prepared context docs (may contain File API uploads)
|
|
747
|
+
*/
|
|
748
|
+
async function cleanupGeminiFiles(ai, geminiFileName, contextDocs = []) {
|
|
749
|
+
const toDelete = [];
|
|
750
|
+
if (geminiFileName) toDelete.push(geminiFileName);
|
|
751
|
+
for (const doc of contextDocs) {
|
|
752
|
+
if (doc.type === 'fileData' && doc.geminiFileName) {
|
|
753
|
+
toDelete.push(doc.geminiFileName);
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
if (toDelete.length === 0) return;
|
|
757
|
+
|
|
758
|
+
let cleaned = 0;
|
|
759
|
+
for (const name of toDelete) {
|
|
760
|
+
try {
|
|
761
|
+
await ai.files.delete({ name });
|
|
762
|
+
cleaned++;
|
|
763
|
+
} catch { /* ignore — files may already be expired */ }
|
|
764
|
+
}
|
|
765
|
+
if (cleaned > 0) {
|
|
766
|
+
console.log(` 🧹 Cleaned up ${cleaned} Gemini File API upload(s)`);
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
module.exports = {
|
|
771
|
+
initGemini,
|
|
772
|
+
prepareDocsForGemini,
|
|
773
|
+
loadPrompt,
|
|
774
|
+
processWithGemini,
|
|
775
|
+
compileFinalResult,
|
|
776
|
+
buildDocBridgeText,
|
|
777
|
+
analyzeVideoForContext,
|
|
778
|
+
cleanupGeminiFiles,
|
|
779
|
+
};
|