task-summary-extractor 8.3.0 → 9.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +38 -0
- package/ARCHITECTURE.md +99 -3
- package/EXPLORATION.md +148 -89
- package/QUICK_START.md +5 -2
- package/README.md +51 -7
- package/bin/taskex.js +11 -4
- package/package.json +38 -5
- package/src/config.js +52 -3
- package/src/modes/focused-reanalysis.js +2 -1
- package/src/modes/progress-updater.js +1 -1
- package/src/phases/_shared.js +43 -0
- package/src/phases/compile.js +101 -0
- package/src/phases/deep-dive.js +118 -0
- package/src/phases/discover.js +178 -0
- package/src/phases/init.js +192 -0
- package/src/phases/output.js +238 -0
- package/src/phases/process-media.js +633 -0
- package/src/phases/services.js +104 -0
- package/src/phases/summary.js +86 -0
- package/src/pipeline.js +431 -1463
- package/src/renderers/docx.js +531 -0
- package/src/renderers/html.js +672 -0
- package/src/renderers/markdown.js +15 -183
- package/src/renderers/pdf.js +90 -0
- package/src/renderers/shared.js +211 -0
- package/src/schemas/analysis-compiled.schema.json +381 -0
- package/src/schemas/analysis-segment.schema.json +380 -0
- package/src/services/doc-parser.js +346 -0
- package/src/services/gemini.js +101 -44
- package/src/services/video.js +123 -8
- package/src/utils/adaptive-budget.js +6 -4
- package/src/utils/checkpoint.js +2 -1
- package/src/utils/cli.js +131 -110
- package/src/utils/colors.js +83 -0
- package/src/utils/confidence-filter.js +138 -0
- package/src/utils/diff-engine.js +2 -1
- package/src/utils/global-config.js +6 -5
- package/src/utils/health-dashboard.js +11 -9
- package/src/utils/json-parser.js +4 -2
- package/src/utils/learning-loop.js +3 -2
- package/src/utils/progress-bar.js +286 -0
- package/src/utils/quality-gate.js +4 -2
- package/src/utils/retry.js +3 -1
- package/src/utils/schema-validator.js +314 -0
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document parser service — converts rich document formats (DOCX, DOC, XLSX,
|
|
3
|
+
* PPTX, RTF, ODT, EPUB, HTML) to plain text for Gemini AI ingestion.
|
|
4
|
+
*
|
|
5
|
+
* Strategy:
|
|
6
|
+
* 1. mammoth — DOCX → clean text (best quality, preserves structure)
|
|
7
|
+
* 2. xlsx — XLSX/XLS/CSV → text tables
|
|
8
|
+
* 3. officeparser — DOC, PPTX, ODT, RTF, EPUB (broad fallback)
|
|
9
|
+
* 4. Built-in — HTML → text (regex strip), JSON/XML → passthrough
|
|
10
|
+
*
|
|
11
|
+
* All parsers return plain text. Binary files that can't be parsed
|
|
12
|
+
* are skipped with a warning (no crash).
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
'use strict';
|
|
16
|
+
|
|
17
|
+
const fs = require('fs');
|
|
18
|
+
const path = require('path');
|
|
19
|
+
const { c } = require('../utils/colors');
|
|
20
|
+
|
|
21
|
+
// ======================== PARSER REGISTRY ========================
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Extensions handled by each parser strategy.
|
|
25
|
+
* Order matters: first match wins.
|
|
26
|
+
*/
|
|
27
|
+
const PARSER_MAP = {
|
|
28
|
+
// mammoth — high-quality DOCX conversion
|
|
29
|
+
'.docx': 'mammoth',
|
|
30
|
+
|
|
31
|
+
// xlsx — Excel spreadsheets
|
|
32
|
+
'.xlsx': 'xlsx',
|
|
33
|
+
'.xls': 'xlsx',
|
|
34
|
+
|
|
35
|
+
// officeparser — broad Office/ODF/EPUB support
|
|
36
|
+
'.doc': 'officeparser',
|
|
37
|
+
'.pptx': 'officeparser',
|
|
38
|
+
'.ppt': 'officeparser',
|
|
39
|
+
'.odt': 'officeparser',
|
|
40
|
+
'.odp': 'officeparser',
|
|
41
|
+
'.ods': 'officeparser',
|
|
42
|
+
'.rtf': 'officeparser',
|
|
43
|
+
'.epub': 'officeparser',
|
|
44
|
+
|
|
45
|
+
// Built-in parsers
|
|
46
|
+
'.html': 'html',
|
|
47
|
+
'.htm': 'html',
|
|
48
|
+
'.xml': 'builtin-text',
|
|
49
|
+
'.json': 'builtin-text',
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* All extensions this module can parse (union of PARSER_MAP keys).
|
|
54
|
+
* Exported for config.js to extend DOC_EXTS and remove GEMINI_UNSUPPORTED.
|
|
55
|
+
*/
|
|
56
|
+
const PARSEABLE_EXTS = Object.keys(PARSER_MAP);
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Extensions that were previously unsupported but are now parseable.
|
|
60
|
+
* Used to update GEMINI_UNSUPPORTED → INLINE_TEXT_EXTS migration.
|
|
61
|
+
*/
|
|
62
|
+
const NEWLY_SUPPORTED_EXTS = ['.docx', '.doc', '.xlsx', '.xls', '.pptx', '.ppt', '.odt', '.odp', '.ods', '.rtf', '.epub', '.html', '.htm'];
|
|
63
|
+
|
|
64
|
+
// ======================== MAMMOTH (DOCX) ========================
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Parse a DOCX file to plain text using mammoth.
|
|
68
|
+
* Falls back to raw text extraction if styled conversion fails.
|
|
69
|
+
*
|
|
70
|
+
* @param {string} filePath - Absolute path to .docx file
|
|
71
|
+
* @returns {Promise<{ text: string, warnings: string[] }>}
|
|
72
|
+
*/
|
|
73
|
+
async function parseDocx(filePath) {
|
|
74
|
+
const mammoth = require('mammoth');
|
|
75
|
+
const warnings = [];
|
|
76
|
+
|
|
77
|
+
try {
|
|
78
|
+
// First try: extract raw text (most reliable, preserves all content)
|
|
79
|
+
const result = await mammoth.extractRawText({ path: filePath });
|
|
80
|
+
if (result.messages && result.messages.length > 0) {
|
|
81
|
+
for (const msg of result.messages) {
|
|
82
|
+
warnings.push(`mammoth: ${msg.message}`);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
let text = (result.value || '').trim();
|
|
87
|
+
|
|
88
|
+
// If raw text is empty, try HTML conversion as fallback
|
|
89
|
+
if (!text) {
|
|
90
|
+
const htmlResult = await mammoth.convertToHtml({ path: filePath });
|
|
91
|
+
text = stripHtml(htmlResult.value || '');
|
|
92
|
+
if (htmlResult.messages) {
|
|
93
|
+
for (const msg of htmlResult.messages) {
|
|
94
|
+
warnings.push(`mammoth-html: ${msg.message}`);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return { text, warnings };
|
|
100
|
+
} catch (err) {
|
|
101
|
+
return { text: '', warnings: [`mammoth parse failed: ${err.message}`] };
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// ======================== XLSX (Excel) ========================
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Parse an Excel file (XLSX/XLS) to text tables.
|
|
109
|
+
* Each sheet becomes a section with rows formatted as pipe-delimited tables.
|
|
110
|
+
*
|
|
111
|
+
* @param {string} filePath - Absolute path to .xlsx/.xls file
|
|
112
|
+
* @returns {Promise<{ text: string, warnings: string[] }>}
|
|
113
|
+
*/
|
|
114
|
+
async function parseExcel(filePath) {
|
|
115
|
+
const XLSX = require('xlsx');
|
|
116
|
+
const warnings = [];
|
|
117
|
+
|
|
118
|
+
try {
|
|
119
|
+
const workbook = XLSX.readFile(filePath, { type: 'file' });
|
|
120
|
+
const sections = [];
|
|
121
|
+
|
|
122
|
+
for (const sheetName of workbook.SheetNames) {
|
|
123
|
+
const sheet = workbook.Sheets[sheetName];
|
|
124
|
+
if (!sheet) continue;
|
|
125
|
+
|
|
126
|
+
// Convert sheet to array of arrays
|
|
127
|
+
const rows = XLSX.utils.sheet_to_json(sheet, { header: 1, defval: '' });
|
|
128
|
+
if (rows.length === 0) continue;
|
|
129
|
+
|
|
130
|
+
const lines = [`=== Sheet: ${sheetName} ===`];
|
|
131
|
+
|
|
132
|
+
// Format as pipe-delimited table (Gemini reads this well)
|
|
133
|
+
for (const row of rows) {
|
|
134
|
+
const cells = row.map(cell => {
|
|
135
|
+
if (cell === null || cell === undefined) return '';
|
|
136
|
+
return String(cell).replace(/\|/g, '\\|').replace(/\n/g, ' ');
|
|
137
|
+
});
|
|
138
|
+
lines.push(`| ${cells.join(' | ')} |`);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
sections.push(lines.join('\n'));
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const text = sections.join('\n\n');
|
|
145
|
+
if (!text) warnings.push('Excel file has no readable content');
|
|
146
|
+
|
|
147
|
+
return { text, warnings };
|
|
148
|
+
} catch (err) {
|
|
149
|
+
return { text: '', warnings: [`xlsx parse failed: ${err.message}`] };
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// ======================== OFFICEPARSER (DOC, PPTX, ODT, RTF, EPUB) ========================
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Parse a document using officeparser (broad format support).
|
|
157
|
+
* Handles: .doc, .pptx, .ppt, .odt, .odp, .ods, .rtf, .epub
|
|
158
|
+
*
|
|
159
|
+
* @param {string} filePath - Absolute path to document
|
|
160
|
+
* @returns {Promise<{ text: string, warnings: string[] }>}
|
|
161
|
+
*/
|
|
162
|
+
async function parseWithOfficeParser(filePath) {
|
|
163
|
+
const officeparser = require('officeparser');
|
|
164
|
+
const warnings = [];
|
|
165
|
+
|
|
166
|
+
try {
|
|
167
|
+
const text = await officeparser.parseOfficeAsync(filePath);
|
|
168
|
+
if (!text || !text.trim()) {
|
|
169
|
+
warnings.push('officeparser returned empty text');
|
|
170
|
+
}
|
|
171
|
+
return { text: (text || '').trim(), warnings };
|
|
172
|
+
} catch (err) {
|
|
173
|
+
return { text: '', warnings: [`officeparser failed: ${err.message}`] };
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// ======================== HTML STRIP ========================
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Strip HTML tags to extract plain text.
|
|
181
|
+
* Handles common elements: headings, paragraphs, lists, tables, breaks.
|
|
182
|
+
*
|
|
183
|
+
* @param {string} html - HTML content
|
|
184
|
+
* @returns {string} Plain text
|
|
185
|
+
*/
|
|
186
|
+
function stripHtml(html) {
|
|
187
|
+
if (!html) return '';
|
|
188
|
+
|
|
189
|
+
let text = html;
|
|
190
|
+
|
|
191
|
+
// Convert block elements to newlines
|
|
192
|
+
text = text.replace(/<\/(p|div|h[1-6]|li|tr|blockquote|section|article)>/gi, '\n');
|
|
193
|
+
text = text.replace(/<br\s*\/?>/gi, '\n');
|
|
194
|
+
text = text.replace(/<hr\s*\/?>/gi, '\n---\n');
|
|
195
|
+
|
|
196
|
+
// Convert list items
|
|
197
|
+
text = text.replace(/<li[^>]*>/gi, '- ');
|
|
198
|
+
|
|
199
|
+
// Convert headings to markdown-style
|
|
200
|
+
text = text.replace(/<h1[^>]*>/gi, '\n# ');
|
|
201
|
+
text = text.replace(/<h2[^>]*>/gi, '\n## ');
|
|
202
|
+
text = text.replace(/<h3[^>]*>/gi, '\n### ');
|
|
203
|
+
text = text.replace(/<h[4-6][^>]*>/gi, '\n#### ');
|
|
204
|
+
|
|
205
|
+
// Convert table cells
|
|
206
|
+
text = text.replace(/<td[^>]*>/gi, ' | ');
|
|
207
|
+
text = text.replace(/<th[^>]*>/gi, ' | ');
|
|
208
|
+
|
|
209
|
+
// Strip all remaining tags
|
|
210
|
+
text = text.replace(/<[^>]+>/g, '');
|
|
211
|
+
|
|
212
|
+
// Decode common HTML entities
|
|
213
|
+
text = text.replace(/&/g, '&');
|
|
214
|
+
text = text.replace(/</g, '<');
|
|
215
|
+
text = text.replace(/>/g, '>');
|
|
216
|
+
text = text.replace(/"/g, '"');
|
|
217
|
+
text = text.replace(/'/g, "'");
|
|
218
|
+
text = text.replace(/ /g, ' ');
|
|
219
|
+
|
|
220
|
+
// Clean up excessive whitespace
|
|
221
|
+
text = text.replace(/[ \t]+/g, ' ');
|
|
222
|
+
text = text.replace(/\n{3,}/g, '\n\n');
|
|
223
|
+
text = text.trim();
|
|
224
|
+
|
|
225
|
+
return text;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// ======================== BUILT-IN TEXT ========================
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Read a file as UTF-8 text (for JSON, XML, etc.).
|
|
232
|
+
* @param {string} filePath
|
|
233
|
+
* @returns {Promise<{ text: string, warnings: string[] }>}
|
|
234
|
+
*/
|
|
235
|
+
async function parseBuiltinText(filePath) {
|
|
236
|
+
try {
|
|
237
|
+
const content = await fs.promises.readFile(filePath, 'utf8');
|
|
238
|
+
return { text: content.trim(), warnings: [] };
|
|
239
|
+
} catch (err) {
|
|
240
|
+
return { text: '', warnings: [`Failed to read file: ${err.message}`] };
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Parse an HTML file by reading it and stripping tags.
|
|
246
|
+
* @param {string} filePath
|
|
247
|
+
* @returns {Promise<{ text: string, warnings: string[] }>}
|
|
248
|
+
*/
|
|
249
|
+
async function parseHtmlFile(filePath) {
|
|
250
|
+
try {
|
|
251
|
+
const html = await fs.promises.readFile(filePath, 'utf8');
|
|
252
|
+
const text = stripHtml(html);
|
|
253
|
+
return { text, warnings: [] };
|
|
254
|
+
} catch (err) {
|
|
255
|
+
return { text: '', warnings: [`Failed to read HTML: ${err.message}`] };
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// ======================== MAIN PARSE FUNCTION ========================
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Parse any supported document to plain text.
|
|
263
|
+
*
|
|
264
|
+
* @param {string} filePath - Absolute path to the document
|
|
265
|
+
* @param {object} [opts]
|
|
266
|
+
* @param {number} [opts.maxLength] - Truncate output to this many characters (default: unlimited)
|
|
267
|
+
* @param {boolean} [opts.silent] - Suppress console warnings (default: false)
|
|
268
|
+
* @returns {Promise<{ text: string, ext: string, parser: string, warnings: string[], success: boolean }>}
|
|
269
|
+
*/
|
|
270
|
+
async function parseDocument(filePath, opts = {}) {
|
|
271
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
272
|
+
const parser = PARSER_MAP[ext];
|
|
273
|
+
|
|
274
|
+
if (!parser) {
|
|
275
|
+
return {
|
|
276
|
+
text: '',
|
|
277
|
+
ext,
|
|
278
|
+
parser: 'none',
|
|
279
|
+
warnings: [`No parser available for extension "${ext}"`],
|
|
280
|
+
success: false,
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
let result;
|
|
285
|
+
switch (parser) {
|
|
286
|
+
case 'mammoth':
|
|
287
|
+
result = await parseDocx(filePath);
|
|
288
|
+
break;
|
|
289
|
+
case 'xlsx':
|
|
290
|
+
result = await parseExcel(filePath);
|
|
291
|
+
break;
|
|
292
|
+
case 'officeparser':
|
|
293
|
+
result = await parseWithOfficeParser(filePath);
|
|
294
|
+
break;
|
|
295
|
+
case 'html':
|
|
296
|
+
result = await parseHtmlFile(filePath);
|
|
297
|
+
break;
|
|
298
|
+
case 'builtin-text':
|
|
299
|
+
result = await parseBuiltinText(filePath);
|
|
300
|
+
break;
|
|
301
|
+
default:
|
|
302
|
+
result = { text: '', warnings: [`Unknown parser "${parser}" for "${ext}"`] };
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
let text = result.text || '';
|
|
306
|
+
const warnings = result.warnings || [];
|
|
307
|
+
|
|
308
|
+
// Truncate if requested
|
|
309
|
+
if (opts.maxLength && text.length > opts.maxLength) {
|
|
310
|
+
text = text.slice(0, opts.maxLength) + '\n\n... (truncated — original was ' + result.text.length.toLocaleString() + ' chars)';
|
|
311
|
+
warnings.push(`Output truncated to ${opts.maxLength.toLocaleString()} chars`);
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
// Log warnings if not silent
|
|
315
|
+
if (!opts.silent && warnings.length > 0) {
|
|
316
|
+
for (const w of warnings) {
|
|
317
|
+
console.warn(` ${c.warn(`${path.basename(filePath)}: ${w}`)}`);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
return {
|
|
322
|
+
text,
|
|
323
|
+
ext,
|
|
324
|
+
parser,
|
|
325
|
+
warnings,
|
|
326
|
+
success: text.length > 0,
|
|
327
|
+
};
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/**
|
|
331
|
+
* Check if a file extension is parseable by this module.
|
|
332
|
+
* @param {string} ext - Extension including dot (e.g. '.docx')
|
|
333
|
+
* @returns {boolean}
|
|
334
|
+
*/
|
|
335
|
+
function canParse(ext) {
|
|
336
|
+
return ext.toLowerCase() in PARSER_MAP;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
module.exports = {
|
|
340
|
+
parseDocument,
|
|
341
|
+
canParse,
|
|
342
|
+
stripHtml,
|
|
343
|
+
PARSEABLE_EXTS,
|
|
344
|
+
NEWLY_SUPPORTED_EXTS,
|
|
345
|
+
PARSER_MAP,
|
|
346
|
+
};
|
package/src/services/gemini.js
CHANGED
|
@@ -12,6 +12,7 @@ const {
|
|
|
12
12
|
GEMINI_API_KEY,
|
|
13
13
|
GEMINI_FILE_API_EXTS,
|
|
14
14
|
INLINE_TEXT_EXTS,
|
|
15
|
+
DOC_PARSER_EXTS,
|
|
15
16
|
GEMINI_UNSUPPORTED,
|
|
16
17
|
MIME_MAP,
|
|
17
18
|
GEMINI_POLL_TIMEOUT_MS,
|
|
@@ -19,6 +20,7 @@ const {
|
|
|
19
20
|
// Access config.GEMINI_MODEL and config.GEMINI_CONTEXT_WINDOW at call time
|
|
20
21
|
// (not destructured) so runtime model changes via setActiveModel() are visible.
|
|
21
22
|
const { extractJson } = require('../utils/json-parser');
|
|
23
|
+
const { parseDocument } = require('./doc-parser');
|
|
22
24
|
const {
|
|
23
25
|
selectDocsByBudget,
|
|
24
26
|
sliceVttForSegment,
|
|
@@ -28,6 +30,7 @@ const {
|
|
|
28
30
|
} = require('../utils/context-manager');
|
|
29
31
|
const { formatHMS } = require('../utils/format');
|
|
30
32
|
const { withRetry } = require('../utils/retry');
|
|
33
|
+
const { c } = require('../utils/colors');
|
|
31
34
|
|
|
32
35
|
// ======================== INIT ========================
|
|
33
36
|
|
|
@@ -60,7 +63,18 @@ async function prepareDocsForGemini(ai, docFileList) {
|
|
|
60
63
|
console.log(` Reading ${name} (inline text)...`);
|
|
61
64
|
const content = await fs.promises.readFile(docPath, 'utf8');
|
|
62
65
|
prepared.push({ type: 'inlineText', fileName: name, content });
|
|
63
|
-
console.log(`
|
|
66
|
+
console.log(` ${c.success(`${name} ready (${(content.length / 1024).toFixed(1)} KB)`)}`);
|
|
67
|
+
} else if (DOC_PARSER_EXTS.includes(ext)) {
|
|
68
|
+
// Binary document — convert to text via doc-parser
|
|
69
|
+
console.log(` Parsing ${name} (${ext} → text)...`);
|
|
70
|
+
const result = await parseDocument(docPath, { silent: true });
|
|
71
|
+
if (result.success && result.text) {
|
|
72
|
+
prepared.push({ type: 'inlineText', fileName: name, content: result.text });
|
|
73
|
+
console.log(` ${c.success(`${name} parsed (${(result.text.length / 1024).toFixed(1)} KB text extracted)`)}`);
|
|
74
|
+
} else {
|
|
75
|
+
const reason = result.warnings.length > 0 ? result.warnings[0] : 'empty output';
|
|
76
|
+
console.warn(` ${c.warn(`${name} — parse failed (${reason}), will upload to Firebase only`)}`);
|
|
77
|
+
}
|
|
64
78
|
} else if (GEMINI_FILE_API_EXTS.includes(ext)) {
|
|
65
79
|
const mime = MIME_MAP[ext] || 'application/octet-stream';
|
|
66
80
|
console.log(` Uploading ${name} to Gemini File API...`);
|
|
@@ -76,7 +90,7 @@ async function prepareDocsForGemini(ai, docFileList) {
|
|
|
76
90
|
const pollStart = Date.now();
|
|
77
91
|
while (file.state === 'PROCESSING') {
|
|
78
92
|
if (Date.now() - pollStart > GEMINI_POLL_TIMEOUT_MS) {
|
|
79
|
-
console.warn(`
|
|
93
|
+
console.warn(` ${c.warn(`${name} — polling timed out after ${(GEMINI_POLL_TIMEOUT_MS / 1000).toFixed(0)}s, skipping`)}`);
|
|
80
94
|
file = null;
|
|
81
95
|
break;
|
|
82
96
|
}
|
|
@@ -88,7 +102,7 @@ async function prepareDocsForGemini(ai, docFileList) {
|
|
|
88
102
|
}
|
|
89
103
|
|
|
90
104
|
if (!file || file.state === 'FAILED') {
|
|
91
|
-
console.warn(`
|
|
105
|
+
console.warn(` ${c.warn(`${name} — Gemini processing failed, skipping`)}`);
|
|
92
106
|
continue;
|
|
93
107
|
}
|
|
94
108
|
|
|
@@ -99,14 +113,14 @@ async function prepareDocsForGemini(ai, docFileList) {
|
|
|
99
113
|
fileUri: file.uri,
|
|
100
114
|
geminiFileName: file.name,
|
|
101
115
|
});
|
|
102
|
-
console.log(`
|
|
116
|
+
console.log(` ${c.success(`${name} ready (File API)`)}`);
|
|
103
117
|
} else if (GEMINI_UNSUPPORTED.includes(ext)) {
|
|
104
|
-
console.warn(`
|
|
118
|
+
console.warn(` ${c.warn(`${name} — format not supported by Gemini, will upload to Firebase only`)}`);
|
|
105
119
|
} else {
|
|
106
|
-
console.warn(`
|
|
120
|
+
console.warn(` ${c.warn(`${name} — unknown doc type, skipping`)}`);
|
|
107
121
|
}
|
|
108
122
|
} catch (err) {
|
|
109
|
-
console.warn(`
|
|
123
|
+
console.warn(` ${c.warn(`${name} — failed: ${err.message}`)}`);
|
|
110
124
|
}
|
|
111
125
|
}
|
|
112
126
|
|
|
@@ -258,21 +272,10 @@ async function processWithGemini(ai, filePath, displayName, contextDocs = [], pr
|
|
|
258
272
|
let file;
|
|
259
273
|
let usedExternalUrl = false;
|
|
260
274
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
file = { uri: existingFileUri, mimeType: existingFileMime, name: existingGeminiFileName, state: 'ACTIVE' };
|
|
264
|
-
console.log(` Reusing Gemini File API URI (skip upload)`);
|
|
265
|
-
} else if (storageDownloadUrl) {
|
|
266
|
-
// Strategy B: Use Firebase Storage download URL as Gemini External URL
|
|
267
|
-
// Supported for models >= 2.5; limit 100MB per payload.
|
|
268
|
-
// Gemini fetches the file on-demand — no separate upload + polling needed.
|
|
269
|
-
file = { uri: storageDownloadUrl, mimeType: 'video/mp4', name: null, state: 'ACTIVE' };
|
|
270
|
-
usedExternalUrl = true;
|
|
271
|
-
console.log(` Using Firebase Storage URL as external reference (skip Gemini upload)`);
|
|
272
|
-
} else {
|
|
273
|
-
// Strategy C: Upload to Gemini File API (default fallback)
|
|
275
|
+
// Helper: upload via Gemini File API with polling (Strategy C)
|
|
276
|
+
async function uploadViaFileApi() {
|
|
274
277
|
console.log(` Uploading to Gemini File API...`);
|
|
275
|
-
|
|
278
|
+
let uploaded = await withRetry(
|
|
276
279
|
() => ai.files.upload({
|
|
277
280
|
file: filePath,
|
|
278
281
|
config: { mimeType: 'video/mp4', displayName },
|
|
@@ -280,26 +283,51 @@ async function processWithGemini(ai, filePath, displayName, contextDocs = [], pr
|
|
|
280
283
|
{ label: `Gemini file upload (${displayName})`, maxRetries: 3 }
|
|
281
284
|
);
|
|
282
285
|
|
|
283
|
-
// 3. Wait for processing (with polling + retry on get + timeout)
|
|
284
286
|
let waited = 0;
|
|
285
287
|
const pollStart = Date.now();
|
|
286
|
-
while (
|
|
288
|
+
while (uploaded.state === 'PROCESSING') {
|
|
287
289
|
if (Date.now() - pollStart > GEMINI_POLL_TIMEOUT_MS) {
|
|
288
290
|
throw new Error(`Gemini file processing timed out after ${(GEMINI_POLL_TIMEOUT_MS / 1000).toFixed(0)}s for ${displayName}. Try again or increase GEMINI_POLL_TIMEOUT_MS.`);
|
|
289
291
|
}
|
|
290
292
|
process.stdout.write(` Processing${'.'.repeat((waited % 3) + 1)} \r`);
|
|
291
293
|
await new Promise(r => setTimeout(r, 5000));
|
|
292
294
|
waited++;
|
|
293
|
-
|
|
294
|
-
() => ai.files.get({ name:
|
|
295
|
+
uploaded = await withRetry(
|
|
296
|
+
() => ai.files.get({ name: uploaded.name }),
|
|
295
297
|
{ label: 'Gemini file status check', maxRetries: 2, baseDelay: 1000 }
|
|
296
298
|
);
|
|
297
299
|
}
|
|
298
300
|
console.log(' Processing complete. ');
|
|
299
301
|
|
|
300
|
-
if (
|
|
301
|
-
throw new Error(`Gemini file processing failed for ${displayName}
|
|
302
|
+
if (uploaded.state === 'FAILED') {
|
|
303
|
+
throw new Error(`Gemini file processing failed for ${displayName}. The file may be corrupt or in an unsupported format — try re-compressing or converting to MP4.`);
|
|
302
304
|
}
|
|
305
|
+
return uploaded;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
const EXTERNAL_URL_MAX_BYTES = 20 * 1024 * 1024; // 20 MB — Gemini rejects HTTPS URLs for larger files
|
|
309
|
+
|
|
310
|
+
if (existingFileUri) {
|
|
311
|
+
// Strategy A: Reuse Gemini File API URI from a previous pass
|
|
312
|
+
file = { uri: existingFileUri, mimeType: existingFileMime, name: existingGeminiFileName, state: 'ACTIVE' };
|
|
313
|
+
console.log(` Reusing Gemini File API URI (skip upload)`);
|
|
314
|
+
} else if (storageDownloadUrl) {
|
|
315
|
+
// Strategy B: Use Firebase Storage download URL as Gemini External URL
|
|
316
|
+
// Supported for models >= 2.5; Gemini rejects external HTTPS URLs for files > ~20 MB.
|
|
317
|
+
const fileSizeBytes = fs.existsSync(filePath) ? fs.statSync(filePath).size : 0;
|
|
318
|
+
if (fileSizeBytes > EXTERNAL_URL_MAX_BYTES) {
|
|
319
|
+
console.log(` Segment too large for external URL (${(fileSizeBytes / 1048576).toFixed(1)} MB > 20 MB) — using File API upload`);
|
|
320
|
+
// file stays null → falls through to Strategy C below
|
|
321
|
+
} else {
|
|
322
|
+
file = { uri: storageDownloadUrl, mimeType: 'video/mp4', name: null, state: 'ACTIVE' };
|
|
323
|
+
usedExternalUrl = true;
|
|
324
|
+
console.log(` Using Firebase Storage URL as external reference (skip Gemini upload)`);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
if (!file) {
|
|
329
|
+
// Strategy C: Upload to Gemini File API (default fallback, or after B was skipped for large files)
|
|
330
|
+
file = await uploadViaFileApi();
|
|
303
331
|
}
|
|
304
332
|
|
|
305
333
|
// 4. Build content parts with SMART CONTEXT MANAGEMENT
|
|
@@ -375,7 +403,7 @@ async function processWithGemini(ai, filePath, displayName, contextDocs = [], pr
|
|
|
375
403
|
|
|
376
404
|
contentParts.push({ text: promptText });
|
|
377
405
|
|
|
378
|
-
// 5. Send request (
|
|
406
|
+
// 5. Send request (model uses its default thinking budget)
|
|
379
407
|
const requestPayload = {
|
|
380
408
|
model: config.GEMINI_MODEL,
|
|
381
409
|
contents: [{ role: 'user', parts: contentParts }],
|
|
@@ -383,15 +411,50 @@ async function processWithGemini(ai, filePath, displayName, contextDocs = [], pr
|
|
|
383
411
|
systemInstruction,
|
|
384
412
|
maxOutputTokens: 65536,
|
|
385
413
|
temperature: 0,
|
|
386
|
-
thinkingConfig: { thinkingBudget },
|
|
387
414
|
},
|
|
388
415
|
};
|
|
389
416
|
|
|
390
417
|
const t0 = Date.now();
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
418
|
+
let response;
|
|
419
|
+
try {
|
|
420
|
+
response = await withRetry(
|
|
421
|
+
() => ai.models.generateContent(requestPayload),
|
|
422
|
+
{ label: `Gemini segment analysis (${displayName})`, maxRetries: 2, baseDelay: 5000 }
|
|
423
|
+
);
|
|
424
|
+
} catch (apiErr) {
|
|
425
|
+
const errMsg = apiErr.message || '';
|
|
426
|
+
|
|
427
|
+
// Automatic fallback: if external URL was rejected, retry via Gemini File API upload
|
|
428
|
+
if (usedExternalUrl && errMsg.includes('INVALID_ARGUMENT')) {
|
|
429
|
+
console.log(` ${c.warn('External URL rejected by Gemini — falling back to File API upload...')}`);
|
|
430
|
+
try {
|
|
431
|
+
file = await uploadViaFileApi();
|
|
432
|
+
usedExternalUrl = false;
|
|
433
|
+
// Replace the video reference in contentParts[0]
|
|
434
|
+
contentParts[0] = { fileData: { mimeType: file.mimeType, fileUri: file.uri } };
|
|
435
|
+
requestPayload.contents[0].parts = contentParts;
|
|
436
|
+
response = await withRetry(
|
|
437
|
+
() => ai.models.generateContent(requestPayload),
|
|
438
|
+
{ label: `Gemini segment analysis — File API retry (${displayName})`, maxRetries: 2, baseDelay: 5000 }
|
|
439
|
+
);
|
|
440
|
+
console.log(` ${c.success('File API fallback succeeded')}`);
|
|
441
|
+
} catch (fallbackErr) {
|
|
442
|
+
console.error(` ${c.error(`File API fallback also failed: ${fallbackErr.message}`)}`);
|
|
443
|
+
throw fallbackErr;
|
|
444
|
+
}
|
|
445
|
+
} else {
|
|
446
|
+
// Log request diagnostics for other errors to aid debugging
|
|
447
|
+
const partSummary = contentParts.map((p, i) => {
|
|
448
|
+
if (p.fileData) return ` [${i}] fileData: ${p.fileData.mimeType} → ${(p.fileData.fileUri || '').substring(0, 120)}`;
|
|
449
|
+
if (p.text) return ` [${i}] text: ${p.text.length} chars → ${p.text.substring(0, 80).replace(/\n/g, ' ')}...`;
|
|
450
|
+
return ` [${i}] unknown part`;
|
|
451
|
+
});
|
|
452
|
+
console.error(` ${c.error('Request diagnostics:')}`);
|
|
453
|
+
console.error(` Model: ${config.GEMINI_MODEL} | Parts: ${contentParts.length} | maxOutput: 65536`);
|
|
454
|
+
partSummary.forEach(s => console.error(` ${s}`));
|
|
455
|
+
throw apiErr;
|
|
456
|
+
}
|
|
457
|
+
}
|
|
395
458
|
const durationMs = Date.now() - t0;
|
|
396
459
|
|
|
397
460
|
const rawText = response.text;
|
|
@@ -556,11 +619,6 @@ ${segmentDumps}`;
|
|
|
556
619
|
systemInstruction: `${systemInstruction}\n\nYou are now in COMPILATION MODE — your job is to merge multiple segment analyses into one final unified output. Deduplicate, reconcile conflicts, and produce the definitive analysis. Output valid JSON only — no markdown fences.`,
|
|
557
620
|
maxOutputTokens: 65536,
|
|
558
621
|
temperature: 0,
|
|
559
|
-
// Thinking tokens share the maxOutputTokens pool in Gemini 2.5+ models.
|
|
560
|
-
// Default 10240 leaves ~55K for output — enough for full structured merge.
|
|
561
|
-
// Too low (4096) → model hits ceiling and produces minimal output.
|
|
562
|
-
// Too high (16384) → eats into output budget causing truncation.
|
|
563
|
-
thinkingConfig: { thinkingBudget: compilationThinking },
|
|
564
622
|
},
|
|
565
623
|
};
|
|
566
624
|
|
|
@@ -594,9 +652,9 @@ ${segmentDumps}`;
|
|
|
594
652
|
const compiled = extractJson(rawText);
|
|
595
653
|
|
|
596
654
|
if (!compiled) {
|
|
597
|
-
console.warn('
|
|
655
|
+
console.warn(` ${c.warn('Failed to parse compiled result — falling back to raw segment merge')}`);
|
|
598
656
|
} else {
|
|
599
|
-
console.log('
|
|
657
|
+
console.log(` ${c.success('Final compilation complete')}`);
|
|
600
658
|
}
|
|
601
659
|
|
|
602
660
|
return {
|
|
@@ -657,7 +715,7 @@ async function analyzeVideoForContext(ai, filePath, displayName, opts = {}) {
|
|
|
657
715
|
console.log(' Processing complete. ');
|
|
658
716
|
|
|
659
717
|
if (file.state === 'FAILED') {
|
|
660
|
-
throw new Error(`Gemini file processing failed for ${displayName}
|
|
718
|
+
throw new Error(`Gemini file processing failed for ${displayName}. The file may be corrupt or in an unsupported format — try re-compressing or using a different segment.`);
|
|
661
719
|
}
|
|
662
720
|
|
|
663
721
|
// 3. Build prompt for comprehensive summary
|
|
@@ -701,7 +759,6 @@ FORMAT:
|
|
|
701
759
|
systemInstruction: 'You are a meticulous video analyst. Produce comprehensive, detailed summaries that capture everything in the video. Write in clear Markdown prose.',
|
|
702
760
|
maxOutputTokens: 32768,
|
|
703
761
|
temperature: 0.1,
|
|
704
|
-
thinkingConfig: { thinkingBudget },
|
|
705
762
|
},
|
|
706
763
|
};
|
|
707
764
|
|
|
@@ -718,7 +775,7 @@ FORMAT:
|
|
|
718
775
|
try {
|
|
719
776
|
await ai.files.delete({ name: file.name });
|
|
720
777
|
} catch (cleanupErr) {
|
|
721
|
-
console.warn(`
|
|
778
|
+
console.warn(` ${c.warn(`Gemini file cleanup failed: ${cleanupErr.message}`)}`);
|
|
722
779
|
}
|
|
723
780
|
|
|
724
781
|
const usage = response.usageMetadata || {};
|
|
@@ -730,7 +787,7 @@ FORMAT:
|
|
|
730
787
|
};
|
|
731
788
|
|
|
732
789
|
console.log(` Tokens — input: ${tokenUsage.inputTokens.toLocaleString()} | output: ${tokenUsage.outputTokens.toLocaleString()} | thinking: ${tokenUsage.thoughtTokens.toLocaleString()}`);
|
|
733
|
-
|
|
790
|
+
console.log(` ${c.success(`Summary: ${summary.length.toLocaleString()} chars in ${(durationMs / 1000).toFixed(1)}s`)}`);
|
|
734
791
|
|
|
735
792
|
return { summary, durationMs, tokenUsage };
|
|
736
793
|
}
|