npm - task-summary-extractor - Versions diffs - 9.2.1 → 9.3.1 - Mend

task-summary-extractor 9.2.1 → 9.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/.env.example +6 -2
package/package.json +2 -3
package/src/config.js +1 -1
package/src/logger.js +6 -3
package/src/modes/deep-summary.js +375 -0
package/src/phases/discover.js +1 -0
package/src/phases/init.js +9 -30
package/src/phases/services.js +61 -1
package/src/pipeline.js +16 -3
package/src/services/gemini.js +3 -3
package/src/utils/cli.js +89 -1
package/src/utils/json-parser.js +3 -0
package/EXPLORATION.md +0 -514

package/.env.example CHANGED Viewed

@@ -12,8 +12,8 @@ GEMINI_API_KEY=your_gemini_api_key
 GEMINI_MODEL=gemini-2.5-flash
 # ======================== VIDEO PROCESSING ========================
-# Speed multiplier (default: 1.5)
-VIDEO_SPEED=1.5
+# Speed multiplier (default: 1.6)
+VIDEO_SPEED=1.6
 # Segment duration in seconds (default: 280)
 VIDEO_SEGMENT_TIME=280
 # ffmpeg preset: ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow
@@ -36,3 +36,7 @@ THINKING_BUDGET=24576
 COMPILATION_THINKING_BUDGET=10240
 # Max polling time for Gemini File API processing in ms (default: 300000 = 5 min)
 GEMINI_POLL_TIMEOUT_MS=300000
+# ======================== NPM PUBLISHING ========================
+# Automation token for npm publish (optional — if not set, browser sign-in is used)
+# NPM_TOKEN=npm_your_token_here

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "task-summary-extractor",
-  "version": "9.2.1",
+  "version": "9.3.1",
   "description": "AI-powered meeting analysis & document generation CLI — video + document processing, deep dive docs, dynamic mode, interactive CLI with model selection, confidence scoring, learning loop, git progress tracking",
   "main": "process_and_upload.js",
   "bin": {
@@ -15,8 +15,7 @@
     ".env.example",
     "README.md",
     "QUICK_START.md",
-    "ARCHITECTURE.md",
-    "EXPLORATION.md"
+    "ARCHITECTURE.md"
   ],
   "scripts": {
     "setup": "node setup.js",

package/src/config.js CHANGED Viewed

@@ -220,7 +220,7 @@ function getMaxThinkingBudget() {
 // ======================== VIDEO PROCESSING ========================
-const SPEED = envFloat('VIDEO_SPEED', 1.5);
+const SPEED = envFloat('VIDEO_SPEED', 1.6);
 const SEG_TIME = envInt('VIDEO_SEGMENT_TIME', 280); // seconds — produces segments < 5 min
 const PRESET = env('VIDEO_PRESET', 'slow');
 const VIDEO_EXTS = ['.mp4', '.mkv', '.avi', '.mov', '.webm'];

package/src/logger.js CHANGED Viewed

@@ -321,16 +321,17 @@ class Logger {
   /** Flush buffers and close the logger. Safe to call multiple times. */
   close() {
     if (this.closed) return;
-    this.closed = true;
     clearInterval(this._flushInterval);
     this.unpatchConsole();
-    // End active phase if any
+    // End active phase if any (must happen BEFORE setting closed flag
+    // so _writeStructured inside phaseEnd is not blocked)
     if (this._activePhase) {
       this.phaseEnd();
     }
-    // Write footer
+    // Write footer and session_end BEFORE setting closed flag
+    // so _writeStructured is not blocked by the guard
     const elapsed = ((Date.now() - this.startTime) / 1000).toFixed(1);
     const footer = `\n=== CLOSED | elapsed: ${elapsed}s | ${new Date().toISOString()} ===\n`;
     this._detailedBuffer.push(footer);
@@ -342,6 +343,8 @@ class Logger {
       timestamp: new Date().toISOString(),
       level: 'info',
     });
+    this.closed = true;
     this._flush(true); // sync flush on close to ensure data is written before process exits
   }

package/src/modes/deep-summary.js ADDED Viewed

@@ -0,0 +1,375 @@
+/**
+ * Deep Summary — pre-summarizes context documents before segment analysis
+ * to dramatically reduce input tokens per segment.
+ *
+ * Instead of sending full document content (potentially 500K+ tokens) to
+ * every segment, this module:
+ *  1. Groups documents by priority tier
+ *  2. Sends each group to Gemini for intelligent condensation
+ *  3. Replaces full content with condensed summaries
+ *  4. Preserves "excluded" docs at full fidelity (user-chosen focus docs)
+ *  5. Ensures summaries capture all ticket IDs, action items, statuses
+ *
+ * The user can pick specific docs to EXCLUDE from summarization — these stay
+ * full. The summary pass receives extra instructions to focus on extracting
+ * information related to these excluded docs' topics.
+ *
+ * Token savings: typically 60-80% reduction in per-segment context tokens.
+ */
+'use strict';
+const { extractJson } = require('../utils/json-parser');
+const { withRetry } = require('../utils/retry');
+const { estimateTokens } = require('../utils/context-manager');
+const { c } = require('../utils/colors');
+const config = require('../config');
+// ======================== CONSTANTS ========================
+/** Max tokens for a single summarization call output */
+const SUMMARY_MAX_OUTPUT = 16384;
+/** Max input chars to send in one summarization batch (~200K tokens @ 0.3 tok/char) */
+const BATCH_MAX_CHARS = 600000;
+/** Minimum content length (chars) to bother summarizing — below this, keep full */
+const MIN_SUMMARIZE_LENGTH = 500;
+// ======================== BATCH BUILDER ========================
+/**
+ * Group documents into batches that fit within the batch char limit.
+ * Each batch will be summarized in a single Gemini call.
+ *
+ * @param {Array} docs - Context docs to batch [{type, fileName, content}]
+ * @param {number} [maxChars=BATCH_MAX_CHARS] - Max chars per batch
+ * @returns {Array<Array>} Batches of docs
+ */
+function buildBatches(docs, maxChars = BATCH_MAX_CHARS) {
+  const batches = [];
+  let currentBatch = [];
+  let currentChars = 0;
+  for (const doc of docs) {
+    const docChars = doc.content ? doc.content.length : 0;
+    // If this single doc exceeds the batch limit, it gets its own batch
+    if (docChars > maxChars) {
+      if (currentBatch.length > 0) {
+        batches.push(currentBatch);
+        currentBatch = [];
+        currentChars = 0;
+      }
+      batches.push([doc]);
+      continue;
+    }
+    if (currentChars + docChars > maxChars && currentBatch.length > 0) {
+      batches.push(currentBatch);
+      currentBatch = [];
+      currentChars = 0;
+    }
+    currentBatch.push(doc);
+    currentChars += docChars;
+  }
+  if (currentBatch.length > 0) {
+    batches.push(currentBatch);
+  }
+  return batches;
+}
+// ======================== SUMMARIZE ONE BATCH ========================
+/**
+ * Summarize a batch of documents into a condensed representation.
+ *
+ * @param {object} ai - Gemini AI instance
+ * @param {Array} docs - Documents in this batch
+ * @param {object} [opts]
+ * @param {string[]} [opts.focusTopics=[]] - Topics to focus on (from excluded docs)
+ * @param {number} [opts.thinkingBudget=8192] - Thinking token budget
+ * @param {number} [opts.batchIndex=0] - Batch number for logging
+ * @param {number} [opts.totalBatches=1] - Total batches for logging
+ * @returns {Promise<{summaries: Map<string, string>, tokenUsage: object}|null>}
+ */
+async function summarizeBatch(ai, docs, opts = {}) {
+  const {
+    focusTopics = [],
+    thinkingBudget = 8192,
+    batchIndex = 0,
+    totalBatches = 1,
+  } = opts;
+  const docEntries = docs
+    .filter(d => d.type === 'inlineText' && d.content)
+    .map(d => `=== DOCUMENT: ${d.fileName} ===\n${d.content}`);
+  if (docEntries.length === 0) return null;
+  const focusSection = focusTopics.length > 0
+    ? `\n\nFOCUS AREAS — The user has selected certain documents to keep at full fidelity. ` +
+      `Your summaries must be especially thorough about information related to these topics:\n` +
+      focusTopics.map((t, i) => `  ${i + 1}. ${t}`).join('\n') +
+      `\n\nFor every ticket ID, action item, blocker, or status mentioned in relation to these ` +
+      `focus areas, include them verbatim in the summary. Do NOT omit any IDs or assignments.`
+    : '';
+  const promptText = `You are a precision document summarizer for a meeting analysis pipeline.
+Your job: read ALL documents below and produce a CONDENSED version of each that preserves:
+- Every ticket ID, task ID, CR number, or reference number (verbatim)
+- All assignees, reviewers, and responsible parties
+- All statuses (open, closed, in_progress, blocked, etc.)
+- All action items and their owners
+- All blockers, dependencies, and deadlines
+- Key decisions and their rationale
+- File paths and code references
+- Numerical data (percentages, counts, dates, versions)
+What to remove:
+- Verbose explanations of well-known concepts
+- Redundant phrasing and filler text
+- Formatting-only content (decorative headers, dividers)
+- Boilerplate/template text that adds no information
+${focusSection}
+OUTPUT FORMAT:
+Return valid JSON with this structure:
+{
+  "summaries": {
+    "<fileName>": "<condensed text — plain text, preserving all key info>",
+    ...
+  },
+  "metadata": {
+    "originalTokensEstimate": <number>,
+    "summaryTokensEstimate": <number>,
+    "compressionRatio": <number between 0 and 1>
+  }
+}
+Aim for 70-80% size reduction while preserving ALL actionable information.
+Every ID, every name, every status must survive the summarization.
+DOCUMENTS TO SUMMARIZE (${docEntries.length} documents):
+${docEntries.join('\n\n')}`;
+  const requestPayload = {
+    model: config.GEMINI_MODEL,
+    contents: [{ role: 'user', parts: [{ text: promptText }] }],
+    config: {
+      systemInstruction: 'You are a lossless information compressor. Preserve every ID, name, status, assignment, and actionable detail. Output valid JSON only.',
+      maxOutputTokens: SUMMARY_MAX_OUTPUT,
+      temperature: 0,
+      thinkingConfig: { thinkingBudget },
+    },
+  };
+  try {
+    const label = totalBatches > 1
+      ? `Deep summary batch ${batchIndex + 1}/${totalBatches}`
+      : 'Deep summary';
+    const response = await withRetry(
+      () => ai.models.generateContent(requestPayload),
+      { label, maxRetries: 2, baseDelay: 3000 }
+    );
+    const rawText = response.text;
+    const parsed = extractJson(rawText);
+    if (!parsed || !parsed.summaries) return null;
+    const usage = response.usageMetadata || {};
+    const tokenUsage = {
+      inputTokens: usage.promptTokenCount || 0,
+      outputTokens: usage.candidatesTokenCount || 0,
+      totalTokens: usage.totalTokenCount || 0,
+      thoughtTokens: usage.thoughtsTokenCount || 0,
+    };
+    return { summaries: parsed.summaries, metadata: parsed.metadata || {}, tokenUsage };
+  } catch (err) {
+    console.warn(`    ${c.warn(`Deep summary batch ${batchIndex + 1} failed: ${err.message}`)}`);
+    return null;
+  }
+}
+// ======================== MAIN ENTRY POINT ========================
+/**
+ * Run deep summarization on context documents.
+ *
+ * @param {object} ai - Gemini AI instance
+ * @param {Array} contextDocs - All prepared context docs
+ * @param {object} [opts]
+ * @param {string[]} [opts.excludeFileNames=[]] - Doc fileNames to keep at full fidelity
+ * @param {number} [opts.thinkingBudget=8192] - Thinking budget per batch
+ * @param {Function} [opts.onProgress] - Callback(done, total) for progress
+ * @returns {Promise<{docs: Array, stats: object}>}
+ */
+async function deepSummarize(ai, contextDocs, opts = {}) {
+  const {
+    excludeFileNames = [],
+    thinkingBudget = 8192,
+    onProgress = null,
+  } = opts;
+  const excludeSet = new Set(excludeFileNames.map(n => n.toLowerCase()));
+  // Partition: docs to summarize vs docs to keep full
+  const toSummarize = [];
+  const keepFull = [];
+  for (const doc of contextDocs) {
+    // Keep non-text docs (fileData = PDF etc.) as-is
+    if (doc.type !== 'inlineText') {
+      keepFull.push(doc);
+      continue;
+    }
+    // Keep excluded docs at full fidelity
+    if (excludeSet.has(doc.fileName.toLowerCase())) {
+      keepFull.push(doc);
+      continue;
+    }
+    // Skip tiny docs — not worth summarizing
+    if (!doc.content || doc.content.length < MIN_SUMMARIZE_LENGTH) {
+      keepFull.push(doc);
+      continue;
+    }
+    toSummarize.push(doc);
+  }
+  if (toSummarize.length === 0) {
+    return {
+      docs: contextDocs,
+      stats: {
+        summarized: 0,
+        keptFull: keepFull.length,
+        originalTokens: 0,
+        summaryTokens: 0,
+        savedTokens: 0,
+        savingsPercent: 0,
+        totalInputTokens: 0,
+        totalOutputTokens: 0,
+      },
+    };
+  }
+  // Build focus topics from excluded docs (tell summarizer what to prioritize)
+  const focusTopics = keepFull
+    .filter(d => d.type === 'inlineText' && excludeSet.has(d.fileName.toLowerCase()))
+    .map(d => d.fileName);
+  // Batch documents
+  const batches = buildBatches(toSummarize);
+  console.log(`    Batched ${c.highlight(toSummarize.length)} doc(s) into ${c.highlight(batches.length)} summarization batch(es)`);
+  if (focusTopics.length > 0) {
+    console.log(`    Focus topics from ${c.highlight(focusTopics.length)} excluded doc(s):`);
+    focusTopics.forEach(t => console.log(`      ${c.dim('•')} ${c.cyan(t)}`));
+  }
+  // Process batches (sequential for now; can add parallelization later)
+  const allSummaries = new Map();
+  let totalInput = 0;
+  let totalOutput = 0;
+  let batchesDone = 0;
+  for (let i = 0; i < batches.length; i++) {
+    const result = await summarizeBatch(ai, batches[i], {
+      focusTopics,
+      thinkingBudget,
+      batchIndex: i,
+      totalBatches: batches.length,
+    });
+    batchesDone++;
+    if (onProgress) onProgress(batchesDone, batches.length);
+    if (result && result.summaries) {
+      for (const [fileName, summary] of Object.entries(result.summaries)) {
+        allSummaries.set(fileName.toLowerCase(), summary);
+      }
+      totalInput += result.tokenUsage.inputTokens;
+      totalOutput += result.tokenUsage.outputTokens;
+    }
+  }
+  // Replace doc content with summaries
+  let originalTokens = 0;
+  let summaryTokens = 0;
+  const resultDocs = [];
+  for (const doc of contextDocs) {
+    if (doc.type !== 'inlineText') {
+      resultDocs.push(doc);
+      continue;
+    }
+    // Check if this doc was excluded (kept full)
+    if (excludeSet.has(doc.fileName.toLowerCase())) {
+      resultDocs.push(doc);
+      continue;
+    }
+    // Check if we have a summary for this doc
+    const summaryKey = doc.fileName.toLowerCase();
+    const summary = allSummaries.get(summaryKey);
+    if (summary && summary.length > 0) {
+      const origTokens = estimateTokens(doc.content);
+      const sumTokens = estimateTokens(summary);
+      originalTokens += origTokens;
+      summaryTokens += sumTokens;
+      resultDocs.push({
+        ...doc,
+        content: `[Deep Summary — original: ~${origTokens.toLocaleString()} tokens → condensed: ~${sumTokens.toLocaleString()} tokens]\n\n${summary}`,
+        _originalLength: doc.content.length,
+        _summaryLength: summary.length,
+        _deepSummarized: true,
+      });
+    } else {
+      // No summary returned — keep original
+      resultDocs.push(doc);
+    }
+  }
+  const savedTokens = originalTokens - summaryTokens;
+  const savingsPercent = originalTokens > 0
+    ? parseFloat(((savedTokens / originalTokens) * 100).toFixed(1))
+    : 0;
+  return {
+    docs: resultDocs,
+    stats: {
+      summarized: allSummaries.size,
+      keptFull: keepFull.length,
+      originalTokens,
+      summaryTokens,
+      savedTokens,
+      savingsPercent,
+      totalInputTokens: totalInput,
+      totalOutputTokens: totalOutput,
+    },
+  };
+}
+// ======================== EXPORTS ========================
+module.exports = {
+  deepSummarize,
+  summarizeBatch,
+  buildBatches,
+  SUMMARY_MAX_OUTPUT,
+  BATCH_MAX_CHARS,
+  MIN_SUMMARIZE_LENGTH,
+};

package/src/phases/discover.js CHANGED Viewed

@@ -85,6 +85,7 @@ async function phaseDiscover(ctx) {
   if (opts.resume) activeFlags.push('resume');
   if (opts.reanalyze) activeFlags.push('reanalyze');
   if (opts.dryRun) activeFlags.push('dry-run');
+  if (opts.deepSummary) activeFlags.push('deep-summary');
   if (activeFlags.length > 0) {
     console.log(`  Flags: ${c.yellow(activeFlags.join(', '))}`);
   }

package/src/phases/init.js CHANGED Viewed

@@ -67,6 +67,10 @@ async function phaseInit() {
     disableDiff: !!flags['no-diff'],
     noHtml: !!flags['no-html'],
     deepDive: !!flags['deep-dive'],
+    deepSummary: !!flags['deep-summary'],
+    deepSummaryExclude: typeof flags['exclude-docs'] === 'string'
+      ? flags['exclude-docs'].split(',').map(s => s.trim()).filter(Boolean)
+      : [],  // populated by CLI flag, interactive picker, or kept empty
     dynamic: !!flags.dynamic,
     request: typeof flags.request === 'string' ? flags.request : null,
     updateProgress: !!flags['update-progress'],
@@ -94,36 +98,10 @@ async function phaseInit() {
     opts.runMode = mode;
     if (mode !== 'custom') {
-      // Apply preset overrides
-      const { selectRunMode: _ignore, ...cliModule } = require('../utils/cli');
-      // Access RUN_PRESETS from the module
-      const presetOverrides = {
-        fast: {
-          disableFocusedPass: true,
-          disableLearning: true,
-          disableDiff: true,
-          format: 'md,json',
-          formats: new Set(['md', 'json']),
-          modelTier: 'economy',
-        },
-        balanced: {
-          disableFocusedPass: false,
-          disableLearning: false,
-          disableDiff: false,
-          format: 'all',
-          formats: new Set(['md', 'html', 'json', 'pdf', 'docx']),
-          modelTier: 'balanced',
-        },
-        detailed: {
-          disableFocusedPass: false,
-          disableLearning: false,
-          disableDiff: false,
-          format: 'all',
-          formats: new Set(['md', 'html', 'json', 'pdf', 'docx']),
-          modelTier: 'premium',
-        },
-      };
-      const preset = presetOverrides[mode];
+      // Apply preset overrides from the shared RUN_PRESETS definition
+      const { RUN_PRESETS } = require('../utils/cli');
+      const presetDef = RUN_PRESETS[mode];
+      const preset = presetDef ? presetDef.overrides : null;
       if (preset) {
         opts.disableFocusedPass = preset.disableFocusedPass;
         opts.disableLearning = preset.disableLearning;
@@ -322,6 +300,7 @@ function _printRunSummary(opts, modelId, models, targetDir) {
   if (!opts.disableLearning) features.push(c.green('learning'));
   if (!opts.disableDiff) features.push(c.green('diff'));
   if (opts.deepDive) features.push(c.cyan('deep-dive'));
+  if (opts.deepSummary) features.push(c.cyan('deep-summary'));
   if (opts.dynamic) features.push(c.cyan('dynamic'));
   if (opts.resume) features.push(c.yellow('resume'));
   if (opts.dryRun) features.push(c.yellow('dry-run'));

package/src/phases/services.js CHANGED Viewed

@@ -7,6 +7,9 @@ const path = require('path');
 const { initFirebase, uploadToStorage, storageExists } = require('../services/firebase');
 const { initGemini, prepareDocsForGemini } = require('../services/gemini');
+// --- Modes ---
+const { deepSummarize } = require('../modes/deep-summary');
 // --- Utils ---
 const { parallelMap } = require('../utils/retry');
@@ -101,4 +104,61 @@ async function phaseServices(ctx) {
   return { ...ctx, storage, firebaseReady, ai, contextDocs, docStorageUrls, callName };
 }
-module.exports = phaseServices;
+// ======================== PHASE: DEEP SUMMARY ========================
+/**
+ * Pre-summarize context documents to save input tokens per segment.
+ * Runs only when --deep-summary flag is active.
+ *
+ * @param {object} ctx - Pipeline context with ai, contextDocs, opts
+ * @returns {object} Updated ctx with summarized contextDocs and deepSummaryStats
+ */
+async function phaseDeepSummary(ctx) {
+  const log = getLog();
+  const { opts, ai, contextDocs } = ctx;
+  if (!opts.deepSummary || !ai || contextDocs.length === 0) {
+    return { ...ctx, deepSummaryStats: null };
+  }
+  console.log('');
+  console.log(c.cyan('  ── Deep Summary — Pre-summarizing context documents ──'));
+  log.step('Deep summary: starting context document pre-summarization');
+  if (log && log.phaseStart) log.phaseStart('deep_summary');
+  const excludeNames = opts.deepSummaryExclude || [];
+  let updatedDocs = contextDocs;
+  let deepSummaryStats = null;
+  try {
+    const result = await deepSummarize(ai, contextDocs, {
+      excludeFileNames: excludeNames,
+      thinkingBudget: Math.min(8192, opts.thinkingBudget),
+    });
+    updatedDocs = result.docs;
+    deepSummaryStats = result.stats;
+    if (deepSummaryStats.summarized > 0) {
+      console.log(`  ${c.success(`Summarized ${c.highlight(deepSummaryStats.summarized)} doc(s) — saved ~${c.highlight(deepSummaryStats.savedTokens.toLocaleString())} tokens (${c.yellow(deepSummaryStats.savingsPercent + '%')} reduction)`)}`);
+      console.log(`    ${c.dim('Original:')} ~${deepSummaryStats.originalTokens.toLocaleString()} tokens → ${c.dim('Condensed:')} ~${deepSummaryStats.summaryTokens.toLocaleString()} tokens`);
+      if (deepSummaryStats.keptFull > 0) {
+        console.log(`    ${c.dim('Kept full:')} ${deepSummaryStats.keptFull} doc(s) (excluded from summary)`);
+      }
+      log.step(`Deep summary: ${deepSummaryStats.summarized} docs summarized, ${deepSummaryStats.savedTokens} tokens saved (${deepSummaryStats.savingsPercent}%)`);
+      log.metric('deep_summary', deepSummaryStats);
+    } else {
+      console.log(`  ${c.dim('No documents needed summarization')}`);
+    }
+  } catch (err) {
+    console.warn(`  ${c.warn(`Deep summary failed (continuing with full docs): ${err.message}`)}`);
+    log.warn(`Deep summary failed: ${err.message}`);
+  }
+  if (log && log.phaseEnd) log.phaseEnd({ stats: deepSummaryStats });
+  console.log('');
+  return { ...ctx, contextDocs: updatedDocs, deepSummaryStats };
+}
+module.exports = { phaseServices, phaseDeepSummary };

package/src/pipeline.js CHANGED Viewed

@@ -32,7 +32,7 @@ const { getLog, isShuttingDown, PKG_ROOT, PROJECT_ROOT } = require('./phases/_sh
 // --- Pipeline phases ---
 const phaseInit        = require('./phases/init');
 const phaseDiscover    = require('./phases/discover');
-const phaseServices    = require('./phases/services');
+const { phaseServices, phaseDeepSummary } = require('./phases/services');
 const phaseProcessVideo = require('./phases/process-media');
 const phaseCompile     = require('./phases/compile');
 const phaseOutput      = require('./phases/output');
@@ -46,7 +46,7 @@ const phaseDeepDive    = require('./phases/deep-dive');
 // --- Utils (for run orchestration + alt modes) ---
 const { c } = require('./utils/colors');
 const { findDocsRecursive } = require('./utils/fs');
-const { promptUserText } = require('./utils/cli');
+const { promptUserText, selectDocsToExclude } = require('./utils/cli');
 const { createProgressBar } = require('./utils/progress-bar');
 const { buildHealthReport, printHealthDashboard } = require('./utils/health-dashboard');
 const { saveHistory, buildHistoryEntry } = require('./utils/learning-loop');
@@ -92,9 +92,21 @@ async function run() {
   // Phase 3: Services
   bar.setPhase('services');
-  const fullCtx = await phaseServices(ctx);
+  let fullCtx = await phaseServices(ctx);
   bar.tick('Services ready');
+  // Phase 3.5 (optional): Deep Summary — pre-summarize context docs
+  if (fullCtx.opts.deepSummary && fullCtx.ai && fullCtx.contextDocs.length > 0) {
+    // Interactive picker: let user choose docs to keep at full fidelity
+    if (process.stdin.isTTY && fullCtx.opts.deepSummaryExclude.length === 0) {
+      const excluded = await selectDocsToExclude(fullCtx.contextDocs);
+      fullCtx.opts.deepSummaryExclude = excluded;
+    }
+    bar.setPhase('deep-summary', 1);
+    fullCtx = await phaseDeepSummary(fullCtx);
+    bar.tick('Docs summarized');
+  }
   // Phase 4: Process each media file (video or audio)
   const allSegmentAnalyses = [];
   const allSegmentReports = [];
@@ -117,6 +129,7 @@ async function run() {
     contextDocuments: fullCtx.contextDocs.map(d => d.fileName),
     documentStorageUrls: fullCtx.docStorageUrls,
     firebaseAuthenticated: fullCtx.firebaseReady,
+    deepSummary: fullCtx.deepSummaryStats || null,
     files: [],
   };

package/src/services/gemini.js CHANGED Viewed

@@ -90,7 +90,7 @@ async function prepareDocsForGemini(ai, docFileList) {
         const pollStart = Date.now();
         while (file.state === 'PROCESSING') {
           if (Date.now() - pollStart > GEMINI_POLL_TIMEOUT_MS) {
-            console.warn(`    ${c.warn(`${name} — polling timed out after ${(GEMINI_POLL_TIMEOUT_MS / 1000).toFixed(0)}s, skipping`)}`);
+            console.warn(`    ${c.warn(`${name} — file is still processing after ${(GEMINI_POLL_TIMEOUT_MS / 1000).toFixed(0)}s, skipping (you can increase the wait time with GEMINI_POLL_TIMEOUT_MS in .env)`)}`);
             file = null;
             break;
           }
@@ -287,7 +287,7 @@ async function processWithGemini(ai, filePath, displayName, contextDocs = [], pr
     const pollStart = Date.now();
     while (uploaded.state === 'PROCESSING') {
       if (Date.now() - pollStart > GEMINI_POLL_TIMEOUT_MS) {
-        throw new Error(`Gemini file processing timed out after ${(GEMINI_POLL_TIMEOUT_MS / 1000).toFixed(0)}s for ${displayName}. Try again or increase GEMINI_POLL_TIMEOUT_MS.`);
+        throw new Error(`File "${displayName}" is still processing after ${(GEMINI_POLL_TIMEOUT_MS / 1000).toFixed(0)}s. Try again or increase the wait time by setting GEMINI_POLL_TIMEOUT_MS in your .env file.`);
       }
       process.stdout.write(`    Processing${'.'.repeat((waited % 3) + 1)}   \r`);
       await new Promise(r => setTimeout(r, 5000));
@@ -343,7 +343,7 @@ async function processWithGemini(ai, filePath, displayName, contextDocs = [], pr
     buildProgressiveContext(previousAnalyses, userName) || ''
   );
   const docBudget = Math.max(100000, config.GEMINI_CONTEXT_WINDOW - 350000 - prevContextEstimate);
-  console.log(`    Context budget: ${(docBudget / 1000).toFixed(0)}K tokens for docs (${contextDocs.length} available)`);
+  console.log(`    Reference docs budget: ${(docBudget / 1000).toFixed(0)}K (${contextDocs.length} doc${contextDocs.length !== 1 ? 's' : ''} available)`);
   const { selected: selectedDocs, excluded, stats } = selectDocsByBudget(
     contextDocs, docBudget, { segmentIndex }