npm - task-summary-extractor - Versions diffs - 9.2.2 → 9.4.0 - Mend

task-summary-extractor 9.2.2 → 9.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/.env.example +6 -2
package/ARCHITECTURE.md +37 -37
package/QUICK_START.md +1 -1
package/README.md +32 -13
package/package.json +2 -3
package/src/config.js +1 -1
package/src/modes/deep-summary.js +406 -0
package/src/phases/discover.js +1 -0
package/src/phases/init.js +9 -30
package/src/phases/services.js +61 -1
package/src/pipeline.js +33 -3
package/src/services/gemini.js +142 -17
package/src/utils/cli.js +89 -1
package/src/utils/context-manager.js +31 -4
package/EXPLORATION.md +0 -514

package/src/modes/deep-summary.js ADDED Viewed

@@ -0,0 +1,406 @@
+/**
+ * Deep Summary — pre-summarizes context documents before segment analysis
+ * to dramatically reduce input tokens per segment.
+ *
+ * Instead of sending full document content (potentially 500K+ tokens) to
+ * every segment, this module:
+ *  1. Groups documents by priority tier
+ *  2. Sends each group to Gemini for intelligent condensation
+ *  3. Replaces full content with condensed summaries
+ *  4. Preserves "excluded" docs at full fidelity (user-chosen focus docs)
+ *  5. Ensures summaries capture all ticket IDs, action items, statuses
+ *
+ * The user can pick specific docs to EXCLUDE from summarization — these stay
+ * full. The summary pass receives extra instructions to focus on extracting
+ * information related to these excluded docs' topics.
+ *
+ * Token savings: typically 60-80% reduction in per-segment context tokens.
+ */
+'use strict';
+const { extractJson } = require('../utils/json-parser');
+const { withRetry } = require('../utils/retry');
+const { estimateTokens } = require('../utils/context-manager');
+const { c } = require('../utils/colors');
+const config = require('../config');
+// ======================== CONSTANTS ========================
+/** Max tokens for a single summarization call output */
+const SUMMARY_MAX_OUTPUT = 16384;
+/** Max input chars to send in one summarization batch (~200K tokens @ 0.3 tok/char) */
+const BATCH_MAX_CHARS = 600000;
+/** Minimum content length (chars) to bother summarizing — below this, keep full */
+const MIN_SUMMARIZE_LENGTH = 500;
+/**
+ * Hard cap per-document chars before sending to Gemini.
+ * Gemini context = 1M tokens; prompt overhead ~50K tokens; at 0.3 tok/char
+ * 900K chars ≈ 270K tokens — safe with prompt + thinking overhead.
+ */
+const MAX_DOC_CHARS = 900000;
+// ======================== BATCH BUILDER ========================
+/**
+ * Group documents into batches that fit within the batch char limit.
+ * Each batch will be summarized in a single Gemini call.
+ *
+ * @param {Array} docs - Context docs to batch [{type, fileName, content}]
+ * @param {number} [maxChars=BATCH_MAX_CHARS] - Max chars per batch
+ * @returns {Array<Array>} Batches of docs
+ */
+function buildBatches(docs, maxChars = BATCH_MAX_CHARS) {
+  const batches = [];
+  let currentBatch = [];
+  let currentChars = 0;
+  for (let doc of docs) {
+    let docChars = doc.content ? doc.content.length : 0;
+    // Truncate extremely large docs to avoid exceeding the context window.
+    // Any single doc beyond MAX_DOC_CHARS is capped (tail is dropped) and a
+    // warning is prepended so the summariser knows the content is incomplete.
+    if (docChars > MAX_DOC_CHARS) {
+      const truncated = doc.content.substring(0, MAX_DOC_CHARS);
+      doc = {
+        ...doc,
+        content: `[TRUNCATED — original ${(docChars / 1024).toFixed(0)} KB exceeded the ${(MAX_DOC_CHARS / 1024).toFixed(0)} KB limit; only the first ${(MAX_DOC_CHARS / 1024).toFixed(0)} KB is included]\n\n${truncated}`,
+        _truncatedFrom: docChars,
+      };
+      docChars = doc.content.length;
+      console.warn(`    ${c.warn(`${doc.fileName} truncated from ${(doc._truncatedFrom / 1024).toFixed(0)} KB to ${(MAX_DOC_CHARS / 1024).toFixed(0)} KB for deep summary`)}`);
+    }
+    // If this single doc exceeds the batch limit, it gets its own batch
+    if (docChars > maxChars) {
+      if (currentBatch.length > 0) {
+        batches.push(currentBatch);
+        currentBatch = [];
+        currentChars = 0;
+      }
+      batches.push([doc]);
+      continue;
+    }
+    if (currentChars + docChars > maxChars && currentBatch.length > 0) {
+      batches.push(currentBatch);
+      currentBatch = [];
+      currentChars = 0;
+    }
+    currentBatch.push(doc);
+    currentChars += docChars;
+  }
+  if (currentBatch.length > 0) {
+    batches.push(currentBatch);
+  }
+  return batches;
+}
+// ======================== SUMMARIZE ONE BATCH ========================
+/**
+ * Summarize a batch of documents into a condensed representation.
+ *
+ * @param {object} ai - Gemini AI instance
+ * @param {Array} docs - Documents in this batch
+ * @param {object} [opts]
+ * @param {string[]} [opts.focusTopics=[]] - Topics to focus on (from excluded docs)
+ * @param {number} [opts.thinkingBudget=8192] - Thinking token budget
+ * @param {number} [opts.batchIndex=0] - Batch number for logging
+ * @param {number} [opts.totalBatches=1] - Total batches for logging
+ * @returns {Promise<{summaries: Map<string, string>, tokenUsage: object}|null>}
+ */
+async function summarizeBatch(ai, docs, opts = {}) {
+  const {
+    focusTopics = [],
+    thinkingBudget = 8192,
+    batchIndex = 0,
+    totalBatches = 1,
+  } = opts;
+  const docEntries = docs
+    .filter(d => d.type === 'inlineText' && d.content)
+    .map(d => `=== DOCUMENT: ${d.fileName} ===\n${d.content}`);
+  if (docEntries.length === 0) return null;
+  const focusSection = focusTopics.length > 0
+    ? `\n\nFOCUS AREAS — The user has selected certain documents to keep at full fidelity. ` +
+      `Your summaries must be especially thorough about information related to these topics:\n` +
+      focusTopics.map((t, i) => `  ${i + 1}. ${t}`).join('\n') +
+      `\n\nFor every ticket ID, action item, blocker, or status mentioned in relation to these ` +
+      `focus areas, include them verbatim in the summary. Do NOT omit any IDs or assignments.`
+    : '';
+  const promptText = `You are a precision document summarizer for a meeting analysis pipeline.
+Your job: read ALL documents below and produce a CONDENSED version of each that preserves every piece of actionable information.
+WHAT TO PRESERVE (in order of importance):
+1. IDENTIFIERS — Every ticket ID, task ID, CR number, PR number, JIRA key, GitHub issue, reference number, version number. Copy these VERBATIM — do not paraphrase or abbreviate IDs.
+2. PEOPLE — All assignees, reviewers, approvers, requesters, and responsible parties. Use full names exactly as they appear.
+3. STATUSES & STATES — All statuses (open, closed, in_progress, blocked, deferred, etc.) and state markers (✅, ⬜, ⏸️, 🔲). Preserve the exact status vocabulary used in the document.
+4. ACTION ITEMS — Every action item, commitment, and deliverable with its owner, deadline, and dependency chain.
+5. BLOCKERS & DEPENDENCIES — What is blocked, by whom, what it blocks downstream.
+6. DECISIONS & RATIONALE — Key decisions and WHY they were made (not just what).
+7. CROSS-REFERENCES — When Document A references something from Document B, preserve that linkage. If ticket X is mentioned in a code-map entry, keep both the ticket ID and the code-map path.
+8. TECHNICAL SPECIFICS — File paths, code references, API endpoints, database tables, configuration keys, environment names (dev/staging/prod).
+9. NUMERICAL DATA — Percentages, counts, dates, deadlines, version numbers, sizes.
+10. CHECKLISTS & PROGRESS — Preserve checklist items with their completion status markers. Include progress ratios (e.g., "35/74 done, 6 blocked").
+WHAT TO REMOVE:
+- Verbose explanations of well-known concepts
+- Redundant phrasing, filler text, throat-clearing sentences
+- Formatting-only content (decorative headers, horizontal rules, empty sections)
+- Boilerplate/template text that adds no project-specific information
+- Repeated definitions or glossary entries that don't change across documents
+${focusSection}
+QUALITY REQUIREMENTS:
+- Aim for 70-80% size reduction while preserving ALL actionable information.
+- Every ID, every name, every status MUST survive the summarization.
+- If two documents reference the same entity (ticket, file, person), ensure the summary preserves enough context in BOTH summaries for downstream consumers to make the connection.
+- When a document contains a table, preserve the table structure (header + key rows). Omit empty or low-value rows.
+- When a document has nested structure (subsections, indented lists), preserve the hierarchy — use indentation or numbering.
+OUTPUT FORMAT:
+Return valid JSON with this structure:
+{
+  "summaries": {
+    "<fileName>": "<condensed text — plain text, preserving all key info>",
+    ...
+  },
+  "metadata": {
+    "originalTokensEstimate": <number>,
+    "summaryTokensEstimate": <number>,
+    "compressionRatio": <number between 0 and 1>
+  }
+}
+DOCUMENTS TO SUMMARIZE (${docEntries.length} documents):
+${docEntries.join('\n\n')}`;
+  const requestPayload = {
+    model: config.GEMINI_MODEL,
+    contents: [{ role: 'user', parts: [{ text: promptText }] }],
+    config: {
+      systemInstruction: 'You are a lossless information compressor specialized in engineering and business documents. Preserve every ID, name, status, assignment, dependency, file path, decision rationale, and actionable detail. Maintain cross-document references (when doc A mentions entity from doc B, keep both sides). Output valid JSON only.',
+      maxOutputTokens: SUMMARY_MAX_OUTPUT,
+      temperature: 0,
+      thinkingConfig: { thinkingBudget },
+    },
+  };
+  try {
+    const label = totalBatches > 1
+      ? `Deep summary batch ${batchIndex + 1}/${totalBatches}`
+      : 'Deep summary';
+    const response = await withRetry(
+      () => ai.models.generateContent(requestPayload),
+      { label, maxRetries: 2, baseDelay: 3000 }
+    );
+    const rawText = response.text;
+    const parsed = extractJson(rawText);
+    if (!parsed || !parsed.summaries) return null;
+    const usage = response.usageMetadata || {};
+    const tokenUsage = {
+      inputTokens: usage.promptTokenCount || 0,
+      outputTokens: usage.candidatesTokenCount || 0,
+      totalTokens: usage.totalTokenCount || 0,
+      thoughtTokens: usage.thoughtsTokenCount || 0,
+    };
+    return { summaries: parsed.summaries, metadata: parsed.metadata || {}, tokenUsage };
+  } catch (err) {
+    console.warn(`    ${c.warn(`Deep summary batch ${batchIndex + 1} failed: ${err.message}`)}`);
+    return null;
+  }
+}
+// ======================== MAIN ENTRY POINT ========================
+/**
+ * Run deep summarization on context documents.
+ *
+ * @param {object} ai - Gemini AI instance
+ * @param {Array} contextDocs - All prepared context docs
+ * @param {object} [opts]
+ * @param {string[]} [opts.excludeFileNames=[]] - Doc fileNames to keep at full fidelity
+ * @param {number} [opts.thinkingBudget=8192] - Thinking budget per batch
+ * @param {Function} [opts.onProgress] - Callback(done, total) for progress
+ * @returns {Promise<{docs: Array, stats: object}>}
+ */
+async function deepSummarize(ai, contextDocs, opts = {}) {
+  const {
+    excludeFileNames = [],
+    thinkingBudget = 8192,
+    onProgress = null,
+  } = opts;
+  const excludeSet = new Set(excludeFileNames.map(n => n.toLowerCase()));
+  // Partition: docs to summarize vs docs to keep full
+  const toSummarize = [];
+  const keepFull = [];
+  for (const doc of contextDocs) {
+    // Keep non-text docs (fileData = PDF etc.) as-is
+    if (doc.type !== 'inlineText') {
+      keepFull.push(doc);
+      continue;
+    }
+    // Keep excluded docs at full fidelity
+    if (excludeSet.has(doc.fileName.toLowerCase())) {
+      keepFull.push(doc);
+      continue;
+    }
+    // Skip tiny docs — not worth summarizing
+    if (!doc.content || doc.content.length < MIN_SUMMARIZE_LENGTH) {
+      keepFull.push(doc);
+      continue;
+    }
+    toSummarize.push(doc);
+  }
+  if (toSummarize.length === 0) {
+    return {
+      docs: contextDocs,
+      stats: {
+        summarized: 0,
+        keptFull: keepFull.length,
+        originalTokens: 0,
+        summaryTokens: 0,
+        savedTokens: 0,
+        savingsPercent: 0,
+        totalInputTokens: 0,
+        totalOutputTokens: 0,
+      },
+    };
+  }
+  // Build focus topics from excluded docs (tell summarizer what to prioritize)
+  const focusTopics = keepFull
+    .filter(d => d.type === 'inlineText' && excludeSet.has(d.fileName.toLowerCase()))
+    .map(d => d.fileName);
+  // Batch documents
+  const batches = buildBatches(toSummarize);
+  console.log(`    Batched ${c.highlight(toSummarize.length)} doc(s) into ${c.highlight(batches.length)} summarization batch(es)`);
+  if (focusTopics.length > 0) {
+    console.log(`    Focus topics from ${c.highlight(focusTopics.length)} excluded doc(s):`);
+    focusTopics.forEach(t => console.log(`      ${c.dim('•')} ${c.cyan(t)}`));
+  }
+  // Process batches (sequential for now; can add parallelization later)
+  const allSummaries = new Map();
+  let totalInput = 0;
+  let totalOutput = 0;
+  let batchesDone = 0;
+  for (let i = 0; i < batches.length; i++) {
+    const result = await summarizeBatch(ai, batches[i], {
+      focusTopics,
+      thinkingBudget,
+      batchIndex: i,
+      totalBatches: batches.length,
+    });
+    batchesDone++;
+    if (onProgress) onProgress(batchesDone, batches.length);
+    if (result && result.summaries) {
+      for (const [fileName, summary] of Object.entries(result.summaries)) {
+        allSummaries.set(fileName.toLowerCase(), summary);
+      }
+      totalInput += result.tokenUsage.inputTokens;
+      totalOutput += result.tokenUsage.outputTokens;
+    }
+  }
+  // Replace doc content with summaries
+  let originalTokens = 0;
+  let summaryTokens = 0;
+  const resultDocs = [];
+  for (const doc of contextDocs) {
+    if (doc.type !== 'inlineText') {
+      resultDocs.push(doc);
+      continue;
+    }
+    // Check if this doc was excluded (kept full)
+    if (excludeSet.has(doc.fileName.toLowerCase())) {
+      resultDocs.push(doc);
+      continue;
+    }
+    // Check if we have a summary for this doc
+    const summaryKey = doc.fileName.toLowerCase();
+    const summary = allSummaries.get(summaryKey);
+    if (summary && summary.length > 0) {
+      const origTokens = estimateTokens(doc.content);
+      const sumTokens = estimateTokens(summary);
+      originalTokens += origTokens;
+      summaryTokens += sumTokens;
+      resultDocs.push({
+        ...doc,
+        content: `[Deep Summary — original: ~${origTokens.toLocaleString()} tokens → condensed: ~${sumTokens.toLocaleString()} tokens]\n\n${summary}`,
+        _originalLength: doc.content.length,
+        _summaryLength: summary.length,
+        _deepSummarized: true,
+      });
+    } else {
+      // No summary returned — keep original
+      resultDocs.push(doc);
+    }
+  }
+  const savedTokens = originalTokens - summaryTokens;
+  const savingsPercent = originalTokens > 0
+    ? parseFloat(((savedTokens / originalTokens) * 100).toFixed(1))
+    : 0;
+  return {
+    docs: resultDocs,
+    stats: {
+      summarized: allSummaries.size,
+      keptFull: keepFull.length,
+      originalTokens,
+      summaryTokens,
+      savedTokens,
+      savingsPercent,
+      totalInputTokens: totalInput,
+      totalOutputTokens: totalOutput,
+    },
+  };
+}
+// ======================== EXPORTS ========================
+module.exports = {
+  deepSummarize,
+  summarizeBatch,
+  buildBatches,
+  SUMMARY_MAX_OUTPUT,
+  BATCH_MAX_CHARS,
+  MIN_SUMMARIZE_LENGTH,
+  MAX_DOC_CHARS,
+};

package/src/phases/discover.js CHANGED Viewed

@@ -85,6 +85,7 @@ async function phaseDiscover(ctx) {
   if (opts.resume) activeFlags.push('resume');
   if (opts.reanalyze) activeFlags.push('reanalyze');
   if (opts.dryRun) activeFlags.push('dry-run');
+  if (opts.deepSummary) activeFlags.push('deep-summary');
   if (activeFlags.length > 0) {
     console.log(`  Flags: ${c.yellow(activeFlags.join(', '))}`);
   }

package/src/phases/init.js CHANGED Viewed

@@ -67,6 +67,10 @@ async function phaseInit() {
     disableDiff: !!flags['no-diff'],
     noHtml: !!flags['no-html'],
     deepDive: !!flags['deep-dive'],
+    deepSummary: !!flags['deep-summary'],
+    deepSummaryExclude: typeof flags['exclude-docs'] === 'string'
+      ? flags['exclude-docs'].split(',').map(s => s.trim()).filter(Boolean)
+      : [],  // populated by CLI flag, interactive picker, or kept empty
     dynamic: !!flags.dynamic,
     request: typeof flags.request === 'string' ? flags.request : null,
     updateProgress: !!flags['update-progress'],
@@ -94,36 +98,10 @@ async function phaseInit() {
     opts.runMode = mode;
     if (mode !== 'custom') {
-      // Apply preset overrides
-      const { selectRunMode: _ignore, ...cliModule } = require('../utils/cli');
-      // Access RUN_PRESETS from the module
-      const presetOverrides = {
-        fast: {
-          disableFocusedPass: true,
-          disableLearning: true,
-          disableDiff: true,
-          format: 'md,json',
-          formats: new Set(['md', 'json']),
-          modelTier: 'economy',
-        },
-        balanced: {
-          disableFocusedPass: false,
-          disableLearning: false,
-          disableDiff: false,
-          format: 'all',
-          formats: new Set(['md', 'html', 'json', 'pdf', 'docx']),
-          modelTier: 'balanced',
-        },
-        detailed: {
-          disableFocusedPass: false,
-          disableLearning: false,
-          disableDiff: false,
-          format: 'all',
-          formats: new Set(['md', 'html', 'json', 'pdf', 'docx']),
-          modelTier: 'premium',
-        },
-      };
-      const preset = presetOverrides[mode];
+      // Apply preset overrides from the shared RUN_PRESETS definition
+      const { RUN_PRESETS } = require('../utils/cli');
+      const presetDef = RUN_PRESETS[mode];
+      const preset = presetDef ? presetDef.overrides : null;
       if (preset) {
         opts.disableFocusedPass = preset.disableFocusedPass;
         opts.disableLearning = preset.disableLearning;
@@ -322,6 +300,7 @@ function _printRunSummary(opts, modelId, models, targetDir) {
   if (!opts.disableLearning) features.push(c.green('learning'));
   if (!opts.disableDiff) features.push(c.green('diff'));
   if (opts.deepDive) features.push(c.cyan('deep-dive'));
+  if (opts.deepSummary) features.push(c.cyan('deep-summary'));
   if (opts.dynamic) features.push(c.cyan('dynamic'));
   if (opts.resume) features.push(c.yellow('resume'));
   if (opts.dryRun) features.push(c.yellow('dry-run'));

package/src/phases/services.js CHANGED Viewed

@@ -7,6 +7,9 @@ const path = require('path');
 const { initFirebase, uploadToStorage, storageExists } = require('../services/firebase');
 const { initGemini, prepareDocsForGemini } = require('../services/gemini');
+// --- Modes ---
+const { deepSummarize } = require('../modes/deep-summary');
 // --- Utils ---
 const { parallelMap } = require('../utils/retry');
@@ -101,4 +104,61 @@ async function phaseServices(ctx) {
   return { ...ctx, storage, firebaseReady, ai, contextDocs, docStorageUrls, callName };
 }
-module.exports = phaseServices;
+// ======================== PHASE: DEEP SUMMARY ========================
+/**
+ * Pre-summarize context documents to save input tokens per segment.
+ * Runs only when --deep-summary flag is active.
+ *
+ * @param {object} ctx - Pipeline context with ai, contextDocs, opts
+ * @returns {object} Updated ctx with summarized contextDocs and deepSummaryStats
+ */
+async function phaseDeepSummary(ctx) {
+  const log = getLog();
+  const { opts, ai, contextDocs } = ctx;
+  if (!opts.deepSummary || !ai || contextDocs.length === 0) {
+    return { ...ctx, deepSummaryStats: null };
+  }
+  console.log('');
+  console.log(c.cyan('  ── Deep Summary — Pre-summarizing context documents ──'));
+  log.step('Deep summary: starting context document pre-summarization');
+  if (log && log.phaseStart) log.phaseStart('deep_summary');
+  const excludeNames = opts.deepSummaryExclude || [];
+  let updatedDocs = contextDocs;
+  let deepSummaryStats = null;
+  try {
+    const result = await deepSummarize(ai, contextDocs, {
+      excludeFileNames: excludeNames,
+      thinkingBudget: Math.min(8192, opts.thinkingBudget),
+    });
+    updatedDocs = result.docs;
+    deepSummaryStats = result.stats;
+    if (deepSummaryStats.summarized > 0) {
+      console.log(`  ${c.success(`Summarized ${c.highlight(deepSummaryStats.summarized)} doc(s) — saved ~${c.highlight(deepSummaryStats.savedTokens.toLocaleString())} tokens (${c.yellow(deepSummaryStats.savingsPercent + '%')} reduction)`)}`);
+      console.log(`    ${c.dim('Original:')} ~${deepSummaryStats.originalTokens.toLocaleString()} tokens → ${c.dim('Condensed:')} ~${deepSummaryStats.summaryTokens.toLocaleString()} tokens`);
+      if (deepSummaryStats.keptFull > 0) {
+        console.log(`    ${c.dim('Kept full:')} ${deepSummaryStats.keptFull} doc(s) (excluded from summary)`);
+      }
+      log.step(`Deep summary: ${deepSummaryStats.summarized} docs summarized, ${deepSummaryStats.savedTokens} tokens saved (${deepSummaryStats.savingsPercent}%)`);
+      log.metric('deep_summary', deepSummaryStats);
+    } else {
+      console.log(`  ${c.dim('No documents needed summarization')}`);
+    }
+  } catch (err) {
+    console.warn(`  ${c.warn(`Deep summary failed (continuing with full docs): ${err.message}`)}`);
+    log.warn(`Deep summary failed: ${err.message}`);
+  }
+  if (log && log.phaseEnd) log.phaseEnd({ stats: deepSummaryStats });
+  console.log('');
+  return { ...ctx, contextDocs: updatedDocs, deepSummaryStats };
+}
+module.exports = { phaseServices, phaseDeepSummary };

package/src/pipeline.js CHANGED Viewed

@@ -32,7 +32,7 @@ const { getLog, isShuttingDown, PKG_ROOT, PROJECT_ROOT } = require('./phases/_sh
 // --- Pipeline phases ---
 const phaseInit        = require('./phases/init');
 const phaseDiscover    = require('./phases/discover');
-const phaseServices    = require('./phases/services');
+const { phaseServices, phaseDeepSummary } = require('./phases/services');
 const phaseProcessVideo = require('./phases/process-media');
 const phaseCompile     = require('./phases/compile');
 const phaseOutput      = require('./phases/output');
@@ -46,7 +46,7 @@ const phaseDeepDive    = require('./phases/deep-dive');
 // --- Utils (for run orchestration + alt modes) ---
 const { c } = require('./utils/colors');
 const { findDocsRecursive } = require('./utils/fs');
-const { promptUserText } = require('./utils/cli');
+const { promptUser, promptUserText, selectDocsToExclude } = require('./utils/cli');
 const { createProgressBar } = require('./utils/progress-bar');
 const { buildHealthReport, printHealthDashboard } = require('./utils/health-dashboard');
 const { saveHistory, buildHistoryEntry } = require('./utils/learning-loop');
@@ -92,9 +92,38 @@ async function run() {
   // Phase 3: Services
   bar.setPhase('services');
-  const fullCtx = await phaseServices(ctx);
+  let fullCtx = await phaseServices(ctx);
   bar.tick('Services ready');
+  // Phase 3.5 (optional): Deep Summary — pre-summarize context docs
+  // If user didn't pass --deep-summary but has many context docs, offer it interactively
+  if (!fullCtx.opts.deepSummary && process.stdin.isTTY && fullCtx.ai && fullCtx.contextDocs.length >= 3) {
+    const inlineDocs = fullCtx.contextDocs.filter(d => d.type === 'inlineText' && d.content);
+    const totalChars = inlineDocs.reduce((sum, d) => sum + d.content.length, 0);
+    const totalTokensEstimate = Math.ceil(totalChars * 0.3);
+    // Only offer when context is large enough to benefit (>100K tokens)
+    if (totalTokensEstimate > 100000) {
+      console.log('');
+      console.log(`  ${c.cyan('You have')} ${c.highlight(inlineDocs.length)} ${c.cyan('context docs')} (~${c.highlight((totalTokensEstimate / 1000).toFixed(0) + 'K')} ${c.cyan('tokens)')}`);
+      console.log(`  ${c.dim('Deep summary can reduce per-segment context by 60-80%, saving time and cost.')}`);
+      const wantDeepSummary = await promptUser(`  ${c.cyan('Enable deep summary?')} [y/N] `);
+      if (wantDeepSummary) {
+        fullCtx.opts.deepSummary = true;
+      }
+    }
+  }
+  if (fullCtx.opts.deepSummary && fullCtx.ai && fullCtx.contextDocs.length > 0) {
+    // Interactive picker: let user choose docs to keep at full fidelity
+    if (process.stdin.isTTY && fullCtx.opts.deepSummaryExclude.length === 0) {
+      const excluded = await selectDocsToExclude(fullCtx.contextDocs);
+      fullCtx.opts.deepSummaryExclude = excluded;
+    }
+    bar.setPhase('deep-summary', 1);
+    fullCtx = await phaseDeepSummary(fullCtx);
+    bar.tick('Docs summarized');
+  }
   // Phase 4: Process each media file (video or audio)
   const allSegmentAnalyses = [];
   const allSegmentReports = [];
@@ -117,6 +146,7 @@ async function run() {
     contextDocuments: fullCtx.contextDocs.map(d => d.fileName),
     documentStorageUrls: fullCtx.docStorageUrls,
     firebaseAuthenticated: fullCtx.firebaseReady,
+    deepSummary: fullCtx.deepSummaryStats || null,
     files: [],
   };