npm - task-summary-extractor - Versions diffs - 8.3.0 → 9.0.0 - Mend

task-summary-extractor 8.3.0 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/.env.example +38 -0
package/ARCHITECTURE.md +99 -3
package/EXPLORATION.md +148 -89
package/QUICK_START.md +5 -2
package/README.md +51 -7
package/bin/taskex.js +11 -4
package/package.json +38 -5
package/src/config.js +52 -3
package/src/modes/focused-reanalysis.js +2 -1
package/src/modes/progress-updater.js +1 -1
package/src/phases/_shared.js +43 -0
package/src/phases/compile.js +101 -0
package/src/phases/deep-dive.js +118 -0
package/src/phases/discover.js +178 -0
package/src/phases/init.js +192 -0
package/src/phases/output.js +238 -0
package/src/phases/process-media.js +633 -0
package/src/phases/services.js +104 -0
package/src/phases/summary.js +86 -0
package/src/pipeline.js +431 -1463
package/src/renderers/docx.js +531 -0
package/src/renderers/html.js +672 -0
package/src/renderers/markdown.js +15 -183
package/src/renderers/pdf.js +90 -0
package/src/renderers/shared.js +211 -0
package/src/schemas/analysis-compiled.schema.json +381 -0
package/src/schemas/analysis-segment.schema.json +380 -0
package/src/services/doc-parser.js +346 -0
package/src/services/gemini.js +101 -44
package/src/services/video.js +123 -8
package/src/utils/adaptive-budget.js +6 -4
package/src/utils/checkpoint.js +2 -1
package/src/utils/cli.js +131 -110
package/src/utils/colors.js +83 -0
package/src/utils/confidence-filter.js +138 -0
package/src/utils/diff-engine.js +2 -1
package/src/utils/global-config.js +6 -5
package/src/utils/health-dashboard.js +11 -9
package/src/utils/json-parser.js +4 -2
package/src/utils/learning-loop.js +3 -2
package/src/utils/progress-bar.js +286 -0
package/src/utils/quality-gate.js +4 -2
package/src/utils/retry.js +3 -1
package/src/utils/schema-validator.js +314 -0

package/src/renderers/markdown.js CHANGED Viewed

@@ -13,189 +13,12 @@
 'use strict';
-// ════════════════════════════════════════════════════════════
-//  Name Clustering Utilities
-// ════════════════════════════════════════════════════════════
-/**
- * Strip parenthetical suffixes and normalize whitespace.
- * "Mohamed Elhadi (Service Desk)" → "Mohamed Elhadi"
- */
-function stripParens(name) {
-  return (name || '').replace(/\s*\([^)]*\)\s*/g, '').trim();
-}
-/**
- * Normalize a name to lowercase stripped form for comparison.
- */
-function normalizeKey(name) {
-  return stripParens(name).toLowerCase().replace(/\s+/g, ' ').trim();
-}
-/**
- * Build a Map<canonicalName, Set<rawVariants>> from a list of raw name strings.
- * Clustering rules applied in order:
- *  1. Exact normalized match (case-insensitive, parens stripped)
- *  2. Substring containment after stripping
- *
- * The canonical name chosen is the longest proper-cased variant (no parens).
- */
-function clusterNames(rawNames) {
-  const clusters = new Map();           // normKey → { canonical, variants: Set }
-  const normToCluster = new Map();      // normKey → cluster ref
-  for (const raw of rawNames) {
-    const stripped = stripParens(raw).trim();
-    if (!stripped) continue;
-    const nk = normalizeKey(raw);
-    // Check exact match first
-    if (normToCluster.has(nk)) {
-      const c = normToCluster.get(nk);
-      c.variants.add(raw);
-      // Upgrade canonical: prefer longest proper-cased form without parens
-      if (stripped.length >= c.canonical.length && stripped[0] === stripped[0].toUpperCase()) {
-        c.canonical = stripped;
-      }
-      continue;
-    }
-    // Check substring containment against existing clusters
-    let merged = false;
-    for (const [existNk, c] of normToCluster) {
-      if (existNk.includes(nk) || nk.includes(existNk)) {
-        c.variants.add(raw);
-        normToCluster.set(nk, c);
-        if (stripped.length >= c.canonical.length && stripped[0] === stripped[0].toUpperCase()) {
-          c.canonical = stripped;
-        }
-        merged = true;
-        break;
-      }
-    }
-    if (merged) continue;
-    // New cluster
-    const cluster = { canonical: stripped[0] === stripped[0].toUpperCase() ? stripped : raw, variants: new Set([raw]) };
-    clusters.set(nk, cluster);
-    normToCluster.set(nk, cluster);
-  }
-  // Build final map: canonical → Set of raw variants
-  const result = new Map();
-  for (const c of clusters.values()) {
-    if (!result.has(c.canonical)) result.set(c.canonical, new Set());
-    for (const v of c.variants) {
-      result.get(c.canonical).add(v);
-    }
-  }
-  return result;
-}
-/**
- * Given a raw name and a cluster map, return the canonical form.
- */
-function resolve(name, clusterMap) {
-  if (!name) return name;
-  const nk = normalizeKey(name);
-  for (const [canonical, variants] of clusterMap) {
-    for (const v of variants) {
-      if (normalizeKey(v) === nk) return canonical;
-    }
-    // substring fallback
-    const cnk = normalizeKey(canonical);
-    if (cnk.includes(nk) || nk.includes(cnk)) return canonical;
-  }
-  return stripParens(name).trim() || name;
-}
-// ════════════════════════════════════════════════════════════
-//  Dedup Utilities
-// ════════════════════════════════════════════════════════════
-/** Deduplicate an array by a key function. First occurrence wins (keeps richest data by default). */
-function dedupBy(arr, keyFn) {
-  const seen = new Map();
-  const result = [];
-  for (const item of arr) {
-    const k = keyFn(item);
-    if (!k) { result.push(item); continue; }
-    if (seen.has(k)) {
-      // Merge: overwrite sparse fields from later duplicates
-      const existing = seen.get(k);
-      for (const [field, val] of Object.entries(item)) {
-        if (val && !existing[field]) existing[field] = val;
-      }
-      continue;
-    }
-    seen.set(k, item);
-    result.push(item);
-  }
-  return result;
-}
-/** Normalize a description for fuzzy matching — strips file paths, parenthetical details, punctuation. */
-function normalizeDesc(s) {
-  return (s || '')
-    .toLowerCase()
-    // Strip full file paths (keep only the last segment, e.g. "Notifications.cs")
-    .replace(/[\w\-./\\]+\/[\w\-./\\]+\.(cs|ts|js|json|html|resx|png|md)/g, m => {
-      const parts = m.replace(/\\/g, '/').split('/');
-      return parts[parts.length - 1];
-    })
-    // Strip parenthetical additions like "(likely code/config ...)"
-    .replace(/\([^)]*\)/g, '')
-    // Strip trailing punctuation and whitespace before punctuation
-    .replace(/\s+([.,;:!?])/g, '$1')
-    // Collapse whitespace
-    .replace(/\s+/g, ' ')
-    .trim();
-}
-/** Deduplicate by description text similarity (fallback when IDs are missing). */
-function dedupByDesc(arr, descField = 'description') {
-  const seen = new Set();
-  return arr.filter(item => {
-    const key = normalizeDesc(item[descField]);
-    if (!key || seen.has(key)) return false;
-    seen.add(key);
-    return true;
-  });
-}
-// ════════════════════════════════════════════════════════════
-//  Main Renderer
-// ════════════════════════════════════════════════════════════
-/** Format a timestamp string for display, optionally with segment number. */
-function fmtTs(ts, seg) {
-  if (!ts) return '';
-  if (seg) return `\`${ts}\` _(Seg ${seg})_`;
-  return `\`${ts}\``;
-}
-/** Make a compact priority badge */
-function priBadge(p) {
-  if (!p) return '';
-  const icons = { high: '🔴', medium: '🟡', low: '🟢', critical: '🔴' };
-  return ` ${icons[p] || '⚪'} \`${p}\``;
-}
-/** Make a compact confidence badge */
-function confBadge(c) {
-  if (!c) return '';
-  const icons = { HIGH: '🟢', MEDIUM: '🟡', LOW: '🔴' };
-  return ` ${icons[c] || '⚪'}\`${c}\``;
-}
-/** Make a confidence badge with reason tooltip */
-function confBadgeFull(c, reason) {
-  if (!c) return '';
-  const icons = { HIGH: '🟢', MEDIUM: '🟡', LOW: '🔴' };
-  const badge = `${icons[c] || '⚪'}\`${c}\``;
-  if (reason) return ` ${badge} _(${reason})_`;
-  return ` ${badge}`;
-}
+// Shared renderer utilities (name clustering, dedup, badges)
+const {
+  stripParens, normalizeKey, clusterNames, resolve,
+  dedupBy, normalizeDesc, dedupByDesc,
+  fmtTs, priBadge, confBadge, confBadgeFull,
+} = require('./shared');
 /**
  * Render the final compiled analysis into a comprehensive Markdown report.
@@ -290,6 +113,15 @@ function renderResultsMarkdown({ compiled, meta }) {
   }
   ln('');
+  // Confidence filter notice
+  if (compiled._filterMeta && compiled._filterMeta.minConfidence !== 'LOW') {
+    const fm = compiled._filterMeta;
+    const levelLabel = fm.minConfidence === 'HIGH' ? 'HIGH' : 'MEDIUM and HIGH';
+    ln(`> ⚠️ **Confidence filter active:** showing only ${levelLabel} confidence items.  `);
+    ln(`> Kept ${fm.filteredCounts.total}/${fm.originalCounts.total} items (${fm.removed} removed). Full unfiltered data in \`results.json\`.  `);
+    ln('');
+  }
   // Segment breakdown
   const segs = meta.segments || [];
   if (segs.length > 0) {

package/src/renderers/pdf.js ADDED Viewed

@@ -0,0 +1,90 @@
+/**
+ * PDF renderer — converts the HTML report to a self-contained PDF.
+ *
+ * Uses Puppeteer (headless Chrome) to render the full HTML report,
+ * preserving all styling, collapsible sections, and visual layout.
+ *
+ * Usage:
+ *   const { renderResultsPdf } = require('./pdf');
+ *   await renderResultsPdf(htmlContent, outputPath);
+ */
+'use strict';
+const { c } = require('../utils/colors');
+/**
+ * Render an HTML string to a PDF file.
+ *
+ * @param {string} htmlContent - The full self-contained HTML string
+ * @param {string} outputPath  - Absolute path for the output .pdf file
+ * @param {object} [options]   - PDF options
+ * @param {string} [options.format='A4']     - Page size (A4, Letter, etc.)
+ * @param {boolean} [options.landscape=false] - Landscape orientation
+ * @param {string} [options.margin]          - CSS margin (e.g. '1cm')
+ * @returns {Promise<{ path: string, pages: number, bytes: number }>}
+ */
+async function renderResultsPdf(htmlContent, outputPath, options = {}) {
+  let browser;
+  try {
+    const puppeteer = require('puppeteer');
+    browser = await puppeteer.launch({
+      headless: true,
+      args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'],
+    });
+    const page = await browser.newPage();
+    // Set HTML content with generous timeout for complex reports
+    await page.setContent(htmlContent, {
+      waitUntil: 'networkidle0',
+      timeout: 30000,
+    });
+    // Expand all collapsible sections so they appear in PDF
+    await page.evaluate(() => {
+      document.querySelectorAll('details').forEach(d => d.setAttribute('open', ''));
+    });
+    // Generate PDF
+    const margin = options.margin || '1.5cm';
+    const pdfBuffer = await page.pdf({
+      path: outputPath,
+      format: options.format || 'A4',
+      landscape: !!options.landscape,
+      printBackground: true,
+      margin: { top: margin, right: margin, bottom: margin, left: margin },
+      displayHeaderFooter: true,
+      headerTemplate: '<div></div>',
+      footerTemplate: `
+        <div style="font-size: 9px; color: #888; width: 100%; text-align: center; padding: 0 1.5cm;">
+          <span>Generated by taskex</span>
+          <span style="float: right;">Page <span class="pageNumber"></span> of <span class="totalPages"></span></span>
+        </div>
+      `,
+    });
+    // Get page count from the buffer metadata
+    const pages = pdfBuffer.toString('ascii').match(/\/Type\s*\/Page\b/g)?.length || 0;
+    return {
+      path: outputPath,
+      pages,
+      bytes: pdfBuffer.length,
+    };
+  } catch (err) {
+    if (err.code === 'MODULE_NOT_FOUND') {
+      throw new Error(
+        'PDF generation requires puppeteer. Install it with: npm install puppeteer\n' +
+        '  This downloads a bundled Chromium (~300MB) for rendering.'
+      );
+    }
+    throw new Error(`PDF generation failed: ${err.message}`);
+  } finally {
+    if (browser) {
+      await browser.close().catch(() => {});
+    }
+  }
+}
+module.exports = { renderResultsPdf };

package/src/renderers/shared.js ADDED Viewed

@@ -0,0 +1,211 @@
+/**
+ * Shared renderer utilities — name clustering, deduplication, badge formatting.
+ *
+ * Extracted from markdown.js so both the Markdown and HTML renderers
+ * share the same data normalisation logic.
+ */
+'use strict';
+// ════════════════════════════════════════════════════════════
+//  Name Clustering Utilities
+// ════════════════════════════════════════════════════════════
+/**
+ * Strip parenthetical suffixes and normalize whitespace.
+ * "Mohamed Elhadi (Service Desk)" → "Mohamed Elhadi"
+ */
+function stripParens(name) {
+  return (name || '').replace(/\s*\([^)]*\)\s*/g, '').trim();
+}
+/**
+ * Normalize a name to lowercase stripped form for comparison.
+ */
+function normalizeKey(name) {
+  return stripParens(name).toLowerCase().replace(/\s+/g, ' ').trim();
+}
+/**
+ * Build a Map<canonicalName, Set<rawVariants>> from a list of raw name strings.
+ * Clustering rules applied in order:
+ *  1. Exact normalized match (case-insensitive, parens stripped)
+ *  2. Substring containment after stripping
+ *
+ * The canonical name chosen is the longest proper-cased variant (no parens).
+ */
+function clusterNames(rawNames) {
+  const clusters = new Map();
+  const normToCluster = new Map();
+  for (const raw of rawNames) {
+    const stripped = stripParens(raw).trim();
+    if (!stripped) continue;
+    const nk = normalizeKey(raw);
+    if (normToCluster.has(nk)) {
+      const c = normToCluster.get(nk);
+      c.variants.add(raw);
+      if (stripped.length >= c.canonical.length && stripped[0] === stripped[0].toUpperCase()) {
+        c.canonical = stripped;
+      }
+      continue;
+    }
+    let merged = false;
+    for (const [existNk, c] of normToCluster) {
+      if (existNk.includes(nk) || nk.includes(existNk)) {
+        c.variants.add(raw);
+        normToCluster.set(nk, c);
+        if (stripped.length >= c.canonical.length && stripped[0] === stripped[0].toUpperCase()) {
+          c.canonical = stripped;
+        }
+        merged = true;
+        break;
+      }
+    }
+    if (merged) continue;
+    const cluster = { canonical: stripped[0] === stripped[0].toUpperCase() ? stripped : raw, variants: new Set([raw]) };
+    clusters.set(nk, cluster);
+    normToCluster.set(nk, cluster);
+  }
+  const result = new Map();
+  for (const c of clusters.values()) {
+    if (!result.has(c.canonical)) result.set(c.canonical, new Set());
+    for (const v of c.variants) {
+      result.get(c.canonical).add(v);
+    }
+  }
+  return result;
+}
+/**
+ * Given a raw name and a cluster map, return the canonical form.
+ */
+function resolve(name, clusterMap) {
+  if (!name) return name;
+  const nk = normalizeKey(name);
+  for (const [canonical, variants] of clusterMap) {
+    for (const v of variants) {
+      if (normalizeKey(v) === nk) return canonical;
+    }
+    const cnk = normalizeKey(canonical);
+    if (cnk.includes(nk) || nk.includes(cnk)) return canonical;
+  }
+  return stripParens(name).trim() || name;
+}
+// ════════════════════════════════════════════════════════════
+//  Dedup Utilities
+// ════════════════════════════════════════════════════════════
+/** Deduplicate an array by a key function. First occurrence wins. */
+function dedupBy(arr, keyFn) {
+  const seen = new Map();
+  const result = [];
+  for (const item of arr) {
+    const k = keyFn(item);
+    if (!k) { result.push(item); continue; }
+    if (seen.has(k)) {
+      const existing = seen.get(k);
+      for (const [field, val] of Object.entries(item)) {
+        if (val && !existing[field]) existing[field] = val;
+      }
+      continue;
+    }
+    seen.set(k, item);
+    result.push(item);
+  }
+  return result;
+}
+/** Normalize a description for fuzzy matching. */
+function normalizeDesc(s) {
+  return (s || '')
+    .toLowerCase()
+    .replace(/[\w\-./\\]+\/[\w\-./\\]+\.(cs|ts|js|json|html|resx|png|md)/g, m => {
+      const parts = m.replace(/\\/g, '/').split('/');
+      return parts[parts.length - 1];
+    })
+    .replace(/\([^)]*\)/g, '')
+    .replace(/\s+([.,;:!?])/g, '$1')
+    .replace(/\s+/g, ' ')
+    .trim();
+}
+/** Deduplicate by description text similarity. */
+function dedupByDesc(arr, descField = 'description') {
+  const seen = new Set();
+  return arr.filter(item => {
+    const key = normalizeDesc(item[descField]);
+    if (!key || seen.has(key)) return false;
+    seen.add(key);
+    return true;
+  });
+}
+// ════════════════════════════════════════════════════════════
+//  Badge / Formatting Utilities
+// ════════════════════════════════════════════════════════════
+/** Format a timestamp string for display. */
+function fmtTs(ts, seg) {
+  if (!ts) return '';
+  if (seg) return `\`${ts}\` _(Seg ${seg})_`;
+  return `\`${ts}\``;
+}
+/** Make a compact priority badge */
+function priBadge(p) {
+  if (!p) return '';
+  const icons = { high: '🔴', medium: '🟡', low: '🟢', critical: '🔴' };
+  return ` ${icons[p] || '⚪'} \`${p}\``;
+}
+/** Make a compact confidence badge */
+function confBadge(c) {
+  if (!c) return '';
+  const icons = { HIGH: '🟢', MEDIUM: '🟡', LOW: '🔴' };
+  return ` ${icons[c] || '⚪'}\`${c}\``;
+}
+/** Make a confidence badge with reason tooltip */
+function confBadgeFull(c, reason) {
+  if (!c) return '';
+  const icons = { HIGH: '🟢', MEDIUM: '🟡', LOW: '🔴' };
+  const badge = `${icons[c] || '⚪'}\`${c}\``;
+  if (reason) return ` ${badge} _(${reason})_`;
+  return ` ${badge}`;
+}
+// ════════════════════════════════════════════════════════════
+//  HTML-safe escaping
+// ════════════════════════════════════════════════════════════
+/** Escape a string for safe HTML insertion. */
+function escHtml(s) {
+  if (!s) return '';
+  return String(s)
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/"/g, '&quot;')
+    .replace(/'/g, '&#39;');
+}
+module.exports = {
+  stripParens,
+  normalizeKey,
+  clusterNames,
+  resolve,
+  dedupBy,
+  normalizeDesc,
+  dedupByDesc,
+  fmtTs,
+  priBadge,
+  confBadge,
+  confBadgeFull,
+  escHtml,
+};