npm - wayfind - Versions diffs - 2.0.28 → 2.0.29 - Mend

wayfind 2.0.28 → 2.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/bin/connectors/github.js +37 -1
package/bin/content-store.js +127 -38
package/bin/digest.js +118 -37
package/bin/distill.js +356 -0
package/bin/slack-bot.js +2 -1
package/bin/storage/sqlite-backend.js +44 -2
package/bin/team-context.js +99 -0
package/package.json +1 -1

package/bin/connectors/github.js CHANGED Viewed

@@ -303,7 +303,28 @@ async function pull(config, since) {
       highlights.push(`${failedCount} CI failure(s)`);
     }
-    repoHighlights.push({ repo: repoStr, openPRs, mergedPRs, highlights });
+    repoHighlights.push({
+      repo: repoStr,
+      openPRs,
+      mergedPRs,
+      highlights,
+      topPRs: data.prs.slice(0, 5).map((pr) => ({
+        number: pr.number,
+        title: pr.title,
+        author: pr.user?.login || pr.user?.name || 'unknown',
+        state: pr.merged_at ? 'merged' : pr.state,
+      })),
+      topIssues: data.issues.slice(0, 5).map((iss) => ({
+        number: iss.number,
+        title: iss.title,
+        labels: (iss.labels || []).map((l) => (typeof l === 'string' ? l : l.name)).filter(Boolean),
+        state: iss.state,
+      })),
+      failedRuns: failed.map((r) => ({
+        name: r.name || r.workflow?.name || 'unknown',
+        branch: r.head_branch || '',
+      })),
+    });
   }
   // Generate rollup summary
@@ -489,6 +510,21 @@ function generateSummaryMarkdown(
     for (const h of rh.highlights) {
       lines.push(`- ${h}`);
     }
+    if (rh.topPRs && rh.topPRs.length > 0) {
+      const prItems = rh.topPRs.map((pr) => `#${pr.number} "${pr.title}" (${pr.author}, ${pr.state})`);
+      lines.push(`**PRs:** ${prItems.join(' | ')}`);
+    }
+    if (rh.topIssues && rh.topIssues.length > 0) {
+      const issueItems = rh.topIssues.map((iss) => {
+        const labels = iss.labels && iss.labels.length > 0 ? ` [${iss.labels.join(', ')}]` : '';
+        return `#${iss.number} "${iss.title}"${labels} (${iss.state})`;
+      });
+      lines.push(`**Issues:** ${issueItems.join(' | ')}`);
+    }
+    if (rh.failedRuns && rh.failedRuns.length > 0) {
+      const runItems = rh.failedRuns.map((r) => `${r.name}${r.branch ? ' (' + r.branch + ')' : ''}`);
+      lines.push(`**Failed CI:** ${runItems.join(' | ')}`);
+    }
     lines.push('');
   }

package/bin/content-store.js CHANGED Viewed

@@ -212,6 +212,22 @@ function generateEntryId(date, repo, title) {
   return crypto.createHash('sha256').update(input).digest('hex').slice(0, 12);
 }
+/**
+ * Compute a quality score for an entry (0-3).
+ * +1 if has reasoning (explains WHY)
+ * +1 if has alternatives (what was rejected)
+ * +1 if substantive content (>500 chars)
+ * @param {Object} entry - Entry metadata
+ * @returns {number} 0-3
+ */
+function computeQualityScore(entry) {
+  let score = 0;
+  if (entry.hasReasoning) score++;
+  if (entry.hasAlternatives) score++;
+  if ((entry.contentLength || 0) > 500) score++;
+  return score;
+}
 /**
  * Build the text content for embedding from an entry's fields.
  * @param {Object} entry - Entry with date, repo, title, fields
@@ -329,7 +345,7 @@ async function indexJournals(options = {}) {
       const content = buildContent({ ...entry, date, author });
       const hash = contentHash(content);
-      newEntries[id] = {
+      const entryMeta = {
         date,
         repo: entry.repo,
         title: entry.title,
@@ -339,8 +355,12 @@ async function indexJournals(options = {}) {
         contentLength: content.length,
         tags: extractTags(entry),
         hasEmbedding: false,
+        hasReasoning: false,
+        hasAlternatives: false,
         _content: content, // temporary, not saved to index
       };
+      entryMeta.qualityScore = computeQualityScore(entryMeta);
+      newEntries[id] = entryMeta;
     }
   }
@@ -744,39 +764,50 @@ function getEntryContent(entryId, options = {}) {
   // ── Signal entries ──────────────────────────────────────────────────────
   if (entry.source === 'signal') {
     if (!signalsDir) return null;
-    // entry.repo is like 'signals/github' — extract the channel
-    const channel = (entry.repo || '').replace(/^signals\//, '');
-    if (!channel) return null;
-    const channelDir = path.join(signalsDir, channel);
-    if (!fs.existsSync(channelDir)) return null;
-    // Find a matching file in the channel directory
-    // Try date-based filename first, then scan for any file containing the title
-    const dateCandidates = [
-      path.join(channelDir, `${entry.date}.md`),
-      path.join(channelDir, `${entry.date}-summary.md`),
-    ];
-    for (const filePath of dateCandidates) {
+    const repo = entry.repo || '';
+    // Determine file location based on repo format:
+    // - 'signals/channel' (summary files) → signalsDir/channel/
+    // - 'owner/repo' (per-repo files) → find the channel dir containing owner/repo/
+    let searchDirs = [];
+    if (repo.startsWith('signals/')) {
+      const channel = repo.replace(/^signals\//, '');
+      searchDirs = [path.join(signalsDir, channel)];
+    } else {
+      // Per-repo entry: search all channel dirs for owner/repo subdirectory
       try {
-        return fs.readFileSync(filePath, 'utf8');
-      } catch {
-        // Try next candidate
-      }
+        const channels = fs.readdirSync(signalsDir, { withFileTypes: true })
+          .filter(d => d.isDirectory()).map(d => d.name);
+        for (const ch of channels) {
+          const repoDir = path.join(signalsDir, ch, repo);
+          if (fs.existsSync(repoDir)) {
+            searchDirs.push(repoDir);
+          }
+        }
+      } catch { /* skip */ }
     }
-    // Scan channel dir for files matching the date
-    try {
-      const files = fs.readdirSync(channelDir).filter(f => f.endsWith('.md') && f.includes(entry.date));
-      for (const file of files) {
+    for (const dir of searchDirs) {
+      if (!fs.existsSync(dir)) continue;
+      // Try date-based filename first, then summary, then scan
+      const dateCandidates = [
+        path.join(dir, `${entry.date}.md`),
+        path.join(dir, `${entry.date}-summary.md`),
+      ];
+      for (const filePath of dateCandidates) {
         try {
-          return fs.readFileSync(path.join(channelDir, file), 'utf8');
-        } catch {
-          continue;
-        }
+          return fs.readFileSync(filePath, 'utf8');
+        } catch { /* try next */ }
       }
-    } catch {
-      // Channel dir not readable
+      // Scan for files matching the date
+      try {
+        const files = fs.readdirSync(dir).filter(f => f.endsWith('.md') && f.includes(entry.date));
+        for (const file of files) {
+          try {
+            return fs.readFileSync(path.join(dir, file), 'utf8');
+          } catch { continue; }
+        }
+      } catch { /* dir not readable */ }
     }
     return null;
@@ -1346,7 +1377,7 @@ async function indexConversations(options = {}) {
       const hash = contentHash(content);
-      existingIndex.entries[id] = {
+      const convEntry = {
         date,
         repo: transcript.repo,
         title: decision.title,
@@ -1361,6 +1392,8 @@ async function indexConversations(options = {}) {
         hasAlternatives: !!decision.has_alternatives,
         _content: content,
       };
+      convEntry.qualityScore = computeQualityScore(convEntry);
+      existingIndex.entries[id] = convEntry;
       if (doEmbeddings) {
         try {
@@ -1653,16 +1686,42 @@ async function indexSignals(options = {}) {
   for (const channel of channels) {
     const channelDir = path.join(signalsDir, channel);
-    let files;
+    // Collect all .md files: channel root + recursive owner/repo subdirectories
+    const signalFiles = [];
     try {
-      files = fs.readdirSync(channelDir).filter(f => f.endsWith('.md')).sort();
+      const entries = fs.readdirSync(channelDir, { withFileTypes: true });
+      // Channel-root .md files (summaries like YYYY-MM-DD-summary.md)
+      for (const e of entries) {
+        if (e.isFile() && e.name.endsWith('.md')) {
+          signalFiles.push({ filePath: path.join(channelDir, e.name), file: e.name, repo: 'signals/' + channel });
+        }
+      }
+      // Walk owner/repo subdirectories (e.g., github/acme-corp/web-api/)
+      for (const ownerEntry of entries) {
+        if (!ownerEntry.isDirectory()) continue;
+        const ownerDir = path.join(channelDir, ownerEntry.name);
+        let repoEntries;
+        try { repoEntries = fs.readdirSync(ownerDir, { withFileTypes: true }); } catch { continue; }
+        for (const repoEntry of repoEntries) {
+          if (!repoEntry.isDirectory()) continue;
+          const repoDir = path.join(ownerDir, repoEntry.name);
+          const repoStr = `${ownerEntry.name}/${repoEntry.name}`;
+          let repoFiles;
+          try { repoFiles = fs.readdirSync(repoDir).filter(f => f.endsWith('.md')); } catch { continue; }
+          for (const f of repoFiles) {
+            signalFiles.push({ filePath: path.join(repoDir, f), file: f, repo: repoStr });
+          }
+        }
+      }
     } catch {
       continue;
     }
-    for (const file of files) {
+    signalFiles.sort((a, b) => a.file.localeCompare(b.file));
+    for (const { filePath, file, repo } of signalFiles) {
       stats.fileCount++;
-      const filePath = path.join(channelDir, file);
       let content;
       try {
         content = fs.readFileSync(filePath, 'utf8');
@@ -1670,16 +1729,17 @@ async function indexSignals(options = {}) {
         continue;
       }
-      // Extract date from filename (YYYY-MM-DD.md) or fall back to filename
-      const dateMatch = file.match(/^(\d{4}-\d{2}-\d{2})\.md$/);
+      // Extract date from filename (YYYY-MM-DD.md or YYYY-MM-DD-summary.md) or fall back to filename
+      const dateMatch = file.match(/^(\d{4}-\d{2}-\d{2})/);
       const date = dateMatch ? dateMatch[1] : file.replace(/\.md$/, '');
       // Extract title from first # heading, or fall back to filename
       const titleMatch = content.match(/^#\s+(.+)$/m);
       const title = titleMatch ? titleMatch[1].trim() : file.replace(/\.md$/, '');
-      // Extract tags: channel name + any ## section headings
+      // Extract tags: channel name + repo + any ## section headings
       const tags = [channel];
+      if (repo !== 'signals/' + channel) tags.push(repo);
       const sectionRe = /^##\s+(.+)$/gm;
       let sectionMatch;
       while ((sectionMatch = sectionRe.exec(content)) !== null) {
@@ -1689,7 +1749,6 @@ async function indexSignals(options = {}) {
         }
       }
-      const repo = 'signals/' + channel;
       const id = generateEntryId(date, repo, file.replace(/\.md$/, ''));
       const hash = contentHash(content);
@@ -1975,6 +2034,32 @@ function computeQualityProfile(options = {}) {
 // ── Exports ─────────────────────────────────────────────────────────────────
+/**
+ * Deduplicate search results by removing raw entries that have been absorbed
+ * into distilled entries. If a distilled entry exists in the results, its
+ * source entries (listed in distilledFrom) are removed.
+ * @param {Array<{id: string, entry: Object, score?: number}>} results
+ * @returns {Array} Deduplicated results
+ */
+function deduplicateResults(results) {
+  if (!results || results.length === 0) return results;
+  // Collect all IDs that have been absorbed into distilled entries
+  const absorbedIds = new Set();
+  for (const r of results) {
+    if (r.entry && r.entry.distilledFrom && Array.isArray(r.entry.distilledFrom)) {
+      for (const id of r.entry.distilledFrom) {
+        absorbedIds.add(id);
+      }
+    }
+  }
+  if (absorbedIds.size === 0) return results;
+  // Filter out absorbed entries
+  return results.filter(r => !absorbedIds.has(r.id));
+}
 module.exports = {
   // Parsing
   parseJournalFile,
@@ -2005,6 +2090,10 @@ module.exports = {
   isRepoExcluded,
   applyFilters,
+  // Quality & dedup
+  computeQualityScore,
+  deduplicateResults,
   // Core operations
   indexJournals,
   indexSignals,

package/bin/digest.js CHANGED Viewed

@@ -269,13 +269,18 @@ function collectFromStore(sinceDate, options = {}) {
   });
   if (entries.length === 0) {
-    return { journals: '', signals: '', entryCount: 0 };
+    return { journals: '', signals: '', entryCount: 0, entryMeta: [] };
   }
   const journalParts = [];
   const signalParts = [];
+  const journalMeta = [];
+  const signalMeta = [];
   for (const { id, entry } of entries) {
+    // Skip raw entries that have been absorbed into a distilled entry
+    if (entry.distilledFrom) continue;
     const content = contentStore.getEntryContent(id, { storePath, journalDir, signalsDir });
     if (!content) continue;
@@ -287,10 +292,21 @@ function collectFromStore(sinceDate, options = {}) {
     const meta = author ? `${header}\n${author}\n` : `${header}\n`;
     const formatted = `${meta}\n${content}`;
+    const itemMeta = {
+      date: entry.date,
+      source: entry.source,
+      qualityScore: entry.qualityScore || 0,
+      hasReasoning: entry.hasReasoning,
+      hasAlternatives: entry.hasAlternatives,
+      distillTier: entry.distillTier || 'raw',
+    };
     if (source === 'signal') {
       signalParts.push(formatted);
+      signalMeta.push(itemMeta);
     } else {
       journalParts.push(formatted);
+      journalMeta.push(itemMeta);
     }
   }
@@ -298,6 +314,7 @@ function collectFromStore(sinceDate, options = {}) {
     journals: journalParts.join('\n\n---\n\n'),
     signals: signalParts.join('\n\n---\n\n'),
     entryCount: entries.length,
+    entryMeta: { journal: journalMeta, signal: signalMeta },
   };
 }
@@ -618,56 +635,102 @@ function buildPrompt(personaId, signalContent, journalContent, dateRange, contex
 }
 /**
- * Apply token budget constraints to signal and journal content.
- * Truncates oldest journal entries first, then signal content.
+ * Apply token budget constraints with quality-weighted packing.
+ * Higher quality entries are kept preferentially over low-quality ones.
  * @param {string} signalContent
  * @param {string} journalContent
  * @param {number} maxChars
- * @returns {{ signals: string, journals: string, truncated: boolean }}
+ * @param {Object} [options] - Optional metadata for quality-weighted packing
+ * @param {Object} [options.entryMeta] - { journal: [{qualityScore, date, ...}], signal: [...] }
+ * @param {Array} [options.scores] - Intelligence scores from Haiku scoring
+ * @param {string} [options.personaId] - Current persona for score lookup
+ * @returns {{ signals: string, journals: string, truncated: boolean, stats: Object }}
  */
-function applyTokenBudget(signalContent, journalContent, maxChars) {
+function applyTokenBudget(signalContent, journalContent, maxChars, options = {}) {
   const total = signalContent.length + journalContent.length;
   if (total <= maxChars) {
-    return { signals: signalContent, journals: journalContent, truncated: false };
+    return { signals: signalContent, journals: journalContent, truncated: false, stats: { dropped: 0 } };
+  }
+  const { entryMeta, scores, personaId } = options;
+  // Split into sections
+  const signalSections = signalContent ? signalContent.split('\n\n---\n\n') : [];
+  const journalSections = journalContent ? journalContent.split('\n\n---\n\n') : [];
+  const signalMetaArr = (entryMeta && entryMeta.signal) || [];
+  const journalMetaArr = (entryMeta && entryMeta.journal) || [];
+  // Score each section with composite priority
+  const todayStr = today();
+  const yesterdayStr = (() => { const d = new Date(); d.setDate(d.getDate() - 1); return d.toISOString().split('T')[0]; })();
+  const allSections = [];
+  for (let i = 0; i < signalSections.length; i++) {
+    const meta = signalMetaArr[i] || {};
+    const quality = meta.qualityScore || 0;
+    const recency = (meta.date === todayStr || meta.date === yesterdayStr) ? 1 : 0;
+    const intel = (scores && scores[i] && personaId) ? (scores[i][personaId] || 0) : 0;
+    const distillBonus = (meta.distillTier && meta.distillTier !== 'raw') ? 1 : 0;
+    allSections.push({
+      text: signalSections[i],
+      type: 'signal',
+      priority: quality + recency + intel + distillBonus,
+      len: signalSections[i].length,
+    });
+  }
+  for (let i = 0; i < journalSections.length; i++) {
+    const meta = journalMetaArr[i] || {};
+    const quality = meta.qualityScore || 0;
+    const recency = (meta.date === todayStr || meta.date === yesterdayStr) ? 1 : 0;
+    // Journal score indices start after signal count
+    const scoreIdx = signalSections.length + i;
+    const intel = (scores && scores[scoreIdx] && personaId) ? (scores[scoreIdx][personaId] || 0) : 0;
+    const distillBonus = (meta.distillTier && meta.distillTier !== 'raw') ? 1 : 0;
+    allSections.push({
+      text: journalSections[i],
+      type: 'journal',
+      priority: quality + recency + intel + distillBonus,
+      len: journalSections[i].length,
+    });
   }
-  const truncationNote = '\n\n> Note: Input was truncated to fit within token budget. Some older entries may be omitted.\n';
-  const noteLen = truncationNote.length;
-  const available = maxChars - noteLen;
+  // Sort by priority descending (highest quality first)
+  allSections.sort((a, b) => b.priority - a.priority);
-  let trimmedJournals = journalContent;
-  let trimmedSignals = signalContent;
-  let journalsTrimmed = false;
-  let signalsTrimmed = false;
+  // Greedily pack into budget
+  const truncationNote = '\n\n> Note: Input was truncated to fit within token budget. Lower-quality entries were dropped.\n';
+  const available = maxChars - truncationNote.length;
+  const keptSignals = [];
+  const keptJournals = [];
+  let used = 0;
+  let dropped = 0;
-  // Strategy: drop oldest journal entries first, then trim signals
-  if (trimmedSignals.length + trimmedJournals.length > available) {
-    // Try trimming journals first (keep newest entries)
-    const journalBudget = Math.max(0, available - trimmedSignals.length);
-    if (journalBudget < trimmedJournals.length) {
-      trimmedJournals = trimmedJournals.slice(trimmedJournals.length - journalBudget);
-      journalsTrimmed = true;
+  for (const section of allSections) {
+    const sectionCost = section.len + 7; // account for '\n\n---\n\n' separator
+    if (used + sectionCost <= available) {
+      if (section.type === 'signal') {
+        keptSignals.push(section.text);
+      } else {
+        keptJournals.push(section.text);
+      }
+      used += sectionCost;
+    } else {
+      dropped++;
     }
   }
-  if (trimmedSignals.length + trimmedJournals.length > available) {
-    // Still over — trim signal content from the end
-    const signalBudget = Math.max(0, available - trimmedJournals.length);
-    trimmedSignals = trimmedSignals.slice(0, signalBudget);
-    signalsTrimmed = true;
-  }
-  // Append truncation note to whichever content was actually trimmed
-  if (signalsTrimmed) {
-    trimmedSignals += truncationNote;
-  } else if (journalsTrimmed) {
-    trimmedJournals += truncationNote;
+  const truncated = dropped > 0;
+  let finalSignals = keptSignals.join('\n\n---\n\n');
+  let finalJournals = keptJournals.join('\n\n---\n\n');
+  if (truncated) {
+    finalJournals += truncationNote;
   }
   return {
-    signals: trimmedSignals,
-    journals: trimmedJournals,
-    truncated: true,
+    signals: finalSignals,
+    journals: finalJournals,
+    truncated,
+    stats: { dropped, total: allSections.length, kept: allSections.length - dropped },
   };
 }
@@ -748,7 +811,11 @@ async function generateDigest(config, personaIds, sinceDate, onProgress) {
       ({ signals: pSignals, journals: pJournals } =
         intelligence.filterForPersona(signalContent, journalContent, scores, personaId, threshold, allPersonaIds));
     }
-    const budget = applyTokenBudget(pSignals, pJournals, maxInputChars);
+    const budget = applyTokenBudget(pSignals, pJournals, maxInputChars, {
+      entryMeta: storeResult.entryMeta,
+      scores,
+      personaId,
+    });
     pSignals = budget.signals;
     pJournals = budget.journals;
@@ -805,7 +872,21 @@ async function generateDigest(config, personaIds, sinceDate, onProgress) {
   fs.writeFileSync(combinedFile, combinedContent, 'utf8');
   files.push(combinedFile);
-  return { files, personas: personaIds, dateRange, scores };
+  // Compute input stats for preview mode
+  const entryMeta = storeResult.entryMeta || {};
+  const journalMeta = entryMeta.journal || [];
+  const signalMeta = entryMeta.signal || [];
+  const inputStats = {
+    journalEntries: journalMeta.length,
+    signalEntries: signalMeta.length,
+    qualityDistribution: {
+      rich: journalMeta.filter(m => m.qualityScore >= 2).length,
+      medium: journalMeta.filter(m => m.qualityScore === 1).length,
+      thin: journalMeta.filter(m => m.qualityScore === 0).length,
+    },
+  };
+  return { files, personas: personaIds, dateRange, scores, inputStats };
 }
 /**

package/bin/distill.js ADDED Viewed

@@ -0,0 +1,356 @@
+'use strict';
+const contentStore = require('./content-store');
+const llm = require('./connectors/llm');
+// ── Tier definitions ────────────────────────────────────────────────────────
+const TIERS = {
+  daily: { minAgeDays: 3, maxAgeDays: 14 },
+  weekly: { minAgeDays: 14, maxAgeDays: 60 },
+  archive: { minAgeDays: 60, maxAgeDays: Infinity },
+};
+// ── Helpers ─────────────────────────────────────────────────────────────────
+function daysAgo(dateStr) {
+  const now = new Date();
+  const then = new Date(dateStr + 'T00:00:00Z');
+  return Math.floor((now - then) / (1000 * 60 * 60 * 24));
+}
+function today() {
+  return new Date().toISOString().split('T')[0];
+}
+/**
+ * Compute Jaccard similarity between two titles (word-level).
+ * @param {string} a
+ * @param {string} b
+ * @returns {number} 0-1
+ */
+function titleSimilarity(a, b) {
+  const wordsA = new Set((a || '').toLowerCase().split(/\s+/).filter(w => w.length > 2));
+  const wordsB = new Set((b || '').toLowerCase().split(/\s+/).filter(w => w.length > 2));
+  if (wordsA.size === 0 && wordsB.size === 0) return 1;
+  if (wordsA.size === 0 || wordsB.size === 0) return 0;
+  let intersection = 0;
+  for (const w of wordsA) {
+    if (wordsB.has(w)) intersection++;
+  }
+  const union = new Set([...wordsA, ...wordsB]).size;
+  return union === 0 ? 0 : intersection / union;
+}
+// ── Grouping ────────────────────────────────────────────────────────────────
+/**
+ * Group entries by (date, repo), then cluster by title similarity within each group.
+ * @param {Array<{id: string, entry: Object}>} entries
+ * @returns {Array<Array<{id: string, entry: Object}>>} Clusters of related entries
+ */
+function groupEntries(entries) {
+  // Group by date+repo
+  const groups = {};
+  for (const item of entries) {
+    const key = `${item.entry.date}|${item.entry.repo}`;
+    if (!groups[key]) groups[key] = [];
+    groups[key].push(item);
+  }
+  // Within each group, cluster by title similarity
+  const clusters = [];
+  for (const items of Object.values(groups)) {
+    if (items.length === 1) {
+      clusters.push(items);
+      continue;
+    }
+    const assigned = new Set();
+    for (let i = 0; i < items.length; i++) {
+      if (assigned.has(i)) continue;
+      const cluster = [items[i]];
+      assigned.add(i);
+      for (let j = i + 1; j < items.length; j++) {
+        if (assigned.has(j)) continue;
+        if (titleSimilarity(items[i].entry.title, items[j].entry.title) > 0.8) {
+          cluster.push(items[j]);
+          assigned.add(j);
+        }
+      }
+      clusters.push(cluster);
+    }
+  }
+  return clusters;
+}
+// ── Deduplication ───────────────────────────────────────────────────────────
+/**
+ * Deduplicate a cluster of entries.
+ * - Exact content_hash matches: keep highest quality_score
+ * - Returns { canonical: [{id, entry}], absorbed: [ids] }
+ */
+function deduplicateGroup(cluster) {
+  if (cluster.length <= 1) {
+    return { canonical: cluster, absorbed: [] };
+  }
+  // Group by content hash
+  const byHash = {};
+  for (const item of cluster) {
+    const hash = item.entry.contentHash;
+    if (!byHash[hash]) byHash[hash] = [];
+    byHash[hash].push(item);
+  }
+  const canonical = [];
+  const absorbed = [];
+  for (const items of Object.values(byHash)) {
+    if (items.length === 1) {
+      canonical.push(items[0]);
+      continue;
+    }
+    // Keep the one with highest quality score
+    items.sort((a, b) => (b.entry.qualityScore || 0) - (a.entry.qualityScore || 0));
+    canonical.push(items[0]);
+    for (let i = 1; i < items.length; i++) {
+      absorbed.push(items[i].id);
+    }
+  }
+  return { canonical, absorbed };
+}
+// ── Merging ─────────────────────────────────────────────────────────────────
+const MERGE_PROMPTS = {
+  daily: `You are merging duplicate decision entries from the same day and repo.
+Remove exact duplicates. Keep all distinct decisions with full reasoning.
+Return a single markdown entry that preserves all unique information.
+Format: Start with the repo and title, then include all distinct decisions with their reasoning.`,
+  weekly: `You are creating a weekly summary for a repo.
+Combine related decisions into a concise per-repo weekly summary.
+Preserve key reasoning and alternatives that were considered.
+Remove redundancy and boilerplate.
+Format: A clean markdown summary organized by topic.`,
+  archive: `You are creating a monthly archive summary.
+Compress multiple entries into a brief summary with key decisions and outcomes only.
+Focus on what was decided and why, not the details of how.
+Format: A compact markdown summary, max 500 words.`,
+};
+/**
+ * Merge 2+ related entries into a single distilled entry via LLM.
+ * @param {Array<{id: string, entry: Object}>} entries
+ * @param {Object} llmConfig - { provider, model, api_key_env }
+ * @param {string} tier - 'daily', 'weekly', or 'archive'
+ * @returns {Promise<{content: string, title: string}>}
+ */
+async function mergeEntries(entries, llmConfig, tier) {
+  const storePath = contentStore.DEFAULT_STORE_PATH;
+  const journalDir = contentStore.DEFAULT_JOURNAL_DIR;
+  const signalsDir = contentStore.DEFAULT_SIGNALS_DIR;
+  const parts = entries.map(({ id, entry }) => {
+    const content = contentStore.getEntryContent(id, { storePath, journalDir, signalsDir });
+    return content || `${entry.date} — ${entry.repo} — ${entry.title}`;
+  });
+  const systemPrompt = MERGE_PROMPTS[tier] || MERGE_PROMPTS.daily;
+  const userContent = parts.join('\n\n---\n\n');
+  const result = await llm.chat({
+    provider: llmConfig.provider || 'anthropic',
+    model: llmConfig.model || 'claude-haiku-4-5-20251001',
+    apiKeyEnv: llmConfig.api_key_env || 'ANTHROPIC_API_KEY',
+    system: systemPrompt,
+    messages: [{ role: 'user', content: userContent }],
+    max_tokens: 2000,
+  });
+  // Extract title from first line or generate one
+  const firstEntry = entries[0].entry;
+  const title = `[${tier}] ${firstEntry.repo} — ${firstEntry.date}`;
+  return { content: result, title };
+}
+// ── Main Pipeline ───────────────────────────────────────────────────────────
+/**
+ * Run the distillation pipeline.
+ * @param {Object} options
+ * @param {string} [options.tier] - 'daily', 'weekly', 'archive', or 'all'
+ * @param {boolean} [options.dryRun] - If true, don't write changes
+ * @param {Object} [options.llmConfig] - LLM config for merge operations
+ * @param {string} [options.storePath] - Content store path
+ * @returns {Promise<Object>} Stats: { grouped, deduped, merged, llmCalls }
+ */
+async function distillEntries(options = {}) {
+  const tierName = options.tier || 'daily';
+  const dryRun = options.dryRun || false;
+  const storePath = options.storePath || contentStore.DEFAULT_STORE_PATH;
+  const tiersToRun = tierName === 'all'
+    ? ['daily', 'weekly', 'archive']
+    : [tierName];
+  const totalStats = { grouped: 0, deduped: 0, merged: 0, llmCalls: 0 };
+  for (const tier of tiersToRun) {
+    const tierDef = TIERS[tier];
+    if (!tierDef) {
+      console.log(`Unknown tier: ${tier}`);
+      continue;
+    }
+    // Calculate date range for this tier
+    const now = new Date();
+    const sinceDate = new Date(now);
+    sinceDate.setDate(sinceDate.getDate() - tierDef.maxAgeDays);
+    const untilDate = new Date(now);
+    untilDate.setDate(untilDate.getDate() - tierDef.minAgeDays);
+    const since = sinceDate.toISOString().split('T')[0];
+    const until = untilDate.toISOString().split('T')[0];
+    // Query entries eligible for this tier
+    const entries = contentStore.queryMetadata({ since, until, storePath });
+    // Filter: only raw entries that haven't been distilled yet
+    const eligible = entries.filter(({ entry }) => {
+      return (entry.distillTier === 'raw' || !entry.distillTier)
+        && !entry.distilledAt
+        && !entry.distilledFrom;  // not already a distilled entry
+    });
+    if (eligible.length === 0) {
+      console.log(`  ${tier}: no eligible entries`);
+      continue;
+    }
+    console.log(`  ${tier}: ${eligible.length} eligible entries (${since} to ${until})`);
+    // Group and cluster
+    const clusters = groupEntries(eligible);
+    totalStats.grouped += clusters.length;
+    // Deduplicate within each cluster
+    let totalDeduped = 0;
+    const mergeableClusters = [];
+    for (const cluster of clusters) {
+      const { canonical, absorbed } = deduplicateGroup(cluster);
+      totalDeduped += absorbed.length;
+      if (!dryRun && absorbed.length > 0) {
+        // Mark absorbed entries
+        const backend = contentStore.getBackend(storePath);
+        const index = backend.loadIndex();
+        for (const absorbedId of absorbed) {
+          if (index.entries[absorbedId]) {
+            index.entries[absorbedId].distilledAt = Date.now();
+            index.entries[absorbedId].distillTier = tier;
+          }
+        }
+        backend.saveIndex(index);
+      }
+      // Only merge if there are 2+ canonical entries in the cluster
+      if (canonical.length >= 2) {
+        mergeableClusters.push(canonical);
+      }
+    }
+    totalStats.deduped += totalDeduped;
+    if (dryRun) {
+      console.log(`    Would dedup: ${totalDeduped} entries`);
+      console.log(`    Would merge: ${mergeableClusters.length} clusters (${mergeableClusters.reduce((s, c) => s + c.length, 0)} entries)`);
+      continue;
+    }
+    // Merge clusters via LLM
+    if (mergeableClusters.length > 0 && options.llmConfig) {
+      for (const cluster of mergeableClusters) {
+        try {
+          const { content, title } = await mergeEntries(cluster, options.llmConfig, tier);
+          totalStats.llmCalls++;
+          // Create distilled entry in the content store
+          const firstEntry = cluster[0].entry;
+          const absorbedIds = cluster.map(c => c.id);
+          const id = contentStore.generateEntryId(firstEntry.date, firstEntry.repo, title);
+          const hash = contentStore.contentHash(content);
+          const backend = contentStore.getBackend(storePath);
+          const index = backend.loadIndex();
+          index.entries[id] = {
+            date: firstEntry.date,
+            repo: firstEntry.repo,
+            title,
+            source: 'distilled',
+            user: '',
+            drifted: false,
+            contentHash: hash,
+            contentLength: content.length,
+            tags: firstEntry.tags || [],
+            hasEmbedding: false,
+            hasReasoning: true,
+            hasAlternatives: false,
+            qualityScore: 3, // distilled entries are high quality by definition
+            distillTier: tier,
+            distilledFrom: absorbedIds,
+            distilledAt: Date.now(),
+          };
+          // Mark source entries as absorbed
+          for (const item of cluster) {
+            if (index.entries[item.id]) {
+              index.entries[item.id].distilledAt = Date.now();
+              index.entries[item.id].distillTier = tier;
+            }
+          }
+          index.entryCount = Object.keys(index.entries).length;
+          backend.saveIndex(index);
+          totalStats.merged += cluster.length;
+        } catch (err) {
+          console.log(`    Merge failed for cluster: ${err.message}`);
+        }
+      }
+    }
+    // Log the distillation run
+    if (!dryRun) {
+      try {
+        const backend = contentStore.getBackend(storePath);
+        if (backend.db) {
+          backend.db.prepare(`
+            INSERT INTO distillation_log (run_at, tier, entries_input, entries_output, entries_merged, entries_deduped, llm_calls)
+            VALUES (?, ?, ?, ?, ?, ?, ?)
+          `).run(Date.now(), tier, eligible.length, eligible.length - totalDeduped - totalStats.merged, totalStats.merged, totalDeduped, totalStats.llmCalls);
+        }
+      } catch { /* non-fatal */ }
+    }
+  }
+  return totalStats;
+}
+// ── Exports ─────────────────────────────────────────────────────────────────
+module.exports = {
+  distillEntries,
+  groupEntries,
+  deduplicateGroup,
+  mergeEntries,
+  titleSimilarity,
+  TIERS,
+};

package/bin/slack-bot.js CHANGED Viewed

@@ -639,7 +639,8 @@ async function searchDecisionTrail(query, config) {
     }
   }
-  return results || [];
+  // Deduplicate: prefer distilled entries over their raw sources
+  return contentStore.deduplicateResults(results || []);
 }
 /**

package/bin/storage/sqlite-backend.js CHANGED Viewed

@@ -40,6 +40,10 @@ CREATE TABLE IF NOT EXISTS decisions (
   has_embedding INTEGER DEFAULT 0,
   has_reasoning INTEGER DEFAULT 0,
   has_alternatives INTEGER DEFAULT 0,
+  quality_score INTEGER DEFAULT 0,
+  distill_tier TEXT DEFAULT 'raw',
+  distilled_from TEXT DEFAULT NULL,
+  distilled_at INTEGER DEFAULT NULL,
   created_at INTEGER,
   updated_at INTEGER
 );
@@ -48,6 +52,19 @@ CREATE INDEX IF NOT EXISTS idx_decisions_date ON decisions(date);
 CREATE INDEX IF NOT EXISTS idx_decisions_repo ON decisions(repo);
 CREATE INDEX IF NOT EXISTS idx_decisions_source ON decisions(source);
 CREATE INDEX IF NOT EXISTS idx_decisions_user ON decisions(user);
+CREATE INDEX IF NOT EXISTS idx_decisions_quality ON decisions(quality_score);
+CREATE INDEX IF NOT EXISTS idx_decisions_tier ON decisions(distill_tier);
+CREATE TABLE IF NOT EXISTS distillation_log (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  run_at INTEGER NOT NULL,
+  tier TEXT NOT NULL,
+  entries_input INTEGER DEFAULT 0,
+  entries_output INTEGER DEFAULT 0,
+  entries_merged INTEGER DEFAULT 0,
+  entries_deduped INTEGER DEFAULT 0,
+  llm_calls INTEGER DEFAULT 0
+);
 CREATE TABLE IF NOT EXISTS embeddings (
   id TEXT PRIMARY KEY,
@@ -90,6 +107,10 @@ function entryToRow(id, entry) {
     has_embedding: entry.hasEmbedding ? 1 : 0,
     has_reasoning: entry.hasReasoning ? 1 : 0,
     has_alternatives: entry.hasAlternatives ? 1 : 0,
+    quality_score: entry.qualityScore || 0,
+    distill_tier: entry.distillTier || 'raw',
+    distilled_from: entry.distilledFrom ? JSON.stringify(entry.distilledFrom) : null,
+    distilled_at: entry.distilledAt || null,
     created_at: entry.createdAt || Date.now(),
     updated_at: Date.now(),
   };
@@ -109,6 +130,10 @@ function rowToEntry(row) {
     hasEmbedding: !!row.has_embedding,
     hasReasoning: !!row.has_reasoning,
     hasAlternatives: !!row.has_alternatives,
+    qualityScore: row.quality_score || 0,
+    distillTier: row.distill_tier || 'raw',
+    distilledFrom: row.distilled_from ? JSON.parse(row.distilled_from) : null,
+    distilledAt: row.distilled_at || null,
   };
 }
@@ -137,6 +162,21 @@ class SqliteBackend {
     if (!existing) {
       this.db.prepare('INSERT INTO metadata (key, value) VALUES (?, ?)').run('schema_version', SCHEMA_VERSION);
     }
+    // Migrate existing databases: add new columns if they don't exist
+    const cols = this.db.prepare('PRAGMA table_info(decisions)').all().map(c => c.name);
+    if (!cols.includes('quality_score')) {
+      this.db.exec('ALTER TABLE decisions ADD COLUMN quality_score INTEGER DEFAULT 0');
+    }
+    if (!cols.includes('distill_tier')) {
+      this.db.exec('ALTER TABLE decisions ADD COLUMN distill_tier TEXT DEFAULT \'raw\'');
+    }
+    if (!cols.includes('distilled_from')) {
+      this.db.exec('ALTER TABLE decisions ADD COLUMN distilled_from TEXT DEFAULT NULL');
+    }
+    if (!cols.includes('distilled_at')) {
+      this.db.exec('ALTER TABLE decisions ADD COLUMN distilled_at INTEGER DEFAULT NULL');
+    }
   }
   close() {
@@ -173,10 +213,12 @@ class SqliteBackend {
       const stmt = this.db.prepare(`
         INSERT INTO decisions (id, date, repo, title, source, user, drifted,
           content_hash, content_length, tags, has_embedding, has_reasoning,
-          has_alternatives, created_at, updated_at)
+          has_alternatives, quality_score, distill_tier, distilled_from, distilled_at,
+          created_at, updated_at)
         VALUES (@id, @date, @repo, @title, @source, @user, @drifted,
           @content_hash, @content_length, @tags, @has_embedding, @has_reasoning,
-          @has_alternatives, @created_at, @updated_at)
+          @has_alternatives, @quality_score, @distill_tier, @distilled_from, @distilled_at,
+          @created_at, @updated_at)
       `);
       for (const [id, entry] of Object.entries(entries)) {
         stmt.run(entryToRow(id, entry));

package/bin/team-context.js CHANGED Viewed

@@ -718,6 +718,14 @@ async function runPull(args) {
       writeConnectorsConfig(freshConfig);
       printPullResult(name, result);
     }
+    // Auto-index signals into content store after pull
+    try {
+      console.log('\nIndexing signals...');
+      const signalStats = await contentStore.indexSignals({ embeddings: false });
+      console.log(`  ${signalStats.newEntries} new, ${signalStats.updatedEntries} updated, ${signalStats.skippedEntries} unchanged`);
+    } catch (err) {
+      console.log(`  Signal indexing skipped: ${err.message}`);
+    }
     return;
   }
@@ -827,6 +835,14 @@ async function runPull(args) {
   writeConnectorsConfig(freshConfig);
   printPullResult(channel, result);
+  // Auto-index signals into content store after pull
+  try {
+    const signalStats = await contentStore.indexSignals({ embeddings: false });
+    console.log(`\nSignals indexed: ${signalStats.newEntries} new, ${signalStats.updatedEntries} updated`);
+  } catch (err) {
+    console.log(`Signal indexing skipped: ${err.message}`);
+  }
 }
 function runSignals() {
@@ -928,6 +944,7 @@ async function runDigest(args) {
   const personaIdx = args.indexOf('--persona');
   const sinceIdx = args.indexOf('--since');
   const deliver = args.includes('--deliver');
+  const preview = args.includes('--preview');
   // Determine personas
   let personaIds;
@@ -981,6 +998,32 @@ async function runDigest(args) {
   });
   console.log('');
+  if (preview) {
+    // Preview mode: print digest content and stats to stdout
+    console.log('=== DIGEST PREVIEW ===');
+    console.log('');
+    if (result.inputStats) {
+      const s = result.inputStats;
+      console.log(`Input: ${s.journalEntries || 0} journal, ${s.signalEntries || 0} signal entries`);
+      if (s.budgetStats) {
+        console.log(`Budget: ${s.budgetStats.kept || 0} kept, ${s.budgetStats.dropped || 0} dropped`);
+      }
+      console.log('');
+    }
+    for (const f of result.files) {
+      try {
+        const content = fs.readFileSync(f, 'utf8');
+        const personaId = path.basename(path.dirname(f)) || 'combined';
+        console.log(`--- ${personaId} ---`);
+        console.log(content);
+        console.log('');
+      } catch { /* skip unreadable */ }
+    }
+    console.log('=== END PREVIEW ===');
+    return;
+  }
   console.log('Digests generated:');
   for (const f of result.files) {
     console.log(`  ${f}`);
@@ -1172,6 +1215,20 @@ async function runReindex(args) {
   const signalsOnly = args.includes('--signals-only');
   const doExport = args.includes('--export');
   const detectShifts = args.includes('--detect-shifts');
+  const force = args.includes('--force');
+  if (force) {
+    console.log('Force mode: clearing content store for full reindex...');
+    try {
+      const backend = contentStore.getBackend();
+      const emptyIndex = { version: contentStore.INDEX_VERSION, entries: {}, lastUpdated: Date.now(), entryCount: 0 };
+      backend.saveIndex(emptyIndex);
+      // Clear conversation fingerprint cache so all transcripts are re-extracted
+      backend.saveConversationIndex({});
+    } catch (err) {
+      console.log(`  Warning: could not clear store: ${err.message}`);
+    }
+  }
   if (!conversationsOnly && !signalsOnly) {
     console.log('=== Journals ===');
@@ -1192,6 +1249,44 @@ async function runReindex(args) {
     console.log('=== Signals ===');
     await indexSignalsIfAvailable();
   }
+  // Optional: run distillation after reindex
+  if (args.includes('--distill')) {
+    console.log('');
+    console.log('=== Distillation ===');
+    await runDistill(['--tier', 'daily']);
+  }
+}
+async function runDistill(args) {
+  const distill = require('./distill');
+  const dryRun = args.includes('--dry-run');
+  const tierIdx = args.indexOf('--tier');
+  const tier = (tierIdx !== -1 && args[tierIdx + 1]) ? args[tierIdx + 1] : 'daily';
+  console.log(`Distilling content (tier: ${tier}${dryRun ? ', dry run' : ''})...`);
+  // Build LLM config from connectors
+  let llmConfig = null;
+  if (!dryRun) {
+    const config = readConnectorsConfig();
+    if (config.digest && config.digest.llm) {
+      llmConfig = {
+        provider: config.digest.llm.provider,
+        model: config.digest.llm.intelligence?.model || 'claude-haiku-4-5-20251001',
+        api_key_env: config.digest.llm.api_key_env,
+      };
+    }
+  }
+  const stats = await distill.distillEntries({ tier, dryRun, llmConfig });
+  console.log('');
+  console.log('Distillation results:');
+  console.log(`  Groups: ${stats.grouped}`);
+  console.log(`  Deduped: ${stats.deduped}`);
+  console.log(`  Merged: ${stats.merged}`);
+  console.log(`  LLM calls: ${stats.llmCalls}`);
 }
 /**
@@ -4736,6 +4831,10 @@ const COMMANDS = {
     desc: 'Index all signal sources (journals + conversations)',
     run: (args) => runReindex(args),
   },
+  distill: {
+    desc: 'Distill content store: dedup, merge, and compact entries',
+    run: (args) => runDistill(args),
+  },
   'index-journals': {
     desc: 'Index journal entries into the content store',
     run: (args) => runIndexJournals(args),

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "wayfind",
-  "version": "2.0.28",
+  "version": "2.0.29",
   "description": "Team decision trail for AI-assisted development. The connective tissue between product, engineering, and strategy.",
   "bin": {
     "wayfind": "./bin/team-context.js"