wayfind 2.0.28 → 2.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -303,7 +303,28 @@ async function pull(config, since) {
303
303
  highlights.push(`${failedCount} CI failure(s)`);
304
304
  }
305
305
 
306
- repoHighlights.push({ repo: repoStr, openPRs, mergedPRs, highlights });
306
+ repoHighlights.push({
307
+ repo: repoStr,
308
+ openPRs,
309
+ mergedPRs,
310
+ highlights,
311
+ topPRs: data.prs.slice(0, 5).map((pr) => ({
312
+ number: pr.number,
313
+ title: pr.title,
314
+ author: pr.user?.login || pr.user?.name || 'unknown',
315
+ state: pr.merged_at ? 'merged' : pr.state,
316
+ })),
317
+ topIssues: data.issues.slice(0, 5).map((iss) => ({
318
+ number: iss.number,
319
+ title: iss.title,
320
+ labels: (iss.labels || []).map((l) => (typeof l === 'string' ? l : l.name)).filter(Boolean),
321
+ state: iss.state,
322
+ })),
323
+ failedRuns: failed.map((r) => ({
324
+ name: r.name || r.workflow?.name || 'unknown',
325
+ branch: r.head_branch || '',
326
+ })),
327
+ });
307
328
  }
308
329
 
309
330
  // Generate rollup summary
@@ -489,6 +510,21 @@ function generateSummaryMarkdown(
489
510
  for (const h of rh.highlights) {
490
511
  lines.push(`- ${h}`);
491
512
  }
513
+ if (rh.topPRs && rh.topPRs.length > 0) {
514
+ const prItems = rh.topPRs.map((pr) => `#${pr.number} "${pr.title}" (${pr.author}, ${pr.state})`);
515
+ lines.push(`**PRs:** ${prItems.join(' | ')}`);
516
+ }
517
+ if (rh.topIssues && rh.topIssues.length > 0) {
518
+ const issueItems = rh.topIssues.map((iss) => {
519
+ const labels = iss.labels && iss.labels.length > 0 ? ` [${iss.labels.join(', ')}]` : '';
520
+ return `#${iss.number} "${iss.title}"${labels} (${iss.state})`;
521
+ });
522
+ lines.push(`**Issues:** ${issueItems.join(' | ')}`);
523
+ }
524
+ if (rh.failedRuns && rh.failedRuns.length > 0) {
525
+ const runItems = rh.failedRuns.map((r) => `${r.name}${r.branch ? ' (' + r.branch + ')' : ''}`);
526
+ lines.push(`**Failed CI:** ${runItems.join(' | ')}`);
527
+ }
492
528
  lines.push('');
493
529
  }
494
530
 
@@ -212,6 +212,22 @@ function generateEntryId(date, repo, title) {
212
212
  return crypto.createHash('sha256').update(input).digest('hex').slice(0, 12);
213
213
  }
214
214
 
215
+ /**
216
+ * Compute a quality score for an entry (0-3).
217
+ * +1 if has reasoning (explains WHY)
218
+ * +1 if has alternatives (what was rejected)
219
+ * +1 if substantive content (>500 chars)
220
+ * @param {Object} entry - Entry metadata
221
+ * @returns {number} 0-3
222
+ */
223
+ function computeQualityScore(entry) {
224
+ let score = 0;
225
+ if (entry.hasReasoning) score++;
226
+ if (entry.hasAlternatives) score++;
227
+ if ((entry.contentLength || 0) > 500) score++;
228
+ return score;
229
+ }
230
+
215
231
  /**
216
232
  * Build the text content for embedding from an entry's fields.
217
233
  * @param {Object} entry - Entry with date, repo, title, fields
@@ -329,7 +345,7 @@ async function indexJournals(options = {}) {
329
345
  const content = buildContent({ ...entry, date, author });
330
346
  const hash = contentHash(content);
331
347
 
332
- newEntries[id] = {
348
+ const entryMeta = {
333
349
  date,
334
350
  repo: entry.repo,
335
351
  title: entry.title,
@@ -339,8 +355,12 @@ async function indexJournals(options = {}) {
339
355
  contentLength: content.length,
340
356
  tags: extractTags(entry),
341
357
  hasEmbedding: false,
358
+ hasReasoning: false,
359
+ hasAlternatives: false,
342
360
  _content: content, // temporary, not saved to index
343
361
  };
362
+ entryMeta.qualityScore = computeQualityScore(entryMeta);
363
+ newEntries[id] = entryMeta;
344
364
  }
345
365
  }
346
366
 
@@ -744,39 +764,50 @@ function getEntryContent(entryId, options = {}) {
744
764
  // ── Signal entries ──────────────────────────────────────────────────────
745
765
  if (entry.source === 'signal') {
746
766
  if (!signalsDir) return null;
747
- // entry.repo is like 'signals/github' — extract the channel
748
- const channel = (entry.repo || '').replace(/^signals\//, '');
749
- if (!channel) return null;
750
-
751
- const channelDir = path.join(signalsDir, channel);
752
- if (!fs.existsSync(channelDir)) return null;
753
-
754
- // Find a matching file in the channel directory
755
- // Try date-based filename first, then scan for any file containing the title
756
- const dateCandidates = [
757
- path.join(channelDir, `${entry.date}.md`),
758
- path.join(channelDir, `${entry.date}-summary.md`),
759
- ];
760
- for (const filePath of dateCandidates) {
767
+ const repo = entry.repo || '';
768
+
769
+ // Determine file location based on repo format:
770
+ // - 'signals/channel' (summary files) → signalsDir/channel/
771
+ // - 'owner/repo' (per-repo files) → find the channel dir containing owner/repo/
772
+ let searchDirs = [];
773
+ if (repo.startsWith('signals/')) {
774
+ const channel = repo.replace(/^signals\//, '');
775
+ searchDirs = [path.join(signalsDir, channel)];
776
+ } else {
777
+ // Per-repo entry: search all channel dirs for owner/repo subdirectory
761
778
  try {
762
- return fs.readFileSync(filePath, 'utf8');
763
- } catch {
764
- // Try next candidate
765
- }
779
+ const channels = fs.readdirSync(signalsDir, { withFileTypes: true })
780
+ .filter(d => d.isDirectory()).map(d => d.name);
781
+ for (const ch of channels) {
782
+ const repoDir = path.join(signalsDir, ch, repo);
783
+ if (fs.existsSync(repoDir)) {
784
+ searchDirs.push(repoDir);
785
+ }
786
+ }
787
+ } catch { /* skip */ }
766
788
  }
767
789
 
768
- // Scan channel dir for files matching the date
769
- try {
770
- const files = fs.readdirSync(channelDir).filter(f => f.endsWith('.md') && f.includes(entry.date));
771
- for (const file of files) {
790
+ for (const dir of searchDirs) {
791
+ if (!fs.existsSync(dir)) continue;
792
+ // Try date-based filename first, then summary, then scan
793
+ const dateCandidates = [
794
+ path.join(dir, `${entry.date}.md`),
795
+ path.join(dir, `${entry.date}-summary.md`),
796
+ ];
797
+ for (const filePath of dateCandidates) {
772
798
  try {
773
- return fs.readFileSync(path.join(channelDir, file), 'utf8');
774
- } catch {
775
- continue;
776
- }
799
+ return fs.readFileSync(filePath, 'utf8');
800
+ } catch { /* try next */ }
777
801
  }
778
- } catch {
779
- // Channel dir not readable
802
+ // Scan for files matching the date
803
+ try {
804
+ const files = fs.readdirSync(dir).filter(f => f.endsWith('.md') && f.includes(entry.date));
805
+ for (const file of files) {
806
+ try {
807
+ return fs.readFileSync(path.join(dir, file), 'utf8');
808
+ } catch { continue; }
809
+ }
810
+ } catch { /* dir not readable */ }
780
811
  }
781
812
 
782
813
  return null;
@@ -1346,7 +1377,7 @@ async function indexConversations(options = {}) {
1346
1377
 
1347
1378
  const hash = contentHash(content);
1348
1379
 
1349
- existingIndex.entries[id] = {
1380
+ const convEntry = {
1350
1381
  date,
1351
1382
  repo: transcript.repo,
1352
1383
  title: decision.title,
@@ -1361,6 +1392,8 @@ async function indexConversations(options = {}) {
1361
1392
  hasAlternatives: !!decision.has_alternatives,
1362
1393
  _content: content,
1363
1394
  };
1395
+ convEntry.qualityScore = computeQualityScore(convEntry);
1396
+ existingIndex.entries[id] = convEntry;
1364
1397
 
1365
1398
  if (doEmbeddings) {
1366
1399
  try {
@@ -1653,16 +1686,42 @@ async function indexSignals(options = {}) {
1653
1686
 
1654
1687
  for (const channel of channels) {
1655
1688
  const channelDir = path.join(signalsDir, channel);
1656
- let files;
1689
+
1690
+ // Collect all .md files: channel root + recursive owner/repo subdirectories
1691
+ const signalFiles = [];
1657
1692
  try {
1658
- files = fs.readdirSync(channelDir).filter(f => f.endsWith('.md')).sort();
1693
+ const entries = fs.readdirSync(channelDir, { withFileTypes: true });
1694
+ // Channel-root .md files (summaries like YYYY-MM-DD-summary.md)
1695
+ for (const e of entries) {
1696
+ if (e.isFile() && e.name.endsWith('.md')) {
1697
+ signalFiles.push({ filePath: path.join(channelDir, e.name), file: e.name, repo: 'signals/' + channel });
1698
+ }
1699
+ }
1700
+ // Walk owner/repo subdirectories (e.g., github/acme-corp/web-api/)
1701
+ for (const ownerEntry of entries) {
1702
+ if (!ownerEntry.isDirectory()) continue;
1703
+ const ownerDir = path.join(channelDir, ownerEntry.name);
1704
+ let repoEntries;
1705
+ try { repoEntries = fs.readdirSync(ownerDir, { withFileTypes: true }); } catch { continue; }
1706
+ for (const repoEntry of repoEntries) {
1707
+ if (!repoEntry.isDirectory()) continue;
1708
+ const repoDir = path.join(ownerDir, repoEntry.name);
1709
+ const repoStr = `${ownerEntry.name}/${repoEntry.name}`;
1710
+ let repoFiles;
1711
+ try { repoFiles = fs.readdirSync(repoDir).filter(f => f.endsWith('.md')); } catch { continue; }
1712
+ for (const f of repoFiles) {
1713
+ signalFiles.push({ filePath: path.join(repoDir, f), file: f, repo: repoStr });
1714
+ }
1715
+ }
1716
+ }
1659
1717
  } catch {
1660
1718
  continue;
1661
1719
  }
1662
1720
 
1663
- for (const file of files) {
1721
+ signalFiles.sort((a, b) => a.file.localeCompare(b.file));
1722
+
1723
+ for (const { filePath, file, repo } of signalFiles) {
1664
1724
  stats.fileCount++;
1665
- const filePath = path.join(channelDir, file);
1666
1725
  let content;
1667
1726
  try {
1668
1727
  content = fs.readFileSync(filePath, 'utf8');
@@ -1670,16 +1729,17 @@ async function indexSignals(options = {}) {
1670
1729
  continue;
1671
1730
  }
1672
1731
 
1673
- // Extract date from filename (YYYY-MM-DD.md) or fall back to filename
1674
- const dateMatch = file.match(/^(\d{4}-\d{2}-\d{2})\.md$/);
1732
+ // Extract date from filename (YYYY-MM-DD.md or YYYY-MM-DD-summary.md) or fall back to filename
1733
+ const dateMatch = file.match(/^(\d{4}-\d{2}-\d{2})/);
1675
1734
  const date = dateMatch ? dateMatch[1] : file.replace(/\.md$/, '');
1676
1735
 
1677
1736
  // Extract title from first # heading, or fall back to filename
1678
1737
  const titleMatch = content.match(/^#\s+(.+)$/m);
1679
1738
  const title = titleMatch ? titleMatch[1].trim() : file.replace(/\.md$/, '');
1680
1739
 
1681
- // Extract tags: channel name + any ## section headings
1740
+ // Extract tags: channel name + repo + any ## section headings
1682
1741
  const tags = [channel];
1742
+ if (repo !== 'signals/' + channel) tags.push(repo);
1683
1743
  const sectionRe = /^##\s+(.+)$/gm;
1684
1744
  let sectionMatch;
1685
1745
  while ((sectionMatch = sectionRe.exec(content)) !== null) {
@@ -1689,7 +1749,6 @@ async function indexSignals(options = {}) {
1689
1749
  }
1690
1750
  }
1691
1751
 
1692
- const repo = 'signals/' + channel;
1693
1752
  const id = generateEntryId(date, repo, file.replace(/\.md$/, ''));
1694
1753
  const hash = contentHash(content);
1695
1754
 
@@ -1975,6 +2034,32 @@ function computeQualityProfile(options = {}) {
1975
2034
 
1976
2035
  // ── Exports ─────────────────────────────────────────────────────────────────
1977
2036
 
2037
+ /**
2038
+ * Deduplicate search results by removing raw entries that have been absorbed
2039
+ * into distilled entries. If a distilled entry exists in the results, its
2040
+ * source entries (listed in distilledFrom) are removed.
2041
+ * @param {Array<{id: string, entry: Object, score?: number}>} results
2042
+ * @returns {Array} Deduplicated results
2043
+ */
2044
+ function deduplicateResults(results) {
2045
+ if (!results || results.length === 0) return results;
2046
+
2047
+ // Collect all IDs that have been absorbed into distilled entries
2048
+ const absorbedIds = new Set();
2049
+ for (const r of results) {
2050
+ if (r.entry && r.entry.distilledFrom && Array.isArray(r.entry.distilledFrom)) {
2051
+ for (const id of r.entry.distilledFrom) {
2052
+ absorbedIds.add(id);
2053
+ }
2054
+ }
2055
+ }
2056
+
2057
+ if (absorbedIds.size === 0) return results;
2058
+
2059
+ // Filter out absorbed entries
2060
+ return results.filter(r => !absorbedIds.has(r.id));
2061
+ }
2062
+
1978
2063
  module.exports = {
1979
2064
  // Parsing
1980
2065
  parseJournalFile,
@@ -2005,6 +2090,10 @@ module.exports = {
2005
2090
  isRepoExcluded,
2006
2091
  applyFilters,
2007
2092
 
2093
+ // Quality & dedup
2094
+ computeQualityScore,
2095
+ deduplicateResults,
2096
+
2008
2097
  // Core operations
2009
2098
  indexJournals,
2010
2099
  indexSignals,
package/bin/digest.js CHANGED
@@ -269,13 +269,18 @@ function collectFromStore(sinceDate, options = {}) {
269
269
  });
270
270
 
271
271
  if (entries.length === 0) {
272
- return { journals: '', signals: '', entryCount: 0 };
272
+ return { journals: '', signals: '', entryCount: 0, entryMeta: [] };
273
273
  }
274
274
 
275
275
  const journalParts = [];
276
276
  const signalParts = [];
277
+ const journalMeta = [];
278
+ const signalMeta = [];
277
279
 
278
280
  for (const { id, entry } of entries) {
281
+ // Skip raw entries that have been absorbed into a distilled entry
282
+ if (entry.distilledFrom) continue;
283
+
279
284
  const content = contentStore.getEntryContent(id, { storePath, journalDir, signalsDir });
280
285
  if (!content) continue;
281
286
 
@@ -287,10 +292,21 @@ function collectFromStore(sinceDate, options = {}) {
287
292
  const meta = author ? `${header}\n${author}\n` : `${header}\n`;
288
293
  const formatted = `${meta}\n${content}`;
289
294
 
295
+ const itemMeta = {
296
+ date: entry.date,
297
+ source: entry.source,
298
+ qualityScore: entry.qualityScore || 0,
299
+ hasReasoning: entry.hasReasoning,
300
+ hasAlternatives: entry.hasAlternatives,
301
+ distillTier: entry.distillTier || 'raw',
302
+ };
303
+
290
304
  if (source === 'signal') {
291
305
  signalParts.push(formatted);
306
+ signalMeta.push(itemMeta);
292
307
  } else {
293
308
  journalParts.push(formatted);
309
+ journalMeta.push(itemMeta);
294
310
  }
295
311
  }
296
312
 
@@ -298,6 +314,7 @@ function collectFromStore(sinceDate, options = {}) {
298
314
  journals: journalParts.join('\n\n---\n\n'),
299
315
  signals: signalParts.join('\n\n---\n\n'),
300
316
  entryCount: entries.length,
317
+ entryMeta: { journal: journalMeta, signal: signalMeta },
301
318
  };
302
319
  }
303
320
 
@@ -618,56 +635,102 @@ function buildPrompt(personaId, signalContent, journalContent, dateRange, contex
618
635
  }
619
636
 
620
637
  /**
621
- * Apply token budget constraints to signal and journal content.
622
- * Truncates oldest journal entries first, then signal content.
638
+ * Apply token budget constraints with quality-weighted packing.
639
+ * Higher quality entries are kept preferentially over low-quality ones.
623
640
  * @param {string} signalContent
624
641
  * @param {string} journalContent
625
642
  * @param {number} maxChars
626
- * @returns {{ signals: string, journals: string, truncated: boolean }}
643
+ * @param {Object} [options] - Optional metadata for quality-weighted packing
644
+ * @param {Object} [options.entryMeta] - { journal: [{qualityScore, date, ...}], signal: [...] }
645
+ * @param {Array} [options.scores] - Intelligence scores from Haiku scoring
646
+ * @param {string} [options.personaId] - Current persona for score lookup
647
+ * @returns {{ signals: string, journals: string, truncated: boolean, stats: Object }}
627
648
  */
628
- function applyTokenBudget(signalContent, journalContent, maxChars) {
649
+ function applyTokenBudget(signalContent, journalContent, maxChars, options = {}) {
629
650
  const total = signalContent.length + journalContent.length;
630
651
  if (total <= maxChars) {
631
- return { signals: signalContent, journals: journalContent, truncated: false };
652
+ return { signals: signalContent, journals: journalContent, truncated: false, stats: { dropped: 0 } };
653
+ }
654
+
655
+ const { entryMeta, scores, personaId } = options;
656
+
657
+ // Split into sections
658
+ const signalSections = signalContent ? signalContent.split('\n\n---\n\n') : [];
659
+ const journalSections = journalContent ? journalContent.split('\n\n---\n\n') : [];
660
+ const signalMetaArr = (entryMeta && entryMeta.signal) || [];
661
+ const journalMetaArr = (entryMeta && entryMeta.journal) || [];
662
+
663
+ // Score each section with composite priority
664
+ const todayStr = today();
665
+ const yesterdayStr = (() => { const d = new Date(); d.setDate(d.getDate() - 1); return d.toISOString().split('T')[0]; })();
666
+
667
+ const allSections = [];
668
+ for (let i = 0; i < signalSections.length; i++) {
669
+ const meta = signalMetaArr[i] || {};
670
+ const quality = meta.qualityScore || 0;
671
+ const recency = (meta.date === todayStr || meta.date === yesterdayStr) ? 1 : 0;
672
+ const intel = (scores && scores[i] && personaId) ? (scores[i][personaId] || 0) : 0;
673
+ const distillBonus = (meta.distillTier && meta.distillTier !== 'raw') ? 1 : 0;
674
+ allSections.push({
675
+ text: signalSections[i],
676
+ type: 'signal',
677
+ priority: quality + recency + intel + distillBonus,
678
+ len: signalSections[i].length,
679
+ });
680
+ }
681
+ for (let i = 0; i < journalSections.length; i++) {
682
+ const meta = journalMetaArr[i] || {};
683
+ const quality = meta.qualityScore || 0;
684
+ const recency = (meta.date === todayStr || meta.date === yesterdayStr) ? 1 : 0;
685
+ // Journal score indices start after signal count
686
+ const scoreIdx = signalSections.length + i;
687
+ const intel = (scores && scores[scoreIdx] && personaId) ? (scores[scoreIdx][personaId] || 0) : 0;
688
+ const distillBonus = (meta.distillTier && meta.distillTier !== 'raw') ? 1 : 0;
689
+ allSections.push({
690
+ text: journalSections[i],
691
+ type: 'journal',
692
+ priority: quality + recency + intel + distillBonus,
693
+ len: journalSections[i].length,
694
+ });
632
695
  }
633
696
 
634
- const truncationNote = '\n\n> Note: Input was truncated to fit within token budget. Some older entries may be omitted.\n';
635
- const noteLen = truncationNote.length;
636
- const available = maxChars - noteLen;
697
+ // Sort by priority descending (highest quality first)
698
+ allSections.sort((a, b) => b.priority - a.priority);
637
699
 
638
- let trimmedJournals = journalContent;
639
- let trimmedSignals = signalContent;
640
- let journalsTrimmed = false;
641
- let signalsTrimmed = false;
700
+ // Greedily pack into budget
701
+ const truncationNote = '\n\n> Note: Input was truncated to fit within token budget. Lower-quality entries were dropped.\n';
702
+ const available = maxChars - truncationNote.length;
703
+ const keptSignals = [];
704
+ const keptJournals = [];
705
+ let used = 0;
706
+ let dropped = 0;
642
707
 
643
- // Strategy: drop oldest journal entries first, then trim signals
644
- if (trimmedSignals.length + trimmedJournals.length > available) {
645
- // Try trimming journals first (keep newest entries)
646
- const journalBudget = Math.max(0, available - trimmedSignals.length);
647
- if (journalBudget < trimmedJournals.length) {
648
- trimmedJournals = trimmedJournals.slice(trimmedJournals.length - journalBudget);
649
- journalsTrimmed = true;
708
+ for (const section of allSections) {
709
+ const sectionCost = section.len + 7; // account for '\n\n---\n\n' separator
710
+ if (used + sectionCost <= available) {
711
+ if (section.type === 'signal') {
712
+ keptSignals.push(section.text);
713
+ } else {
714
+ keptJournals.push(section.text);
715
+ }
716
+ used += sectionCost;
717
+ } else {
718
+ dropped++;
650
719
  }
651
720
  }
652
721
 
653
- if (trimmedSignals.length + trimmedJournals.length > available) {
654
- // Still over — trim signal content from the end
655
- const signalBudget = Math.max(0, available - trimmedJournals.length);
656
- trimmedSignals = trimmedSignals.slice(0, signalBudget);
657
- signalsTrimmed = true;
658
- }
659
-
660
- // Append truncation note to whichever content was actually trimmed
661
- if (signalsTrimmed) {
662
- trimmedSignals += truncationNote;
663
- } else if (journalsTrimmed) {
664
- trimmedJournals += truncationNote;
722
+ const truncated = dropped > 0;
723
+ let finalSignals = keptSignals.join('\n\n---\n\n');
724
+ let finalJournals = keptJournals.join('\n\n---\n\n');
725
+ if (truncated) {
726
+ finalJournals += truncationNote;
665
727
  }
666
728
 
667
729
  return {
668
- signals: trimmedSignals,
669
- journals: trimmedJournals,
670
- truncated: true,
730
+ signals: finalSignals,
731
+ journals: finalJournals,
732
+ truncated,
733
+ stats: { dropped, total: allSections.length, kept: allSections.length - dropped },
671
734
  };
672
735
  }
673
736
 
@@ -748,7 +811,11 @@ async function generateDigest(config, personaIds, sinceDate, onProgress) {
748
811
  ({ signals: pSignals, journals: pJournals } =
749
812
  intelligence.filterForPersona(signalContent, journalContent, scores, personaId, threshold, allPersonaIds));
750
813
  }
751
- const budget = applyTokenBudget(pSignals, pJournals, maxInputChars);
814
+ const budget = applyTokenBudget(pSignals, pJournals, maxInputChars, {
815
+ entryMeta: storeResult.entryMeta,
816
+ scores,
817
+ personaId,
818
+ });
752
819
  pSignals = budget.signals;
753
820
  pJournals = budget.journals;
754
821
 
@@ -805,7 +872,21 @@ async function generateDigest(config, personaIds, sinceDate, onProgress) {
805
872
  fs.writeFileSync(combinedFile, combinedContent, 'utf8');
806
873
  files.push(combinedFile);
807
874
 
808
- return { files, personas: personaIds, dateRange, scores };
875
+ // Compute input stats for preview mode
876
+ const entryMeta = storeResult.entryMeta || {};
877
+ const journalMeta = entryMeta.journal || [];
878
+ const signalMeta = entryMeta.signal || [];
879
+ const inputStats = {
880
+ journalEntries: journalMeta.length,
881
+ signalEntries: signalMeta.length,
882
+ qualityDistribution: {
883
+ rich: journalMeta.filter(m => m.qualityScore >= 2).length,
884
+ medium: journalMeta.filter(m => m.qualityScore === 1).length,
885
+ thin: journalMeta.filter(m => m.qualityScore === 0).length,
886
+ },
887
+ };
888
+
889
+ return { files, personas: personaIds, dateRange, scores, inputStats };
809
890
  }
810
891
 
811
892
  /**
package/bin/distill.js ADDED
@@ -0,0 +1,356 @@
1
+ 'use strict';
2
+
3
+ const contentStore = require('./content-store');
4
+ const llm = require('./connectors/llm');
5
+
6
+ // ── Tier definitions ────────────────────────────────────────────────────────
7
+
8
+ const TIERS = {
9
+ daily: { minAgeDays: 3, maxAgeDays: 14 },
10
+ weekly: { minAgeDays: 14, maxAgeDays: 60 },
11
+ archive: { minAgeDays: 60, maxAgeDays: Infinity },
12
+ };
13
+
14
+ // ── Helpers ─────────────────────────────────────────────────────────────────
15
+
16
+ function daysAgo(dateStr) {
17
+ const now = new Date();
18
+ const then = new Date(dateStr + 'T00:00:00Z');
19
+ return Math.floor((now - then) / (1000 * 60 * 60 * 24));
20
+ }
21
+
22
+ function today() {
23
+ return new Date().toISOString().split('T')[0];
24
+ }
25
+
26
+ /**
27
+ * Compute Jaccard similarity between two titles (word-level).
28
+ * @param {string} a
29
+ * @param {string} b
30
+ * @returns {number} 0-1
31
+ */
32
+ function titleSimilarity(a, b) {
33
+ const wordsA = new Set((a || '').toLowerCase().split(/\s+/).filter(w => w.length > 2));
34
+ const wordsB = new Set((b || '').toLowerCase().split(/\s+/).filter(w => w.length > 2));
35
+ if (wordsA.size === 0 && wordsB.size === 0) return 1;
36
+ if (wordsA.size === 0 || wordsB.size === 0) return 0;
37
+ let intersection = 0;
38
+ for (const w of wordsA) {
39
+ if (wordsB.has(w)) intersection++;
40
+ }
41
+ const union = new Set([...wordsA, ...wordsB]).size;
42
+ return union === 0 ? 0 : intersection / union;
43
+ }
44
+
45
+ // ── Grouping ────────────────────────────────────────────────────────────────
46
+
47
+ /**
48
+ * Group entries by (date, repo), then cluster by title similarity within each group.
49
+ * @param {Array<{id: string, entry: Object}>} entries
50
+ * @returns {Array<Array<{id: string, entry: Object}>>} Clusters of related entries
51
+ */
52
+ function groupEntries(entries) {
53
+ // Group by date+repo
54
+ const groups = {};
55
+ for (const item of entries) {
56
+ const key = `${item.entry.date}|${item.entry.repo}`;
57
+ if (!groups[key]) groups[key] = [];
58
+ groups[key].push(item);
59
+ }
60
+
61
+ // Within each group, cluster by title similarity
62
+ const clusters = [];
63
+ for (const items of Object.values(groups)) {
64
+ if (items.length === 1) {
65
+ clusters.push(items);
66
+ continue;
67
+ }
68
+
69
+ const assigned = new Set();
70
+ for (let i = 0; i < items.length; i++) {
71
+ if (assigned.has(i)) continue;
72
+ const cluster = [items[i]];
73
+ assigned.add(i);
74
+ for (let j = i + 1; j < items.length; j++) {
75
+ if (assigned.has(j)) continue;
76
+ if (titleSimilarity(items[i].entry.title, items[j].entry.title) > 0.8) {
77
+ cluster.push(items[j]);
78
+ assigned.add(j);
79
+ }
80
+ }
81
+ clusters.push(cluster);
82
+ }
83
+ }
84
+
85
+ return clusters;
86
+ }
87
+
88
+ // ── Deduplication ───────────────────────────────────────────────────────────
89
+
90
+ /**
91
+ * Deduplicate a cluster of entries.
92
+ * - Exact content_hash matches: keep highest quality_score
93
+ * - Returns { canonical: [{id, entry}], absorbed: [ids] }
94
+ */
95
+ function deduplicateGroup(cluster) {
96
+ if (cluster.length <= 1) {
97
+ return { canonical: cluster, absorbed: [] };
98
+ }
99
+
100
+ // Group by content hash
101
+ const byHash = {};
102
+ for (const item of cluster) {
103
+ const hash = item.entry.contentHash;
104
+ if (!byHash[hash]) byHash[hash] = [];
105
+ byHash[hash].push(item);
106
+ }
107
+
108
+ const canonical = [];
109
+ const absorbed = [];
110
+
111
+ for (const items of Object.values(byHash)) {
112
+ if (items.length === 1) {
113
+ canonical.push(items[0]);
114
+ continue;
115
+ }
116
+ // Keep the one with highest quality score
117
+ items.sort((a, b) => (b.entry.qualityScore || 0) - (a.entry.qualityScore || 0));
118
+ canonical.push(items[0]);
119
+ for (let i = 1; i < items.length; i++) {
120
+ absorbed.push(items[i].id);
121
+ }
122
+ }
123
+
124
+ return { canonical, absorbed };
125
+ }
126
+
127
+ // ── Merging ─────────────────────────────────────────────────────────────────
128
+
129
+ const MERGE_PROMPTS = {
130
+ daily: `You are merging duplicate decision entries from the same day and repo.
131
+ Remove exact duplicates. Keep all distinct decisions with full reasoning.
132
+ Return a single markdown entry that preserves all unique information.
133
+ Format: Start with the repo and title, then include all distinct decisions with their reasoning.`,
134
+
135
+ weekly: `You are creating a weekly summary for a repo.
136
+ Combine related decisions into a concise per-repo weekly summary.
137
+ Preserve key reasoning and alternatives that were considered.
138
+ Remove redundancy and boilerplate.
139
+ Format: A clean markdown summary organized by topic.`,
140
+
141
+ archive: `You are creating a monthly archive summary.
142
+ Compress multiple entries into a brief summary with key decisions and outcomes only.
143
+ Focus on what was decided and why, not the details of how.
144
+ Format: A compact markdown summary, max 500 words.`,
145
+ };
146
+
147
+ /**
148
+ * Merge 2+ related entries into a single distilled entry via LLM.
149
+ * @param {Array<{id: string, entry: Object}>} entries
150
+ * @param {Object} llmConfig - { provider, model, api_key_env }
151
+ * @param {string} tier - 'daily', 'weekly', or 'archive'
152
+ * @returns {Promise<{content: string, title: string}>}
153
+ */
154
+ async function mergeEntries(entries, llmConfig, tier) {
155
+ const storePath = contentStore.DEFAULT_STORE_PATH;
156
+ const journalDir = contentStore.DEFAULT_JOURNAL_DIR;
157
+ const signalsDir = contentStore.DEFAULT_SIGNALS_DIR;
158
+
159
+ const parts = entries.map(({ id, entry }) => {
160
+ const content = contentStore.getEntryContent(id, { storePath, journalDir, signalsDir });
161
+ return content || `${entry.date} — ${entry.repo} — ${entry.title}`;
162
+ });
163
+
164
+ const systemPrompt = MERGE_PROMPTS[tier] || MERGE_PROMPTS.daily;
165
+ const userContent = parts.join('\n\n---\n\n');
166
+
167
+ const result = await llm.chat({
168
+ provider: llmConfig.provider || 'anthropic',
169
+ model: llmConfig.model || 'claude-haiku-4-5-20251001',
170
+ apiKeyEnv: llmConfig.api_key_env || 'ANTHROPIC_API_KEY',
171
+ system: systemPrompt,
172
+ messages: [{ role: 'user', content: userContent }],
173
+ max_tokens: 2000,
174
+ });
175
+
176
+ // Extract title from first line or generate one
177
+ const firstEntry = entries[0].entry;
178
+ const title = `[${tier}] ${firstEntry.repo} — ${firstEntry.date}`;
179
+
180
+ return { content: result, title };
181
+ }
182
+
183
+ // ── Main Pipeline ───────────────────────────────────────────────────────────
184
+
185
+ /**
186
+ * Run the distillation pipeline.
187
+ * @param {Object} options
188
+ * @param {string} [options.tier] - 'daily', 'weekly', 'archive', or 'all'
189
+ * @param {boolean} [options.dryRun] - If true, don't write changes
190
+ * @param {Object} [options.llmConfig] - LLM config for merge operations
191
+ * @param {string} [options.storePath] - Content store path
192
+ * @returns {Promise<Object>} Stats: { grouped, deduped, merged, llmCalls }
193
+ */
194
+ async function distillEntries(options = {}) {
195
+ const tierName = options.tier || 'daily';
196
+ const dryRun = options.dryRun || false;
197
+ const storePath = options.storePath || contentStore.DEFAULT_STORE_PATH;
198
+
199
+ const tiersToRun = tierName === 'all'
200
+ ? ['daily', 'weekly', 'archive']
201
+ : [tierName];
202
+
203
+ const totalStats = { grouped: 0, deduped: 0, merged: 0, llmCalls: 0 };
204
+
205
+ for (const tier of tiersToRun) {
206
+ const tierDef = TIERS[tier];
207
+ if (!tierDef) {
208
+ console.log(`Unknown tier: ${tier}`);
209
+ continue;
210
+ }
211
+
212
+ // Calculate date range for this tier
213
+ const now = new Date();
214
+ const sinceDate = new Date(now);
215
+ sinceDate.setDate(sinceDate.getDate() - tierDef.maxAgeDays);
216
+ const untilDate = new Date(now);
217
+ untilDate.setDate(untilDate.getDate() - tierDef.minAgeDays);
218
+
219
+ const since = sinceDate.toISOString().split('T')[0];
220
+ const until = untilDate.toISOString().split('T')[0];
221
+
222
+ // Query entries eligible for this tier
223
+ const entries = contentStore.queryMetadata({ since, until, storePath });
224
+
225
+ // Filter: only raw entries that haven't been distilled yet
226
+ const eligible = entries.filter(({ entry }) => {
227
+ return (entry.distillTier === 'raw' || !entry.distillTier)
228
+ && !entry.distilledAt
229
+ && !entry.distilledFrom; // not already a distilled entry
230
+ });
231
+
232
+ if (eligible.length === 0) {
233
+ console.log(` ${tier}: no eligible entries`);
234
+ continue;
235
+ }
236
+
237
+ console.log(` ${tier}: ${eligible.length} eligible entries (${since} to ${until})`);
238
+
239
+ // Group and cluster
240
+ const clusters = groupEntries(eligible);
241
+ totalStats.grouped += clusters.length;
242
+
243
+ // Deduplicate within each cluster
244
+ let totalDeduped = 0;
245
+ const mergeableClusters = [];
246
+
247
+ for (const cluster of clusters) {
248
+ const { canonical, absorbed } = deduplicateGroup(cluster);
249
+ totalDeduped += absorbed.length;
250
+
251
+ if (!dryRun && absorbed.length > 0) {
252
+ // Mark absorbed entries
253
+ const backend = contentStore.getBackend(storePath);
254
+ const index = backend.loadIndex();
255
+ for (const absorbedId of absorbed) {
256
+ if (index.entries[absorbedId]) {
257
+ index.entries[absorbedId].distilledAt = Date.now();
258
+ index.entries[absorbedId].distillTier = tier;
259
+ }
260
+ }
261
+ backend.saveIndex(index);
262
+ }
263
+
264
+ // Only merge if there are 2+ canonical entries in the cluster
265
+ if (canonical.length >= 2) {
266
+ mergeableClusters.push(canonical);
267
+ }
268
+ }
269
+
270
+ totalStats.deduped += totalDeduped;
271
+
272
+ if (dryRun) {
273
+ console.log(` Would dedup: ${totalDeduped} entries`);
274
+ console.log(` Would merge: ${mergeableClusters.length} clusters (${mergeableClusters.reduce((s, c) => s + c.length, 0)} entries)`);
275
+ continue;
276
+ }
277
+
278
+ // Merge clusters via LLM
279
+ if (mergeableClusters.length > 0 && options.llmConfig) {
280
+ for (const cluster of mergeableClusters) {
281
+ try {
282
+ const { content, title } = await mergeEntries(cluster, options.llmConfig, tier);
283
+ totalStats.llmCalls++;
284
+
285
+ // Create distilled entry in the content store
286
+ const firstEntry = cluster[0].entry;
287
+ const absorbedIds = cluster.map(c => c.id);
288
+ const id = contentStore.generateEntryId(firstEntry.date, firstEntry.repo, title);
289
+ const hash = contentStore.contentHash(content);
290
+
291
+ const backend = contentStore.getBackend(storePath);
292
+ const index = backend.loadIndex();
293
+
294
+ index.entries[id] = {
295
+ date: firstEntry.date,
296
+ repo: firstEntry.repo,
297
+ title,
298
+ source: 'distilled',
299
+ user: '',
300
+ drifted: false,
301
+ contentHash: hash,
302
+ contentLength: content.length,
303
+ tags: firstEntry.tags || [],
304
+ hasEmbedding: false,
305
+ hasReasoning: true,
306
+ hasAlternatives: false,
307
+ qualityScore: 3, // distilled entries are high quality by definition
308
+ distillTier: tier,
309
+ distilledFrom: absorbedIds,
310
+ distilledAt: Date.now(),
311
+ };
312
+
313
+ // Mark source entries as absorbed
314
+ for (const item of cluster) {
315
+ if (index.entries[item.id]) {
316
+ index.entries[item.id].distilledAt = Date.now();
317
+ index.entries[item.id].distillTier = tier;
318
+ }
319
+ }
320
+
321
+ index.entryCount = Object.keys(index.entries).length;
322
+ backend.saveIndex(index);
323
+ totalStats.merged += cluster.length;
324
+ } catch (err) {
325
+ console.log(` Merge failed for cluster: ${err.message}`);
326
+ }
327
+ }
328
+ }
329
+
330
+ // Log the distillation run
331
+ if (!dryRun) {
332
+ try {
333
+ const backend = contentStore.getBackend(storePath);
334
+ if (backend.db) {
335
+ backend.db.prepare(`
336
+ INSERT INTO distillation_log (run_at, tier, entries_input, entries_output, entries_merged, entries_deduped, llm_calls)
337
+ VALUES (?, ?, ?, ?, ?, ?, ?)
338
+ `).run(Date.now(), tier, eligible.length, eligible.length - totalDeduped - totalStats.merged, totalStats.merged, totalDeduped, totalStats.llmCalls);
339
+ }
340
+ } catch { /* non-fatal */ }
341
+ }
342
+ }
343
+
344
+ return totalStats;
345
+ }
346
+
347
+ // ── Exports ─────────────────────────────────────────────────────────────────
348
+
349
+ module.exports = {
350
+ distillEntries,
351
+ groupEntries,
352
+ deduplicateGroup,
353
+ mergeEntries,
354
+ titleSimilarity,
355
+ TIERS,
356
+ };
package/bin/slack-bot.js CHANGED
@@ -639,7 +639,8 @@ async function searchDecisionTrail(query, config) {
639
639
  }
640
640
  }
641
641
 
642
- return results || [];
642
+ // Deduplicate: prefer distilled entries over their raw sources
643
+ return contentStore.deduplicateResults(results || []);
643
644
  }
644
645
 
645
646
  /**
@@ -40,6 +40,10 @@ CREATE TABLE IF NOT EXISTS decisions (
40
40
  has_embedding INTEGER DEFAULT 0,
41
41
  has_reasoning INTEGER DEFAULT 0,
42
42
  has_alternatives INTEGER DEFAULT 0,
43
+ quality_score INTEGER DEFAULT 0,
44
+ distill_tier TEXT DEFAULT 'raw',
45
+ distilled_from TEXT DEFAULT NULL,
46
+ distilled_at INTEGER DEFAULT NULL,
43
47
  created_at INTEGER,
44
48
  updated_at INTEGER
45
49
  );
@@ -48,6 +52,19 @@ CREATE INDEX IF NOT EXISTS idx_decisions_date ON decisions(date);
48
52
  CREATE INDEX IF NOT EXISTS idx_decisions_repo ON decisions(repo);
49
53
  CREATE INDEX IF NOT EXISTS idx_decisions_source ON decisions(source);
50
54
  CREATE INDEX IF NOT EXISTS idx_decisions_user ON decisions(user);
55
+ CREATE INDEX IF NOT EXISTS idx_decisions_quality ON decisions(quality_score);
56
+ CREATE INDEX IF NOT EXISTS idx_decisions_tier ON decisions(distill_tier);
57
+
58
+ CREATE TABLE IF NOT EXISTS distillation_log (
59
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
60
+ run_at INTEGER NOT NULL,
61
+ tier TEXT NOT NULL,
62
+ entries_input INTEGER DEFAULT 0,
63
+ entries_output INTEGER DEFAULT 0,
64
+ entries_merged INTEGER DEFAULT 0,
65
+ entries_deduped INTEGER DEFAULT 0,
66
+ llm_calls INTEGER DEFAULT 0
67
+ );
51
68
 
52
69
  CREATE TABLE IF NOT EXISTS embeddings (
53
70
  id TEXT PRIMARY KEY,
@@ -90,6 +107,10 @@ function entryToRow(id, entry) {
90
107
  has_embedding: entry.hasEmbedding ? 1 : 0,
91
108
  has_reasoning: entry.hasReasoning ? 1 : 0,
92
109
  has_alternatives: entry.hasAlternatives ? 1 : 0,
110
+ quality_score: entry.qualityScore || 0,
111
+ distill_tier: entry.distillTier || 'raw',
112
+ distilled_from: entry.distilledFrom ? JSON.stringify(entry.distilledFrom) : null,
113
+ distilled_at: entry.distilledAt || null,
93
114
  created_at: entry.createdAt || Date.now(),
94
115
  updated_at: Date.now(),
95
116
  };
@@ -109,6 +130,10 @@ function rowToEntry(row) {
109
130
  hasEmbedding: !!row.has_embedding,
110
131
  hasReasoning: !!row.has_reasoning,
111
132
  hasAlternatives: !!row.has_alternatives,
133
+ qualityScore: row.quality_score || 0,
134
+ distillTier: row.distill_tier || 'raw',
135
+ distilledFrom: row.distilled_from ? JSON.parse(row.distilled_from) : null,
136
+ distilledAt: row.distilled_at || null,
112
137
  };
113
138
  }
114
139
 
@@ -137,6 +162,21 @@ class SqliteBackend {
137
162
  if (!existing) {
138
163
  this.db.prepare('INSERT INTO metadata (key, value) VALUES (?, ?)').run('schema_version', SCHEMA_VERSION);
139
164
  }
165
+
166
+ // Migrate existing databases: add new columns if they don't exist
167
+ const cols = this.db.prepare('PRAGMA table_info(decisions)').all().map(c => c.name);
168
+ if (!cols.includes('quality_score')) {
169
+ this.db.exec('ALTER TABLE decisions ADD COLUMN quality_score INTEGER DEFAULT 0');
170
+ }
171
+ if (!cols.includes('distill_tier')) {
172
+ this.db.exec('ALTER TABLE decisions ADD COLUMN distill_tier TEXT DEFAULT \'raw\'');
173
+ }
174
+ if (!cols.includes('distilled_from')) {
175
+ this.db.exec('ALTER TABLE decisions ADD COLUMN distilled_from TEXT DEFAULT NULL');
176
+ }
177
+ if (!cols.includes('distilled_at')) {
178
+ this.db.exec('ALTER TABLE decisions ADD COLUMN distilled_at INTEGER DEFAULT NULL');
179
+ }
140
180
  }
141
181
 
142
182
  close() {
@@ -173,10 +213,12 @@ class SqliteBackend {
173
213
  const stmt = this.db.prepare(`
174
214
  INSERT INTO decisions (id, date, repo, title, source, user, drifted,
175
215
  content_hash, content_length, tags, has_embedding, has_reasoning,
176
- has_alternatives, created_at, updated_at)
216
+ has_alternatives, quality_score, distill_tier, distilled_from, distilled_at,
217
+ created_at, updated_at)
177
218
  VALUES (@id, @date, @repo, @title, @source, @user, @drifted,
178
219
  @content_hash, @content_length, @tags, @has_embedding, @has_reasoning,
179
- @has_alternatives, @created_at, @updated_at)
220
+ @has_alternatives, @quality_score, @distill_tier, @distilled_from, @distilled_at,
221
+ @created_at, @updated_at)
180
222
  `);
181
223
  for (const [id, entry] of Object.entries(entries)) {
182
224
  stmt.run(entryToRow(id, entry));
@@ -718,6 +718,14 @@ async function runPull(args) {
718
718
  writeConnectorsConfig(freshConfig);
719
719
  printPullResult(name, result);
720
720
  }
721
+ // Auto-index signals into content store after pull
722
+ try {
723
+ console.log('\nIndexing signals...');
724
+ const signalStats = await contentStore.indexSignals({ embeddings: false });
725
+ console.log(` ${signalStats.newEntries} new, ${signalStats.updatedEntries} updated, ${signalStats.skippedEntries} unchanged`);
726
+ } catch (err) {
727
+ console.log(` Signal indexing skipped: ${err.message}`);
728
+ }
721
729
  return;
722
730
  }
723
731
 
@@ -827,6 +835,14 @@ async function runPull(args) {
827
835
  writeConnectorsConfig(freshConfig);
828
836
 
829
837
  printPullResult(channel, result);
838
+
839
+ // Auto-index signals into content store after pull
840
+ try {
841
+ const signalStats = await contentStore.indexSignals({ embeddings: false });
842
+ console.log(`\nSignals indexed: ${signalStats.newEntries} new, ${signalStats.updatedEntries} updated`);
843
+ } catch (err) {
844
+ console.log(`Signal indexing skipped: ${err.message}`);
845
+ }
830
846
  }
831
847
 
832
848
  function runSignals() {
@@ -928,6 +944,7 @@ async function runDigest(args) {
928
944
  const personaIdx = args.indexOf('--persona');
929
945
  const sinceIdx = args.indexOf('--since');
930
946
  const deliver = args.includes('--deliver');
947
+ const preview = args.includes('--preview');
931
948
 
932
949
  // Determine personas
933
950
  let personaIds;
@@ -981,6 +998,32 @@ async function runDigest(args) {
981
998
  });
982
999
 
983
1000
  console.log('');
1001
+
1002
+ if (preview) {
1003
+ // Preview mode: print digest content and stats to stdout
1004
+ console.log('=== DIGEST PREVIEW ===');
1005
+ console.log('');
1006
+ if (result.inputStats) {
1007
+ const s = result.inputStats;
1008
+ console.log(`Input: ${s.journalEntries || 0} journal, ${s.signalEntries || 0} signal entries`);
1009
+ if (s.budgetStats) {
1010
+ console.log(`Budget: ${s.budgetStats.kept || 0} kept, ${s.budgetStats.dropped || 0} dropped`);
1011
+ }
1012
+ console.log('');
1013
+ }
1014
+ for (const f of result.files) {
1015
+ try {
1016
+ const content = fs.readFileSync(f, 'utf8');
1017
+ const personaId = path.basename(path.dirname(f)) || 'combined';
1018
+ console.log(`--- ${personaId} ---`);
1019
+ console.log(content);
1020
+ console.log('');
1021
+ } catch { /* skip unreadable */ }
1022
+ }
1023
+ console.log('=== END PREVIEW ===');
1024
+ return;
1025
+ }
1026
+
984
1027
  console.log('Digests generated:');
985
1028
  for (const f of result.files) {
986
1029
  console.log(` ${f}`);
@@ -1172,6 +1215,20 @@ async function runReindex(args) {
1172
1215
  const signalsOnly = args.includes('--signals-only');
1173
1216
  const doExport = args.includes('--export');
1174
1217
  const detectShifts = args.includes('--detect-shifts');
1218
+ const force = args.includes('--force');
1219
+
1220
+ if (force) {
1221
+ console.log('Force mode: clearing content store for full reindex...');
1222
+ try {
1223
+ const backend = contentStore.getBackend();
1224
+ const emptyIndex = { version: contentStore.INDEX_VERSION, entries: {}, lastUpdated: Date.now(), entryCount: 0 };
1225
+ backend.saveIndex(emptyIndex);
1226
+ // Clear conversation fingerprint cache so all transcripts are re-extracted
1227
+ backend.saveConversationIndex({});
1228
+ } catch (err) {
1229
+ console.log(` Warning: could not clear store: ${err.message}`);
1230
+ }
1231
+ }
1175
1232
 
1176
1233
  if (!conversationsOnly && !signalsOnly) {
1177
1234
  console.log('=== Journals ===');
@@ -1192,6 +1249,44 @@ async function runReindex(args) {
1192
1249
  console.log('=== Signals ===');
1193
1250
  await indexSignalsIfAvailable();
1194
1251
  }
1252
+
1253
+ // Optional: run distillation after reindex
1254
+ if (args.includes('--distill')) {
1255
+ console.log('');
1256
+ console.log('=== Distillation ===');
1257
+ await runDistill(['--tier', 'daily']);
1258
+ }
1259
+ }
1260
+
1261
+ async function runDistill(args) {
1262
+ const distill = require('./distill');
1263
+ const dryRun = args.includes('--dry-run');
1264
+ const tierIdx = args.indexOf('--tier');
1265
+ const tier = (tierIdx !== -1 && args[tierIdx + 1]) ? args[tierIdx + 1] : 'daily';
1266
+
1267
+ console.log(`Distilling content (tier: ${tier}${dryRun ? ', dry run' : ''})...`);
1268
+
1269
+ // Build LLM config from connectors
1270
+ let llmConfig = null;
1271
+ if (!dryRun) {
1272
+ const config = readConnectorsConfig();
1273
+ if (config.digest && config.digest.llm) {
1274
+ llmConfig = {
1275
+ provider: config.digest.llm.provider,
1276
+ model: config.digest.llm.intelligence?.model || 'claude-haiku-4-5-20251001',
1277
+ api_key_env: config.digest.llm.api_key_env,
1278
+ };
1279
+ }
1280
+ }
1281
+
1282
+ const stats = await distill.distillEntries({ tier, dryRun, llmConfig });
1283
+
1284
+ console.log('');
1285
+ console.log('Distillation results:');
1286
+ console.log(` Groups: ${stats.grouped}`);
1287
+ console.log(` Deduped: ${stats.deduped}`);
1288
+ console.log(` Merged: ${stats.merged}`);
1289
+ console.log(` LLM calls: ${stats.llmCalls}`);
1195
1290
  }
1196
1291
 
1197
1292
  /**
@@ -4736,6 +4831,10 @@ const COMMANDS = {
4736
4831
  desc: 'Index all signal sources (journals + conversations)',
4737
4832
  run: (args) => runReindex(args),
4738
4833
  },
4834
+ distill: {
4835
+ desc: 'Distill content store: dedup, merge, and compact entries',
4836
+ run: (args) => runDistill(args),
4837
+ },
4739
4838
  'index-journals': {
4740
4839
  desc: 'Index journal entries into the content store',
4741
4840
  run: (args) => runIndexJournals(args),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wayfind",
3
- "version": "2.0.28",
3
+ "version": "2.0.29",
4
4
  "description": "Team decision trail for AI-assisted development. The connective tissue between product, engineering, and strategy.",
5
5
  "bin": {
6
6
  "wayfind": "./bin/team-context.js"