wayfind 2.0.28 → 2.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/connectors/github.js +37 -1
- package/bin/content-store.js +127 -38
- package/bin/digest.js +118 -37
- package/bin/distill.js +356 -0
- package/bin/slack-bot.js +2 -1
- package/bin/storage/sqlite-backend.js +44 -2
- package/bin/team-context.js +99 -0
- package/package.json +1 -1
package/bin/connectors/github.js
CHANGED
|
@@ -303,7 +303,28 @@ async function pull(config, since) {
|
|
|
303
303
|
highlights.push(`${failedCount} CI failure(s)`);
|
|
304
304
|
}
|
|
305
305
|
|
|
306
|
-
repoHighlights.push({
|
|
306
|
+
repoHighlights.push({
|
|
307
|
+
repo: repoStr,
|
|
308
|
+
openPRs,
|
|
309
|
+
mergedPRs,
|
|
310
|
+
highlights,
|
|
311
|
+
topPRs: data.prs.slice(0, 5).map((pr) => ({
|
|
312
|
+
number: pr.number,
|
|
313
|
+
title: pr.title,
|
|
314
|
+
author: pr.user?.login || pr.user?.name || 'unknown',
|
|
315
|
+
state: pr.merged_at ? 'merged' : pr.state,
|
|
316
|
+
})),
|
|
317
|
+
topIssues: data.issues.slice(0, 5).map((iss) => ({
|
|
318
|
+
number: iss.number,
|
|
319
|
+
title: iss.title,
|
|
320
|
+
labels: (iss.labels || []).map((l) => (typeof l === 'string' ? l : l.name)).filter(Boolean),
|
|
321
|
+
state: iss.state,
|
|
322
|
+
})),
|
|
323
|
+
failedRuns: failed.map((r) => ({
|
|
324
|
+
name: r.name || r.workflow?.name || 'unknown',
|
|
325
|
+
branch: r.head_branch || '',
|
|
326
|
+
})),
|
|
327
|
+
});
|
|
307
328
|
}
|
|
308
329
|
|
|
309
330
|
// Generate rollup summary
|
|
@@ -489,6 +510,21 @@ function generateSummaryMarkdown(
|
|
|
489
510
|
for (const h of rh.highlights) {
|
|
490
511
|
lines.push(`- ${h}`);
|
|
491
512
|
}
|
|
513
|
+
if (rh.topPRs && rh.topPRs.length > 0) {
|
|
514
|
+
const prItems = rh.topPRs.map((pr) => `#${pr.number} "${pr.title}" (${pr.author}, ${pr.state})`);
|
|
515
|
+
lines.push(`**PRs:** ${prItems.join(' | ')}`);
|
|
516
|
+
}
|
|
517
|
+
if (rh.topIssues && rh.topIssues.length > 0) {
|
|
518
|
+
const issueItems = rh.topIssues.map((iss) => {
|
|
519
|
+
const labels = iss.labels && iss.labels.length > 0 ? ` [${iss.labels.join(', ')}]` : '';
|
|
520
|
+
return `#${iss.number} "${iss.title}"${labels} (${iss.state})`;
|
|
521
|
+
});
|
|
522
|
+
lines.push(`**Issues:** ${issueItems.join(' | ')}`);
|
|
523
|
+
}
|
|
524
|
+
if (rh.failedRuns && rh.failedRuns.length > 0) {
|
|
525
|
+
const runItems = rh.failedRuns.map((r) => `${r.name}${r.branch ? ' (' + r.branch + ')' : ''}`);
|
|
526
|
+
lines.push(`**Failed CI:** ${runItems.join(' | ')}`);
|
|
527
|
+
}
|
|
492
528
|
lines.push('');
|
|
493
529
|
}
|
|
494
530
|
|
package/bin/content-store.js
CHANGED
|
@@ -212,6 +212,22 @@ function generateEntryId(date, repo, title) {
|
|
|
212
212
|
return crypto.createHash('sha256').update(input).digest('hex').slice(0, 12);
|
|
213
213
|
}
|
|
214
214
|
|
|
215
|
+
/**
|
|
216
|
+
* Compute a quality score for an entry (0-3).
|
|
217
|
+
* +1 if has reasoning (explains WHY)
|
|
218
|
+
* +1 if has alternatives (what was rejected)
|
|
219
|
+
* +1 if substantive content (>500 chars)
|
|
220
|
+
* @param {Object} entry - Entry metadata
|
|
221
|
+
* @returns {number} 0-3
|
|
222
|
+
*/
|
|
223
|
+
function computeQualityScore(entry) {
|
|
224
|
+
let score = 0;
|
|
225
|
+
if (entry.hasReasoning) score++;
|
|
226
|
+
if (entry.hasAlternatives) score++;
|
|
227
|
+
if ((entry.contentLength || 0) > 500) score++;
|
|
228
|
+
return score;
|
|
229
|
+
}
|
|
230
|
+
|
|
215
231
|
/**
|
|
216
232
|
* Build the text content for embedding from an entry's fields.
|
|
217
233
|
* @param {Object} entry - Entry with date, repo, title, fields
|
|
@@ -329,7 +345,7 @@ async function indexJournals(options = {}) {
|
|
|
329
345
|
const content = buildContent({ ...entry, date, author });
|
|
330
346
|
const hash = contentHash(content);
|
|
331
347
|
|
|
332
|
-
|
|
348
|
+
const entryMeta = {
|
|
333
349
|
date,
|
|
334
350
|
repo: entry.repo,
|
|
335
351
|
title: entry.title,
|
|
@@ -339,8 +355,12 @@ async function indexJournals(options = {}) {
|
|
|
339
355
|
contentLength: content.length,
|
|
340
356
|
tags: extractTags(entry),
|
|
341
357
|
hasEmbedding: false,
|
|
358
|
+
hasReasoning: false,
|
|
359
|
+
hasAlternatives: false,
|
|
342
360
|
_content: content, // temporary, not saved to index
|
|
343
361
|
};
|
|
362
|
+
entryMeta.qualityScore = computeQualityScore(entryMeta);
|
|
363
|
+
newEntries[id] = entryMeta;
|
|
344
364
|
}
|
|
345
365
|
}
|
|
346
366
|
|
|
@@ -744,39 +764,50 @@ function getEntryContent(entryId, options = {}) {
|
|
|
744
764
|
// ── Signal entries ──────────────────────────────────────────────────────
|
|
745
765
|
if (entry.source === 'signal') {
|
|
746
766
|
if (!signalsDir) return null;
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
path.join(channelDir, `${entry.date}-summary.md`),
|
|
759
|
-
];
|
|
760
|
-
for (const filePath of dateCandidates) {
|
|
767
|
+
const repo = entry.repo || '';
|
|
768
|
+
|
|
769
|
+
// Determine file location based on repo format:
|
|
770
|
+
// - 'signals/channel' (summary files) → signalsDir/channel/
|
|
771
|
+
// - 'owner/repo' (per-repo files) → find the channel dir containing owner/repo/
|
|
772
|
+
let searchDirs = [];
|
|
773
|
+
if (repo.startsWith('signals/')) {
|
|
774
|
+
const channel = repo.replace(/^signals\//, '');
|
|
775
|
+
searchDirs = [path.join(signalsDir, channel)];
|
|
776
|
+
} else {
|
|
777
|
+
// Per-repo entry: search all channel dirs for owner/repo subdirectory
|
|
761
778
|
try {
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
779
|
+
const channels = fs.readdirSync(signalsDir, { withFileTypes: true })
|
|
780
|
+
.filter(d => d.isDirectory()).map(d => d.name);
|
|
781
|
+
for (const ch of channels) {
|
|
782
|
+
const repoDir = path.join(signalsDir, ch, repo);
|
|
783
|
+
if (fs.existsSync(repoDir)) {
|
|
784
|
+
searchDirs.push(repoDir);
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
} catch { /* skip */ }
|
|
766
788
|
}
|
|
767
789
|
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
790
|
+
for (const dir of searchDirs) {
|
|
791
|
+
if (!fs.existsSync(dir)) continue;
|
|
792
|
+
// Try date-based filename first, then summary, then scan
|
|
793
|
+
const dateCandidates = [
|
|
794
|
+
path.join(dir, `${entry.date}.md`),
|
|
795
|
+
path.join(dir, `${entry.date}-summary.md`),
|
|
796
|
+
];
|
|
797
|
+
for (const filePath of dateCandidates) {
|
|
772
798
|
try {
|
|
773
|
-
return fs.readFileSync(
|
|
774
|
-
} catch {
|
|
775
|
-
continue;
|
|
776
|
-
}
|
|
799
|
+
return fs.readFileSync(filePath, 'utf8');
|
|
800
|
+
} catch { /* try next */ }
|
|
777
801
|
}
|
|
778
|
-
|
|
779
|
-
|
|
802
|
+
// Scan for files matching the date
|
|
803
|
+
try {
|
|
804
|
+
const files = fs.readdirSync(dir).filter(f => f.endsWith('.md') && f.includes(entry.date));
|
|
805
|
+
for (const file of files) {
|
|
806
|
+
try {
|
|
807
|
+
return fs.readFileSync(path.join(dir, file), 'utf8');
|
|
808
|
+
} catch { continue; }
|
|
809
|
+
}
|
|
810
|
+
} catch { /* dir not readable */ }
|
|
780
811
|
}
|
|
781
812
|
|
|
782
813
|
return null;
|
|
@@ -1346,7 +1377,7 @@ async function indexConversations(options = {}) {
|
|
|
1346
1377
|
|
|
1347
1378
|
const hash = contentHash(content);
|
|
1348
1379
|
|
|
1349
|
-
|
|
1380
|
+
const convEntry = {
|
|
1350
1381
|
date,
|
|
1351
1382
|
repo: transcript.repo,
|
|
1352
1383
|
title: decision.title,
|
|
@@ -1361,6 +1392,8 @@ async function indexConversations(options = {}) {
|
|
|
1361
1392
|
hasAlternatives: !!decision.has_alternatives,
|
|
1362
1393
|
_content: content,
|
|
1363
1394
|
};
|
|
1395
|
+
convEntry.qualityScore = computeQualityScore(convEntry);
|
|
1396
|
+
existingIndex.entries[id] = convEntry;
|
|
1364
1397
|
|
|
1365
1398
|
if (doEmbeddings) {
|
|
1366
1399
|
try {
|
|
@@ -1653,16 +1686,42 @@ async function indexSignals(options = {}) {
|
|
|
1653
1686
|
|
|
1654
1687
|
for (const channel of channels) {
|
|
1655
1688
|
const channelDir = path.join(signalsDir, channel);
|
|
1656
|
-
|
|
1689
|
+
|
|
1690
|
+
// Collect all .md files: channel root + recursive owner/repo subdirectories
|
|
1691
|
+
const signalFiles = [];
|
|
1657
1692
|
try {
|
|
1658
|
-
|
|
1693
|
+
const entries = fs.readdirSync(channelDir, { withFileTypes: true });
|
|
1694
|
+
// Channel-root .md files (summaries like YYYY-MM-DD-summary.md)
|
|
1695
|
+
for (const e of entries) {
|
|
1696
|
+
if (e.isFile() && e.name.endsWith('.md')) {
|
|
1697
|
+
signalFiles.push({ filePath: path.join(channelDir, e.name), file: e.name, repo: 'signals/' + channel });
|
|
1698
|
+
}
|
|
1699
|
+
}
|
|
1700
|
+
// Walk owner/repo subdirectories (e.g., github/acme-corp/web-api/)
|
|
1701
|
+
for (const ownerEntry of entries) {
|
|
1702
|
+
if (!ownerEntry.isDirectory()) continue;
|
|
1703
|
+
const ownerDir = path.join(channelDir, ownerEntry.name);
|
|
1704
|
+
let repoEntries;
|
|
1705
|
+
try { repoEntries = fs.readdirSync(ownerDir, { withFileTypes: true }); } catch { continue; }
|
|
1706
|
+
for (const repoEntry of repoEntries) {
|
|
1707
|
+
if (!repoEntry.isDirectory()) continue;
|
|
1708
|
+
const repoDir = path.join(ownerDir, repoEntry.name);
|
|
1709
|
+
const repoStr = `${ownerEntry.name}/${repoEntry.name}`;
|
|
1710
|
+
let repoFiles;
|
|
1711
|
+
try { repoFiles = fs.readdirSync(repoDir).filter(f => f.endsWith('.md')); } catch { continue; }
|
|
1712
|
+
for (const f of repoFiles) {
|
|
1713
|
+
signalFiles.push({ filePath: path.join(repoDir, f), file: f, repo: repoStr });
|
|
1714
|
+
}
|
|
1715
|
+
}
|
|
1716
|
+
}
|
|
1659
1717
|
} catch {
|
|
1660
1718
|
continue;
|
|
1661
1719
|
}
|
|
1662
1720
|
|
|
1663
|
-
|
|
1721
|
+
signalFiles.sort((a, b) => a.file.localeCompare(b.file));
|
|
1722
|
+
|
|
1723
|
+
for (const { filePath, file, repo } of signalFiles) {
|
|
1664
1724
|
stats.fileCount++;
|
|
1665
|
-
const filePath = path.join(channelDir, file);
|
|
1666
1725
|
let content;
|
|
1667
1726
|
try {
|
|
1668
1727
|
content = fs.readFileSync(filePath, 'utf8');
|
|
@@ -1670,16 +1729,17 @@ async function indexSignals(options = {}) {
|
|
|
1670
1729
|
continue;
|
|
1671
1730
|
}
|
|
1672
1731
|
|
|
1673
|
-
// Extract date from filename (YYYY-MM-DD.md) or fall back to filename
|
|
1674
|
-
const dateMatch = file.match(/^(\d{4}-\d{2}-\d{2})
|
|
1732
|
+
// Extract date from filename (YYYY-MM-DD.md or YYYY-MM-DD-summary.md) or fall back to filename
|
|
1733
|
+
const dateMatch = file.match(/^(\d{4}-\d{2}-\d{2})/);
|
|
1675
1734
|
const date = dateMatch ? dateMatch[1] : file.replace(/\.md$/, '');
|
|
1676
1735
|
|
|
1677
1736
|
// Extract title from first # heading, or fall back to filename
|
|
1678
1737
|
const titleMatch = content.match(/^#\s+(.+)$/m);
|
|
1679
1738
|
const title = titleMatch ? titleMatch[1].trim() : file.replace(/\.md$/, '');
|
|
1680
1739
|
|
|
1681
|
-
// Extract tags: channel name + any ## section headings
|
|
1740
|
+
// Extract tags: channel name + repo + any ## section headings
|
|
1682
1741
|
const tags = [channel];
|
|
1742
|
+
if (repo !== 'signals/' + channel) tags.push(repo);
|
|
1683
1743
|
const sectionRe = /^##\s+(.+)$/gm;
|
|
1684
1744
|
let sectionMatch;
|
|
1685
1745
|
while ((sectionMatch = sectionRe.exec(content)) !== null) {
|
|
@@ -1689,7 +1749,6 @@ async function indexSignals(options = {}) {
|
|
|
1689
1749
|
}
|
|
1690
1750
|
}
|
|
1691
1751
|
|
|
1692
|
-
const repo = 'signals/' + channel;
|
|
1693
1752
|
const id = generateEntryId(date, repo, file.replace(/\.md$/, ''));
|
|
1694
1753
|
const hash = contentHash(content);
|
|
1695
1754
|
|
|
@@ -1975,6 +2034,32 @@ function computeQualityProfile(options = {}) {
|
|
|
1975
2034
|
|
|
1976
2035
|
// ── Exports ─────────────────────────────────────────────────────────────────
|
|
1977
2036
|
|
|
2037
|
+
/**
|
|
2038
|
+
* Deduplicate search results by removing raw entries that have been absorbed
|
|
2039
|
+
* into distilled entries. If a distilled entry exists in the results, its
|
|
2040
|
+
* source entries (listed in distilledFrom) are removed.
|
|
2041
|
+
* @param {Array<{id: string, entry: Object, score?: number}>} results
|
|
2042
|
+
* @returns {Array} Deduplicated results
|
|
2043
|
+
*/
|
|
2044
|
+
function deduplicateResults(results) {
|
|
2045
|
+
if (!results || results.length === 0) return results;
|
|
2046
|
+
|
|
2047
|
+
// Collect all IDs that have been absorbed into distilled entries
|
|
2048
|
+
const absorbedIds = new Set();
|
|
2049
|
+
for (const r of results) {
|
|
2050
|
+
if (r.entry && r.entry.distilledFrom && Array.isArray(r.entry.distilledFrom)) {
|
|
2051
|
+
for (const id of r.entry.distilledFrom) {
|
|
2052
|
+
absorbedIds.add(id);
|
|
2053
|
+
}
|
|
2054
|
+
}
|
|
2055
|
+
}
|
|
2056
|
+
|
|
2057
|
+
if (absorbedIds.size === 0) return results;
|
|
2058
|
+
|
|
2059
|
+
// Filter out absorbed entries
|
|
2060
|
+
return results.filter(r => !absorbedIds.has(r.id));
|
|
2061
|
+
}
|
|
2062
|
+
|
|
1978
2063
|
module.exports = {
|
|
1979
2064
|
// Parsing
|
|
1980
2065
|
parseJournalFile,
|
|
@@ -2005,6 +2090,10 @@ module.exports = {
|
|
|
2005
2090
|
isRepoExcluded,
|
|
2006
2091
|
applyFilters,
|
|
2007
2092
|
|
|
2093
|
+
// Quality & dedup
|
|
2094
|
+
computeQualityScore,
|
|
2095
|
+
deduplicateResults,
|
|
2096
|
+
|
|
2008
2097
|
// Core operations
|
|
2009
2098
|
indexJournals,
|
|
2010
2099
|
indexSignals,
|
package/bin/digest.js
CHANGED
|
@@ -269,13 +269,18 @@ function collectFromStore(sinceDate, options = {}) {
|
|
|
269
269
|
});
|
|
270
270
|
|
|
271
271
|
if (entries.length === 0) {
|
|
272
|
-
return { journals: '', signals: '', entryCount: 0 };
|
|
272
|
+
return { journals: '', signals: '', entryCount: 0, entryMeta: [] };
|
|
273
273
|
}
|
|
274
274
|
|
|
275
275
|
const journalParts = [];
|
|
276
276
|
const signalParts = [];
|
|
277
|
+
const journalMeta = [];
|
|
278
|
+
const signalMeta = [];
|
|
277
279
|
|
|
278
280
|
for (const { id, entry } of entries) {
|
|
281
|
+
// Skip raw entries that have been absorbed into a distilled entry
|
|
282
|
+
if (entry.distilledFrom) continue;
|
|
283
|
+
|
|
279
284
|
const content = contentStore.getEntryContent(id, { storePath, journalDir, signalsDir });
|
|
280
285
|
if (!content) continue;
|
|
281
286
|
|
|
@@ -287,10 +292,21 @@ function collectFromStore(sinceDate, options = {}) {
|
|
|
287
292
|
const meta = author ? `${header}\n${author}\n` : `${header}\n`;
|
|
288
293
|
const formatted = `${meta}\n${content}`;
|
|
289
294
|
|
|
295
|
+
const itemMeta = {
|
|
296
|
+
date: entry.date,
|
|
297
|
+
source: entry.source,
|
|
298
|
+
qualityScore: entry.qualityScore || 0,
|
|
299
|
+
hasReasoning: entry.hasReasoning,
|
|
300
|
+
hasAlternatives: entry.hasAlternatives,
|
|
301
|
+
distillTier: entry.distillTier || 'raw',
|
|
302
|
+
};
|
|
303
|
+
|
|
290
304
|
if (source === 'signal') {
|
|
291
305
|
signalParts.push(formatted);
|
|
306
|
+
signalMeta.push(itemMeta);
|
|
292
307
|
} else {
|
|
293
308
|
journalParts.push(formatted);
|
|
309
|
+
journalMeta.push(itemMeta);
|
|
294
310
|
}
|
|
295
311
|
}
|
|
296
312
|
|
|
@@ -298,6 +314,7 @@ function collectFromStore(sinceDate, options = {}) {
|
|
|
298
314
|
journals: journalParts.join('\n\n---\n\n'),
|
|
299
315
|
signals: signalParts.join('\n\n---\n\n'),
|
|
300
316
|
entryCount: entries.length,
|
|
317
|
+
entryMeta: { journal: journalMeta, signal: signalMeta },
|
|
301
318
|
};
|
|
302
319
|
}
|
|
303
320
|
|
|
@@ -618,56 +635,102 @@ function buildPrompt(personaId, signalContent, journalContent, dateRange, contex
|
|
|
618
635
|
}
|
|
619
636
|
|
|
620
637
|
/**
|
|
621
|
-
* Apply token budget constraints
|
|
622
|
-
*
|
|
638
|
+
* Apply token budget constraints with quality-weighted packing.
|
|
639
|
+
* Higher quality entries are kept preferentially over low-quality ones.
|
|
623
640
|
* @param {string} signalContent
|
|
624
641
|
* @param {string} journalContent
|
|
625
642
|
* @param {number} maxChars
|
|
626
|
-
* @
|
|
643
|
+
* @param {Object} [options] - Optional metadata for quality-weighted packing
|
|
644
|
+
* @param {Object} [options.entryMeta] - { journal: [{qualityScore, date, ...}], signal: [...] }
|
|
645
|
+
* @param {Array} [options.scores] - Intelligence scores from Haiku scoring
|
|
646
|
+
* @param {string} [options.personaId] - Current persona for score lookup
|
|
647
|
+
* @returns {{ signals: string, journals: string, truncated: boolean, stats: Object }}
|
|
627
648
|
*/
|
|
628
|
-
function applyTokenBudget(signalContent, journalContent, maxChars) {
|
|
649
|
+
function applyTokenBudget(signalContent, journalContent, maxChars, options = {}) {
|
|
629
650
|
const total = signalContent.length + journalContent.length;
|
|
630
651
|
if (total <= maxChars) {
|
|
631
|
-
return { signals: signalContent, journals: journalContent, truncated: false };
|
|
652
|
+
return { signals: signalContent, journals: journalContent, truncated: false, stats: { dropped: 0 } };
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
const { entryMeta, scores, personaId } = options;
|
|
656
|
+
|
|
657
|
+
// Split into sections
|
|
658
|
+
const signalSections = signalContent ? signalContent.split('\n\n---\n\n') : [];
|
|
659
|
+
const journalSections = journalContent ? journalContent.split('\n\n---\n\n') : [];
|
|
660
|
+
const signalMetaArr = (entryMeta && entryMeta.signal) || [];
|
|
661
|
+
const journalMetaArr = (entryMeta && entryMeta.journal) || [];
|
|
662
|
+
|
|
663
|
+
// Score each section with composite priority
|
|
664
|
+
const todayStr = today();
|
|
665
|
+
const yesterdayStr = (() => { const d = new Date(); d.setDate(d.getDate() - 1); return d.toISOString().split('T')[0]; })();
|
|
666
|
+
|
|
667
|
+
const allSections = [];
|
|
668
|
+
for (let i = 0; i < signalSections.length; i++) {
|
|
669
|
+
const meta = signalMetaArr[i] || {};
|
|
670
|
+
const quality = meta.qualityScore || 0;
|
|
671
|
+
const recency = (meta.date === todayStr || meta.date === yesterdayStr) ? 1 : 0;
|
|
672
|
+
const intel = (scores && scores[i] && personaId) ? (scores[i][personaId] || 0) : 0;
|
|
673
|
+
const distillBonus = (meta.distillTier && meta.distillTier !== 'raw') ? 1 : 0;
|
|
674
|
+
allSections.push({
|
|
675
|
+
text: signalSections[i],
|
|
676
|
+
type: 'signal',
|
|
677
|
+
priority: quality + recency + intel + distillBonus,
|
|
678
|
+
len: signalSections[i].length,
|
|
679
|
+
});
|
|
680
|
+
}
|
|
681
|
+
for (let i = 0; i < journalSections.length; i++) {
|
|
682
|
+
const meta = journalMetaArr[i] || {};
|
|
683
|
+
const quality = meta.qualityScore || 0;
|
|
684
|
+
const recency = (meta.date === todayStr || meta.date === yesterdayStr) ? 1 : 0;
|
|
685
|
+
// Journal score indices start after signal count
|
|
686
|
+
const scoreIdx = signalSections.length + i;
|
|
687
|
+
const intel = (scores && scores[scoreIdx] && personaId) ? (scores[scoreIdx][personaId] || 0) : 0;
|
|
688
|
+
const distillBonus = (meta.distillTier && meta.distillTier !== 'raw') ? 1 : 0;
|
|
689
|
+
allSections.push({
|
|
690
|
+
text: journalSections[i],
|
|
691
|
+
type: 'journal',
|
|
692
|
+
priority: quality + recency + intel + distillBonus,
|
|
693
|
+
len: journalSections[i].length,
|
|
694
|
+
});
|
|
632
695
|
}
|
|
633
696
|
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
const available = maxChars - noteLen;
|
|
697
|
+
// Sort by priority descending (highest quality first)
|
|
698
|
+
allSections.sort((a, b) => b.priority - a.priority);
|
|
637
699
|
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
700
|
+
// Greedily pack into budget
|
|
701
|
+
const truncationNote = '\n\n> Note: Input was truncated to fit within token budget. Lower-quality entries were dropped.\n';
|
|
702
|
+
const available = maxChars - truncationNote.length;
|
|
703
|
+
const keptSignals = [];
|
|
704
|
+
const keptJournals = [];
|
|
705
|
+
let used = 0;
|
|
706
|
+
let dropped = 0;
|
|
642
707
|
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
708
|
+
for (const section of allSections) {
|
|
709
|
+
const sectionCost = section.len + 7; // account for '\n\n---\n\n' separator
|
|
710
|
+
if (used + sectionCost <= available) {
|
|
711
|
+
if (section.type === 'signal') {
|
|
712
|
+
keptSignals.push(section.text);
|
|
713
|
+
} else {
|
|
714
|
+
keptJournals.push(section.text);
|
|
715
|
+
}
|
|
716
|
+
used += sectionCost;
|
|
717
|
+
} else {
|
|
718
|
+
dropped++;
|
|
650
719
|
}
|
|
651
720
|
}
|
|
652
721
|
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
}
|
|
659
|
-
|
|
660
|
-
// Append truncation note to whichever content was actually trimmed
|
|
661
|
-
if (signalsTrimmed) {
|
|
662
|
-
trimmedSignals += truncationNote;
|
|
663
|
-
} else if (journalsTrimmed) {
|
|
664
|
-
trimmedJournals += truncationNote;
|
|
722
|
+
const truncated = dropped > 0;
|
|
723
|
+
let finalSignals = keptSignals.join('\n\n---\n\n');
|
|
724
|
+
let finalJournals = keptJournals.join('\n\n---\n\n');
|
|
725
|
+
if (truncated) {
|
|
726
|
+
finalJournals += truncationNote;
|
|
665
727
|
}
|
|
666
728
|
|
|
667
729
|
return {
|
|
668
|
-
signals:
|
|
669
|
-
journals:
|
|
670
|
-
truncated
|
|
730
|
+
signals: finalSignals,
|
|
731
|
+
journals: finalJournals,
|
|
732
|
+
truncated,
|
|
733
|
+
stats: { dropped, total: allSections.length, kept: allSections.length - dropped },
|
|
671
734
|
};
|
|
672
735
|
}
|
|
673
736
|
|
|
@@ -748,7 +811,11 @@ async function generateDigest(config, personaIds, sinceDate, onProgress) {
|
|
|
748
811
|
({ signals: pSignals, journals: pJournals } =
|
|
749
812
|
intelligence.filterForPersona(signalContent, journalContent, scores, personaId, threshold, allPersonaIds));
|
|
750
813
|
}
|
|
751
|
-
const budget = applyTokenBudget(pSignals, pJournals, maxInputChars
|
|
814
|
+
const budget = applyTokenBudget(pSignals, pJournals, maxInputChars, {
|
|
815
|
+
entryMeta: storeResult.entryMeta,
|
|
816
|
+
scores,
|
|
817
|
+
personaId,
|
|
818
|
+
});
|
|
752
819
|
pSignals = budget.signals;
|
|
753
820
|
pJournals = budget.journals;
|
|
754
821
|
|
|
@@ -805,7 +872,21 @@ async function generateDigest(config, personaIds, sinceDate, onProgress) {
|
|
|
805
872
|
fs.writeFileSync(combinedFile, combinedContent, 'utf8');
|
|
806
873
|
files.push(combinedFile);
|
|
807
874
|
|
|
808
|
-
|
|
875
|
+
// Compute input stats for preview mode
|
|
876
|
+
const entryMeta = storeResult.entryMeta || {};
|
|
877
|
+
const journalMeta = entryMeta.journal || [];
|
|
878
|
+
const signalMeta = entryMeta.signal || [];
|
|
879
|
+
const inputStats = {
|
|
880
|
+
journalEntries: journalMeta.length,
|
|
881
|
+
signalEntries: signalMeta.length,
|
|
882
|
+
qualityDistribution: {
|
|
883
|
+
rich: journalMeta.filter(m => m.qualityScore >= 2).length,
|
|
884
|
+
medium: journalMeta.filter(m => m.qualityScore === 1).length,
|
|
885
|
+
thin: journalMeta.filter(m => m.qualityScore === 0).length,
|
|
886
|
+
},
|
|
887
|
+
};
|
|
888
|
+
|
|
889
|
+
return { files, personas: personaIds, dateRange, scores, inputStats };
|
|
809
890
|
}
|
|
810
891
|
|
|
811
892
|
/**
|
package/bin/distill.js
ADDED
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const contentStore = require('./content-store');
|
|
4
|
+
const llm = require('./connectors/llm');
|
|
5
|
+
|
|
6
|
+
// ── Tier definitions ────────────────────────────────────────────────────────
|
|
7
|
+
|
|
8
|
+
const TIERS = {
|
|
9
|
+
daily: { minAgeDays: 3, maxAgeDays: 14 },
|
|
10
|
+
weekly: { minAgeDays: 14, maxAgeDays: 60 },
|
|
11
|
+
archive: { minAgeDays: 60, maxAgeDays: Infinity },
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
// ── Helpers ─────────────────────────────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
function daysAgo(dateStr) {
|
|
17
|
+
const now = new Date();
|
|
18
|
+
const then = new Date(dateStr + 'T00:00:00Z');
|
|
19
|
+
return Math.floor((now - then) / (1000 * 60 * 60 * 24));
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function today() {
|
|
23
|
+
return new Date().toISOString().split('T')[0];
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Compute Jaccard similarity between two titles (word-level).
|
|
28
|
+
* @param {string} a
|
|
29
|
+
* @param {string} b
|
|
30
|
+
* @returns {number} 0-1
|
|
31
|
+
*/
|
|
32
|
+
function titleSimilarity(a, b) {
|
|
33
|
+
const wordsA = new Set((a || '').toLowerCase().split(/\s+/).filter(w => w.length > 2));
|
|
34
|
+
const wordsB = new Set((b || '').toLowerCase().split(/\s+/).filter(w => w.length > 2));
|
|
35
|
+
if (wordsA.size === 0 && wordsB.size === 0) return 1;
|
|
36
|
+
if (wordsA.size === 0 || wordsB.size === 0) return 0;
|
|
37
|
+
let intersection = 0;
|
|
38
|
+
for (const w of wordsA) {
|
|
39
|
+
if (wordsB.has(w)) intersection++;
|
|
40
|
+
}
|
|
41
|
+
const union = new Set([...wordsA, ...wordsB]).size;
|
|
42
|
+
return union === 0 ? 0 : intersection / union;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ── Grouping ────────────────────────────────────────────────────────────────
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Group entries by (date, repo), then cluster by title similarity within each group.
|
|
49
|
+
* @param {Array<{id: string, entry: Object}>} entries
|
|
50
|
+
* @returns {Array<Array<{id: string, entry: Object}>>} Clusters of related entries
|
|
51
|
+
*/
|
|
52
|
+
function groupEntries(entries) {
|
|
53
|
+
// Group by date+repo
|
|
54
|
+
const groups = {};
|
|
55
|
+
for (const item of entries) {
|
|
56
|
+
const key = `${item.entry.date}|${item.entry.repo}`;
|
|
57
|
+
if (!groups[key]) groups[key] = [];
|
|
58
|
+
groups[key].push(item);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Within each group, cluster by title similarity
|
|
62
|
+
const clusters = [];
|
|
63
|
+
for (const items of Object.values(groups)) {
|
|
64
|
+
if (items.length === 1) {
|
|
65
|
+
clusters.push(items);
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const assigned = new Set();
|
|
70
|
+
for (let i = 0; i < items.length; i++) {
|
|
71
|
+
if (assigned.has(i)) continue;
|
|
72
|
+
const cluster = [items[i]];
|
|
73
|
+
assigned.add(i);
|
|
74
|
+
for (let j = i + 1; j < items.length; j++) {
|
|
75
|
+
if (assigned.has(j)) continue;
|
|
76
|
+
if (titleSimilarity(items[i].entry.title, items[j].entry.title) > 0.8) {
|
|
77
|
+
cluster.push(items[j]);
|
|
78
|
+
assigned.add(j);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
clusters.push(cluster);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return clusters;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// ── Deduplication ───────────────────────────────────────────────────────────
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Deduplicate a cluster of entries.
|
|
92
|
+
* - Exact content_hash matches: keep highest quality_score
|
|
93
|
+
* - Returns { canonical: [{id, entry}], absorbed: [ids] }
|
|
94
|
+
*/
|
|
95
|
+
function deduplicateGroup(cluster) {
|
|
96
|
+
if (cluster.length <= 1) {
|
|
97
|
+
return { canonical: cluster, absorbed: [] };
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Group by content hash
|
|
101
|
+
const byHash = {};
|
|
102
|
+
for (const item of cluster) {
|
|
103
|
+
const hash = item.entry.contentHash;
|
|
104
|
+
if (!byHash[hash]) byHash[hash] = [];
|
|
105
|
+
byHash[hash].push(item);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const canonical = [];
|
|
109
|
+
const absorbed = [];
|
|
110
|
+
|
|
111
|
+
for (const items of Object.values(byHash)) {
|
|
112
|
+
if (items.length === 1) {
|
|
113
|
+
canonical.push(items[0]);
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
// Keep the one with highest quality score
|
|
117
|
+
items.sort((a, b) => (b.entry.qualityScore || 0) - (a.entry.qualityScore || 0));
|
|
118
|
+
canonical.push(items[0]);
|
|
119
|
+
for (let i = 1; i < items.length; i++) {
|
|
120
|
+
absorbed.push(items[i].id);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return { canonical, absorbed };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// ── Merging ─────────────────────────────────────────────────────────────────
|
|
128
|
+
|
|
129
|
+
const MERGE_PROMPTS = {
|
|
130
|
+
daily: `You are merging duplicate decision entries from the same day and repo.
|
|
131
|
+
Remove exact duplicates. Keep all distinct decisions with full reasoning.
|
|
132
|
+
Return a single markdown entry that preserves all unique information.
|
|
133
|
+
Format: Start with the repo and title, then include all distinct decisions with their reasoning.`,
|
|
134
|
+
|
|
135
|
+
weekly: `You are creating a weekly summary for a repo.
|
|
136
|
+
Combine related decisions into a concise per-repo weekly summary.
|
|
137
|
+
Preserve key reasoning and alternatives that were considered.
|
|
138
|
+
Remove redundancy and boilerplate.
|
|
139
|
+
Format: A clean markdown summary organized by topic.`,
|
|
140
|
+
|
|
141
|
+
archive: `You are creating a monthly archive summary.
|
|
142
|
+
Compress multiple entries into a brief summary with key decisions and outcomes only.
|
|
143
|
+
Focus on what was decided and why, not the details of how.
|
|
144
|
+
Format: A compact markdown summary, max 500 words.`,
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Merge 2+ related entries into a single distilled entry via LLM.
|
|
149
|
+
* @param {Array<{id: string, entry: Object}>} entries
|
|
150
|
+
* @param {Object} llmConfig - { provider, model, api_key_env }
|
|
151
|
+
* @param {string} tier - 'daily', 'weekly', or 'archive'
|
|
152
|
+
* @returns {Promise<{content: string, title: string}>}
|
|
153
|
+
*/
|
|
154
|
+
async function mergeEntries(entries, llmConfig, tier) {
|
|
155
|
+
const storePath = contentStore.DEFAULT_STORE_PATH;
|
|
156
|
+
const journalDir = contentStore.DEFAULT_JOURNAL_DIR;
|
|
157
|
+
const signalsDir = contentStore.DEFAULT_SIGNALS_DIR;
|
|
158
|
+
|
|
159
|
+
const parts = entries.map(({ id, entry }) => {
|
|
160
|
+
const content = contentStore.getEntryContent(id, { storePath, journalDir, signalsDir });
|
|
161
|
+
return content || `${entry.date} — ${entry.repo} — ${entry.title}`;
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
const systemPrompt = MERGE_PROMPTS[tier] || MERGE_PROMPTS.daily;
|
|
165
|
+
const userContent = parts.join('\n\n---\n\n');
|
|
166
|
+
|
|
167
|
+
const result = await llm.chat({
|
|
168
|
+
provider: llmConfig.provider || 'anthropic',
|
|
169
|
+
model: llmConfig.model || 'claude-haiku-4-5-20251001',
|
|
170
|
+
apiKeyEnv: llmConfig.api_key_env || 'ANTHROPIC_API_KEY',
|
|
171
|
+
system: systemPrompt,
|
|
172
|
+
messages: [{ role: 'user', content: userContent }],
|
|
173
|
+
max_tokens: 2000,
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
// Extract title from first line or generate one
|
|
177
|
+
const firstEntry = entries[0].entry;
|
|
178
|
+
const title = `[${tier}] ${firstEntry.repo} — ${firstEntry.date}`;
|
|
179
|
+
|
|
180
|
+
return { content: result, title };
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// ── Main Pipeline ───────────────────────────────────────────────────────────
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Run the distillation pipeline.
|
|
187
|
+
* @param {Object} options
|
|
188
|
+
* @param {string} [options.tier] - 'daily', 'weekly', 'archive', or 'all'
|
|
189
|
+
* @param {boolean} [options.dryRun] - If true, don't write changes
|
|
190
|
+
* @param {Object} [options.llmConfig] - LLM config for merge operations
|
|
191
|
+
* @param {string} [options.storePath] - Content store path
|
|
192
|
+
* @returns {Promise<Object>} Stats: { grouped, deduped, merged, llmCalls }
|
|
193
|
+
*/
|
|
194
|
+
async function distillEntries(options = {}) {
|
|
195
|
+
const tierName = options.tier || 'daily';
|
|
196
|
+
const dryRun = options.dryRun || false;
|
|
197
|
+
const storePath = options.storePath || contentStore.DEFAULT_STORE_PATH;
|
|
198
|
+
|
|
199
|
+
const tiersToRun = tierName === 'all'
|
|
200
|
+
? ['daily', 'weekly', 'archive']
|
|
201
|
+
: [tierName];
|
|
202
|
+
|
|
203
|
+
const totalStats = { grouped: 0, deduped: 0, merged: 0, llmCalls: 0 };
|
|
204
|
+
|
|
205
|
+
for (const tier of tiersToRun) {
|
|
206
|
+
const tierDef = TIERS[tier];
|
|
207
|
+
if (!tierDef) {
|
|
208
|
+
console.log(`Unknown tier: ${tier}`);
|
|
209
|
+
continue;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// Calculate date range for this tier
|
|
213
|
+
const now = new Date();
|
|
214
|
+
const sinceDate = new Date(now);
|
|
215
|
+
sinceDate.setDate(sinceDate.getDate() - tierDef.maxAgeDays);
|
|
216
|
+
const untilDate = new Date(now);
|
|
217
|
+
untilDate.setDate(untilDate.getDate() - tierDef.minAgeDays);
|
|
218
|
+
|
|
219
|
+
const since = sinceDate.toISOString().split('T')[0];
|
|
220
|
+
const until = untilDate.toISOString().split('T')[0];
|
|
221
|
+
|
|
222
|
+
// Query entries eligible for this tier
|
|
223
|
+
const entries = contentStore.queryMetadata({ since, until, storePath });
|
|
224
|
+
|
|
225
|
+
// Filter: only raw entries that haven't been distilled yet
|
|
226
|
+
const eligible = entries.filter(({ entry }) => {
|
|
227
|
+
return (entry.distillTier === 'raw' || !entry.distillTier)
|
|
228
|
+
&& !entry.distilledAt
|
|
229
|
+
&& !entry.distilledFrom; // not already a distilled entry
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
if (eligible.length === 0) {
|
|
233
|
+
console.log(` ${tier}: no eligible entries`);
|
|
234
|
+
continue;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
console.log(` ${tier}: ${eligible.length} eligible entries (${since} to ${until})`);
|
|
238
|
+
|
|
239
|
+
// Group and cluster
|
|
240
|
+
const clusters = groupEntries(eligible);
|
|
241
|
+
totalStats.grouped += clusters.length;
|
|
242
|
+
|
|
243
|
+
// Deduplicate within each cluster
|
|
244
|
+
let totalDeduped = 0;
|
|
245
|
+
const mergeableClusters = [];
|
|
246
|
+
|
|
247
|
+
for (const cluster of clusters) {
|
|
248
|
+
const { canonical, absorbed } = deduplicateGroup(cluster);
|
|
249
|
+
totalDeduped += absorbed.length;
|
|
250
|
+
|
|
251
|
+
if (!dryRun && absorbed.length > 0) {
|
|
252
|
+
// Mark absorbed entries
|
|
253
|
+
const backend = contentStore.getBackend(storePath);
|
|
254
|
+
const index = backend.loadIndex();
|
|
255
|
+
for (const absorbedId of absorbed) {
|
|
256
|
+
if (index.entries[absorbedId]) {
|
|
257
|
+
index.entries[absorbedId].distilledAt = Date.now();
|
|
258
|
+
index.entries[absorbedId].distillTier = tier;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
backend.saveIndex(index);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// Only merge if there are 2+ canonical entries in the cluster
|
|
265
|
+
if (canonical.length >= 2) {
|
|
266
|
+
mergeableClusters.push(canonical);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
totalStats.deduped += totalDeduped;
|
|
271
|
+
|
|
272
|
+
if (dryRun) {
|
|
273
|
+
console.log(` Would dedup: ${totalDeduped} entries`);
|
|
274
|
+
console.log(` Would merge: ${mergeableClusters.length} clusters (${mergeableClusters.reduce((s, c) => s + c.length, 0)} entries)`);
|
|
275
|
+
continue;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// Merge clusters via LLM
|
|
279
|
+
if (mergeableClusters.length > 0 && options.llmConfig) {
|
|
280
|
+
for (const cluster of mergeableClusters) {
|
|
281
|
+
try {
|
|
282
|
+
const { content, title } = await mergeEntries(cluster, options.llmConfig, tier);
|
|
283
|
+
totalStats.llmCalls++;
|
|
284
|
+
|
|
285
|
+
// Create distilled entry in the content store
|
|
286
|
+
const firstEntry = cluster[0].entry;
|
|
287
|
+
const absorbedIds = cluster.map(c => c.id);
|
|
288
|
+
const id = contentStore.generateEntryId(firstEntry.date, firstEntry.repo, title);
|
|
289
|
+
const hash = contentStore.contentHash(content);
|
|
290
|
+
|
|
291
|
+
const backend = contentStore.getBackend(storePath);
|
|
292
|
+
const index = backend.loadIndex();
|
|
293
|
+
|
|
294
|
+
index.entries[id] = {
|
|
295
|
+
date: firstEntry.date,
|
|
296
|
+
repo: firstEntry.repo,
|
|
297
|
+
title,
|
|
298
|
+
source: 'distilled',
|
|
299
|
+
user: '',
|
|
300
|
+
drifted: false,
|
|
301
|
+
contentHash: hash,
|
|
302
|
+
contentLength: content.length,
|
|
303
|
+
tags: firstEntry.tags || [],
|
|
304
|
+
hasEmbedding: false,
|
|
305
|
+
hasReasoning: true,
|
|
306
|
+
hasAlternatives: false,
|
|
307
|
+
qualityScore: 3, // distilled entries are high quality by definition
|
|
308
|
+
distillTier: tier,
|
|
309
|
+
distilledFrom: absorbedIds,
|
|
310
|
+
distilledAt: Date.now(),
|
|
311
|
+
};
|
|
312
|
+
|
|
313
|
+
// Mark source entries as absorbed
|
|
314
|
+
for (const item of cluster) {
|
|
315
|
+
if (index.entries[item.id]) {
|
|
316
|
+
index.entries[item.id].distilledAt = Date.now();
|
|
317
|
+
index.entries[item.id].distillTier = tier;
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
index.entryCount = Object.keys(index.entries).length;
|
|
322
|
+
backend.saveIndex(index);
|
|
323
|
+
totalStats.merged += cluster.length;
|
|
324
|
+
} catch (err) {
|
|
325
|
+
console.log(` Merge failed for cluster: ${err.message}`);
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// Log the distillation run
|
|
331
|
+
if (!dryRun) {
|
|
332
|
+
try {
|
|
333
|
+
const backend = contentStore.getBackend(storePath);
|
|
334
|
+
if (backend.db) {
|
|
335
|
+
backend.db.prepare(`
|
|
336
|
+
INSERT INTO distillation_log (run_at, tier, entries_input, entries_output, entries_merged, entries_deduped, llm_calls)
|
|
337
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
338
|
+
`).run(Date.now(), tier, eligible.length, eligible.length - totalDeduped - totalStats.merged, totalStats.merged, totalDeduped, totalStats.llmCalls);
|
|
339
|
+
}
|
|
340
|
+
} catch { /* non-fatal */ }
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
return totalStats;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// ── Exports ─────────────────────────────────────────────────────────────────
|
|
348
|
+
|
|
349
|
+
module.exports = {
|
|
350
|
+
distillEntries,
|
|
351
|
+
groupEntries,
|
|
352
|
+
deduplicateGroup,
|
|
353
|
+
mergeEntries,
|
|
354
|
+
titleSimilarity,
|
|
355
|
+
TIERS,
|
|
356
|
+
};
|
package/bin/slack-bot.js
CHANGED
|
@@ -40,6 +40,10 @@ CREATE TABLE IF NOT EXISTS decisions (
|
|
|
40
40
|
has_embedding INTEGER DEFAULT 0,
|
|
41
41
|
has_reasoning INTEGER DEFAULT 0,
|
|
42
42
|
has_alternatives INTEGER DEFAULT 0,
|
|
43
|
+
quality_score INTEGER DEFAULT 0,
|
|
44
|
+
distill_tier TEXT DEFAULT 'raw',
|
|
45
|
+
distilled_from TEXT DEFAULT NULL,
|
|
46
|
+
distilled_at INTEGER DEFAULT NULL,
|
|
43
47
|
created_at INTEGER,
|
|
44
48
|
updated_at INTEGER
|
|
45
49
|
);
|
|
@@ -48,6 +52,19 @@ CREATE INDEX IF NOT EXISTS idx_decisions_date ON decisions(date);
|
|
|
48
52
|
CREATE INDEX IF NOT EXISTS idx_decisions_repo ON decisions(repo);
|
|
49
53
|
CREATE INDEX IF NOT EXISTS idx_decisions_source ON decisions(source);
|
|
50
54
|
CREATE INDEX IF NOT EXISTS idx_decisions_user ON decisions(user);
|
|
55
|
+
CREATE INDEX IF NOT EXISTS idx_decisions_quality ON decisions(quality_score);
|
|
56
|
+
CREATE INDEX IF NOT EXISTS idx_decisions_tier ON decisions(distill_tier);
|
|
57
|
+
|
|
58
|
+
CREATE TABLE IF NOT EXISTS distillation_log (
|
|
59
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
60
|
+
run_at INTEGER NOT NULL,
|
|
61
|
+
tier TEXT NOT NULL,
|
|
62
|
+
entries_input INTEGER DEFAULT 0,
|
|
63
|
+
entries_output INTEGER DEFAULT 0,
|
|
64
|
+
entries_merged INTEGER DEFAULT 0,
|
|
65
|
+
entries_deduped INTEGER DEFAULT 0,
|
|
66
|
+
llm_calls INTEGER DEFAULT 0
|
|
67
|
+
);
|
|
51
68
|
|
|
52
69
|
CREATE TABLE IF NOT EXISTS embeddings (
|
|
53
70
|
id TEXT PRIMARY KEY,
|
|
@@ -90,6 +107,10 @@ function entryToRow(id, entry) {
|
|
|
90
107
|
has_embedding: entry.hasEmbedding ? 1 : 0,
|
|
91
108
|
has_reasoning: entry.hasReasoning ? 1 : 0,
|
|
92
109
|
has_alternatives: entry.hasAlternatives ? 1 : 0,
|
|
110
|
+
quality_score: entry.qualityScore || 0,
|
|
111
|
+
distill_tier: entry.distillTier || 'raw',
|
|
112
|
+
distilled_from: entry.distilledFrom ? JSON.stringify(entry.distilledFrom) : null,
|
|
113
|
+
distilled_at: entry.distilledAt || null,
|
|
93
114
|
created_at: entry.createdAt || Date.now(),
|
|
94
115
|
updated_at: Date.now(),
|
|
95
116
|
};
|
|
@@ -109,6 +130,10 @@ function rowToEntry(row) {
|
|
|
109
130
|
hasEmbedding: !!row.has_embedding,
|
|
110
131
|
hasReasoning: !!row.has_reasoning,
|
|
111
132
|
hasAlternatives: !!row.has_alternatives,
|
|
133
|
+
qualityScore: row.quality_score || 0,
|
|
134
|
+
distillTier: row.distill_tier || 'raw',
|
|
135
|
+
distilledFrom: row.distilled_from ? JSON.parse(row.distilled_from) : null,
|
|
136
|
+
distilledAt: row.distilled_at || null,
|
|
112
137
|
};
|
|
113
138
|
}
|
|
114
139
|
|
|
@@ -137,6 +162,21 @@ class SqliteBackend {
|
|
|
137
162
|
if (!existing) {
|
|
138
163
|
this.db.prepare('INSERT INTO metadata (key, value) VALUES (?, ?)').run('schema_version', SCHEMA_VERSION);
|
|
139
164
|
}
|
|
165
|
+
|
|
166
|
+
// Migrate existing databases: add new columns if they don't exist
|
|
167
|
+
const cols = this.db.prepare('PRAGMA table_info(decisions)').all().map(c => c.name);
|
|
168
|
+
if (!cols.includes('quality_score')) {
|
|
169
|
+
this.db.exec('ALTER TABLE decisions ADD COLUMN quality_score INTEGER DEFAULT 0');
|
|
170
|
+
}
|
|
171
|
+
if (!cols.includes('distill_tier')) {
|
|
172
|
+
this.db.exec('ALTER TABLE decisions ADD COLUMN distill_tier TEXT DEFAULT \'raw\'');
|
|
173
|
+
}
|
|
174
|
+
if (!cols.includes('distilled_from')) {
|
|
175
|
+
this.db.exec('ALTER TABLE decisions ADD COLUMN distilled_from TEXT DEFAULT NULL');
|
|
176
|
+
}
|
|
177
|
+
if (!cols.includes('distilled_at')) {
|
|
178
|
+
this.db.exec('ALTER TABLE decisions ADD COLUMN distilled_at INTEGER DEFAULT NULL');
|
|
179
|
+
}
|
|
140
180
|
}
|
|
141
181
|
|
|
142
182
|
close() {
|
|
@@ -173,10 +213,12 @@ class SqliteBackend {
|
|
|
173
213
|
const stmt = this.db.prepare(`
|
|
174
214
|
INSERT INTO decisions (id, date, repo, title, source, user, drifted,
|
|
175
215
|
content_hash, content_length, tags, has_embedding, has_reasoning,
|
|
176
|
-
has_alternatives,
|
|
216
|
+
has_alternatives, quality_score, distill_tier, distilled_from, distilled_at,
|
|
217
|
+
created_at, updated_at)
|
|
177
218
|
VALUES (@id, @date, @repo, @title, @source, @user, @drifted,
|
|
178
219
|
@content_hash, @content_length, @tags, @has_embedding, @has_reasoning,
|
|
179
|
-
@has_alternatives, @
|
|
220
|
+
@has_alternatives, @quality_score, @distill_tier, @distilled_from, @distilled_at,
|
|
221
|
+
@created_at, @updated_at)
|
|
180
222
|
`);
|
|
181
223
|
for (const [id, entry] of Object.entries(entries)) {
|
|
182
224
|
stmt.run(entryToRow(id, entry));
|
package/bin/team-context.js
CHANGED
|
@@ -718,6 +718,14 @@ async function runPull(args) {
|
|
|
718
718
|
writeConnectorsConfig(freshConfig);
|
|
719
719
|
printPullResult(name, result);
|
|
720
720
|
}
|
|
721
|
+
// Auto-index signals into content store after pull
|
|
722
|
+
try {
|
|
723
|
+
console.log('\nIndexing signals...');
|
|
724
|
+
const signalStats = await contentStore.indexSignals({ embeddings: false });
|
|
725
|
+
console.log(` ${signalStats.newEntries} new, ${signalStats.updatedEntries} updated, ${signalStats.skippedEntries} unchanged`);
|
|
726
|
+
} catch (err) {
|
|
727
|
+
console.log(` Signal indexing skipped: ${err.message}`);
|
|
728
|
+
}
|
|
721
729
|
return;
|
|
722
730
|
}
|
|
723
731
|
|
|
@@ -827,6 +835,14 @@ async function runPull(args) {
|
|
|
827
835
|
writeConnectorsConfig(freshConfig);
|
|
828
836
|
|
|
829
837
|
printPullResult(channel, result);
|
|
838
|
+
|
|
839
|
+
// Auto-index signals into content store after pull
|
|
840
|
+
try {
|
|
841
|
+
const signalStats = await contentStore.indexSignals({ embeddings: false });
|
|
842
|
+
console.log(`\nSignals indexed: ${signalStats.newEntries} new, ${signalStats.updatedEntries} updated`);
|
|
843
|
+
} catch (err) {
|
|
844
|
+
console.log(`Signal indexing skipped: ${err.message}`);
|
|
845
|
+
}
|
|
830
846
|
}
|
|
831
847
|
|
|
832
848
|
function runSignals() {
|
|
@@ -928,6 +944,7 @@ async function runDigest(args) {
|
|
|
928
944
|
const personaIdx = args.indexOf('--persona');
|
|
929
945
|
const sinceIdx = args.indexOf('--since');
|
|
930
946
|
const deliver = args.includes('--deliver');
|
|
947
|
+
const preview = args.includes('--preview');
|
|
931
948
|
|
|
932
949
|
// Determine personas
|
|
933
950
|
let personaIds;
|
|
@@ -981,6 +998,32 @@ async function runDigest(args) {
|
|
|
981
998
|
});
|
|
982
999
|
|
|
983
1000
|
console.log('');
|
|
1001
|
+
|
|
1002
|
+
if (preview) {
|
|
1003
|
+
// Preview mode: print digest content and stats to stdout
|
|
1004
|
+
console.log('=== DIGEST PREVIEW ===');
|
|
1005
|
+
console.log('');
|
|
1006
|
+
if (result.inputStats) {
|
|
1007
|
+
const s = result.inputStats;
|
|
1008
|
+
console.log(`Input: ${s.journalEntries || 0} journal, ${s.signalEntries || 0} signal entries`);
|
|
1009
|
+
if (s.budgetStats) {
|
|
1010
|
+
console.log(`Budget: ${s.budgetStats.kept || 0} kept, ${s.budgetStats.dropped || 0} dropped`);
|
|
1011
|
+
}
|
|
1012
|
+
console.log('');
|
|
1013
|
+
}
|
|
1014
|
+
for (const f of result.files) {
|
|
1015
|
+
try {
|
|
1016
|
+
const content = fs.readFileSync(f, 'utf8');
|
|
1017
|
+
const personaId = path.basename(path.dirname(f)) || 'combined';
|
|
1018
|
+
console.log(`--- ${personaId} ---`);
|
|
1019
|
+
console.log(content);
|
|
1020
|
+
console.log('');
|
|
1021
|
+
} catch { /* skip unreadable */ }
|
|
1022
|
+
}
|
|
1023
|
+
console.log('=== END PREVIEW ===');
|
|
1024
|
+
return;
|
|
1025
|
+
}
|
|
1026
|
+
|
|
984
1027
|
console.log('Digests generated:');
|
|
985
1028
|
for (const f of result.files) {
|
|
986
1029
|
console.log(` ${f}`);
|
|
@@ -1172,6 +1215,20 @@ async function runReindex(args) {
|
|
|
1172
1215
|
const signalsOnly = args.includes('--signals-only');
|
|
1173
1216
|
const doExport = args.includes('--export');
|
|
1174
1217
|
const detectShifts = args.includes('--detect-shifts');
|
|
1218
|
+
const force = args.includes('--force');
|
|
1219
|
+
|
|
1220
|
+
if (force) {
|
|
1221
|
+
console.log('Force mode: clearing content store for full reindex...');
|
|
1222
|
+
try {
|
|
1223
|
+
const backend = contentStore.getBackend();
|
|
1224
|
+
const emptyIndex = { version: contentStore.INDEX_VERSION, entries: {}, lastUpdated: Date.now(), entryCount: 0 };
|
|
1225
|
+
backend.saveIndex(emptyIndex);
|
|
1226
|
+
// Clear conversation fingerprint cache so all transcripts are re-extracted
|
|
1227
|
+
backend.saveConversationIndex({});
|
|
1228
|
+
} catch (err) {
|
|
1229
|
+
console.log(` Warning: could not clear store: ${err.message}`);
|
|
1230
|
+
}
|
|
1231
|
+
}
|
|
1175
1232
|
|
|
1176
1233
|
if (!conversationsOnly && !signalsOnly) {
|
|
1177
1234
|
console.log('=== Journals ===');
|
|
@@ -1192,6 +1249,44 @@ async function runReindex(args) {
|
|
|
1192
1249
|
console.log('=== Signals ===');
|
|
1193
1250
|
await indexSignalsIfAvailable();
|
|
1194
1251
|
}
|
|
1252
|
+
|
|
1253
|
+
// Optional: run distillation after reindex
|
|
1254
|
+
if (args.includes('--distill')) {
|
|
1255
|
+
console.log('');
|
|
1256
|
+
console.log('=== Distillation ===');
|
|
1257
|
+
await runDistill(['--tier', 'daily']);
|
|
1258
|
+
}
|
|
1259
|
+
}
|
|
1260
|
+
|
|
1261
|
+
async function runDistill(args) {
|
|
1262
|
+
const distill = require('./distill');
|
|
1263
|
+
const dryRun = args.includes('--dry-run');
|
|
1264
|
+
const tierIdx = args.indexOf('--tier');
|
|
1265
|
+
const tier = (tierIdx !== -1 && args[tierIdx + 1]) ? args[tierIdx + 1] : 'daily';
|
|
1266
|
+
|
|
1267
|
+
console.log(`Distilling content (tier: ${tier}${dryRun ? ', dry run' : ''})...`);
|
|
1268
|
+
|
|
1269
|
+
// Build LLM config from connectors
|
|
1270
|
+
let llmConfig = null;
|
|
1271
|
+
if (!dryRun) {
|
|
1272
|
+
const config = readConnectorsConfig();
|
|
1273
|
+
if (config.digest && config.digest.llm) {
|
|
1274
|
+
llmConfig = {
|
|
1275
|
+
provider: config.digest.llm.provider,
|
|
1276
|
+
model: config.digest.llm.intelligence?.model || 'claude-haiku-4-5-20251001',
|
|
1277
|
+
api_key_env: config.digest.llm.api_key_env,
|
|
1278
|
+
};
|
|
1279
|
+
}
|
|
1280
|
+
}
|
|
1281
|
+
|
|
1282
|
+
const stats = await distill.distillEntries({ tier, dryRun, llmConfig });
|
|
1283
|
+
|
|
1284
|
+
console.log('');
|
|
1285
|
+
console.log('Distillation results:');
|
|
1286
|
+
console.log(` Groups: ${stats.grouped}`);
|
|
1287
|
+
console.log(` Deduped: ${stats.deduped}`);
|
|
1288
|
+
console.log(` Merged: ${stats.merged}`);
|
|
1289
|
+
console.log(` LLM calls: ${stats.llmCalls}`);
|
|
1195
1290
|
}
|
|
1196
1291
|
|
|
1197
1292
|
/**
|
|
@@ -4736,6 +4831,10 @@ const COMMANDS = {
|
|
|
4736
4831
|
desc: 'Index all signal sources (journals + conversations)',
|
|
4737
4832
|
run: (args) => runReindex(args),
|
|
4738
4833
|
},
|
|
4834
|
+
distill: {
|
|
4835
|
+
desc: 'Distill content store: dedup, merge, and compact entries',
|
|
4836
|
+
run: (args) => runDistill(args),
|
|
4837
|
+
},
|
|
4739
4838
|
'index-journals': {
|
|
4740
4839
|
desc: 'Index journal entries into the content store',
|
|
4741
4840
|
run: (args) => runIndexJournals(args),
|
package/package.json
CHANGED