context-vault 3.17.0 → 3.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +157 -0
- package/dist/register-tools.d.ts.map +1 -1
- package/dist/register-tools.js +0 -2
- package/dist/register-tools.js.map +1 -1
- package/dist/server.js +78 -1
- package/dist/server.js.map +1 -1
- package/dist/tools/recall.d.ts +1 -1
- package/dist/tools/recall.d.ts.map +1 -1
- package/dist/tools/recall.js +50 -100
- package/dist/tools/recall.js.map +1 -1
- package/node_modules/@context-vault/core/dist/assemble.d.ts +22 -0
- package/node_modules/@context-vault/core/dist/assemble.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/assemble.js +143 -0
- package/node_modules/@context-vault/core/dist/assemble.js.map +1 -0
- package/node_modules/@context-vault/core/dist/capture.d.ts.map +1 -1
- package/node_modules/@context-vault/core/dist/capture.js +10 -5
- package/node_modules/@context-vault/core/dist/capture.js.map +1 -1
- package/node_modules/@context-vault/core/dist/consolidation.d.ts +40 -0
- package/node_modules/@context-vault/core/dist/consolidation.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/consolidation.js +229 -0
- package/node_modules/@context-vault/core/dist/consolidation.js.map +1 -0
- package/node_modules/@context-vault/core/dist/db.d.ts +25 -1
- package/node_modules/@context-vault/core/dist/db.d.ts.map +1 -1
- package/node_modules/@context-vault/core/dist/db.js +92 -4
- package/node_modules/@context-vault/core/dist/db.js.map +1 -1
- package/node_modules/@context-vault/core/dist/frontmatter.d.ts.map +1 -1
- package/node_modules/@context-vault/core/dist/frontmatter.js +26 -3
- package/node_modules/@context-vault/core/dist/frontmatter.js.map +1 -1
- package/node_modules/@context-vault/core/dist/index.d.ts.map +1 -1
- package/node_modules/@context-vault/core/dist/index.js +225 -184
- package/node_modules/@context-vault/core/dist/index.js.map +1 -1
- package/node_modules/@context-vault/core/dist/main.d.ts +2 -0
- package/node_modules/@context-vault/core/dist/main.d.ts.map +1 -1
- package/node_modules/@context-vault/core/dist/main.js +2 -0
- package/node_modules/@context-vault/core/dist/main.js.map +1 -1
- package/node_modules/@context-vault/core/dist/search.d.ts +5 -0
- package/node_modules/@context-vault/core/dist/search.d.ts.map +1 -1
- package/node_modules/@context-vault/core/dist/search.js +97 -5
- package/node_modules/@context-vault/core/dist/search.js.map +1 -1
- package/node_modules/@context-vault/core/dist/summarize.d.ts +5 -0
- package/node_modules/@context-vault/core/dist/summarize.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/summarize.js +146 -0
- package/node_modules/@context-vault/core/dist/summarize.js.map +1 -0
- package/node_modules/@context-vault/core/dist/types.d.ts +2 -0
- package/node_modules/@context-vault/core/dist/types.d.ts.map +1 -1
- package/node_modules/@context-vault/core/package.json +5 -1
- package/node_modules/@context-vault/core/src/assemble.ts +187 -0
- package/node_modules/@context-vault/core/src/capture.ts +10 -5
- package/node_modules/@context-vault/core/src/consolidation.ts +356 -0
- package/node_modules/@context-vault/core/src/db.ts +95 -4
- package/node_modules/@context-vault/core/src/frontmatter.ts +25 -4
- package/node_modules/@context-vault/core/src/index.ts +127 -88
- package/node_modules/@context-vault/core/src/main.ts +4 -0
- package/node_modules/@context-vault/core/src/search.ts +102 -5
- package/node_modules/@context-vault/core/src/summarize.ts +157 -0
- package/node_modules/@context-vault/core/src/types.ts +2 -0
- package/package.json +2 -2
- package/scripts/validate-epipe-shutdown.mjs +183 -0
- package/scripts/validate-sqlite-busy-retry.mjs +243 -0
- package/src/register-tools.ts +0 -2
- package/src/server.ts +76 -1
- package/src/tools/recall.ts +51 -110
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
import { existsSync, readFileSync, writeFileSync } from 'node:fs';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
import { hybridSearch } from './search.js';
|
|
4
|
+
import { VaultConfig } from './types.js';
|
|
5
|
+
|
|
6
|
+
export interface AssembleOptions {
|
|
7
|
+
role: 'worker' | 'pm' | 'ceo' | 'steer';
|
|
8
|
+
task: string;
|
|
9
|
+
budget: number;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface IncludedEntryMeta {
|
|
13
|
+
title: string;
|
|
14
|
+
status: 'full' | 'condensed';
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface AssembleResult {
|
|
18
|
+
markdown: string;
|
|
19
|
+
metadata: {
|
|
20
|
+
tokens_used: number;
|
|
21
|
+
budget: number;
|
|
22
|
+
entries_included: number;
|
|
23
|
+
role: string;
|
|
24
|
+
included_entries: IncludedEntryMeta[];
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const TOKEN_TO_CHAR_RATIO = 4; // approximate conversion
|
|
29
|
+
|
|
30
|
+
const CONDENSED_SUFFIX = '\n... [truncated for context budget]';
|
|
31
|
+
const CONDENSED_BODY_CHARS = 400;
|
|
32
|
+
|
|
33
|
+
type RoleProfile = { rules: string[]; skills: string[]; memory: string[] };
|
|
34
|
+
|
|
35
|
+
const DEFAULT_ROLE_PROFILES: Record<string, RoleProfile> = {
|
|
36
|
+
worker: {
|
|
37
|
+
rules: ['coding', 'patterns', 'standards', 'error-handling', 'debugging'],
|
|
38
|
+
skills: ['git', 'compile', 'test', 'local'],
|
|
39
|
+
memory: ['feedback', 'user', 'project', 'reference']
|
|
40
|
+
},
|
|
41
|
+
pm: {
|
|
42
|
+
rules: ['orchestration', 'planning', 'spec-writing', 'review', 'debugging-escalation'],
|
|
43
|
+
skills: ['dispatch', 'review', 'triage', 'report'],
|
|
44
|
+
memory: ['project', 'team', 'feedback']
|
|
45
|
+
},
|
|
46
|
+
ceo: {
|
|
47
|
+
rules: ['strategy', 'epistemic-honesty', 'decision-making', 'business-logic'],
|
|
48
|
+
skills: ['triage', 'delegate', 'report'],
|
|
49
|
+
memory: ['north-star', 'project', 'user']
|
|
50
|
+
},
|
|
51
|
+
steer: {
|
|
52
|
+
rules: ['steer', 'strategy', 'feedback-loops', 'analysis'],
|
|
53
|
+
skills: ['triage', 'report', 'analyze'],
|
|
54
|
+
memory: ['north-star', 'feedback']
|
|
55
|
+
}
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
function loadRoleProfiles(dataDir: string): Record<string, RoleProfile> {
|
|
59
|
+
const rolesPath = join(dataDir, 'roles.json');
|
|
60
|
+
if (existsSync(rolesPath)) {
|
|
61
|
+
try {
|
|
62
|
+
const raw = readFileSync(rolesPath, 'utf-8');
|
|
63
|
+
const parsed = JSON.parse(raw);
|
|
64
|
+
if (parsed && typeof parsed === 'object') {
|
|
65
|
+
return parsed as Record<string, RoleProfile>;
|
|
66
|
+
}
|
|
67
|
+
} catch {
|
|
68
|
+
// fall through to defaults
|
|
69
|
+
}
|
|
70
|
+
} else {
|
|
71
|
+
// Seed roles.json with defaults so the user can edit it later
|
|
72
|
+
try {
|
|
73
|
+
writeFileSync(rolesPath, JSON.stringify(DEFAULT_ROLE_PROFILES, null, 2) + '\n');
|
|
74
|
+
} catch {
|
|
75
|
+
// non-fatal: writable failure just means we use in-memory defaults
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return DEFAULT_ROLE_PROFILES;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export async function assembleContext(
|
|
82
|
+
db: import('node:sqlite').DatabaseSync,
|
|
83
|
+
config: VaultConfig,
|
|
84
|
+
options: AssembleOptions
|
|
85
|
+
): Promise<AssembleResult> {
|
|
86
|
+
const { role, task, budget } = options;
|
|
87
|
+
|
|
88
|
+
// 1. Load role profiles (dynamic — falls back to defaults)
|
|
89
|
+
const ROLE_PROFILES = loadRoleProfiles(config.dataDir);
|
|
90
|
+
const profile = ROLE_PROFILES[role] || ROLE_PROFILES.worker || DEFAULT_ROLE_PROFILES.worker;
|
|
91
|
+
const charsBudget = budget * TOKEN_TO_CHAR_RATIO;
|
|
92
|
+
|
|
93
|
+
// Allocate slots
|
|
94
|
+
const rulesChars = Math.floor(charsBudget * 0.2); // 20% for rules
|
|
95
|
+
const skillsChars = Math.floor(charsBudget * 0.1); // 10% for skills
|
|
96
|
+
const taskChars = Math.floor(charsBudget * 0.2); // 20% for task
|
|
97
|
+
const vaultChars = Math.floor(charsBudget * 0.3); // 30% for dynamically retrieved vault context
|
|
98
|
+
// reserve 20%
|
|
99
|
+
|
|
100
|
+
let markdown = `# Assembled Context: ${role.toUpperCase()}\n\n`;
|
|
101
|
+
let totalCharsUsed = markdown.length;
|
|
102
|
+
|
|
103
|
+
// 2. Add Task Spec
|
|
104
|
+
const taskSection = `## Active Task\n\n${task}\n\n`;
|
|
105
|
+
markdown += taskSection;
|
|
106
|
+
totalCharsUsed += taskSection.length;
|
|
107
|
+
|
|
108
|
+
// 3. Dynamic Vault Retrieval based on task
|
|
109
|
+
let entriesIncluded = 0;
|
|
110
|
+
const includedEntries: IncludedEntryMeta[] = [];
|
|
111
|
+
const totalBudgetChars = vaultChars + taskChars + rulesChars + skillsChars;
|
|
112
|
+
|
|
113
|
+
if (vaultChars > 0 && task.trim().length > 0) {
|
|
114
|
+
try {
|
|
115
|
+
// Over-retrieve candidates
|
|
116
|
+
const searchResults = await hybridSearch(
|
|
117
|
+
{
|
|
118
|
+
db,
|
|
119
|
+
config,
|
|
120
|
+
stmts: {} as any,
|
|
121
|
+
insertVec: () => {},
|
|
122
|
+
deleteVec: () => {},
|
|
123
|
+
embed: async () => new Float32Array(384),
|
|
124
|
+
insertCtxVec: () => {},
|
|
125
|
+
deleteCtxVec: () => {}
|
|
126
|
+
},
|
|
127
|
+
task,
|
|
128
|
+
{ limit: 20 }
|
|
129
|
+
);
|
|
130
|
+
|
|
131
|
+
if (searchResults && searchResults.length > 0) {
|
|
132
|
+
markdown += `## Retrieved Context\n\n`;
|
|
133
|
+
const retrievedHeaderLen = `## Retrieved Context\n\n`.length;
|
|
134
|
+
totalCharsUsed += retrievedHeaderLen;
|
|
135
|
+
|
|
136
|
+
for (const item of searchResults) {
|
|
137
|
+
const entry = 'entry' in item ? (item as any).entry : (item as any);
|
|
138
|
+
const fullBody = entry.body || '';
|
|
139
|
+
const titleText = entry.title ? `### ${entry.title}\n` : '';
|
|
140
|
+
const entryTitle = entry.title || '(untitled)';
|
|
141
|
+
|
|
142
|
+
const fullEntryMarkdown = `${titleText}${fullBody}\n\n`;
|
|
143
|
+
|
|
144
|
+
if (totalCharsUsed + fullEntryMarkdown.length <= totalBudgetChars) {
|
|
145
|
+
// Full entry fits
|
|
146
|
+
markdown += fullEntryMarkdown;
|
|
147
|
+
totalCharsUsed += fullEntryMarkdown.length;
|
|
148
|
+
entriesIncluded++;
|
|
149
|
+
includedEntries.push({ title: entryTitle, status: 'full' });
|
|
150
|
+
} else {
|
|
151
|
+
// Try condensed version
|
|
152
|
+
const condensedBody = fullBody.slice(0, CONDENSED_BODY_CHARS) + CONDENSED_SUFFIX;
|
|
153
|
+
const condensedEntryMarkdown = `${titleText}${condensedBody}\n\n`;
|
|
154
|
+
|
|
155
|
+
if (totalCharsUsed + condensedEntryMarkdown.length <= totalBudgetChars) {
|
|
156
|
+
markdown += condensedEntryMarkdown;
|
|
157
|
+
totalCharsUsed += condensedEntryMarkdown.length;
|
|
158
|
+
entriesIncluded++;
|
|
159
|
+
includedEntries.push({ title: entryTitle, status: 'condensed' });
|
|
160
|
+
} else {
|
|
161
|
+
// Neither fits — stop adding vault entries
|
|
162
|
+
break;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
} catch (e) {
|
|
168
|
+
console.error("Vault retrieval error during assembly:", e);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// 4. Construct System Prompts/Rules/Skills
|
|
173
|
+
const rulesSection = `## Role Directives\n\n### Rules\n${profile.rules.join(', ')}\n\n### Skills\n${profile.skills.join(', ')}\n\n`;
|
|
174
|
+
markdown = rulesSection + markdown; // U-Curve: Rules at the top
|
|
175
|
+
totalCharsUsed += rulesSection.length;
|
|
176
|
+
|
|
177
|
+
return {
|
|
178
|
+
markdown,
|
|
179
|
+
metadata: {
|
|
180
|
+
tokens_used: Math.ceil(totalCharsUsed / TOKEN_TO_CHAR_RATIO),
|
|
181
|
+
budget,
|
|
182
|
+
entries_included: entriesIncluded,
|
|
183
|
+
role,
|
|
184
|
+
included_entries: includedEntries
|
|
185
|
+
}
|
|
186
|
+
};
|
|
187
|
+
}
|
|
@@ -6,6 +6,7 @@ import { parseFrontmatter, formatFrontmatter } from './frontmatter.js';
|
|
|
6
6
|
import { formatBody } from './formatters.js';
|
|
7
7
|
import type { BaseCtx, CaptureInput, CaptureResult, IndexEntryInput } from './types.js';
|
|
8
8
|
import { indexEntry } from './index.js';
|
|
9
|
+
import { withBusyRetry } from './db.js';
|
|
9
10
|
|
|
10
11
|
function safeFolderPath(vaultDir: string, kind: string, folder?: string | null): string {
|
|
11
12
|
const base = resolve(vaultDir, kindToPath(kind));
|
|
@@ -303,14 +304,18 @@ export async function captureAndIndex(
|
|
|
303
304
|
try {
|
|
304
305
|
await indexEntry(ctx, entry, precomputedEmbedding);
|
|
305
306
|
if (entry.supersedes?.length && ctx.stmts.updateSupersededBy) {
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
307
|
+
await withBusyRetry(() => {
|
|
308
|
+
for (const supersededId of entry.supersedes!) {
|
|
309
|
+
if (typeof supersededId === 'string' && supersededId.trim()) {
|
|
310
|
+
ctx.stmts.updateSupersededBy.run(entry.id, supersededId.trim());
|
|
311
|
+
}
|
|
309
312
|
}
|
|
310
|
-
}
|
|
313
|
+
});
|
|
311
314
|
}
|
|
312
315
|
if (entry.related_to?.length && ctx.stmts.updateRelatedTo) {
|
|
313
|
-
|
|
316
|
+
await withBusyRetry(() => {
|
|
317
|
+
ctx.stmts.updateRelatedTo.run(JSON.stringify(entry.related_to), entry.id);
|
|
318
|
+
});
|
|
314
319
|
}
|
|
315
320
|
return entry;
|
|
316
321
|
} catch (err) {
|
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
import type { BaseCtx } from './types.js';
|
|
2
|
+
import { dotProduct } from './search.js';
|
|
3
|
+
|
|
4
|
+
export interface DuplicateGroup {
|
|
5
|
+
canonical_id: string;
|
|
6
|
+
duplicate_ids: string[];
|
|
7
|
+
similarity: number;
|
|
8
|
+
importance_signal: number;
|
|
9
|
+
canonical_title: string;
|
|
10
|
+
sample_titles: string[];
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface MergeResult {
|
|
14
|
+
canonical_id: string;
|
|
15
|
+
merged_count: number;
|
|
16
|
+
new_recall_count: number;
|
|
17
|
+
removed_ids: string[];
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface DecayScore {
|
|
21
|
+
id: string;
|
|
22
|
+
title: string;
|
|
23
|
+
kind: string;
|
|
24
|
+
score: number;
|
|
25
|
+
days_since_recall: number;
|
|
26
|
+
recall_count: number;
|
|
27
|
+
recommendation: 'keep' | 'compact' | 'archive';
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
interface VaultRow {
|
|
31
|
+
id: string;
|
|
32
|
+
title: string | null;
|
|
33
|
+
kind: string;
|
|
34
|
+
recall_count: number;
|
|
35
|
+
recall_sessions: number;
|
|
36
|
+
last_recalled_at: string | null;
|
|
37
|
+
updated_at: string | null;
|
|
38
|
+
created_at: string;
|
|
39
|
+
rowid: number;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
interface VecRow {
|
|
43
|
+
rowid: number;
|
|
44
|
+
embedding: Buffer;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const KIND_WEIGHTS: Record<string, number> = {
|
|
48
|
+
insight: 10,
|
|
49
|
+
decision: 8,
|
|
50
|
+
pattern: 7,
|
|
51
|
+
reference: 5,
|
|
52
|
+
event: 2,
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
export async function findDuplicates(
|
|
56
|
+
ctx: BaseCtx,
|
|
57
|
+
opts?: {
|
|
58
|
+
threshold?: number;
|
|
59
|
+
limit?: number;
|
|
60
|
+
kind?: string;
|
|
61
|
+
dryRun?: boolean;
|
|
62
|
+
}
|
|
63
|
+
): Promise<DuplicateGroup[]> {
|
|
64
|
+
const threshold = opts?.threshold ?? 0.85;
|
|
65
|
+
const limit = opts?.limit ?? 50;
|
|
66
|
+
const kind = opts?.kind ?? null;
|
|
67
|
+
|
|
68
|
+
const whereParts = [
|
|
69
|
+
'indexed = 1',
|
|
70
|
+
'superseded_by IS NULL',
|
|
71
|
+
"(expires_at IS NULL OR expires_at > datetime('now'))",
|
|
72
|
+
];
|
|
73
|
+
const params: (string | number)[] = [];
|
|
74
|
+
if (kind) {
|
|
75
|
+
whereParts.push('kind = ?');
|
|
76
|
+
params.push(kind);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const entries = ctx.db
|
|
80
|
+
.prepare(
|
|
81
|
+
`SELECT rowid, id, title, kind, recall_count, recall_sessions, last_recalled_at, updated_at, created_at
|
|
82
|
+
FROM vault WHERE ${whereParts.join(' AND ')} ORDER BY recall_count DESC`
|
|
83
|
+
)
|
|
84
|
+
.all(...params) as unknown as VaultRow[];
|
|
85
|
+
|
|
86
|
+
if (entries.length === 0) return [];
|
|
87
|
+
|
|
88
|
+
const entryMap = new Map<string, VaultRow>();
|
|
89
|
+
const rowidToId = new Map<number, string>();
|
|
90
|
+
for (const e of entries) {
|
|
91
|
+
entryMap.set(e.id, e);
|
|
92
|
+
rowidToId.set(e.rowid, e.id);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Load embeddings for all eligible entries via vault_vec KNN
|
|
96
|
+
const embeddingMap = new Map<string, Float32Array>();
|
|
97
|
+
const rowids = entries.map((e) => e.rowid);
|
|
98
|
+
if (rowids.length > 0) {
|
|
99
|
+
const batchSize = 500;
|
|
100
|
+
for (let i = 0; i < rowids.length; i += batchSize) {
|
|
101
|
+
const batch = rowids.slice(i, i + batchSize);
|
|
102
|
+
const placeholders = batch.map(() => '?').join(',');
|
|
103
|
+
const vecRows = ctx.db
|
|
104
|
+
.prepare(`SELECT rowid, embedding FROM vault_vec WHERE rowid IN (${placeholders})`)
|
|
105
|
+
.all(...batch) as unknown as VecRow[];
|
|
106
|
+
for (const row of vecRows) {
|
|
107
|
+
const id = rowidToId.get(Number(row.rowid));
|
|
108
|
+
if (id && row.embedding) {
|
|
109
|
+
embeddingMap.set(
|
|
110
|
+
id,
|
|
111
|
+
new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4)
|
|
112
|
+
);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (embeddingMap.size === 0) return [];
|
|
119
|
+
|
|
120
|
+
// Find duplicate groups using pairwise similarity
|
|
121
|
+
const merged = new Set<string>();
|
|
122
|
+
const groups: DuplicateGroup[] = [];
|
|
123
|
+
|
|
124
|
+
// For each entry with an embedding, use KNN to find neighbors
|
|
125
|
+
const entriesWithVec = entries.filter((e) => embeddingMap.has(e.id));
|
|
126
|
+
|
|
127
|
+
for (const entry of entriesWithVec) {
|
|
128
|
+
if (merged.has(entry.id)) continue;
|
|
129
|
+
|
|
130
|
+
const entryVec = embeddingMap.get(entry.id)!;
|
|
131
|
+
const neighbors: Array<{ id: string; sim: number }> = [];
|
|
132
|
+
|
|
133
|
+
// Compare against all other entries with embeddings
|
|
134
|
+
for (const other of entriesWithVec) {
|
|
135
|
+
if (other.id === entry.id || merged.has(other.id)) continue;
|
|
136
|
+
const otherVec = embeddingMap.get(other.id);
|
|
137
|
+
if (!otherVec) continue;
|
|
138
|
+
const sim = dotProduct(entryVec, otherVec);
|
|
139
|
+
if (sim >= threshold) {
|
|
140
|
+
neighbors.push({ id: other.id, sim });
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
if (neighbors.length === 0) continue;
|
|
145
|
+
|
|
146
|
+
// Build the group: canonical is the one with highest recall_count
|
|
147
|
+
const allIds = [entry.id, ...neighbors.map((n) => n.id)];
|
|
148
|
+
let canonicalId = entry.id;
|
|
149
|
+
let maxRecall = entry.recall_count;
|
|
150
|
+
for (const nId of neighbors.map((n) => n.id)) {
|
|
151
|
+
const nEntry = entryMap.get(nId);
|
|
152
|
+
if (nEntry && nEntry.recall_count > maxRecall) {
|
|
153
|
+
maxRecall = nEntry.recall_count;
|
|
154
|
+
canonicalId = nId;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const duplicateIds = allIds.filter((id) => id !== canonicalId);
|
|
159
|
+
const avgSim =
|
|
160
|
+
neighbors.reduce((sum, n) => sum + n.sim, 0) / neighbors.length;
|
|
161
|
+
|
|
162
|
+
const canonical = entryMap.get(canonicalId)!;
|
|
163
|
+
const sampleTitles = duplicateIds
|
|
164
|
+
.map((id) => entryMap.get(id)?.title)
|
|
165
|
+
.filter((t): t is string => t != null)
|
|
166
|
+
.slice(0, 5);
|
|
167
|
+
|
|
168
|
+
groups.push({
|
|
169
|
+
canonical_id: canonicalId,
|
|
170
|
+
duplicate_ids: duplicateIds,
|
|
171
|
+
similarity: Math.round(avgSim * 1000) / 1000,
|
|
172
|
+
importance_signal: allIds.length,
|
|
173
|
+
canonical_title: canonical.title ?? '(untitled)',
|
|
174
|
+
sample_titles: sampleTitles,
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
for (const id of allIds) merged.add(id);
|
|
178
|
+
|
|
179
|
+
if (groups.length >= limit) break;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Sort by importance (more duplicates = more important)
|
|
183
|
+
groups.sort((a, b) => b.importance_signal - a.importance_signal);
|
|
184
|
+
|
|
185
|
+
return groups.slice(0, limit);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
export function mergeEntries(
|
|
189
|
+
ctx: BaseCtx,
|
|
190
|
+
opts: {
|
|
191
|
+
canonical_id: string;
|
|
192
|
+
duplicate_ids: string[];
|
|
193
|
+
dryRun?: boolean;
|
|
194
|
+
}
|
|
195
|
+
): MergeResult {
|
|
196
|
+
const dryRun = opts.dryRun ?? true;
|
|
197
|
+
const { canonical_id, duplicate_ids } = opts;
|
|
198
|
+
|
|
199
|
+
if (duplicate_ids.length === 0) {
|
|
200
|
+
const canonical = ctx.db
|
|
201
|
+
.prepare('SELECT recall_count FROM vault WHERE id = ?')
|
|
202
|
+
.get(canonical_id) as { recall_count: number } | undefined;
|
|
203
|
+
return {
|
|
204
|
+
canonical_id,
|
|
205
|
+
merged_count: 0,
|
|
206
|
+
new_recall_count: canonical?.recall_count ?? 0,
|
|
207
|
+
removed_ids: [],
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Gather recall stats from all entries
|
|
212
|
+
const allIds = [canonical_id, ...duplicate_ids];
|
|
213
|
+
const placeholders = allIds.map(() => '?').join(',');
|
|
214
|
+
const rows = ctx.db
|
|
215
|
+
.prepare(
|
|
216
|
+
`SELECT id, recall_count, recall_sessions FROM vault WHERE id IN (${placeholders})`
|
|
217
|
+
)
|
|
218
|
+
.all(...allIds) as unknown as Array<{
|
|
219
|
+
id: string;
|
|
220
|
+
recall_count: number;
|
|
221
|
+
recall_sessions: number;
|
|
222
|
+
}>;
|
|
223
|
+
|
|
224
|
+
let totalRecallCount = 0;
|
|
225
|
+
let totalRecallSessions = 0;
|
|
226
|
+
for (const row of rows) {
|
|
227
|
+
totalRecallCount += row.recall_count ?? 0;
|
|
228
|
+
totalRecallSessions += row.recall_sessions ?? 0;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
if (dryRun) {
|
|
232
|
+
return {
|
|
233
|
+
canonical_id,
|
|
234
|
+
merged_count: duplicate_ids.length,
|
|
235
|
+
new_recall_count: totalRecallCount,
|
|
236
|
+
removed_ids: duplicate_ids,
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
const now = new Date().toISOString();
|
|
241
|
+
const dateStr = now.slice(0, 10);
|
|
242
|
+
|
|
243
|
+
// Update canonical: sum recall counts, append consolidation note
|
|
244
|
+
ctx.db
|
|
245
|
+
.prepare(
|
|
246
|
+
`UPDATE vault SET
|
|
247
|
+
recall_count = ?,
|
|
248
|
+
recall_sessions = ?,
|
|
249
|
+
updated_at = ?,
|
|
250
|
+
body = body || ?
|
|
251
|
+
WHERE id = ?`
|
|
252
|
+
)
|
|
253
|
+
.run(
|
|
254
|
+
totalRecallCount,
|
|
255
|
+
totalRecallSessions,
|
|
256
|
+
now,
|
|
257
|
+
`\n\n[Consolidated from ${duplicate_ids.length + 1} entries on ${dateStr}]`,
|
|
258
|
+
canonical_id
|
|
259
|
+
);
|
|
260
|
+
|
|
261
|
+
// Mark duplicates as superseded
|
|
262
|
+
for (const dupId of duplicate_ids) {
|
|
263
|
+
ctx.db
|
|
264
|
+
.prepare('UPDATE vault SET superseded_by = ?, updated_at = ? WHERE id = ?')
|
|
265
|
+
.run(canonical_id, now, dupId);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
return {
|
|
269
|
+
canonical_id,
|
|
270
|
+
merged_count: duplicate_ids.length,
|
|
271
|
+
new_recall_count: totalRecallCount,
|
|
272
|
+
removed_ids: duplicate_ids,
|
|
273
|
+
};
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
export function computeDecayScores(
|
|
277
|
+
ctx: BaseCtx,
|
|
278
|
+
opts?: {
|
|
279
|
+
limit?: number;
|
|
280
|
+
minAgeDays?: number;
|
|
281
|
+
}
|
|
282
|
+
): DecayScore[] {
|
|
283
|
+
const limit = opts?.limit ?? 100;
|
|
284
|
+
const minAgeDays = opts?.minAgeDays ?? 0;
|
|
285
|
+
|
|
286
|
+
const now = Date.now();
|
|
287
|
+
const cutoffDate = minAgeDays > 0
|
|
288
|
+
? new Date(now - minAgeDays * 86400000).toISOString()
|
|
289
|
+
: null;
|
|
290
|
+
|
|
291
|
+
const whereParts = [
|
|
292
|
+
'indexed = 1',
|
|
293
|
+
'superseded_by IS NULL',
|
|
294
|
+
"(expires_at IS NULL OR expires_at > datetime('now'))",
|
|
295
|
+
];
|
|
296
|
+
const params: string[] = [];
|
|
297
|
+
if (cutoffDate) {
|
|
298
|
+
whereParts.push('created_at <= ?');
|
|
299
|
+
params.push(cutoffDate);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
const rows = ctx.db
|
|
303
|
+
.prepare(
|
|
304
|
+
`SELECT id, title, kind, recall_count, last_recalled_at, updated_at, created_at
|
|
305
|
+
FROM vault WHERE ${whereParts.join(' AND ')}
|
|
306
|
+
ORDER BY updated_at ASC`
|
|
307
|
+
)
|
|
308
|
+
.all(...params) as unknown as Array<{
|
|
309
|
+
id: string;
|
|
310
|
+
title: string | null;
|
|
311
|
+
kind: string;
|
|
312
|
+
recall_count: number;
|
|
313
|
+
last_recalled_at: string | null;
|
|
314
|
+
updated_at: string | null;
|
|
315
|
+
created_at: string;
|
|
316
|
+
}>;
|
|
317
|
+
|
|
318
|
+
const scores: DecayScore[] = [];
|
|
319
|
+
|
|
320
|
+
for (const row of rows) {
|
|
321
|
+
const lastActivity = row.last_recalled_at ?? row.updated_at ?? row.created_at;
|
|
322
|
+
const daysSinceRecall = (now - new Date(lastActivity).getTime()) / 86400000;
|
|
323
|
+
|
|
324
|
+
const baseValue = KIND_WEIGHTS[row.kind] ?? 5;
|
|
325
|
+
const timeDecay = Math.exp(-0.02 * daysSinceRecall);
|
|
326
|
+
const recallBoost = 1 + Math.log((row.recall_count ?? 0) + 1);
|
|
327
|
+
|
|
328
|
+
const rawScore = baseValue * timeDecay * recallBoost;
|
|
329
|
+
// Normalize to 0-100 scale (max possible: ~10 * 1.0 * ~5 = 50 for heavily recalled insight)
|
|
330
|
+
const score = Math.max(0, Math.min(100, Math.round(rawScore * 10)));
|
|
331
|
+
|
|
332
|
+
let recommendation: 'keep' | 'compact' | 'archive';
|
|
333
|
+
if (score >= 30) {
|
|
334
|
+
recommendation = 'keep';
|
|
335
|
+
} else if (score >= 10) {
|
|
336
|
+
recommendation = 'compact';
|
|
337
|
+
} else {
|
|
338
|
+
recommendation = 'archive';
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
scores.push({
|
|
342
|
+
id: row.id,
|
|
343
|
+
title: row.title ?? '(untitled)',
|
|
344
|
+
kind: row.kind,
|
|
345
|
+
score,
|
|
346
|
+
days_since_recall: Math.round(daysSinceRecall),
|
|
347
|
+
recall_count: row.recall_count ?? 0,
|
|
348
|
+
recommendation,
|
|
349
|
+
});
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// Sort by score ascending (most decayed first)
|
|
353
|
+
scores.sort((a, b) => a.score - b.score);
|
|
354
|
+
|
|
355
|
+
return scores.slice(0, limit);
|
|
356
|
+
}
|
|
@@ -123,6 +123,82 @@ function runTransaction(db: DatabaseSync, fn: () => void): void {
|
|
|
123
123
|
}
|
|
124
124
|
}
|
|
125
125
|
|
|
126
|
+
export function isBusyError(err: unknown): boolean {
|
|
127
|
+
if (!err || typeof err !== 'object') return false;
|
|
128
|
+
const e = err as { code?: unknown; message?: unknown; errcode?: unknown };
|
|
129
|
+
if (e.code === 'SQLITE_BUSY' || e.errcode === 5) return true;
|
|
130
|
+
if (typeof e.message === 'string' && /SQLITE_BUSY|database is locked|database table is locked/i.test(e.message)) {
|
|
131
|
+
return true;
|
|
132
|
+
}
|
|
133
|
+
return false;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const BUSY_RETRY_DELAYS_MS = [50, 150, 500];
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Execute `fn` and retry on transient SQLITE_BUSY errors.
|
|
140
|
+
*
|
|
141
|
+
* WAL + busy_timeout=5000ms already handles most single-statement contention.
|
|
142
|
+
* This helper guards transactions / multi-statement paths where a single
|
|
143
|
+
* statement can still exceed the busy grace under N-parallel-daemon load,
|
|
144
|
+
* surfacing SQLITE_BUSY to callers.
|
|
145
|
+
*
|
|
146
|
+
* Delays follow the pattern 50ms / 150ms / 500ms with small jitter.
|
|
147
|
+
* On final failure, the original SQLITE_BUSY error is propagated as-is.
|
|
148
|
+
*/
|
|
149
|
+
export async function withBusyRetry<T>(
|
|
150
|
+
fn: () => T | Promise<T>,
|
|
151
|
+
opts: { attempts?: number; baseDelayMs?: number } = {}
|
|
152
|
+
): Promise<T> {
|
|
153
|
+
const attempts = Math.max(1, opts.attempts ?? 3);
|
|
154
|
+
const baseDelayMs = opts.baseDelayMs ?? 50;
|
|
155
|
+
let lastErr: unknown;
|
|
156
|
+
for (let i = 0; i < attempts; i++) {
|
|
157
|
+
try {
|
|
158
|
+
return await fn();
|
|
159
|
+
} catch (err) {
|
|
160
|
+
lastErr = err;
|
|
161
|
+
if (!isBusyError(err) || i === attempts - 1) throw err;
|
|
162
|
+
const preset = BUSY_RETRY_DELAYS_MS[i];
|
|
163
|
+
const base = preset !== undefined
|
|
164
|
+
? (preset * baseDelayMs) / BUSY_RETRY_DELAYS_MS[0]
|
|
165
|
+
: baseDelayMs * Math.pow(3, i);
|
|
166
|
+
const jitter = Math.random() * (base * 0.2);
|
|
167
|
+
await new Promise((resolve) => setTimeout(resolve, base + jitter));
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
throw lastErr;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Synchronous variant. Uses Atomics.wait for backoff so it can run inside
|
|
175
|
+
* non-async hot paths (watcher, CLI init, migration).
|
|
176
|
+
*/
|
|
177
|
+
export function withBusyRetrySync<T>(
|
|
178
|
+
fn: () => T,
|
|
179
|
+
opts: { attempts?: number; baseDelayMs?: number } = {}
|
|
180
|
+
): T {
|
|
181
|
+
const attempts = Math.max(1, opts.attempts ?? 3);
|
|
182
|
+
const baseDelayMs = opts.baseDelayMs ?? 50;
|
|
183
|
+
let lastErr: unknown;
|
|
184
|
+
for (let i = 0; i < attempts; i++) {
|
|
185
|
+
try {
|
|
186
|
+
return fn();
|
|
187
|
+
} catch (err) {
|
|
188
|
+
lastErr = err;
|
|
189
|
+
if (!isBusyError(err) || i === attempts - 1) throw err;
|
|
190
|
+
const preset = BUSY_RETRY_DELAYS_MS[i];
|
|
191
|
+
const base = preset !== undefined
|
|
192
|
+
? (preset * baseDelayMs) / BUSY_RETRY_DELAYS_MS[0]
|
|
193
|
+
: baseDelayMs * Math.pow(3, i);
|
|
194
|
+
const jitter = Math.random() * (base * 0.2);
|
|
195
|
+
const waitMs = Math.max(1, Math.round(base + jitter));
|
|
196
|
+
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, waitMs);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
throw lastErr;
|
|
200
|
+
}
|
|
201
|
+
|
|
126
202
|
export const SCHEMA_DDL = `
|
|
127
203
|
CREATE TABLE IF NOT EXISTS vault (
|
|
128
204
|
id TEXT PRIMARY KEY,
|
|
@@ -148,7 +224,9 @@ export const SCHEMA_DDL = `
|
|
|
148
224
|
recall_count INTEGER DEFAULT 0,
|
|
149
225
|
recall_sessions INTEGER DEFAULT 0,
|
|
150
226
|
last_recalled_at TEXT,
|
|
151
|
-
heat_tier TEXT CHECK(heat_tier IN ('hot', 'warm', 'cold'))
|
|
227
|
+
heat_tier TEXT CHECK(heat_tier IN ('hot', 'warm', 'cold')),
|
|
228
|
+
summary_condensed TEXT,
|
|
229
|
+
summary_keypoint TEXT
|
|
152
230
|
);
|
|
153
231
|
|
|
154
232
|
CREATE INDEX IF NOT EXISTS idx_vault_kind ON vault(kind);
|
|
@@ -205,7 +283,7 @@ export const SCHEMA_DDL = `
|
|
|
205
283
|
CREATE INDEX IF NOT EXISTS idx_access_log_goal ON access_log(session_goal) WHERE session_goal IS NOT NULL;
|
|
206
284
|
`;
|
|
207
285
|
|
|
208
|
-
const CURRENT_VERSION =
|
|
286
|
+
const CURRENT_VERSION = 20;
|
|
209
287
|
|
|
210
288
|
export async function initDatabase(dbPath: string): Promise<DatabaseSync> {
|
|
211
289
|
const sqliteVec = await loadSqliteVec();
|
|
@@ -308,10 +386,23 @@ export async function initDatabase(dbPath: string): Promise<DatabaseSync> {
|
|
|
308
386
|
db.exec(`CREATE INDEX IF NOT EXISTS idx_access_log_goal ON access_log(session_goal) WHERE session_goal IS NOT NULL`);
|
|
309
387
|
db.exec(`ALTER TABLE vault ADD COLUMN heat_tier TEXT CHECK(heat_tier IN ('hot', 'warm', 'cold'))`);
|
|
310
388
|
db.exec(`CREATE INDEX IF NOT EXISTS idx_vault_heat_tier ON vault(heat_tier) WHERE heat_tier IS NOT NULL`);
|
|
311
|
-
db.exec(`PRAGMA user_version =
|
|
389
|
+
db.exec(`PRAGMA user_version = 19`);
|
|
312
390
|
} catch (e) {
|
|
313
391
|
console.error(`[context-vault] v18->v19 migration failed: ${(e as Error).message}`);
|
|
392
|
+
return db;
|
|
314
393
|
}
|
|
394
|
+
// Fall through to v19->v20 migration
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
// v19 -> v20: add precomputed summary tier columns
|
|
398
|
+
if (version >= 15 && version <= 19) {
|
|
399
|
+
try {
|
|
400
|
+
db.exec('ALTER TABLE vault ADD COLUMN summary_condensed TEXT');
|
|
401
|
+
} catch {}
|
|
402
|
+
try {
|
|
403
|
+
db.exec('ALTER TABLE vault ADD COLUMN summary_keypoint TEXT');
|
|
404
|
+
} catch {}
|
|
405
|
+
db.exec(`PRAGMA user_version = ${CURRENT_VERSION}`);
|
|
315
406
|
return db;
|
|
316
407
|
}
|
|
317
408
|
|
|
@@ -356,7 +447,7 @@ export async function initDatabase(dbPath: string): Promise<DatabaseSync> {
|
|
|
356
447
|
return freshDb;
|
|
357
448
|
}
|
|
358
449
|
|
|
359
|
-
if (version <
|
|
450
|
+
if (version < 20) {
|
|
360
451
|
db.exec(SCHEMA_DDL);
|
|
361
452
|
db.exec(`PRAGMA user_version = ${CURRENT_VERSION}`);
|
|
362
453
|
}
|