r2mcp 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +66 -0
- package/LICENSE +21 -0
- package/README.md +532 -0
- package/dist/breadcrumbs.d.ts +123 -0
- package/dist/breadcrumbs.js +135 -0
- package/dist/cli/classify-edges.d.ts +2 -0
- package/dist/cli/classify-edges.js +130 -0
- package/dist/cli/compile-wiki.d.ts +2 -0
- package/dist/cli/compile-wiki.js +173 -0
- package/dist/cli/dump-edges-json.d.ts +2 -0
- package/dist/cli/dump-edges-json.js +21 -0
- package/dist/cli/extract-entities.d.ts +17 -0
- package/dist/cli/extract-entities.js +166 -0
- package/dist/cli/lint-memory.d.ts +16 -0
- package/dist/cli/lint-memory.js +94 -0
- package/dist/cli/migrate.d.ts +17 -0
- package/dist/cli/migrate.js +146 -0
- package/dist/cli/setup-helpers.d.ts +7 -0
- package/dist/cli/setup-helpers.js +72 -0
- package/dist/cli/setup.d.ts +15 -0
- package/dist/cli/setup.js +95 -0
- package/dist/compiler/clustering.d.ts +29 -0
- package/dist/compiler/clustering.js +66 -0
- package/dist/compiler/frontmatter.d.ts +35 -0
- package/dist/compiler/frontmatter.js +168 -0
- package/dist/compiler/manifest.d.ts +32 -0
- package/dist/compiler/manifest.js +82 -0
- package/dist/compiler/prompts.d.ts +17 -0
- package/dist/compiler/prompts.js +82 -0
- package/dist/compiler/run.d.ts +52 -0
- package/dist/compiler/run.js +186 -0
- package/dist/compiler/tier.d.ts +10 -0
- package/dist/compiler/tier.js +85 -0
- package/dist/compiler/topic.d.ts +16 -0
- package/dist/compiler/topic.js +105 -0
- package/dist/compiler/types.d.ts +101 -0
- package/dist/compiler/types.js +4 -0
- package/dist/db.d.ts +10 -0
- package/dist/db.js +46 -0
- package/dist/edges/candidate-pairs.d.ts +24 -0
- package/dist/edges/candidate-pairs.js +35 -0
- package/dist/edges/classifier.d.ts +45 -0
- package/dist/edges/classifier.js +172 -0
- package/dist/edges/signals.d.ts +13 -0
- package/dist/edges/signals.js +45 -0
- package/dist/edges/stage1-haiku.d.ts +21 -0
- package/dist/edges/stage1-haiku.js +33 -0
- package/dist/edges/stage2-opus.d.ts +41 -0
- package/dist/edges/stage2-opus.js +101 -0
- package/dist/edges/state.d.ts +44 -0
- package/dist/edges/state.js +79 -0
- package/dist/edges/types.d.ts +20 -0
- package/dist/edges/types.js +1 -0
- package/dist/embeddings.d.ts +13 -0
- package/dist/embeddings.js +54 -0
- package/dist/entities/db.d.ts +49 -0
- package/dist/entities/db.js +109 -0
- package/dist/entities/extractor.d.ts +14 -0
- package/dist/entities/extractor.js +154 -0
- package/dist/entities/normalize.d.ts +5 -0
- package/dist/entities/normalize.js +7 -0
- package/dist/entities/prompt.d.ts +19 -0
- package/dist/entities/prompt.js +100 -0
- package/dist/entities/state.d.ts +44 -0
- package/dist/entities/state.js +99 -0
- package/dist/entities/types.d.ts +62 -0
- package/dist/entities/types.js +6 -0
- package/dist/env.d.ts +13 -0
- package/dist/env.js +32 -0
- package/dist/fingerprint.d.ts +2 -0
- package/dist/fingerprint.js +12 -0
- package/dist/graph-rebuild.d.ts +6 -0
- package/dist/graph-rebuild.js +20 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +403 -0
- package/dist/instrumentation.d.ts +10 -0
- package/dist/instrumentation.js +37 -0
- package/dist/lint/checks/contradictions.d.ts +30 -0
- package/dist/lint/checks/contradictions.js +52 -0
- package/dist/lint/checks/drift.d.ts +5 -0
- package/dist/lint/checks/drift.js +34 -0
- package/dist/lint/checks/orphans.d.ts +5 -0
- package/dist/lint/checks/orphans.js +25 -0
- package/dist/lint/checks/stale.d.ts +6 -0
- package/dist/lint/checks/stale.js +29 -0
- package/dist/lint/checks/superseded-unflagged.d.ts +5 -0
- package/dist/lint/checks/superseded-unflagged.js +47 -0
- package/dist/lint/run.d.ts +11 -0
- package/dist/lint/run.js +95 -0
- package/dist/lint/types.d.ts +60 -0
- package/dist/lint/types.js +13 -0
- package/dist/mcp-response.d.ts +7 -0
- package/dist/mcp-response.js +13 -0
- package/dist/providers/anthropic.d.ts +13 -0
- package/dist/providers/anthropic.js +56 -0
- package/dist/providers/claude-code.d.ts +35 -0
- package/dist/providers/claude-code.js +175 -0
- package/dist/providers/errors.d.ts +12 -0
- package/dist/providers/errors.js +19 -0
- package/dist/providers/index.d.ts +30 -0
- package/dist/providers/index.js +71 -0
- package/dist/providers/openrouter.d.ts +19 -0
- package/dist/providers/openrouter.js +76 -0
- package/dist/providers/semaphore.d.ts +19 -0
- package/dist/providers/semaphore.js +51 -0
- package/dist/providers/types.d.ts +27 -0
- package/dist/providers/types.js +7 -0
- package/dist/schema.sql +116 -0
- package/dist/server-instructions.d.ts +9 -0
- package/dist/server-instructions.js +20 -0
- package/dist/telemetry.d.ts +39 -0
- package/dist/telemetry.js +130 -0
- package/dist/tools/classify.d.ts +44 -0
- package/dist/tools/classify.js +121 -0
- package/dist/tools/compile.d.ts +31 -0
- package/dist/tools/compile.js +132 -0
- package/dist/tools/dump-edges-sidecar.d.ts +37 -0
- package/dist/tools/dump-edges-sidecar.js +80 -0
- package/dist/tools/extract-entities.d.ts +53 -0
- package/dist/tools/extract-entities.js +169 -0
- package/dist/tools/lint.d.ts +10 -0
- package/dist/tools/lint.js +13 -0
- package/dist/tools/meditate.d.ts +25 -0
- package/dist/tools/meditate.js +128 -0
- package/dist/tools/recall.d.ts +66 -0
- package/dist/tools/recall.js +409 -0
- package/dist/tools/reject.d.ts +10 -0
- package/dist/tools/reject.js +24 -0
- package/dist/tools/remember.d.ts +26 -0
- package/dist/tools/remember.js +140 -0
- package/dist/tools/search.d.ts +30 -0
- package/dist/tools/search.js +69 -0
- package/dist/tools/spawn-cli.d.ts +14 -0
- package/dist/tools/spawn-cli.js +41 -0
- package/dist/tools/stats.d.ts +31 -0
- package/dist/tools/stats.js +88 -0
- package/package.json +86 -0
- package/skills/remember/SKILL.md +357 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Compile orchestrator — composes tier and topic synthesis, writes files,
|
|
3
|
+
* cleans stale outputs, and produces the run summary.
|
|
4
|
+
*
|
|
5
|
+
* `runCompile()` is dependency-injected so tests can run it without a DB:
|
|
6
|
+
* pass in the memory list and a mocked LLMProvider, get back the summary
|
|
7
|
+
* plus an in-memory file map for assertions.
|
|
8
|
+
*/
|
|
9
|
+
import { existsSync } from 'node:fs';
|
|
10
|
+
import { mkdir, rm, writeFile } from 'node:fs/promises';
|
|
11
|
+
import { dirname, join, relative, resolve } from 'node:path';
|
|
12
|
+
import { compileTier } from './tier.js';
|
|
13
|
+
import { compileTopic } from './topic.js';
|
|
14
|
+
import { topicToSlug } from './clustering.js';
|
|
15
|
+
import { emitFrontmatter } from './frontmatter.js';
|
|
16
|
+
import { computeStaleFiles, manifestPath, readManifest, writeManifest, } from './manifest.js';
|
|
17
|
+
const TIERS = ['preferences', 'project-context', 'conversations'];
|
|
18
|
+
const realFs = {
|
|
19
|
+
ensureDir: (p) => mkdir(p, { recursive: true }).then(() => undefined),
|
|
20
|
+
writeFile: (p, c) => writeFile(p, c, 'utf-8'),
|
|
21
|
+
deleteFile: (p) => (existsSync(p) ? rm(p, { force: true }) : Promise.resolve()),
|
|
22
|
+
readManifest,
|
|
23
|
+
writeManifest,
|
|
24
|
+
exists: existsSync,
|
|
25
|
+
};
|
|
26
|
+
export async function runCompile(opts, deps) {
|
|
27
|
+
const fs = deps.fs ?? realFs;
|
|
28
|
+
const stdout = deps.stdout ?? ((s) => process.stdout.write(s));
|
|
29
|
+
validateOptions(opts);
|
|
30
|
+
const memories = await deps.loadMemories();
|
|
31
|
+
const costMeter = { totalCostUsd: 0, hitCap: false };
|
|
32
|
+
const filesWritten = [];
|
|
33
|
+
const filesDeleted = [];
|
|
34
|
+
const newManifest = {
|
|
35
|
+
run_id: opts.runId,
|
|
36
|
+
generated_at: opts.startedAt,
|
|
37
|
+
tiers: [],
|
|
38
|
+
topics: [],
|
|
39
|
+
};
|
|
40
|
+
const tiersInScope = [];
|
|
41
|
+
const topicsInScope = [];
|
|
42
|
+
// ---- Tier compile ----
|
|
43
|
+
const tiersToBuild = opts.all ? [...TIERS] : opts.tier ? [opts.tier] : [];
|
|
44
|
+
for (const tier of tiersToBuild) {
|
|
45
|
+
if (costMeter.hitCap)
|
|
46
|
+
break;
|
|
47
|
+
tiersInScope.push(tier);
|
|
48
|
+
const tierMemories = memories.filter((m) => m.tier === tier && m.type !== 'archived');
|
|
49
|
+
const result = await compileTier({
|
|
50
|
+
tier,
|
|
51
|
+
memories: tierMemories,
|
|
52
|
+
provider: deps.provider,
|
|
53
|
+
runId: opts.runId,
|
|
54
|
+
maxCostUsd: opts.maxCostUsd,
|
|
55
|
+
costMeter,
|
|
56
|
+
});
|
|
57
|
+
const fm = {
|
|
58
|
+
generated_at: opts.startedAt,
|
|
59
|
+
compile_run_id: opts.runId,
|
|
60
|
+
source_count: result.source_memory_ids.length,
|
|
61
|
+
source_memory_ids: result.source_memory_ids,
|
|
62
|
+
provider: deps.provider.name,
|
|
63
|
+
source_git_sha: opts.sourceGitSha,
|
|
64
|
+
tier,
|
|
65
|
+
};
|
|
66
|
+
const fileContent = emitFrontmatter(fm) + '\n' + result.body;
|
|
67
|
+
const relPath = `${tier}.md`;
|
|
68
|
+
const absPath = resolve(opts.compiledDir, relPath);
|
|
69
|
+
if (opts.dryRun) {
|
|
70
|
+
stdout(`\n--- DRY RUN: ${relPath} ---\n`);
|
|
71
|
+
stdout(fileContent);
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
await fs.ensureDir(opts.compiledDir);
|
|
75
|
+
await fs.writeFile(absPath, fileContent);
|
|
76
|
+
filesWritten.push(absPath);
|
|
77
|
+
}
|
|
78
|
+
newManifest.tiers.push({ tier, path: relPath, source_memory_ids: result.source_memory_ids });
|
|
79
|
+
}
|
|
80
|
+
// ---- Topic compile ----
|
|
81
|
+
if (opts.topic) {
|
|
82
|
+
topicsInScope.push(opts.topic);
|
|
83
|
+
const slug = topicToSlug(opts.topic);
|
|
84
|
+
const result = await compileTopic({
|
|
85
|
+
topic: opts.topic,
|
|
86
|
+
memories: memories.filter((m) => m.type !== 'archived'),
|
|
87
|
+
provider: deps.provider,
|
|
88
|
+
runId: opts.runId,
|
|
89
|
+
maxCostUsd: opts.maxCostUsd,
|
|
90
|
+
costMeter,
|
|
91
|
+
});
|
|
92
|
+
const fm = {
|
|
93
|
+
generated_at: opts.startedAt,
|
|
94
|
+
compile_run_id: opts.runId,
|
|
95
|
+
source_count: result.source_memory_ids.length,
|
|
96
|
+
source_memory_ids: result.source_memory_ids,
|
|
97
|
+
provider: deps.provider.name,
|
|
98
|
+
source_git_sha: opts.sourceGitSha,
|
|
99
|
+
topic: opts.topic,
|
|
100
|
+
};
|
|
101
|
+
const fileContent = emitFrontmatter(fm) + '\n' + result.body;
|
|
102
|
+
const relPath = join('topics', `${slug}.md`);
|
|
103
|
+
const absPath = resolve(opts.compiledDir, relPath);
|
|
104
|
+
if (opts.dryRun) {
|
|
105
|
+
stdout(`\n--- DRY RUN: ${relPath} ---\n`);
|
|
106
|
+
stdout(fileContent);
|
|
107
|
+
}
|
|
108
|
+
else {
|
|
109
|
+
await fs.ensureDir(dirname(absPath));
|
|
110
|
+
await fs.writeFile(absPath, fileContent);
|
|
111
|
+
filesWritten.push(absPath);
|
|
112
|
+
}
|
|
113
|
+
newManifest.topics.push({
|
|
114
|
+
topic: opts.topic,
|
|
115
|
+
path: relPath,
|
|
116
|
+
source_memory_ids: result.source_memory_ids,
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
// ---- Manifest + stale cleanup ----
|
|
120
|
+
let manifestAbs = manifestPath(opts.compiledDir);
|
|
121
|
+
if (!opts.dryRun && filesWritten.length > 0) {
|
|
122
|
+
// Merge with prior manifest entries that we DIDN'T touch this run, so a
|
|
123
|
+
// tier-only invocation doesn't drop unrelated topic entries.
|
|
124
|
+
const prev = await fs.readManifest(opts.compiledDir);
|
|
125
|
+
const merged = mergeManifest(prev, newManifest, {
|
|
126
|
+
tiersTouched: tiersInScope,
|
|
127
|
+
topicsTouched: topicsInScope,
|
|
128
|
+
});
|
|
129
|
+
const stale = computeStaleFiles(prev, merged, {
|
|
130
|
+
tiers: tiersInScope,
|
|
131
|
+
topics: topicsInScope,
|
|
132
|
+
allTopics: false,
|
|
133
|
+
});
|
|
134
|
+
for (const rel of stale) {
|
|
135
|
+
const abs = resolve(opts.compiledDir, rel);
|
|
136
|
+
await fs.deleteFile(abs);
|
|
137
|
+
filesDeleted.push(abs);
|
|
138
|
+
}
|
|
139
|
+
manifestAbs = await fs.writeManifest(opts.compiledDir, merged);
|
|
140
|
+
}
|
|
141
|
+
return {
|
|
142
|
+
run_id: opts.runId,
|
|
143
|
+
started_at: opts.startedAt,
|
|
144
|
+
ended_at: new Date().toISOString(),
|
|
145
|
+
files_written: filesWritten.map((p) => relative(opts.compiledDir, p)),
|
|
146
|
+
files_deleted: filesDeleted.map((p) => relative(opts.compiledDir, p)),
|
|
147
|
+
total_cost_usd: costMeter.totalCostUsd,
|
|
148
|
+
hit_cost_cap: costMeter.hitCap,
|
|
149
|
+
provider: deps.provider.name,
|
|
150
|
+
source_git_sha: opts.sourceGitSha,
|
|
151
|
+
manifest_path: manifestAbs,
|
|
152
|
+
dry_run: !!opts.dryRun,
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
function validateOptions(opts) {
|
|
156
|
+
const modes = [opts.tier, opts.all, opts.topic].filter(Boolean).length;
|
|
157
|
+
if (modes !== 1) {
|
|
158
|
+
throw new Error('compile() requires exactly one of: tier, all, topic');
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Combine a fresh per-tier/per-topic manifest with the prior run's manifest,
|
|
163
|
+
* keeping prior entries that weren't in this run's scope. Per-tier or per-
|
|
164
|
+
* topic compiles don't wipe untouched entries (B.R8 nuance).
|
|
165
|
+
*/
|
|
166
|
+
function mergeManifest(prev, next, scope) {
|
|
167
|
+
if (!prev)
|
|
168
|
+
return next;
|
|
169
|
+
const merged = {
|
|
170
|
+
run_id: next.run_id,
|
|
171
|
+
generated_at: next.generated_at,
|
|
172
|
+
tiers: [...next.tiers],
|
|
173
|
+
topics: [...next.topics],
|
|
174
|
+
};
|
|
175
|
+
// Keep prior tier entries we didn't touch
|
|
176
|
+
for (const t of prev.tiers) {
|
|
177
|
+
if (!scope.tiersTouched.includes(t.tier))
|
|
178
|
+
merged.tiers.push(t);
|
|
179
|
+
}
|
|
180
|
+
// Keep prior topic entries we didn't touch
|
|
181
|
+
for (const t of prev.topics) {
|
|
182
|
+
if (!scope.topicsTouched.includes(t.topic))
|
|
183
|
+
merged.topics.push(t);
|
|
184
|
+
}
|
|
185
|
+
return merged;
|
|
186
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tier-level wiki compile (preferences / project-context / conversations).
|
|
3
|
+
*
|
|
4
|
+
* Compiler-controlled structure (headers, citations) → LLM-controlled prose.
|
|
5
|
+
* This split keeps headers and source_memory_ids deterministic across runs
|
|
6
|
+
* (B.R5 / B.AC3) while allowing prose-level variance under the 5% Levenshtein
|
|
7
|
+
* threshold.
|
|
8
|
+
*/
|
|
9
|
+
import type { CompileSectionResult, CompileTierInput } from './types.js';
|
|
10
|
+
export declare function compileTier(input: CompileTierInput): Promise<CompileSectionResult>;
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tier-level wiki compile (preferences / project-context / conversations).
|
|
3
|
+
*
|
|
4
|
+
* Compiler-controlled structure (headers, citations) → LLM-controlled prose.
|
|
5
|
+
* This split keeps headers and source_memory_ids deterministic across runs
|
|
6
|
+
* (B.R5 / B.AC3) while allowing prose-level variance under the 5% Levenshtein
|
|
7
|
+
* threshold.
|
|
8
|
+
*/
|
|
9
|
+
import { clusterByTopic, topicTitle } from './clustering.js';
|
|
10
|
+
import { tierSystemPrompt, tierClusterUserPrompt } from './prompts.js';
|
|
11
|
+
import { withLLMCallSpan } from '../telemetry.js';
|
|
12
|
+
const TIER_HEADERS = {
|
|
13
|
+
preferences: 'Preferences and Decisions',
|
|
14
|
+
'project-context': 'Project Context',
|
|
15
|
+
conversations: 'Relationship and Continuity',
|
|
16
|
+
};
|
|
17
|
+
const MAX_TOKENS_PER_CLUSTER = 400;
|
|
18
|
+
export async function compileTier(input) {
|
|
19
|
+
const { tier, memories, provider, maxCostUsd, costMeter } = input;
|
|
20
|
+
const clusters = clusterByTopic(memories);
|
|
21
|
+
const headers = [`## ${TIER_HEADERS[tier]}`];
|
|
22
|
+
const sourceIds = new Set();
|
|
23
|
+
const lines = [`## ${TIER_HEADERS[tier]}`, ''];
|
|
24
|
+
let partial = false;
|
|
25
|
+
let sectionCost = 0;
|
|
26
|
+
if (clusters.length === 0) {
|
|
27
|
+
lines.push('_No memories in this tier yet._');
|
|
28
|
+
return {
|
|
29
|
+
body: lines.join('\n') + '\n',
|
|
30
|
+
source_memory_ids: [],
|
|
31
|
+
headers,
|
|
32
|
+
cost_usd: 0,
|
|
33
|
+
partial: false,
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
for (const cluster of clusters) {
|
|
37
|
+
if (costMeter.totalCostUsd >= maxCostUsd) {
|
|
38
|
+
partial = true;
|
|
39
|
+
costMeter.hitCap = true;
|
|
40
|
+
break;
|
|
41
|
+
}
|
|
42
|
+
// Reserve estimate (worst-case) before issuing the call.
|
|
43
|
+
if (costMeter.totalCostUsd + estimatedCallCost() > maxCostUsd) {
|
|
44
|
+
partial = true;
|
|
45
|
+
costMeter.hitCap = true;
|
|
46
|
+
break;
|
|
47
|
+
}
|
|
48
|
+
const heading = `### ${topicTitle(cluster.topic)}`;
|
|
49
|
+
headers.push(heading);
|
|
50
|
+
// claw-1ejd: wrap LLM call for cross-process trace inheritance.
|
|
51
|
+
const result = await withLLMCallSpan('memory.compile_wiki.call', { provider: provider.name, model: 'haiku' }, () => provider.complete({
|
|
52
|
+
model: 'haiku',
|
|
53
|
+
system: tierSystemPrompt(tier),
|
|
54
|
+
prompt: tierClusterUserPrompt(cluster.topic, cluster.memories),
|
|
55
|
+
max_tokens: MAX_TOKENS_PER_CLUSTER,
|
|
56
|
+
}));
|
|
57
|
+
costMeter.totalCostUsd += result.cost_usd;
|
|
58
|
+
sectionCost += result.cost_usd;
|
|
59
|
+
const prose = result.response.trim();
|
|
60
|
+
lines.push('');
|
|
61
|
+
lines.push(heading);
|
|
62
|
+
lines.push('');
|
|
63
|
+
lines.push(prose === '' ? '_(synthesis returned empty paragraph)_' : prose);
|
|
64
|
+
lines.push('');
|
|
65
|
+
const cites = cluster.memories.map((m) => `<m:${m.id}>`).join(' ');
|
|
66
|
+
lines.push(`Sources: ${cites}`);
|
|
67
|
+
for (const m of cluster.memories)
|
|
68
|
+
sourceIds.add(m.id);
|
|
69
|
+
}
|
|
70
|
+
return {
|
|
71
|
+
body: lines.join('\n') + '\n',
|
|
72
|
+
source_memory_ids: [...sourceIds].sort(),
|
|
73
|
+
headers,
|
|
74
|
+
cost_usd: sectionCost,
|
|
75
|
+
partial,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Conservative pre-call estimate. Tracks Anthropic's haiku list price for
|
|
80
|
+
* an 800-token input + 400-token output: 0.0008*0.80 + 0.0004*4.00 ≈ $0.0022.
|
|
81
|
+
* Used for cap-respect math; not for final cost reporting (which uses actuals).
|
|
82
|
+
*/
|
|
83
|
+
function estimatedCallCost() {
|
|
84
|
+
return 0.003;
|
|
85
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-topic wiki page compile.
|
|
3
|
+
*
|
|
4
|
+
* Output shape (B.AC4):
|
|
5
|
+
* ## Summary
|
|
6
|
+
* ## Key Decisions
|
|
7
|
+
* ## Open Questions
|
|
8
|
+
* ## Timeline
|
|
9
|
+
*
|
|
10
|
+
* Summary / Key Decisions / Open Questions are LLM-synthesized. Timeline is
|
|
11
|
+
* deterministic — built from `created_at` ordering — so it adds zero LLM cost
|
|
12
|
+
* and gives the page a stable scaffold even when the LLM declines to produce
|
|
13
|
+
* Open Questions content.
|
|
14
|
+
*/
|
|
15
|
+
import type { CompileSectionResult, CompileTopicInput } from './types.js';
|
|
16
|
+
export declare function compileTopic(input: CompileTopicInput): Promise<CompileSectionResult>;
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-topic wiki page compile.
|
|
3
|
+
*
|
|
4
|
+
* Output shape (B.AC4):
|
|
5
|
+
* ## Summary
|
|
6
|
+
* ## Key Decisions
|
|
7
|
+
* ## Open Questions
|
|
8
|
+
* ## Timeline
|
|
9
|
+
*
|
|
10
|
+
* Summary / Key Decisions / Open Questions are LLM-synthesized. Timeline is
|
|
11
|
+
* deterministic — built from `created_at` ordering — so it adds zero LLM cost
|
|
12
|
+
* and gives the page a stable scaffold even when the LLM declines to produce
|
|
13
|
+
* Open Questions content.
|
|
14
|
+
*/
|
|
15
|
+
import { memoriesForTopic } from './clustering.js';
|
|
16
|
+
import { topicSectionUserPrompt, topicSystemPrompt } from './prompts.js';
|
|
17
|
+
import { withLLMCallSpan } from '../telemetry.js';
|
|
18
|
+
const SECTION_NAMES = ['Summary', 'Key Decisions', 'Open Questions'];
|
|
19
|
+
const MAX_TOKENS_PER_SECTION = 500;
|
|
20
|
+
export async function compileTopic(input) {
|
|
21
|
+
const { topic, memories, provider, maxCostUsd, costMeter } = input;
|
|
22
|
+
const relevant = memoriesForTopic(memories, topic);
|
|
23
|
+
const headers = [];
|
|
24
|
+
const lines = [];
|
|
25
|
+
const sourceIds = new Set();
|
|
26
|
+
let partial = false;
|
|
27
|
+
let sectionCost = 0;
|
|
28
|
+
if (relevant.length === 0) {
|
|
29
|
+
headers.push('## Summary');
|
|
30
|
+
lines.push('## Summary', '', `_No memories tagged with topic '${topic}'._`, '');
|
|
31
|
+
return {
|
|
32
|
+
body: lines.join('\n') + '\n',
|
|
33
|
+
source_memory_ids: [],
|
|
34
|
+
headers,
|
|
35
|
+
cost_usd: 0,
|
|
36
|
+
partial: false,
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
for (const section of SECTION_NAMES) {
|
|
40
|
+
if (costMeter.totalCostUsd >= maxCostUsd) {
|
|
41
|
+
partial = true;
|
|
42
|
+
costMeter.hitCap = true;
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
if (costMeter.totalCostUsd + 0.003 > maxCostUsd) {
|
|
46
|
+
partial = true;
|
|
47
|
+
costMeter.hitCap = true;
|
|
48
|
+
break;
|
|
49
|
+
}
|
|
50
|
+
const heading = `## ${section}`;
|
|
51
|
+
headers.push(heading);
|
|
52
|
+
// claw-1ejd: wrap LLM call for cross-process trace inheritance.
|
|
53
|
+
const result = await withLLMCallSpan('memory.compile_wiki.call', { provider: provider.name, model: 'haiku' }, () => provider.complete({
|
|
54
|
+
model: 'haiku',
|
|
55
|
+
system: topicSystemPrompt(),
|
|
56
|
+
prompt: topicSectionUserPrompt(topic, section, relevant),
|
|
57
|
+
max_tokens: MAX_TOKENS_PER_SECTION,
|
|
58
|
+
}));
|
|
59
|
+
costMeter.totalCostUsd += result.cost_usd;
|
|
60
|
+
sectionCost += result.cost_usd;
|
|
61
|
+
lines.push(heading, '');
|
|
62
|
+
const prose = result.response.trim();
|
|
63
|
+
if (prose === 'NONE' || prose === '') {
|
|
64
|
+
lines.push('_(no relevant content)_');
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
lines.push(prose);
|
|
68
|
+
}
|
|
69
|
+
lines.push('');
|
|
70
|
+
const citeIds = sectionMemoryIds(section, relevant);
|
|
71
|
+
if (citeIds.length > 0) {
|
|
72
|
+
const cites = citeIds.map((id) => `<m:${id}>`).join(' ');
|
|
73
|
+
lines.push(`Sources: ${cites}`);
|
|
74
|
+
lines.push('');
|
|
75
|
+
for (const id of citeIds)
|
|
76
|
+
sourceIds.add(id);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
// Timeline is deterministic — no LLM call.
|
|
80
|
+
if (!partial) {
|
|
81
|
+
headers.push('## Timeline');
|
|
82
|
+
lines.push('## Timeline', '');
|
|
83
|
+
for (const m of relevant) {
|
|
84
|
+
const date = (m.created_at || '').slice(0, 10) || 'unknown';
|
|
85
|
+
const excerpt = m.content.length > 120 ? m.content.slice(0, 117) + '...' : m.content;
|
|
86
|
+
lines.push(`- ${date} — <m:${m.id}> ${excerpt}`);
|
|
87
|
+
sourceIds.add(m.id);
|
|
88
|
+
}
|
|
89
|
+
lines.push('');
|
|
90
|
+
}
|
|
91
|
+
return {
|
|
92
|
+
body: lines.join('\n') + '\n',
|
|
93
|
+
source_memory_ids: [...sourceIds].sort(),
|
|
94
|
+
headers,
|
|
95
|
+
cost_usd: sectionCost,
|
|
96
|
+
partial,
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
function sectionMemoryIds(section, memories) {
|
|
100
|
+
// For now, every section cites the same set of source memories (the entire
|
|
101
|
+
// topic-tagged set). A future refinement can attribute differently per
|
|
102
|
+
// section, but the contract is "stable across runs" — and "all memories
|
|
103
|
+
// for the topic" is trivially stable.
|
|
104
|
+
return [...memories.map((m) => m.id)].sort();
|
|
105
|
+
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared types for the wiki compile step (SPEC-044 Section B).
|
|
3
|
+
*/
|
|
4
|
+
import type { LLMProvider } from '../providers/types.js';
|
|
5
|
+
export type Tier = 'preferences' | 'project-context' | 'conversations';
|
|
6
|
+
/** A memory row as the compiler sees it — only the fields synthesis needs. */
|
|
7
|
+
export interface MemoryForCompile {
|
|
8
|
+
id: string;
|
|
9
|
+
tier: Tier;
|
|
10
|
+
type: string;
|
|
11
|
+
content: string;
|
|
12
|
+
topics: string[];
|
|
13
|
+
people: string[];
|
|
14
|
+
created_at: string;
|
|
15
|
+
/** Optional inbound supersedes/contradicts edges for prose framing (B.R9). */
|
|
16
|
+
edges?: EdgeForCompile[];
|
|
17
|
+
}
|
|
18
|
+
export interface EdgeForCompile {
|
|
19
|
+
from_memory_id: string;
|
|
20
|
+
to_memory_id: string;
|
|
21
|
+
relation: 'supports' | 'contradicts' | 'supersedes' | 'evolved_into' | 'depends_on' | 'related_to';
|
|
22
|
+
rationale: string;
|
|
23
|
+
confidence: number;
|
|
24
|
+
}
|
|
25
|
+
export interface CompileFrontmatter {
|
|
26
|
+
generated_at: string;
|
|
27
|
+
compile_run_id: string;
|
|
28
|
+
source_count: number;
|
|
29
|
+
source_memory_ids: string[];
|
|
30
|
+
provider: string;
|
|
31
|
+
source_git_sha: string | null;
|
|
32
|
+
tier?: Tier;
|
|
33
|
+
topic?: string;
|
|
34
|
+
}
|
|
35
|
+
export interface CompileTierInput {
|
|
36
|
+
tier: Tier;
|
|
37
|
+
memories: MemoryForCompile[];
|
|
38
|
+
provider: LLMProvider;
|
|
39
|
+
runId: string;
|
|
40
|
+
/** If exceeded mid-run, the compile exits gracefully with hit_cost_cap=true. */
|
|
41
|
+
maxCostUsd: number;
|
|
42
|
+
/** Counter shared with the orchestrator so cap is enforced across tiers. */
|
|
43
|
+
costMeter: {
|
|
44
|
+
totalCostUsd: number;
|
|
45
|
+
hitCap: boolean;
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
export interface CompileTopicInput {
|
|
49
|
+
topic: string;
|
|
50
|
+
memories: MemoryForCompile[];
|
|
51
|
+
provider: LLMProvider;
|
|
52
|
+
runId: string;
|
|
53
|
+
maxCostUsd: number;
|
|
54
|
+
costMeter: {
|
|
55
|
+
totalCostUsd: number;
|
|
56
|
+
hitCap: boolean;
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
export interface CompileSectionResult {
|
|
60
|
+
/** Full markdown body (frontmatter prepended by caller). */
|
|
61
|
+
body: string;
|
|
62
|
+
/** Memory IDs that contributed to this section (for frontmatter, B.R3). */
|
|
63
|
+
source_memory_ids: string[];
|
|
64
|
+
/** Set of `## H2` and `### H3` headers (B.R5/B.AC3 stability check). */
|
|
65
|
+
headers: string[];
|
|
66
|
+
/** Cost spent producing this section. */
|
|
67
|
+
cost_usd: number;
|
|
68
|
+
/** True when the run hit `maxCostUsd` mid-synthesis. */
|
|
69
|
+
partial: boolean;
|
|
70
|
+
}
|
|
71
|
+
export interface CompileSummary {
|
|
72
|
+
run_id: string;
|
|
73
|
+
started_at: string;
|
|
74
|
+
ended_at: string;
|
|
75
|
+
files_written: string[];
|
|
76
|
+
files_deleted: string[];
|
|
77
|
+
total_cost_usd: number;
|
|
78
|
+
hit_cost_cap: boolean;
|
|
79
|
+
provider: string;
|
|
80
|
+
source_git_sha: string | null;
|
|
81
|
+
/** Path to the manifest written this run. */
|
|
82
|
+
manifest_path: string;
|
|
83
|
+
/** True for dry-run; preview emitted to stdout. */
|
|
84
|
+
dry_run: boolean;
|
|
85
|
+
}
|
|
86
|
+
export interface CompileManifest {
|
|
87
|
+
run_id: string;
|
|
88
|
+
generated_at: string;
|
|
89
|
+
/** Per-tier files written this run, with their `source_memory_ids`. */
|
|
90
|
+
tiers: Array<{
|
|
91
|
+
tier: Tier;
|
|
92
|
+
path: string;
|
|
93
|
+
source_memory_ids: string[];
|
|
94
|
+
}>;
|
|
95
|
+
/** Per-topic files written this run. */
|
|
96
|
+
topics: Array<{
|
|
97
|
+
topic: string;
|
|
98
|
+
path: string;
|
|
99
|
+
source_memory_ids: string[];
|
|
100
|
+
}>;
|
|
101
|
+
}
|
package/dist/db.d.ts
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import pg from 'pg';
|
|
2
|
+
/**
|
|
3
|
+
* claw-8cjf.2: refuse to guess a database. The old localhost fallback sent
|
|
4
|
+
* writes to the wrong database on misconfiguration and helped nobody — the
|
|
5
|
+
* Docker default is a different URL anyway (r2mcp:r2mcp@localhost, not bare).
|
|
6
|
+
*/
|
|
7
|
+
export declare const MISSING_DATABASE_URL_MESSAGE: string;
|
|
8
|
+
export declare function getPool(): pg.Pool;
|
|
9
|
+
export declare function initDb(): Promise<void>;
|
|
10
|
+
export declare function closeDb(): Promise<void>;
|
package/dist/db.js
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import pg from 'pg';
|
|
2
|
+
import pgvector from 'pgvector/pg';
|
|
3
|
+
import { readFileSync } from 'node:fs';
|
|
4
|
+
import { fileURLToPath } from 'node:url';
|
|
5
|
+
import { dirname, join } from 'node:path';
|
|
6
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
7
|
+
let pool = null;
|
|
8
|
+
/**
|
|
9
|
+
* claw-8cjf.2: refuse to guess a database. The old localhost fallback sent
|
|
10
|
+
* writes to the wrong database on misconfiguration and helped nobody — the
|
|
11
|
+
* Docker default is a different URL anyway (r2mcp:r2mcp@localhost, not bare).
|
|
12
|
+
*/
|
|
13
|
+
export const MISSING_DATABASE_URL_MESSAGE = 'R2MCP_DATABASE_URL is not set. r2mcp refuses to guess a database. Set it via:\n' +
|
|
14
|
+
' • .mcp.json: add R2MCP_DATABASE_URL to the server\'s "env" block (recommended), or\n' +
|
|
15
|
+
' • .env in the project root: R2MCP_DATABASE_URL=postgresql://...\n' +
|
|
16
|
+
'For local Docker (docker compose up -d, then npm run setup) use:\n' +
|
|
17
|
+
' postgresql://r2mcp:r2mcp@localhost:5432/r2mcp';
|
|
18
|
+
export function getPool() {
|
|
19
|
+
if (!pool) {
|
|
20
|
+
const connectionString = process.env.R2MCP_DATABASE_URL;
|
|
21
|
+
if (!connectionString) {
|
|
22
|
+
throw new Error(MISSING_DATABASE_URL_MESSAGE);
|
|
23
|
+
}
|
|
24
|
+
pool = new pg.Pool({ connectionString });
|
|
25
|
+
}
|
|
26
|
+
return pool;
|
|
27
|
+
}
|
|
28
|
+
export async function initDb() {
|
|
29
|
+
const p = getPool();
|
|
30
|
+
// pgvector.registerTypes requires a client (not pool) for setTypeParser
|
|
31
|
+
const client = await p.connect();
|
|
32
|
+
try {
|
|
33
|
+
await pgvector.registerTypes(client);
|
|
34
|
+
}
|
|
35
|
+
finally {
|
|
36
|
+
client.release();
|
|
37
|
+
}
|
|
38
|
+
const schema = readFileSync(join(__dirname, 'schema.sql'), 'utf-8');
|
|
39
|
+
await p.query(schema);
|
|
40
|
+
}
|
|
41
|
+
export async function closeDb() {
|
|
42
|
+
if (pool) {
|
|
43
|
+
await pool.end();
|
|
44
|
+
pool = null;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type pg from 'pg';
|
|
2
|
+
export interface CandidatePair {
|
|
3
|
+
from_id: string;
|
|
4
|
+
to_id: string;
|
|
5
|
+
shared_topics: string[];
|
|
6
|
+
shared_people: string[];
|
|
7
|
+
}
|
|
8
|
+
export interface CandidateOptions {
|
|
9
|
+
sinceDays?: number;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Find candidate memory pairs eligible for edge classification (spec R5).
|
|
13
|
+
*
|
|
14
|
+
* Eligibility:
|
|
15
|
+
* - share >=2 topics, OR
|
|
16
|
+
* - share >=1 person
|
|
17
|
+
*
|
|
18
|
+
* Returns each unordered pair once (using id order to deduplicate).
|
|
19
|
+
* Excludes self-pairs.
|
|
20
|
+
*
|
|
21
|
+
* If `sinceDays` is set, at least one memory in the pair must have been
|
|
22
|
+
* created within that window — used by --since to limit incremental runs.
|
|
23
|
+
*/
|
|
24
|
+
export declare function findCandidatePairs(pool: pg.Pool, opts: CandidateOptions): Promise<CandidatePair[]>;
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Find candidate memory pairs eligible for edge classification (spec R5).
|
|
3
|
+
*
|
|
4
|
+
* Eligibility:
|
|
5
|
+
* - share >=2 topics, OR
|
|
6
|
+
* - share >=1 person
|
|
7
|
+
*
|
|
8
|
+
* Returns each unordered pair once (using id order to deduplicate).
|
|
9
|
+
* Excludes self-pairs.
|
|
10
|
+
*
|
|
11
|
+
* If `sinceDays` is set, at least one memory in the pair must have been
|
|
12
|
+
* created within that window — used by --since to limit incremental runs.
|
|
13
|
+
*/
|
|
14
|
+
export async function findCandidatePairs(pool, opts) {
|
|
15
|
+
const sinceClause = opts.sinceDays
|
|
16
|
+
? `AND (m1.created_at >= NOW() - ($1 || ' days')::interval
|
|
17
|
+
OR m2.created_at >= NOW() - ($1 || ' days')::interval)`
|
|
18
|
+
: '';
|
|
19
|
+
const params = opts.sinceDays ? [String(opts.sinceDays)] : [];
|
|
20
|
+
const sql = `
|
|
21
|
+
SELECT m1.id AS from_id, m2.id AS to_id,
|
|
22
|
+
ARRAY(SELECT unnest(m1.topics) INTERSECT SELECT unnest(m2.topics)) AS shared_topics,
|
|
23
|
+
ARRAY(SELECT unnest(m1.people) INTERSECT SELECT unnest(m2.people)) AS shared_people
|
|
24
|
+
FROM memories m1
|
|
25
|
+
JOIN memories m2
|
|
26
|
+
ON m1.id < m2.id
|
|
27
|
+
AND (
|
|
28
|
+
cardinality(ARRAY(SELECT unnest(m1.topics) INTERSECT SELECT unnest(m2.topics))) >= 2
|
|
29
|
+
OR cardinality(ARRAY(SELECT unnest(m1.people) INTERSECT SELECT unnest(m2.people))) >= 1
|
|
30
|
+
)
|
|
31
|
+
WHERE TRUE ${sinceClause}
|
|
32
|
+
`;
|
|
33
|
+
const res = await pool.query(sql, params);
|
|
34
|
+
return res.rows;
|
|
35
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import type { CandidatePair } from './candidate-pairs.js';
|
|
2
|
+
import type { Stage1Result } from './stage1-haiku.js';
|
|
3
|
+
import type { Stage2Result, MemoryForClassify } from './stage2-opus.js';
|
|
4
|
+
import type { StateStore, RunSummary, RunSummaryWriter } from './state.js';
|
|
5
|
+
import type { EdgeRelation } from './types.js';
|
|
6
|
+
export interface ClassifierDeps {
|
|
7
|
+
findCandidatePairs: (opts: {
|
|
8
|
+
sinceDays?: number;
|
|
9
|
+
}) => Promise<CandidatePair[]>;
|
|
10
|
+
fetchMemoryById: (id: string) => Promise<MemoryForClassify | null>;
|
|
11
|
+
stage1Filter: (pair: {
|
|
12
|
+
from: {
|
|
13
|
+
id: string;
|
|
14
|
+
content: string;
|
|
15
|
+
};
|
|
16
|
+
to: {
|
|
17
|
+
id: string;
|
|
18
|
+
content: string;
|
|
19
|
+
};
|
|
20
|
+
}) => Promise<Stage1Result>;
|
|
21
|
+
stage2Classify: (pair: {
|
|
22
|
+
from: MemoryForClassify;
|
|
23
|
+
to: MemoryForClassify;
|
|
24
|
+
}) => Promise<Stage2Result>;
|
|
25
|
+
insertEdge: (fromId: string, toId: string, relation: EdgeRelation, confidence: number, rationale: string, classifierVersion: string) => Promise<string>;
|
|
26
|
+
state: StateStore;
|
|
27
|
+
summaryWriter: RunSummaryWriter;
|
|
28
|
+
classifierVersion: string;
|
|
29
|
+
estimateCost?: (pairs: CandidatePair[]) => Promise<number>;
|
|
30
|
+
/**
|
|
31
|
+
* Per-provider concurrency cap. Defaults to 1 (sequential — preserves
|
|
32
|
+
* the SPEC-043 behavior). Set to LLMProvider.concurrencyLimit at the call
|
|
33
|
+
* site to enable concurrent dispatch (D.R6, D.AC8).
|
|
34
|
+
*/
|
|
35
|
+
concurrencyLimit?: number;
|
|
36
|
+
/** Reports the active provider name into the run summary (D.R3). */
|
|
37
|
+
providerName?: string;
|
|
38
|
+
}
|
|
39
|
+
export interface RunOptions {
|
|
40
|
+
runId: string;
|
|
41
|
+
maxCostUsd: number;
|
|
42
|
+
dryRun: boolean;
|
|
43
|
+
sinceDays?: number;
|
|
44
|
+
}
|
|
45
|
+
export declare function runClassifier(opts: RunOptions, deps: ClassifierDeps): Promise<RunSummary>;
|