@zuzuucodes/cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +90 -0
  3. package/bin/zuzuu.mjs +133 -0
  4. package/experiments/experiment-1-trace-capture/adapters/claude-code.mjs +220 -0
  5. package/experiments/experiment-1-trace-capture/adapters/codex.mjs +201 -0
  6. package/experiments/experiment-1-trace-capture/adapters/gemini-cli.mjs +113 -0
  7. package/experiments/experiment-1-trace-capture/adapters/host-adapter.mjs +43 -0
  8. package/experiments/experiment-1-trace-capture/adapters/opencode.mjs +205 -0
  9. package/experiments/experiment-1-trace-capture/adapters/pi.mjs +218 -0
  10. package/experiments/experiment-1-trace-capture/adapters/registry.mjs +20 -0
  11. package/experiments/experiment-1-trace-capture/adapters/signals.mjs +44 -0
  12. package/experiments/experiment-1-trace-capture/core/event.mjs +58 -0
  13. package/experiments/experiment-1-trace-capture/core/ids.mjs +32 -0
  14. package/experiments/experiment-1-trace-capture/core/otlp.mjs +54 -0
  15. package/experiments/experiment-1-trace-capture/core/render.mjs +63 -0
  16. package/experiments/experiment-1-trace-capture/core/spans.mjs +43 -0
  17. package/package.json +56 -0
  18. package/zuzuu/actions/adapter.mjs +130 -0
  19. package/zuzuu/actions/convert.mjs +27 -0
  20. package/zuzuu/actions/dispatch.mjs +87 -0
  21. package/zuzuu/actions/inbox.mjs +56 -0
  22. package/zuzuu/actions/manifest.mjs +72 -0
  23. package/zuzuu/actions/marker.mjs +4 -0
  24. package/zuzuu/actions/runner.mjs +37 -0
  25. package/zuzuu/actions/schema.mjs +73 -0
  26. package/zuzuu/actions/trail.mjs +22 -0
  27. package/zuzuu/capture-core.mjs +49 -0
  28. package/zuzuu/commands/act-author.mjs +72 -0
  29. package/zuzuu/commands/act.mjs +101 -0
  30. package/zuzuu/commands/capture.mjs +32 -0
  31. package/zuzuu/commands/code.mjs +84 -0
  32. package/zuzuu/commands/digest.mjs +23 -0
  33. package/zuzuu/commands/distill.mjs +46 -0
  34. package/zuzuu/commands/doctor.mjs +197 -0
  35. package/zuzuu/commands/enable.mjs +195 -0
  36. package/zuzuu/commands/eval.mjs +101 -0
  37. package/zuzuu/commands/explain.mjs +119 -0
  38. package/zuzuu/commands/generation.mjs +107 -0
  39. package/zuzuu/commands/hook.mjs +209 -0
  40. package/zuzuu/commands/inbox.mjs +73 -0
  41. package/zuzuu/commands/init.mjs +89 -0
  42. package/zuzuu/commands/knowledge.mjs +152 -0
  43. package/zuzuu/commands/migrate.mjs +125 -0
  44. package/zuzuu/commands/review.mjs +299 -0
  45. package/zuzuu/commands/status.mjs +82 -0
  46. package/zuzuu/commands/trace.mjs +19 -0
  47. package/zuzuu/digest.mjs +149 -0
  48. package/zuzuu/eval/rank.mjs +31 -0
  49. package/zuzuu/eval/score.mjs +85 -0
  50. package/zuzuu/eval/signals.mjs +57 -0
  51. package/zuzuu/faculty/contract.mjs +19 -0
  52. package/zuzuu/faculty/gate.mjs +65 -0
  53. package/zuzuu/faculty/generation.mjs +392 -0
  54. package/zuzuu/faculty/proposal.mjs +166 -0
  55. package/zuzuu/faculty/provenance.mjs +35 -0
  56. package/zuzuu/faculty/registry.mjs +33 -0
  57. package/zuzuu/faculty/trail.mjs +27 -0
  58. package/zuzuu/guardrails/adapter.mjs +134 -0
  59. package/zuzuu/guardrails.mjs +89 -0
  60. package/zuzuu/inject.mjs +46 -0
  61. package/zuzuu/instructions/adapter.mjs +93 -0
  62. package/zuzuu/knowledge/adapter.mjs +99 -0
  63. package/zuzuu/knowledge/distill.mjs +237 -0
  64. package/zuzuu/knowledge/embed.mjs +52 -0
  65. package/zuzuu/knowledge/er.mjs +98 -0
  66. package/zuzuu/knowledge/inbox.mjs +43 -0
  67. package/zuzuu/knowledge/index.mjs +194 -0
  68. package/zuzuu/knowledge/items.mjs +154 -0
  69. package/zuzuu/knowledge/proposals.mjs +196 -0
  70. package/zuzuu/knowledge/registry.mjs +115 -0
  71. package/zuzuu/live/install.mjs +76 -0
  72. package/zuzuu/live/live-store.mjs +78 -0
  73. package/zuzuu/live/probe.mjs +55 -0
  74. package/zuzuu/live/reconcile.mjs +33 -0
  75. package/zuzuu/memory/adapter.mjs +121 -0
  76. package/zuzuu/miners/actions.mjs +118 -0
  77. package/zuzuu/miners/guardrails.mjs +174 -0
  78. package/zuzuu/miners/instructions.mjs +152 -0
  79. package/zuzuu/miners/knowledge.mjs +22 -0
  80. package/zuzuu/miners/memory.mjs +27 -0
  81. package/zuzuu/miners/registry.mjs +31 -0
  82. package/zuzuu/scaffold.mjs +213 -0
  83. package/zuzuu/session.mjs +72 -0
  84. package/zuzuu/store.mjs +104 -0
@@ -0,0 +1,99 @@
1
+ // zuzuu/knowledge/adapter.mjs
2
+ // The Knowledge faculty adapter (WS2-T2). Wraps the EXISTING Knowledge pipeline
3
+ // (proposals/ER/registry/items/index) behind the faculty-spine adapter contract
4
+ // — { name, ingest, validate, apply, render } — without changing any behaviour.
5
+ //
6
+ // ingest — run ER on a candidate, mirroring createProposal's analysis step
7
+ // validate — registry-based validation of an item
8
+ // apply — IS the extracted approve apply body (applyKnowledgeProposal)
9
+ // render — the human card the `zuzuu review` gate shows for a knowledge proposal
10
+ //
11
+ // Registers itself on import.
12
+
13
+ import { resolve as erResolve } from './er.mjs';
14
+ import { loadRegistry, validateItem } from './registry.mjs';
15
+ import { allItems, slugify } from './items.mjs';
16
+ import { applyKnowledgeProposal } from './proposals.mjs';
17
+ import * as registry from '../faculty/registry.mjs';
18
+
19
+ const name = 'knowledge';
20
+
21
+ /**
22
+ * Ingest a raw candidate: run ER against existing items and return the
23
+ * normalised payload + analysis. Mirrors what createProposal computes today.
24
+ * @param {string} agentDir
25
+ * @param {{candidate:object, source?:string, evidence?:object}} raw
26
+ */
27
+ function ingest(agentDir, raw) {
28
+ const { items } = allItems(agentDir);
29
+ const candidate = { ...raw.candidate };
30
+ candidate.id = candidate.id || slugify(candidate.body);
31
+ const er = erResolve(candidate, items);
32
+ return { payload: candidate, analysis: { er }, dedupeKey: candidate.id };
33
+ }
34
+
35
+ /**
36
+ * Validate an item against the Knowledge registry.
37
+ * @returns {{ok:boolean, errors:string[], warnings:string[]}}
38
+ */
39
+ function validate(agentDir, payload) {
40
+ const reg = loadRegistry(agentDir);
41
+ const v = validateItem(reg, payload);
42
+ const warnings = [
43
+ ...v.unknownKeys.attributes.map((k) => `unregistered attribute '${k}'`),
44
+ ...v.unknownKeys.relations.map((t) => `unregistered relation type '${t}'`),
45
+ ];
46
+ return { ok: v.ok, errors: v.errors, warnings };
47
+ }
48
+
49
+ /**
50
+ * Apply an approved proposal — delegates to the extracted approve apply body.
51
+ * @returns {{ok:boolean, action:string, itemIds:string[], warnings:string[]}}
52
+ */
53
+ function apply(agentDir, proposal) {
54
+ // Bridge spine-shaped records (payload/analysis.er) onto applyKnowledgeProposal's
55
+ // legacy shape (candidate/er). Records that still carry candidate/er pass through.
56
+ const legacy = {
57
+ ...proposal,
58
+ candidate: proposal.candidate ?? proposal.payload,
59
+ er: proposal.er ?? proposal.analysis?.er,
60
+ };
61
+ const r = applyKnowledgeProposal(agentDir, legacy);
62
+ return {
63
+ ok: r.ok,
64
+ action: r.action,
65
+ itemIds: r.item ? [r.item] : [],
66
+ warnings: r.warnings ?? [],
67
+ };
68
+ }
69
+
70
+ /**
71
+ * Render a proposal for the human gate. `card` mirrors the multi-line summary
72
+ * `zuzuu review` shows for knowledge proposals (id, type, attrs/relations, ER
73
+ * verdict); `line` is the one-line list form (`zuzuu proposals list`).
74
+ * @returns {{line:string, card:string}}
75
+ */
76
+ function render(proposal) {
77
+ if (proposal.kind === 'registry') {
78
+ const what = `register ${String(proposal.registry).slice(0, -1)} '${proposal.key}'`;
79
+ return {
80
+ line: `${proposal.id} [${proposal.kind}] ${what}`,
81
+ card: `${what} (seen ${proposal.evidence?.occurrences}× in candidates)`,
82
+ };
83
+ }
84
+ const c = proposal.candidate ?? {};
85
+ const er = proposal.er ?? {};
86
+ const lines = [];
87
+ lines.push(`${c.id ?? ''} ── ${c.type}: ${c.body?.slice(0, 100).replace(/\n/g, ' ')}`);
88
+ for (const [k, v] of Object.entries(c.attributes ?? {})) lines.push(` · ${k} = ${v}`);
89
+ for (const r of c.relations ?? []) lines.push(` → ${r.type} ${r.target}`);
90
+ lines.push(` er: ${er.verdict}${er.match ? ` → ${er.match}` : ''} (${(er.confidence ?? 0).toFixed(2)} · ${er.reason ?? ''})`);
91
+ return {
92
+ line: `${proposal.id} [${er.verdict ?? proposal.kind}] ${c.type}: ${c.body?.slice(0, 60).replace(/\n/g, ' ')}`,
93
+ card: lines.join('\n'),
94
+ };
95
+ }
96
+
97
+ export const adapter = { name, ingest, validate, apply, render };
98
+
99
+ registry.register(adapter);
@@ -0,0 +1,237 @@
1
+ // `zuzuu distill` — source A: mechanical miners over real sessions.
2
+ //
3
+ // Reads HOST transcripts directly (not our OTLP traces — those carry byte
4
+ // sizes only, by privacy design; mining is an internal on-machine read and
5
+ // only the distilled FACT + provenance becomes knowledge). Claude Code first —
6
+ // the richest log. Deterministic, zero-LLM: the cheap unambiguous signals.
7
+ //
8
+ // Miners (v1):
9
+ // commands — normalized Bash commands recurring ≥3× across ≥2 sessions
10
+ // → `command` candidates ("a project command")
11
+ // hot-files — files Read/Edit/Written ≥5× → `entity` candidates
12
+ // failures — tools failing ≥3× → `fact` candidates (worth knowing!)
13
+
14
+ import { readFileSync } from 'node:fs';
15
+ import * as registry from '../../experiments/experiment-1-trace-capture/adapters/registry.mjs';
16
+ import { slugify } from './items.mjs';
17
+ import { createProposal, fileRegistryProposals } from './proposals.mjs';
18
+
19
+ const norm = (cmd) => String(cmd).trim().replace(/\s+/g, ' ').slice(0, 200);
20
+
21
+ // Superset (WS5-T1) constants.
22
+ const SEQ_SEP = ' && '; // joins adjacent Bash commands into a 2-gram label
23
+ const CORRECTION_LEXICON = ["no, don't", "don't ", 'actually use', 'always ', 'never ', 'stop ', 'instead'];
24
+ const DESTRUCTIVE_SHAPES = [/\brm\s+-[a-z]*r/, /git\s+push\s+.*--force/, /DROP\s+TABLE/i, /chmod\s+-R/];
25
+
26
+ const isCorrection = (text) => {
27
+ const t = String(text).toLowerCase();
28
+ return CORRECTION_LEXICON.some((p) => t.includes(p));
29
+ };
30
+ const isDestructive = (cmd) => DESTRUCTIVE_SHAPES.some((re) => re.test(cmd));
31
+
32
+ /** Extract a plain-text string from a user message content (string or block array). */
33
+ function userText(content) {
34
+ if (typeof content === 'string') return content;
35
+ if (Array.isArray(content)) {
36
+ return content
37
+ .filter((b) => b && b.type === 'text' && typeof b.text === 'string')
38
+ .map((b) => b.text)
39
+ .join(' ');
40
+ }
41
+ return '';
42
+ }
43
+ /** True if a user message is a tool_result echo (not a real user turn). */
44
+ const isToolResult = (content) => Array.isArray(content) && content.some((b) => b && b.type === 'tool_result');
45
+
46
+ /**
47
+ * Extract raw mining signals from one Claude Code transcript.
48
+ * SUPERSET (WS5-T1): the original `commands/files/failures` keys are unchanged;
49
+ * `sequences/correctionTurns/destructiveFailures` are added for later faculties.
50
+ */
51
+ export function mineTranscript(file) {
52
+ const out = { commands: [], files: [], failures: [], sequences: [], correctionTurns: [], destructiveFailures: [] };
53
+ let sessionId = '';
54
+ const results = new Map(); // tool_use_id -> is_error
55
+ const uses = []; // {id, name, input}
56
+ const bashOrder = []; // normalized Bash commands in transcript order
57
+ const userTurns = []; // {text, afterToolAction}
58
+ let sawToolAction = false;
59
+ for (const line of readFileSync(file, 'utf8').split('\n')) {
60
+ if (!line) continue;
61
+ let e;
62
+ try {
63
+ e = JSON.parse(line);
64
+ } catch {
65
+ continue;
66
+ }
67
+ if (e.sessionId) sessionId ||= e.sessionId;
68
+ const content = e.message?.content;
69
+ // real user turn (text), not a tool_result echo → candidate correction turn
70
+ if (e.type === 'user' && content != null && !isToolResult(content)) {
71
+ const text = userText(content).trim();
72
+ if (text) userTurns.push({ text, afterToolAction: sawToolAction });
73
+ }
74
+ if (!Array.isArray(content)) continue;
75
+ for (const b of content) {
76
+ if (b.type === 'tool_use') {
77
+ const input = typeof b.input === 'string' ? safeParse(b.input) : b.input ?? {};
78
+ uses.push({ id: b.id, name: b.name, input });
79
+ sawToolAction = true;
80
+ if (b.name === 'Bash' && input?.command) bashOrder.push(norm(input.command));
81
+ } else if (b.type === 'tool_result') {
82
+ results.set(b.tool_use_id, !!b.is_error);
83
+ }
84
+ }
85
+ }
86
+ for (const u of uses) {
87
+ const failed = results.get(u.id) === true;
88
+ if (u.name === 'Bash' && u.input?.command) out.commands.push({ cmd: norm(u.input.command), failed });
89
+ const fp = u.input?.file_path || u.input?.path;
90
+ if (fp && ['Read', 'Write', 'Edit', 'NotebookEdit'].includes(u.name)) out.files.push(String(fp));
91
+ if (failed) out.failures.push(u.name);
92
+ if (failed && u.name === 'Bash' && u.input?.command) {
93
+ const cmd = norm(u.input.command);
94
+ if (isDestructive(cmd)) out.destructiveFailures.push({ cmd, tool: u.name });
95
+ }
96
+ }
97
+ // 2-gram Bash sequences (adjacent within the session)
98
+ for (let i = 0; i + 1 < bashOrder.length; i++) out.sequences.push(bashOrder[i] + SEQ_SEP + bashOrder[i + 1]);
99
+ // corrective user turns that follow an assistant tool action
100
+ for (const t of userTurns) if (t.afterToolAction && isCorrection(t.text)) out.correctionTurns.push({ text: t.text.slice(0, 500) });
101
+ return { sessionId, ...out };
102
+ }
103
+
104
+ function safeParse(s) {
105
+ try {
106
+ return JSON.parse(s);
107
+ } catch {
108
+ return {};
109
+ }
110
+ }
111
+
112
+ /**
113
+ * Aggregate signals across sessions → candidates.
114
+ * Pure (hermetically testable): takes mined per-session signals, returns candidates.
115
+ */
116
+ export function aggregate(sessions, { minCmdCount = 3, minCmdSessions = 2, minFileTouches = 5, minFailures = 3 } = {}) {
117
+ const candidates = [];
118
+ // commands
119
+ const cmdStats = new Map(); // cmd -> {count, sessions:Set, failures}
120
+ for (const s of sessions) {
121
+ for (const { cmd, failed } of s.commands) {
122
+ const st = cmdStats.get(cmd) ?? { count: 0, sessions: new Set(), failures: 0 };
123
+ st.count++;
124
+ st.sessions.add(s.sessionId);
125
+ if (failed) st.failures++;
126
+ cmdStats.set(cmd, st);
127
+ }
128
+ }
129
+ for (const [cmd, st] of cmdStats) {
130
+ if (st.count >= minCmdCount && st.sessions.size >= minCmdSessions) {
131
+ candidates.push({
132
+ candidate: {
133
+ id: 'command-' + slugify(cmd, 40),
134
+ type: 'command',
135
+ body: `Recurring project command: \`${cmd}\` (used ${st.count}× across ${st.sessions.size} sessions${st.failures ? `, failed ${st.failures}×` : ''}).`,
136
+ attributes: { command: cmd },
137
+ relations: [],
138
+ provenance: [...st.sessions].slice(0, 5).map((id) => ({ session: id, ref: 'distill:commands' })),
139
+ },
140
+ evidence: { occurrences: st.count, sessions: st.sessions.size, failures: st.failures },
141
+ });
142
+ }
143
+ }
144
+ // hot files
145
+ const fileStats = new Map();
146
+ for (const s of sessions) {
147
+ for (const f of s.files) {
148
+ const st = fileStats.get(f) ?? { count: 0, sessions: new Set() };
149
+ st.count++;
150
+ st.sessions.add(s.sessionId);
151
+ fileStats.set(f, st);
152
+ }
153
+ }
154
+ for (const [path, st] of fileStats) {
155
+ if (st.count >= minFileTouches) {
156
+ const base = path.split('/').slice(-2).join('/');
157
+ candidates.push({
158
+ candidate: {
159
+ id: 'file-' + slugify(base, 40),
160
+ type: 'entity',
161
+ body: `Hot file in this project: \`${path}\` (touched ${st.count}× across ${st.sessions.size} sessions).`,
162
+ attributes: { path },
163
+ relations: [],
164
+ provenance: [...st.sessions].slice(0, 5).map((id) => ({ session: id, ref: 'distill:hot-files' })),
165
+ },
166
+ evidence: { occurrences: st.count, sessions: st.sessions.size },
167
+ });
168
+ }
169
+ }
170
+ // failing tools
171
+ const failStats = new Map();
172
+ for (const s of sessions) for (const t of s.failures) failStats.set(t, (failStats.get(t) ?? 0) + 1);
173
+ for (const [tool, n] of failStats) {
174
+ if (n >= minFailures) {
175
+ candidates.push({
176
+ candidate: {
177
+ id: 'failing-tool-' + slugify(tool, 30),
178
+ type: 'fact',
179
+ body: `Tool \`${tool}\` fails frequently in this project (${n} failures observed) — worth investigating why.`,
180
+ attributes: {},
181
+ relations: [],
182
+ provenance: sessions.filter((s) => s.failures.includes(tool)).slice(0, 5).map((s) => ({ session: s.sessionId, ref: 'distill:failures' })),
183
+ },
184
+ evidence: { occurrences: n },
185
+ });
186
+ }
187
+ }
188
+ return candidates;
189
+ }
190
+
191
+ /**
192
+ * Mine one {host, ref} pair into the per-session signal superset (tagged with a
193
+ * host-prefixed sessionId so cross-host provenance is legible). Tolerant.
194
+ */
195
+ export function mineHostSession({ host, ref, sessionId }) {
196
+ try {
197
+ const adapter = registry.byName(host);
198
+ if (!adapter || typeof adapter.mineSignals !== 'function') return null;
199
+ const sig = adapter.mineSignals(ref);
200
+ const sid = sessionId || (typeof ref === 'string' ? ref : ref?.sessionId) || host;
201
+ return { sessionId: `${host}:${sid}`, host, ...sig };
202
+ } catch {
203
+ return null;
204
+ }
205
+ }
206
+
207
+ /** Run the full distill: mine sessions (all hosts) → candidates → ER → proposals. */
208
+ export function distillSessions(agentDir, pairs) {
209
+ const mined = pairs.map(mineHostSession).filter(Boolean);
210
+ const candidates = aggregate(mined);
211
+ const proposals = candidates.map((c) => createProposal(agentDir, { candidate: c.candidate, source: 'distill', evidence: c.evidence }));
212
+ const registryProposals = fileRegistryProposals(agentDir);
213
+ return { sessionsMined: mined.length, proposals, registryProposals };
214
+ }
215
+
216
+ /**
217
+ * Resolve which transcripts to mine across ALL detected hosts.
218
+ * Returns `[{host, ref}]`, newest-first, honoring `scope` ('last'|'all') and a
219
+ * `session` substring filter. (Was claude-only — that starved 4 of 5 hosts.)
220
+ */
221
+ export function transcriptsFor({ scope = 'all', session = null, cwd = process.cwd() }) {
222
+ const pairs = [];
223
+ for (const adapter of registry.detected()) {
224
+ let sessions = [];
225
+ try {
226
+ sessions = adapter.listSessions({ cwd });
227
+ } catch {
228
+ continue; // a flaky host (e.g. SQLite on old Node) must not break the rest
229
+ }
230
+ for (const s of sessions) pairs.push({ host: adapter.name, ref: s.ref, sessionId: s.sessionId, mtime: s.mtime ?? 0 });
231
+ }
232
+ pairs.sort((a, b) => (b.mtime ?? 0) - (a.mtime ?? 0));
233
+ let filtered = pairs;
234
+ if (session) filtered = pairs.filter((p) => String(p.sessionId).includes(session));
235
+ else if (scope === 'last') filtered = pairs.slice(0, 1);
236
+ return filtered.map((p) => ({ host: p.host, ref: p.ref, sessionId: p.sessionId }));
237
+ }
@@ -0,0 +1,52 @@
1
+ // Embeddings — ollama-if-present, else honestly absent.
2
+ //
3
+ // Zero-npm-dep rule holds: ollama is an OPTIONAL local service (default
4
+ // :11434), probed at call time. No keys, nothing leaves the machine. When it's
5
+ // absent, semantic search reports unavailable and lexical+graph carry the day —
6
+ // the vector tier is *earned*, not faked.
7
+
8
+ const BASE = process.env.OLLAMA_HOST || 'http://localhost:11434';
9
+ // small, common embedding models — first one present wins
10
+ const PREFERRED = ['nomic-embed-text', 'mxbai-embed-large', 'all-minilm'];
11
+
12
+ async function get(path, opts = {}) {
13
+ const res = await fetch(BASE + path, { signal: AbortSignal.timeout(opts.timeout ?? 1500), ...opts });
14
+ if (!res.ok) throw new Error(`${path} → ${res.status}`);
15
+ return res.json();
16
+ }
17
+
18
+ /** Probe ollama + pick an embedding model. Never throws. */
19
+ export async function detectEmbedder() {
20
+ try {
21
+ const tags = await get('/api/tags');
22
+ const names = (tags.models ?? []).map((m) => String(m.name));
23
+ const model = PREFERRED.map((p) => names.find((n) => n.startsWith(p))).find(Boolean);
24
+ if (!model) return { available: false, reason: `ollama up, no embedding model (pull one of: ${PREFERRED.join(', ')})` };
25
+ return { available: true, model };
26
+ } catch {
27
+ return { available: false, reason: 'ollama not reachable (optional — semantic search needs it)' };
28
+ }
29
+ }
30
+
31
+ /** Embed one text → Float array. Throws on failure (callers decide policy). */
32
+ export async function embed(model, text) {
33
+ const out = await get('/api/embeddings', {
34
+ method: 'POST',
35
+ headers: { 'content-type': 'application/json' },
36
+ body: JSON.stringify({ model, prompt: text }),
37
+ timeout: 30_000,
38
+ });
39
+ if (!Array.isArray(out.embedding)) throw new Error('no embedding in response');
40
+ return out.embedding;
41
+ }
42
+
43
+ export function cosine(a, b) {
44
+ let dot = 0, na = 0, nb = 0;
45
+ const n = Math.min(a.length, b.length);
46
+ for (let i = 0; i < n; i++) {
47
+ dot += a[i] * b[i];
48
+ na += a[i] * a[i];
49
+ nb += b[i] * b[i];
50
+ }
51
+ return na && nb ? dot / Math.sqrt(na * nb) : 0;
52
+ }
@@ -0,0 +1,98 @@
1
+ // Entity resolution — the gatekeeper between candidates and canonical items.
2
+ //
3
+ // Pure matcher: given a candidate and the existing items, decide
4
+ // new — nothing like it exists
5
+ // duplicate — an item already says this (candidate adds nothing)
6
+ // enrich — an existing item is the same entity; candidate adds evidence/
7
+ // attributes/relations → merge into it
8
+ //
9
+ // Deliberately mechanical (v1): exact id → slug-normalized id → token-overlap
10
+ // fuzzy on body+id with attribute corroboration. Deterministic, hermetically
11
+ // testable; an LLM judge is a later, separate rung. Thresholds are conservative:
12
+ // a false "duplicate" silently loses knowledge, a false "new" merely creates a
13
+ // reviewable proposal the human can reject — so we bias toward "new"/"enrich".
14
+
15
+ import { slugify } from './items.mjs';
16
+
17
+ const STOP = new Set(['the', 'a', 'an', 'is', 'are', 'this', 'that', 'with', 'for', 'and', 'or', 'of', 'in', 'on', 'to', 'it', 'its', 'project', 's']);
18
+
19
+ // light stemmer: trailing 's' off words >3 chars (tests→test, runs→run) —
20
+ // enough to stop trivial morphology from sinking real overlaps; no more.
21
+ const stem = (t) => (t.length > 3 && t.endsWith('s') && !t.endsWith('ss') ? t.slice(0, -1) : t);
22
+
23
+ export function tokens(text) {
24
+ return new Set(
25
+ String(text)
26
+ .toLowerCase()
27
+ .split(/[^a-z0-9]+/)
28
+ .filter((t) => t.length > 1 && !STOP.has(t))
29
+ .map(stem),
30
+ );
31
+ }
32
+
33
+ export function jaccard(a, b) {
34
+ if (!a.size || !b.size) return 0;
35
+ let inter = 0;
36
+ for (const t of a) if (b.has(t)) inter++;
37
+ return inter / (a.size + b.size - inter);
38
+ }
39
+
40
+ /** Shared attribute VALUES count as strong corroboration (e.g. same command line). */
41
+ function sharedAttrValues(a = {}, b = {}) {
42
+ let shared = 0;
43
+ for (const [k, v] of Object.entries(a)) if (k in b && String(b[k]) === String(v)) shared++;
44
+ return shared;
45
+ }
46
+
47
+ /**
48
+ * @param {object} candidate {id?, type, body, attributes?}
49
+ * @param {Array} items existing canonical items
50
+ * @returns {{verdict:'new'|'duplicate'|'enrich', match?:string, confidence:number, reason:string}}
51
+ */
52
+ export function resolve(candidate, items) {
53
+ const candId = candidate.id || slugify(candidate.body);
54
+ const candTokens = tokens(`${candId} ${candidate.body ?? ''}`);
55
+
56
+ let best = null;
57
+ for (const item of items) {
58
+ // 1. exact / slug-normalized id match
59
+ if (item.id === candId || slugify(item.id) === candId) {
60
+ best = { item, sim: 1, why: 'id match' };
61
+ break;
62
+ }
63
+ // 2. fuzzy: token overlap + attribute corroboration (same-type only)
64
+ if (item.type !== candidate.type) continue;
65
+ const sim = jaccard(candTokens, tokens(`${item.id} ${item.body ?? ''}`));
66
+ const corroboration = sharedAttrValues(candidate.attributes, item.attributes);
67
+ const score = sim + corroboration * 0.25;
68
+ if (!best || score > best.sim) best = { item, sim: score, why: corroboration ? `token overlap + ${corroboration} shared attribute(s)` : 'token overlap' };
69
+ }
70
+
71
+ if (!best || best.sim < 0.5) {
72
+ return { verdict: 'new', confidence: best ? 1 - best.sim : 1, reason: 'no sufficiently similar item' };
73
+ }
74
+ // same entity — duplicate (nothing new) or enrich (new attrs/relations/evidence)?
75
+ const item = best.item;
76
+ const newAttrs = Object.entries(candidate.attributes ?? {}).filter(([k, v]) => String(item.attributes?.[k]) !== String(v));
77
+ const newRels = (candidate.relations ?? []).filter((r) => !(item.relations ?? []).some((e) => e.type === r.type && e.target === r.target));
78
+ const addsSomething = newAttrs.length || newRels.length || (candidate.provenance ?? []).length;
79
+ return {
80
+ verdict: addsSomething ? 'enrich' : 'duplicate',
81
+ match: item.id,
82
+ confidence: Math.min(best.sim, 1),
83
+ reason: best.why,
84
+ };
85
+ }
86
+
87
+ /** Merge a candidate into an existing item (enrich verdict). Pure. */
88
+ export function merge(item, candidate) {
89
+ const merged = { ...item, attributes: { ...item.attributes }, relations: [...(item.relations ?? [])], provenance: [...(item.provenance ?? [])] };
90
+ for (const [k, v] of Object.entries(candidate.attributes ?? {})) if (!(k in merged.attributes)) merged.attributes[k] = v;
91
+ for (const r of candidate.relations ?? []) {
92
+ if (!merged.relations.some((e) => e.type === r.type && e.target === r.target)) merged.relations.push(r);
93
+ }
94
+ for (const p of candidate.provenance ?? []) {
95
+ if (!merged.provenance.some((e) => e.session === p.session && e.ref === p.ref)) merged.provenance.push(p);
96
+ }
97
+ return merged;
98
+ }
@@ -0,0 +1,43 @@
1
+ // The inbox — where candidates arrive. Agents (per the faculty block) drop one
2
+ // fact per file into agent/knowledge/inbox/; `zuzuu distill` drops mined candidates
3
+ // the same way. Processing wraps each into an ER'd proposal (the file's full
4
+ // content is preserved inside the proposal JSON) and removes the inbox file.
5
+ //
6
+ // Tolerant input: plain text, or our frontmatter grammar for typed candidates.
7
+
8
+ import { join, basename } from 'node:path';
9
+ import { existsSync, readFileSync, readdirSync, rmSync } from 'node:fs';
10
+ import { parseItem, slugify } from './items.mjs';
11
+ import { createProposal, fileRegistryProposals } from './proposals.mjs';
12
+
13
+ export const inboxDir = (agentDir) => join(agentDir, 'knowledge', 'inbox');
14
+
15
+ /** Lenient candidate parse: full item grammar, or bare prose. */
16
+ export function parseCandidate(text, filename = '') {
17
+ try {
18
+ const item = parseItem(text);
19
+ return { id: item.id, type: item.type, body: item.body, attributes: item.attributes, relations: item.relations, provenance: item.provenance };
20
+ } catch {
21
+ const body = text.trim();
22
+ return { id: slugify(body), type: 'fact', body, attributes: {}, relations: [], provenance: [] };
23
+ }
24
+ }
25
+
26
+ /**
27
+ * Process every inbox file → proposal. Returns {processed, proposals, registryProposals}.
28
+ * source tags where candidates came from ('agent' for inbox drops).
29
+ */
30
+ export function processInbox(agentDir, { source = 'agent' } = {}) {
31
+ const dir = inboxDir(agentDir);
32
+ if (!existsSync(dir)) return { processed: 0, proposals: [], registryProposals: [] };
33
+ const proposals = [];
34
+ for (const f of readdirSync(dir).filter((f) => f.endsWith('.md') || f.endsWith('.txt'))) {
35
+ const path = join(dir, f);
36
+ const cand = parseCandidate(readFileSync(path, 'utf8'), f);
37
+ cand.provenance = [...(cand.provenance ?? []), { session: source, ref: `inbox/${basename(f)}` }];
38
+ proposals.push(createProposal(agentDir, { candidate: cand, source, evidence: { inboxFile: f } }));
39
+ rmSync(path); // full candidate now lives inside the proposal
40
+ }
41
+ const registryProposals = fileRegistryProposals(agentDir);
42
+ return { processed: proposals.length, proposals, registryProposals };
43
+ }