@gcunharodrigues/wrxn 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/wrxn.cjs +102 -2
- package/lib/brain.cjs +295 -0
- package/lib/convert.cjs +215 -0
- package/lib/ingest.cjs +174 -0
- package/manifest.json +10 -0
- package/migrations/003-serve-http-door.cjs +44 -0
- package/package.json +2 -2
- package/payload/.claude/hooks/recall-surface.cjs +260 -80
- package/payload/.claude/skills/ingest/SKILL.md +72 -0
- package/payload/.mcp.json +1 -1
- package/payload/.recon-wrxn.json +2 -0
- package/payload/.wrxn/raw/.gitkeep +0 -0
package/bin/wrxn.cjs
CHANGED
|
@@ -11,7 +11,10 @@ const worktree = require('../lib/worktree.cjs');
|
|
|
11
11
|
const executor = require('../lib/executor.cjs');
|
|
12
12
|
const onboard = require('../lib/onboard.cjs');
|
|
13
13
|
const connect = require('../lib/connect.cjs');
|
|
14
|
+
const brain = require('../lib/brain.cjs');
|
|
14
15
|
const statusline = require('../lib/statusline.cjs');
|
|
16
|
+
const { convert } = require('../lib/convert.cjs');
|
|
17
|
+
const { ingest } = require('../lib/ingest.cjs');
|
|
15
18
|
|
|
16
19
|
const PKG_ROOT = path.join(__dirname, '..');
|
|
17
20
|
|
|
@@ -54,6 +57,12 @@ function parseArgs(argv) {
|
|
|
54
57
|
args.flags.owner = argv[++i];
|
|
55
58
|
} else if (a === '--probe') {
|
|
56
59
|
args.flags.probe = argv[++i];
|
|
60
|
+
} else if (a === '--distillation') {
|
|
61
|
+
args.flags.distillation = argv[++i];
|
|
62
|
+
} else if (a === '--limit') {
|
|
63
|
+
args.flags.limit = argv[++i];
|
|
64
|
+
} else if (a === '--type') {
|
|
65
|
+
args.flags.type = argv[++i];
|
|
57
66
|
} else if (a === '--check-report') {
|
|
58
67
|
args.flags['check-report'] = true;
|
|
59
68
|
} else if (a.startsWith('--')) {
|
|
@@ -105,6 +114,17 @@ Usage:
|
|
|
105
114
|
list print all registered connections (agent-readable JSON)
|
|
106
115
|
get <name> print one connection by name
|
|
107
116
|
|
|
117
|
+
wrxn brain query "<q>" [--json] [--limit <n>] [--type <prose|code|NodeType>] [--neighbors] [--root <dir>]
|
|
118
|
+
ask the warm Brain (recon-wrxn's code+prose graph) from the terminal.
|
|
119
|
+
WHOLE-BRAIN by default. Discovers the live serve door via
|
|
120
|
+
.recon-wrxn/serve-endpoint.json and POSTs the query; prints ranked
|
|
121
|
+
hits (name · type · file:line). If the Brain is not warm (no live
|
|
122
|
+
serve), prints a clear error and exits non-zero — no cold load.
|
|
123
|
+
--json emits the structured hits · --limit asks the door for top n ·
|
|
124
|
+
--type post-filters (prose=Page/Section, code=the rest, or an exact
|
|
125
|
+
NodeType) · --neighbors expands each hit to its 1-hop graph neighbors
|
|
126
|
+
(callers/callees/relationships via recon_explain).
|
|
127
|
+
|
|
108
128
|
wrxn statusline [--inject [--path <script>]]
|
|
109
129
|
SYNAPSE live-window writer. With no flag: report whether a statusline
|
|
110
130
|
is configured (~/.claude/settings.json) + print the marker-bounded
|
|
@@ -112,6 +132,20 @@ Usage:
|
|
|
112
132
|
resolved (or --path) statusline script, idempotently (append-only,
|
|
113
133
|
never overwrites). init NEVER touches your statusline.
|
|
114
134
|
|
|
135
|
+
wrxn convert <file> [--cpu] convert a source file to Markdown and print it. Per-format routing:
|
|
136
|
+
markitdown (html/docx/txt/pptx/xlsx) · docling (pdf, with automatic
|
|
137
|
+
CPU fallback on a GPU arch-crash) · pure-JS floor when Python is
|
|
138
|
+
absent. --cpu forces docling onto CPU from the first attempt.
|
|
139
|
+
|
|
140
|
+
wrxn ingest <file> [--distillation <result.json>] [--root <dir>]
|
|
141
|
+
distill a source into the memory wiki: convert (slice 05) → an LLM
|
|
142
|
+
(the ingest skill) produces a summary + N note pages → write them
|
|
143
|
+
to .wrxn/wiki/, each stamped derived_from the raw source, which is
|
|
144
|
+
kept under .wrxn/raw/. ADDITIVE-ONLY: an existing page is never
|
|
145
|
+
overwritten (re-runs are safe). --distillation feeds the skill's
|
|
146
|
+
result JSON (summary,notes); without it, the harness points you at
|
|
147
|
+
the ingest skill.
|
|
148
|
+
|
|
115
149
|
wrxn onboard [--root <dir>] scaffold the Day-1 operator file set under context/ from a filled
|
|
116
150
|
aios-intake.md (the deterministic half of the onboard skill;
|
|
117
151
|
workspace installs only). Idempotent.
|
|
@@ -120,7 +154,7 @@ Profiles: --project (default, the dev pipeline + intelligence + enforcement) |
|
|
|
120
154
|
--workspace (adds the operator layer: onboard/audit/level-up + intake + decisions log +
|
|
121
155
|
connections registry).`;
|
|
122
156
|
|
|
123
|
-
function main(argv) {
|
|
157
|
+
async function main(argv) {
|
|
124
158
|
const args = parseArgs(argv);
|
|
125
159
|
|
|
126
160
|
if (args.flags.version) {
|
|
@@ -294,6 +328,43 @@ function main(argv) {
|
|
|
294
328
|
return 0;
|
|
295
329
|
}
|
|
296
330
|
|
|
331
|
+
if (cmd === 'convert') {
|
|
332
|
+
const file = args._[1];
|
|
333
|
+
if (!file) { process.stderr.write('wrxn: convert requires <file>\n'); return 2; }
|
|
334
|
+
try {
|
|
335
|
+
const md = await convert(path.resolve(file), { gpu: args.flags.cpu ? false : undefined });
|
|
336
|
+
process.stdout.write(md.endsWith('\n') ? md : md + '\n');
|
|
337
|
+
return 0;
|
|
338
|
+
} catch (err) {
|
|
339
|
+
process.stderr.write(`wrxn: ${err.message}\n`);
|
|
340
|
+
return 2;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
if (cmd === 'ingest') {
|
|
345
|
+
const file = args._[1];
|
|
346
|
+
if (!file) { process.stderr.write('wrxn: ingest requires <file>\n'); return 2; }
|
|
347
|
+
const root = path.resolve(args.flags.root || process.cwd());
|
|
348
|
+
// The distillation is the LLM step (the `ingest` skill). The CLI feeds its structured result via
|
|
349
|
+
// --distillation <result.json>; without one, the harness's defaultDistill points back to the skill.
|
|
350
|
+
let distill;
|
|
351
|
+
if (args.flags.distillation) {
|
|
352
|
+
const dpath = path.resolve(args.flags.distillation);
|
|
353
|
+
distill = () => JSON.parse(fs.readFileSync(dpath, 'utf8'));
|
|
354
|
+
}
|
|
355
|
+
try {
|
|
356
|
+
const report = await ingest(path.resolve(file), { root, ...(distill ? { distill } : {}) });
|
|
357
|
+
process.stdout.write(`wrxn ingest ${report.source} → raw ${report.raw}\n`);
|
|
358
|
+
for (const p of report.written) process.stdout.write(` wrote ${p}\n`);
|
|
359
|
+
for (const p of report.skipped) process.stdout.write(` skipped ${p} (exists — additive-only, never clobbered)\n`);
|
|
360
|
+
process.stdout.write(`${report.written.length} written, ${report.skipped.length} skipped.\n`);
|
|
361
|
+
return 0;
|
|
362
|
+
} catch (err) {
|
|
363
|
+
process.stderr.write(`wrxn: ${err.message}\n`);
|
|
364
|
+
return 2;
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
297
368
|
if (cmd === 'onboard') {
|
|
298
369
|
const root = path.resolve(args.flags.root || process.cwd());
|
|
299
370
|
let report;
|
|
@@ -353,6 +424,32 @@ function main(argv) {
|
|
|
353
424
|
}
|
|
354
425
|
}
|
|
355
426
|
|
|
427
|
+
if (cmd === 'brain') {
|
|
428
|
+
const sub = args._[1];
|
|
429
|
+
if (sub !== 'query') {
|
|
430
|
+
process.stderr.write(`wrxn: unknown brain subcommand "${sub || ''}" (expected: query)\n\n${USAGE}\n`);
|
|
431
|
+
return 2;
|
|
432
|
+
}
|
|
433
|
+
const q = args._[2];
|
|
434
|
+
if (!q) { process.stderr.write('wrxn: brain query requires "<query>"\n'); return 2; }
|
|
435
|
+
const opts = { json: !!args.flags.json, neighbors: !!args.flags.neighbors };
|
|
436
|
+
if (args.flags.limit != null) {
|
|
437
|
+
const n = parseInt(args.flags.limit, 10);
|
|
438
|
+
if (!Number.isInteger(n) || n <= 0) { process.stderr.write('wrxn: --limit requires a positive integer\n'); return 2; }
|
|
439
|
+
opts.limit = n;
|
|
440
|
+
}
|
|
441
|
+
if (args.flags.type) opts.type = String(args.flags.type);
|
|
442
|
+
const root = path.resolve(args.flags.root || process.cwd());
|
|
443
|
+
try {
|
|
444
|
+
const res = await brain.query(q, opts, { root });
|
|
445
|
+
process.stdout.write(brain.formatHits(res.hits, opts) + '\n');
|
|
446
|
+
return 0;
|
|
447
|
+
} catch (err) {
|
|
448
|
+
process.stderr.write(`wrxn: ${err.message}\n`);
|
|
449
|
+
return 2;
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
|
|
356
453
|
if (cmd === 'statusline') {
|
|
357
454
|
const home = process.env.HOME || os.homedir();
|
|
358
455
|
const detection = statusline.detectStatusLine(home);
|
|
@@ -397,4 +494,7 @@ function main(argv) {
|
|
|
397
494
|
return 2;
|
|
398
495
|
}
|
|
399
496
|
|
|
400
|
-
|
|
497
|
+
main(process.argv.slice(2)).then(
|
|
498
|
+
(code) => process.exit(code),
|
|
499
|
+
(err) => { process.stderr.write(`wrxn: ${err && err.message ? err.message : err}\n`); process.exit(1); }
|
|
500
|
+
);
|
package/lib/brain.cjs
ADDED
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// WRXN brain query (recon-brain-recall-03) — interrogate the warm Brain from the terminal.
|
|
4
|
+
//
|
|
5
|
+
// The Brain is recon-wrxn's unified code+prose knowledge graph, loaded WARM inside the `recon serve`
|
|
6
|
+
// process Claude Code boots for a session. This command reaches it over the loopback find door that
|
|
7
|
+
// serve announces via a discovery file — it is WHOLE-BRAIN (code AND prose, no scope filter by
|
|
8
|
+
// default), the operator's on-demand counterpart to the prose-only proactive Recall hook.
|
|
9
|
+
//
|
|
10
|
+
// Endpoint-first (v1): if no warm door is discoverable, we raise a clear, actionable error and the CLI
|
|
11
|
+
// exits non-zero — there is NO cold one-shot load (that would pay the index + embedder cost the warm
|
|
12
|
+
// serve already absorbs).
|
|
13
|
+
//
|
|
14
|
+
// The query path takes an INJECTED transport + endpoint reader (deps) so its behavior is unit-testable
|
|
15
|
+
// with no live serve — mirrors the injected-invoker seam in lib/connect.cjs and the recall hook's
|
|
16
|
+
// httpTransport. lib/brain.cjs is PACKAGE code (invoked via bin/wrxn.cjs), NOT payload — no manifest
|
|
17
|
+
// entry, consistent with lib/connect.cjs / lib/executor.cjs / lib/onboard.cjs.
|
|
18
|
+
//
|
|
19
|
+
// The discovery contract (serve-endpoint.json {pid,port}, pid-liveness) is duplicated here from the
|
|
20
|
+
// payload recall-surface hook ON PURPOSE: that hook must be node-stdlib-only and self-contained (it
|
|
21
|
+
// ships into installs without the kernel lib), so package code cannot import it. The contract is ~20
|
|
22
|
+
// stable lines — duplicating it across the install boundary is the same self-containment trade the
|
|
23
|
+
// payload hooks make for findInstallRoot.
|
|
24
|
+
|
|
25
|
+
const fs = require('fs');
|
|
26
|
+
const http = require('http');
|
|
27
|
+
const path = require('path');
|
|
28
|
+
|
|
29
|
+
const ENDPOINT_REL = path.join('.recon-wrxn', 'serve-endpoint.json');
|
|
30
|
+
const FIND_PATH = '/api/tools/recon_find';
|
|
31
|
+
const EXPLAIN_PATH = '/api/tools/recon_explain';
|
|
32
|
+
const TIMEOUT_MS = 5000; // generous: an interactive CLI, not the per-prompt 150ms recall budget
|
|
33
|
+
const MAX_RESPONSE_BYTES = 256 * 1024; // hard cap on an accumulated door response body (anti-flood)
|
|
34
|
+
const PROSE_TYPES = new Set(['Page', 'Section']);
|
|
35
|
+
const WALK_UP_LIMIT = 12;
|
|
36
|
+
|
|
37
|
+
// ── discovery (the cross-repo warm-door contract) ────────────────────────────────────
|
|
38
|
+
|
|
39
|
+
// A pid is alive unless process.kill(pid,0) throws. ESRCH = gone; EPERM = owned by another user but
|
|
40
|
+
// alive. Mirrors the cross-repo discovery contract (and the recall hook).
|
|
41
|
+
function pidAlive(pid) {
|
|
42
|
+
try {
|
|
43
|
+
process.kill(pid, 0);
|
|
44
|
+
return true;
|
|
45
|
+
} catch (e) {
|
|
46
|
+
return !!e && e.code === 'EPERM';
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Refuse a discovery file another user could have planted, or that is group/world-writable — trusting
|
|
51
|
+
// it would let a hostile workspace point the door host/port at an exfil/injection sink. lstat (not
|
|
52
|
+
// stat) so a symlink's OWN ownership/mode is judged, not its target's. A platform without getuid (no
|
|
53
|
+
// POSIX ownership) skips the uid check but still enforces the mode check. Any fault → not trusted.
|
|
54
|
+
function endpointTrusted(file) {
|
|
55
|
+
let st;
|
|
56
|
+
try {
|
|
57
|
+
st = fs.lstatSync(file);
|
|
58
|
+
} catch {
|
|
59
|
+
return false;
|
|
60
|
+
}
|
|
61
|
+
if (typeof process.getuid === 'function' && st.uid !== process.getuid()) return false; // foreign owner
|
|
62
|
+
if ((st.mode & 0o022) !== 0) return false; // group/world-writable
|
|
63
|
+
return true;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Walk up from startDir to the first directory carrying .recon-wrxn/serve-endpoint.json; read and
|
|
67
|
+
// validate {pid,port}; trust it only when it is well-owned (not planted), well-formed, and the pid is
|
|
68
|
+
// alive. Returns {pid,port,root} or null (the Brain is not warm: absent, untrusted, malformed, missing
|
|
69
|
+
// fields, or a dead process).
|
|
70
|
+
function discoverEndpoint(startDir) {
|
|
71
|
+
let dir = startDir || process.cwd();
|
|
72
|
+
for (let i = 0; i < WALK_UP_LIMIT; i++) {
|
|
73
|
+
const file = path.join(dir, ENDPOINT_REL);
|
|
74
|
+
if (fs.existsSync(file)) {
|
|
75
|
+
if (!endpointTrusted(file)) return null; // foreign-owned or loose perms → not warm
|
|
76
|
+
let obj;
|
|
77
|
+
try {
|
|
78
|
+
obj = JSON.parse(fs.readFileSync(file, 'utf8'));
|
|
79
|
+
} catch {
|
|
80
|
+
return null; // malformed → not warm
|
|
81
|
+
}
|
|
82
|
+
const pid = Number(obj && obj.pid);
|
|
83
|
+
const port = Number(obj && obj.port);
|
|
84
|
+
if (!Number.isInteger(pid) || pid <= 0) return null;
|
|
85
|
+
if (!Number.isInteger(port) || port <= 0) return null;
|
|
86
|
+
if (!pidAlive(pid)) return null; // dead process → not warm
|
|
87
|
+
return { pid, port, root: dir };
|
|
88
|
+
}
|
|
89
|
+
const up = path.dirname(dir);
|
|
90
|
+
if (up === dir) break;
|
|
91
|
+
dir = up;
|
|
92
|
+
}
|
|
93
|
+
return null;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// ── transport (injectable; default = real loopback POST) ─────────────────────────────
|
|
97
|
+
|
|
98
|
+
// Default transport: a real loopback POST with a hard timeout. Injectable so unit tests never touch
|
|
99
|
+
// the network (mirrors lib/connect.cjs's invoke seam). Resolves {statusCode, body}; rejects on socket
|
|
100
|
+
// error or timeout.
|
|
101
|
+
function httpTransport({ port, path: reqPath, body, timeoutMs }) {
|
|
102
|
+
return new Promise((resolve, reject) => {
|
|
103
|
+
const payload = Buffer.from(JSON.stringify(body));
|
|
104
|
+
const deadline = timeoutMs || TIMEOUT_MS;
|
|
105
|
+
let settled = false;
|
|
106
|
+
let wall = null;
|
|
107
|
+
const done = (fn, arg) => {
|
|
108
|
+
if (settled) return;
|
|
109
|
+
settled = true;
|
|
110
|
+
if (wall) clearTimeout(wall);
|
|
111
|
+
fn(arg);
|
|
112
|
+
};
|
|
113
|
+
const req = http.request(
|
|
114
|
+
{
|
|
115
|
+
host: '127.0.0.1',
|
|
116
|
+
port,
|
|
117
|
+
path: reqPath,
|
|
118
|
+
method: 'POST',
|
|
119
|
+
headers: { 'Content-Type': 'application/json', 'Content-Length': payload.length },
|
|
120
|
+
},
|
|
121
|
+
(res) => {
|
|
122
|
+
const chunks = [];
|
|
123
|
+
let total = 0;
|
|
124
|
+
res.on('data', (c) => {
|
|
125
|
+
total += c.length;
|
|
126
|
+
if (total > MAX_RESPONSE_BYTES) { req.destroy(new Error('brain door response too large')); return; }
|
|
127
|
+
chunks.push(c);
|
|
128
|
+
});
|
|
129
|
+
res.on('end', () => done(resolve, { statusCode: res.statusCode, body: Buffer.concat(chunks).toString('utf8') }));
|
|
130
|
+
res.on('error', (e) => done(reject, e));
|
|
131
|
+
}
|
|
132
|
+
);
|
|
133
|
+
req.on('error', (e) => done(reject, e));
|
|
134
|
+
// Idle timeout (no bytes for `deadline`) AND an independent wall-clock — the latter bounds a trickle
|
|
135
|
+
// attacker that dribbles bytes to keep the idle timer from ever firing.
|
|
136
|
+
req.setTimeout(deadline, () => req.destroy(new Error('brain door timeout')));
|
|
137
|
+
wall = setTimeout(() => req.destroy(new Error('brain door wall-clock timeout')), deadline);
|
|
138
|
+
req.write(payload);
|
|
139
|
+
req.end();
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// POST a door tool and return the parsed JSON body. Raises a clean error (never a crash) on a transport
|
|
144
|
+
// fault, a non-200 status, or a non-JSON body.
|
|
145
|
+
async function postTool(transport, port, reqPath, body, timeoutMs) {
|
|
146
|
+
let resp;
|
|
147
|
+
try {
|
|
148
|
+
resp = await transport({ port, path: reqPath, body, timeoutMs: timeoutMs || TIMEOUT_MS });
|
|
149
|
+
} catch (err) {
|
|
150
|
+
throw new Error(`Brain door request to ${reqPath} failed: ${err.message}`);
|
|
151
|
+
}
|
|
152
|
+
if (!resp || resp.statusCode !== 200) {
|
|
153
|
+
throw new Error(`Brain door returned HTTP ${resp ? resp.statusCode : 'no-response'} for ${reqPath}`);
|
|
154
|
+
}
|
|
155
|
+
try {
|
|
156
|
+
return JSON.parse(resp.body);
|
|
157
|
+
} catch {
|
|
158
|
+
throw new Error(`Brain door returned a malformed (non-JSON) response for ${reqPath}`);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// ── pure helpers ─────────────────────────────────────────────────────────────────────
|
|
163
|
+
|
|
164
|
+
function isProse(hit) {
|
|
165
|
+
return !!hit && PROSE_TYPES.has(hit.type);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Post-filter hits by --type (the find request can't carry a type ARRAY, so prose=Page+Section is
|
|
169
|
+
// always a post-filter): 'prose' → Page/Section, 'code' → everything else, else an exact NodeType.
|
|
170
|
+
function filterByType(hits, type) {
|
|
171
|
+
if (!type) return hits;
|
|
172
|
+
if (type === 'prose') return hits.filter(isProse);
|
|
173
|
+
if (type === 'code') return hits.filter((h) => !isProse(h));
|
|
174
|
+
const t = String(type).toLowerCase();
|
|
175
|
+
return hits.filter((h) => String(h && h.type).toLowerCase() === t);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Extract a hit's 1-hop neighbors from the door's structured recon_explain response:
|
|
179
|
+
// { result, neighbors: NeighborHit[] }, NeighborHit = { name, type, file, line, relationship }
|
|
180
|
+
// relationship ∈ caller | callee | import | importedBy | method | implementedBy | usedBy | testRef
|
|
181
|
+
// Strictly 1-hop. Consumes that real shape directly — no relationship-bucket guesswork. A missing or
|
|
182
|
+
// non-array `neighbors` (e.g. a degraded/empty explain) yields [] so the hit simply has no neighbors.
|
|
183
|
+
function extractNeighbors(resp) {
|
|
184
|
+
if (!resp || typeof resp !== 'object' || !Array.isArray(resp.neighbors)) return [];
|
|
185
|
+
return resp.neighbors.map((n) => {
|
|
186
|
+
const r = n || {};
|
|
187
|
+
const out = { name: r.name, type: r.type, file: r.file };
|
|
188
|
+
if (r.line != null) out.line = r.line;
|
|
189
|
+
if (r.relationship) out.relationship = r.relationship;
|
|
190
|
+
return out;
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// ── formatting (pure) ────────────────────────────────────────────────────────────────
|
|
195
|
+
|
|
196
|
+
function hitLine(h) {
|
|
197
|
+
const name = h.name || '(unnamed)';
|
|
198
|
+
const type = h.type || '?';
|
|
199
|
+
const loc = h.file ? `${h.file}${h.line != null ? ':' + h.line : ''}` : '';
|
|
200
|
+
return loc ? `${name} · ${type} · ${loc}` : `${name} · ${type}`;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
function neighborLine(n) {
|
|
204
|
+
const rel = n.relationship ? ` [${n.relationship}]` : '';
|
|
205
|
+
return ` - ${hitLine(n)}${rel}`;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Render results: --json re-emits the structured hits; default is a human text list. With --neighbors,
|
|
209
|
+
// each hit's 1-hop neighbors are listed indented beneath it.
|
|
210
|
+
function formatHits(hits, opts = {}) {
|
|
211
|
+
const list = Array.isArray(hits) ? hits : [];
|
|
212
|
+
if (opts.json) return JSON.stringify(list, null, 2);
|
|
213
|
+
if (!list.length) return 'no results';
|
|
214
|
+
const lines = [];
|
|
215
|
+
for (const h of list) {
|
|
216
|
+
lines.push(hitLine(h));
|
|
217
|
+
if (opts.neighbors) {
|
|
218
|
+
const ns = Array.isArray(h.neighbors) ? h.neighbors : [];
|
|
219
|
+
if (ns.length) for (const n of ns) lines.push(neighborLine(n));
|
|
220
|
+
else lines.push(' (no 1-hop neighbors)');
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
return lines.join('\n');
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// ── the query (IO shell over the injected seam) ──────────────────────────────────────
|
|
227
|
+
|
|
228
|
+
const NOT_WARM =
|
|
229
|
+
'Brain is not warm — no live recon serve door found (.recon-wrxn/serve-endpoint.json is absent, ' +
|
|
230
|
+
'malformed, or its process is dead). Open a Claude Code session (which boots recon serve), or run ' +
|
|
231
|
+
'`recon serve` with the find door enabled, then retry.';
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Query the warm Brain. Whole-brain (code+prose) by default.
|
|
235
|
+
* @param {string} q the query string
|
|
236
|
+
* @param {object} opts { json?, limit?, type?, neighbors? }
|
|
237
|
+
* @param {object} deps { root?, discover?, transport?, timeoutMs? } — injected seam for tests
|
|
238
|
+
* @returns {Promise<{hits: object[]}>}
|
|
239
|
+
* @throws a clear error when the Brain is not warm, or on a malformed/non-200 door response.
|
|
240
|
+
*/
|
|
241
|
+
async function query(q, opts = {}, deps = {}) {
|
|
242
|
+
const term = String(q == null ? '' : q).trim();
|
|
243
|
+
if (!term) throw new Error('wrxn brain query requires a non-empty query string');
|
|
244
|
+
|
|
245
|
+
const startDir = deps.root || process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
246
|
+
const discover = deps.discover || discoverEndpoint;
|
|
247
|
+
const transport = deps.transport || httpTransport;
|
|
248
|
+
const timeoutMs = deps.timeoutMs || TIMEOUT_MS;
|
|
249
|
+
|
|
250
|
+
const door = discover(startDir);
|
|
251
|
+
if (!door) throw new Error(NOT_WARM);
|
|
252
|
+
|
|
253
|
+
const findBody = { query: term };
|
|
254
|
+
if (Number.isInteger(opts.limit) && opts.limit > 0) findBody.limit = opts.limit;
|
|
255
|
+
|
|
256
|
+
const found = await postTool(transport, door.port, FIND_PATH, findBody, timeoutMs);
|
|
257
|
+
if (!Array.isArray(found.hits)) {
|
|
258
|
+
throw new Error(
|
|
259
|
+
'Brain door returned an unexpected response shape (no structured `hits` array) — the recon-wrxn ' +
|
|
260
|
+
'serve door may predate the structured find response.'
|
|
261
|
+
);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
let hits = filterByType(found.hits, opts.type);
|
|
265
|
+
|
|
266
|
+
// --neighbors: 1-hop expansion per hit via recon_explain — the ONLY place 1-hop lives. A per-hit
|
|
267
|
+
// explain failure degrades to empty neighbors (the find already succeeded); it never crashes.
|
|
268
|
+
if (opts.neighbors) {
|
|
269
|
+
for (const h of hits) {
|
|
270
|
+
const explainBody = { name: h.name };
|
|
271
|
+
if (h.file) explainBody.file = h.file;
|
|
272
|
+
try {
|
|
273
|
+
h.neighbors = extractNeighbors(await postTool(transport, door.port, EXPLAIN_PATH, explainBody, timeoutMs));
|
|
274
|
+
} catch {
|
|
275
|
+
h.neighbors = [];
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
return { hits };
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
module.exports = {
|
|
284
|
+
query,
|
|
285
|
+
formatHits,
|
|
286
|
+
discoverEndpoint,
|
|
287
|
+
pidAlive,
|
|
288
|
+
httpTransport,
|
|
289
|
+
filterByType,
|
|
290
|
+
extractNeighbors,
|
|
291
|
+
isProse,
|
|
292
|
+
FIND_PATH,
|
|
293
|
+
EXPLAIN_PATH,
|
|
294
|
+
PROSE_TYPES,
|
|
295
|
+
};
|
package/lib/convert.cjs
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Converter primitive (multiformat-distill-05) — convert(srcPath) → Markdown, per-format routing.
|
|
4
|
+
//
|
|
5
|
+
// Decision (ADR 0001 / PRD §5, empirically baked off): markitdown is the primary subprocess for the
|
|
6
|
+
// office/web matrix (html/docx/pptx/xlsx); txt is a zero-dep pass-through; PDF escalates to docling
|
|
7
|
+
// (SOTA tables + OCR), which auto-grabs the GPU and CRASHES on arch-incompat (the GTX-1070/Pascal
|
|
8
|
+
// sm_61 trap — torch cu13x ships no sm_61 kernel) → we force CPU on that crash. When Python /
|
|
9
|
+
// markitdown is absent (ENOENT) we degrade to the pure-JS floor (turndown / mammoth / unpdf / SheetJS).
|
|
10
|
+
//
|
|
11
|
+
// The spawn boundary is INJECTED, mirroring lib/connect.cjs's injectable `invoke`: convert(src,{run})
|
|
12
|
+
// takes a converter runner so routing, ENOENT-degrade, and the CPU fallback are unit-testable WITHOUT
|
|
13
|
+
// any real binary. defaultRun does the real spawnSync — that is what makes the integration check
|
|
14
|
+
// "validated by invocation". convert is async only so the pure-JS floor (mammoth/unpdf are async)
|
|
15
|
+
// can be wired in completely; the primary subprocess path is plain blocking spawnSync.
|
|
16
|
+
|
|
17
|
+
const fs = require('fs');
|
|
18
|
+
const os = require('os');
|
|
19
|
+
const path = require('path');
|
|
20
|
+
const { spawnSync } = require('child_process');
|
|
21
|
+
|
|
22
|
+
// Extension → logical format. (.htm folds into html.)
|
|
23
|
+
const FORMATS = {
|
|
24
|
+
'.html': 'html',
|
|
25
|
+
'.htm': 'html',
|
|
26
|
+
'.docx': 'docx',
|
|
27
|
+
'.txt': 'txt',
|
|
28
|
+
'.pptx': 'pptx',
|
|
29
|
+
'.xlsx': 'xlsx',
|
|
30
|
+
'.pdf': 'pdf',
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
// CUDA / arch-incompat crash signatures — the Pascal sm_61 trap and friends. docling auto-grabs the
|
|
34
|
+
// GPU; a torch build with no matching SM kernel dies with "no kernel image is available...".
|
|
35
|
+
const ARCH_CRASH_RE = /no kernel image|kernel image is available|sm_\d+|CUDA error|CUDA_ERROR|device-side assert|out of memory/i;
|
|
36
|
+
|
|
37
|
+
const SPAWN_OPTS = { encoding: 'utf8', timeout: 600000, maxBuffer: 256 * 1024 * 1024 };
|
|
38
|
+
|
|
39
|
+
// ── the injected boundary's real implementation ────────────────────────────────
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Run a converter subprocess and normalize its result to { ok, markdown } | { ok:false, error }.
|
|
43
|
+
* error.code is 'ENOENT' (not installed → degrade), 'CRASH' (arch-incompat → CPU retry), or 'EXIT'.
|
|
44
|
+
*/
|
|
45
|
+
function defaultRun(tool, srcPath, { device } = {}) {
|
|
46
|
+
if (tool === 'markitdown') {
|
|
47
|
+
const r = spawnSync('markitdown', [srcPath], SPAWN_OPTS);
|
|
48
|
+
return normalize(r);
|
|
49
|
+
}
|
|
50
|
+
if (tool === 'docling') {
|
|
51
|
+
// docling writes <basename>.md into an --output dir (no markdown on stdout); read it back.
|
|
52
|
+
const outDir = fs.mkdtempSync(path.join(os.tmpdir(), 'wrxn-docling-'));
|
|
53
|
+
try {
|
|
54
|
+
const args = [srcPath, '--to', 'md', '--output', outDir];
|
|
55
|
+
const opts = { ...SPAWN_OPTS };
|
|
56
|
+
if (device === 'cpu') {
|
|
57
|
+
args.push('--device', 'cpu');
|
|
58
|
+
opts.env = { ...process.env, CUDA_VISIBLE_DEVICES: '' };
|
|
59
|
+
}
|
|
60
|
+
const r = spawnSync('docling', args, opts);
|
|
61
|
+
if (r.error) return { ok: false, error: classifyError(r.error) };
|
|
62
|
+
if (r.status !== 0 || r.signal) {
|
|
63
|
+
const stderr = r.stderr || '';
|
|
64
|
+
const code = ARCH_CRASH_RE.test(stderr) || r.signal ? 'CRASH' : 'EXIT';
|
|
65
|
+
return { ok: false, error: { code, status: r.status, signal: r.signal, message: stderr.trim() } };
|
|
66
|
+
}
|
|
67
|
+
return { ok: true, markdown: readDoclingOutput(outDir, srcPath) };
|
|
68
|
+
} finally {
|
|
69
|
+
fs.rmSync(outDir, { recursive: true, force: true });
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
throw new Error(`unknown converter tool: ${tool}`);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function normalize(r) {
|
|
76
|
+
if (r.error) return { ok: false, error: classifyError(r.error) };
|
|
77
|
+
if (r.status !== 0 || r.signal) {
|
|
78
|
+
return { ok: false, error: { code: 'EXIT', status: r.status, signal: r.signal, message: (r.stderr || '').trim() } };
|
|
79
|
+
}
|
|
80
|
+
return { ok: true, markdown: r.stdout };
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function classifyError(err) {
|
|
84
|
+
return { code: err.code || 'ERR', message: err.message || String(err) };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function readDoclingOutput(outDir, srcPath) {
|
|
88
|
+
const base = path.basename(srcPath, path.extname(srcPath));
|
|
89
|
+
const preferred = path.join(outDir, `${base}.md`);
|
|
90
|
+
if (fs.existsSync(preferred)) return fs.readFileSync(preferred, 'utf8');
|
|
91
|
+
// Fall back to the first .md docling produced (naming can vary by version).
|
|
92
|
+
const md = fs.readdirSync(outDir).find((f) => f.toLowerCase().endsWith('.md'));
|
|
93
|
+
if (!md) throw new Error(`docling produced no markdown in ${outDir}`);
|
|
94
|
+
return fs.readFileSync(path.join(outDir, md), 'utf8');
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// ── the pure-JS floor (no-Python degrade) ───────────────────────────────────────
|
|
98
|
+
|
|
99
|
+
function lazy(mod) {
|
|
100
|
+
try {
|
|
101
|
+
return require(mod);
|
|
102
|
+
} catch {
|
|
103
|
+
throw new Error(
|
|
104
|
+
`pure-JS floor needs "${mod}" but it is not installed, and the primary converter is absent. ` +
|
|
105
|
+
`Install the primary path (pip install 'markitdown[all]' / docling) or the floor (npm i ${mod}).`
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/** The no-Python in-process floor (research §2: turndown / mammoth / unpdf / SheetJS). Async. */
|
|
111
|
+
async function defaultFloor(fmt, srcPath) {
|
|
112
|
+
if (fmt === 'txt') return fs.readFileSync(srcPath, 'utf8');
|
|
113
|
+
if (fmt === 'html') {
|
|
114
|
+
const Turndown = lazy('turndown');
|
|
115
|
+
const td = new Turndown();
|
|
116
|
+
try {
|
|
117
|
+
const { gfm } = require('turndown-plugin-gfm');
|
|
118
|
+
td.use(gfm);
|
|
119
|
+
} catch { /* gfm tables are a nice-to-have, not required */ }
|
|
120
|
+
return td.turndown(fs.readFileSync(srcPath, 'utf8'));
|
|
121
|
+
}
|
|
122
|
+
if (fmt === 'docx') {
|
|
123
|
+
const mammoth = lazy('mammoth');
|
|
124
|
+
const Turndown = lazy('turndown');
|
|
125
|
+
const { value: html } = await mammoth.convertToHtml({ path: srcPath });
|
|
126
|
+
return new Turndown().turndown(html);
|
|
127
|
+
}
|
|
128
|
+
if (fmt === 'pdf') {
|
|
129
|
+
const { extractText, getDocumentProxy } = lazy('unpdf');
|
|
130
|
+
const buf = new Uint8Array(fs.readFileSync(srcPath));
|
|
131
|
+
const pdf = await getDocumentProxy(buf);
|
|
132
|
+
const { text } = await extractText(pdf, { mergePages: true });
|
|
133
|
+
return text;
|
|
134
|
+
}
|
|
135
|
+
if (fmt === 'xlsx') {
|
|
136
|
+
const XLSX = lazy('xlsx');
|
|
137
|
+
const wb = XLSX.readFile(srcPath);
|
|
138
|
+
return wb.SheetNames.map((n) => `## ${n}\n\n${XLSX.utils.sheet_to_csv(wb.Sheets[n])}`).join('\n\n');
|
|
139
|
+
}
|
|
140
|
+
if (fmt === 'pptx') {
|
|
141
|
+
const officeParser = lazy('officeparser');
|
|
142
|
+
return await officeParser.parseOfficeAsync(srcPath);
|
|
143
|
+
}
|
|
144
|
+
throw new Error(`no pure-JS floor for format "${fmt}"`);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// ── the primitive ───────────────────────────────────────────────────────────────
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Convert a source file to Markdown via per-format routing.
|
|
151
|
+
* @param {string} srcPath
|
|
152
|
+
* @param {{ run?: Function, floor?: Function, gpu?: boolean }} [opts]
|
|
153
|
+
* run — injectable converter boundary (default: defaultRun, the real spawnSync).
|
|
154
|
+
* floor — injectable pure-JS floor (default: defaultFloor).
|
|
155
|
+
* gpu — false forces docling onto CPU from the first attempt (skips the GPU probe/crash).
|
|
156
|
+
* @returns {Promise<string>} the markdown.
|
|
157
|
+
*/
|
|
158
|
+
async function convert(srcPath, { run = defaultRun, floor = defaultFloor, gpu } = {}) {
|
|
159
|
+
// Resolve to an absolute path up front so a leading-dash filename can never be read as a CLI flag
|
|
160
|
+
// by the converter subprocess — the dash-neutralization must not depend on the caller (slice-06
|
|
161
|
+
// ingest calls convert() directly, not via the CLI).
|
|
162
|
+
srcPath = path.resolve(srcPath);
|
|
163
|
+
// Pre-check existence up front (mirrors lib/ingest.cjs's source-not-found guard) so a missing file
|
|
164
|
+
// is rejected with a clean message and NEVER reaches the converter subprocess — whose Python
|
|
165
|
+
// traceback (markitdown/docling) would otherwise leak to the user verbatim (multiformat-distill-08).
|
|
166
|
+
if (!fs.existsSync(srcPath)) throw new Error(`wrxn convert: source not found: ${srcPath}`);
|
|
167
|
+
const ext = path.extname(srcPath).toLowerCase();
|
|
168
|
+
const fmt = FORMATS[ext];
|
|
169
|
+
if (!fmt) {
|
|
170
|
+
throw new Error(`wrxn convert: unsupported format "${ext || '(none)'}" — supported: ${Object.keys(FORMATS).join(', ')}`);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// txt is already plain text — pass it through (zero-dep, always works).
|
|
174
|
+
if (fmt === 'txt') {
|
|
175
|
+
return fs.readFileSync(srcPath, 'utf8');
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if (fmt === 'pdf') {
|
|
179
|
+
return convertPdf(srcPath, { run, floor, gpu });
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// markitdown-primary formats (html/docx/pptx/xlsx).
|
|
183
|
+
const r = run('markitdown', srcPath);
|
|
184
|
+
if (r.ok) return r.markdown;
|
|
185
|
+
if (r.error && r.error.code === 'ENOENT') {
|
|
186
|
+
return floor(fmt, srcPath); // markitdown absent → degrade to the pure-JS floor
|
|
187
|
+
}
|
|
188
|
+
throw new Error(`wrxn convert: markitdown failed on ${path.basename(srcPath)} — ${r.error.message || r.error.code}`);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/** PDF tier: docling (GPU/auto) → CPU on an arch-crash → pure-JS floor if docling is absent. */
|
|
192
|
+
async function convertPdf(srcPath, { run, floor, gpu }) {
|
|
193
|
+
const firstDevice = gpu === false ? 'cpu' : undefined; // undefined = let docling pick (GPU/auto)
|
|
194
|
+
const r = run('docling', srcPath, { device: firstDevice });
|
|
195
|
+
if (r.ok) return r.markdown;
|
|
196
|
+
if (r.error && r.error.code === 'ENOENT') {
|
|
197
|
+
return floor('pdf', srcPath); // no docling → unpdf floor
|
|
198
|
+
}
|
|
199
|
+
if (r.error && r.error.code === 'CRASH' && firstDevice !== 'cpu') {
|
|
200
|
+
// arch-incompat / GPU crash → force CPU (CUDA_VISIBLE_DEVICES='' + --device cpu).
|
|
201
|
+
const cpu = run('docling', srcPath, { device: 'cpu' });
|
|
202
|
+
if (cpu.ok) return cpu.markdown;
|
|
203
|
+
if (cpu.error && cpu.error.code === 'ENOENT') return floor('pdf', srcPath);
|
|
204
|
+
throw new Error(`wrxn convert: docling failed on the CPU fallback for ${path.basename(srcPath)} — ${cpu.error.message || cpu.error.code}`);
|
|
205
|
+
}
|
|
206
|
+
throw new Error(`wrxn convert: docling failed on ${path.basename(srcPath)} — ${r.error.message || r.error.code}`);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
module.exports = {
|
|
210
|
+
convert,
|
|
211
|
+
defaultRun,
|
|
212
|
+
defaultFloor,
|
|
213
|
+
FORMATS,
|
|
214
|
+
ARCH_CRASH_RE,
|
|
215
|
+
};
|