candor-ts 0.4.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/query-core.mjs ADDED
@@ -0,0 +1,302 @@
1
+ /**
2
+ * candor query core — the SPEC §3.1 read-only queries as PURE functions over a loaded report +
3
+ * callgraph sidecar. Shared by the MCP server (mcp.mjs) so the agent surface and the CLI compute
4
+ * the same answers. Shapes match the reference engines (candor-query / candor-java); a cross-check
5
+ * test (test.mjs) pins them against query.mjs and the Rust binary so this can't drift — the family's
6
+ * no-two-truths rule, enforced by test rather than (yet) by query.mjs importing this.
7
+ *
8
+ * Every function takes already-loaded data (fns = the report's `functions`; cg = the callgraph
9
+ * object name->callees) and RETURNS a plain object — no I/O, no process exit. The caller emits.
10
+ */
11
+ import fs from "node:fs";
12
+ import nodePath from "node:path";
13
+
14
+ // Sibling report/callgraph files of a multi-report prefix (candor-scan writes <prefix>.<crate>.scan.json,
15
+ // one per workspace member) — so the loaders read ANY engine's output, not just candor-ts's <prefix>.json.
16
+ // This is the cross-engine premise: an agent queries a report from any language identically.
17
+ function siblings(prefix, predicate) {
18
+ const dir = nodePath.dirname(prefix) || ".";
19
+ const base = nodePath.basename(prefix);
20
+ try {
21
+ return fs.readdirSync(dir).filter((f) => f.startsWith(base + ".") && f.endsWith(".json") && predicate(f))
22
+ .map((f) => nodePath.join(dir, f));
23
+ } catch { return []; }
24
+ }
25
+ // A sibling filename that is a real REPORT (not a callgraph sidecar, an encountered-crate ledger, or a
26
+ // calibrated-coverage sidecar). Exported so `hasReport` (the MCP existence check) uses the SAME predicate
27
+ // as the loader — else a prefix whose only sibling is `.encountered-*`/`.calibrated.json` passes the
28
+ // existence check but loads ZERO functions → an authoritative-empty result (silent under-report; review find).
29
+ export const isReport = (f) => !f.endsWith(".callgraph.json") && !f.includes(".encountered-") && !f.endsWith(".calibrated.json");
30
+
31
+ // Defend the queries against a partial/old-engine/hand-edited report: the §2 required fields are
32
+ // defaulted, and a WRONG-TYPE field is coerced — a non-array `inferred` (e.g. the string "Net") must
33
+ // NOT survive, or `new Set("Net")` iterates characters into {N,e,t} (a fabricated effect set). Array
34
+ // only when actually an array; else []. The §2 forward-compatibility posture applied to the consumer.
35
+ function normFn(e) {
36
+ const arr = (v) => (Array.isArray(v) ? v : []);
37
+ return { ...e, inferred: arr(e.inferred), direct: arr(e.direct), calls: arr(e.calls) };
38
+ }
39
+
40
+ // Normalize a parsed report's `functions` into clean entries. A non-array `functions`, or an entry that
41
+ // isn't an object with a STRING `fn`, is DISCLOSED and dropped — it would otherwise crash a query
42
+ // (`map()` deref on a fn-less entry) or fabricate a junk entity (a primitive normalized into `{0:'t',…}`).
43
+ // The never-crash / never-fabricate posture for malformed input from any engine's report.
44
+ function normFns(parsed, source) {
45
+ const raw = parsed && typeof parsed === "object" && parsed.functions !== undefined ? parsed.functions : parsed;
46
+ if (!Array.isArray(raw)) {
47
+ console.error(`candor-ts: report ${source} has no functions array — OMITTED from this query (malformed report)`);
48
+ return [];
49
+ }
50
+ const out = [];
51
+ for (const e of raw) {
52
+ if (e && typeof e === "object" && typeof e.fn === "string") out.push(normFn(e));
53
+ else console.error(`candor-ts: report ${source} has a malformed entry (no string \`fn\`) — skipped`);
54
+ }
55
+ return out;
56
+ }
57
+
58
+ export function loadReport(prefix) {
59
+ if (fs.existsSync(`${prefix}.json`)) {
60
+ // The PRIMARY report parse must DISCLOSE-and-tolerate like the sibling path — a bare JSON.parse here
61
+ // threw an uncaught stack trace on the CLI for a corrupt `<prefix>.json` (asymmetric with siblings).
62
+ try { return normFns(JSON.parse(fs.readFileSync(`${prefix}.json`, "utf8")), `${prefix}.json`); }
63
+ catch { console.error(`candor-ts: report ${prefix}.json failed to parse — OMITTED (corrupt or mid-write); re-run the scan`); return []; }
64
+ }
65
+ // No exact <prefix>.json — merge the multi-report siblings (the Rust/workspace form).
66
+ const fns = [];
67
+ for (const f of siblings(prefix, isReport)) {
68
+ // DISCLOSE a malformed sibling — never silently drop it (a vanished report reads as "no effect").
69
+ try { fns.push(...normFns(JSON.parse(fs.readFileSync(f, "utf8")), f)); }
70
+ catch { console.error(`candor-ts: report ${f} failed to parse — its functions are OMITTED from this query (corrupt or mid-write); re-run the scan`); }
71
+ }
72
+ return fns;
73
+ }
74
+ export function loadCallgraph(prefix) {
75
+ // A `null`/non-object parse (a `null` callgraph, an array, a number) must NOT reach Object.entries —
76
+ // it throws "Cannot convert null to object". Coerce anything but a plain object to {} (an empty
77
+ // graph), the never-crash direction.
78
+ const norm = (cg) => (cg && typeof cg === "object" && !Array.isArray(cg))
79
+ ? Object.fromEntries(Object.entries(cg).map(([k, v]) => [k, Array.isArray(v) ? v : []]))
80
+ : {};
81
+ if (fs.existsSync(`${prefix}.callgraph.json`)) {
82
+ // The PRIMARY callgraph parse must DISCLOSE-and-tolerate like the sibling path below and like
83
+ // loadReport — a bare JSON.parse here threw an uncaught stack trace on the CLI for a corrupt or
84
+ // `null` `<prefix>.callgraph.json` (asymmetric with siblings). Tolerate (empty graph) + disclose.
85
+ try { return norm(JSON.parse(fs.readFileSync(`${prefix}.callgraph.json`, "utf8"))); }
86
+ catch { console.error(`candor-ts: callgraph ${prefix}.callgraph.json failed to parse — its edges are OMITTED from this query (corrupt or mid-write); re-run the scan`); return {}; }
87
+ }
88
+ const cg = {};
89
+ for (const f of siblings(prefix, (x) => x.endsWith(".callgraph.json"))) {
90
+ try { Object.assign(cg, JSON.parse(fs.readFileSync(f, "utf8"))); }
91
+ catch { console.error(`candor-ts: callgraph ${f} failed to parse — its edges are OMITTED from this query (corrupt or mid-write); re-run the scan`); }
92
+ }
93
+ return norm(cg);
94
+ }
95
+
96
+ // ---- the §3.1 match ladder: exact > segment-suffix > substring ------------------------------------
97
+ function matchTier(name, q) {
98
+ if (name === q) return 3;
99
+ if (name.endsWith(q) && /[.$#]$/.test(name.slice(0, name.length - q.length))) return 2;
100
+ if (name.includes(q)) return 1;
101
+ return 0;
102
+ }
103
+ export function matches(names, q) {
104
+ const best = Math.max(0, ...names.map((n) => matchTier(n, q)));
105
+ return best === 0 ? [] : names.filter((n) => matchTier(n, q) >= best);
106
+ }
107
+
108
+ function reverseGraph(cg) {
109
+ const rev = new Map();
110
+ for (const [caller, callees] of Object.entries(cg))
111
+ for (const c of callees) {
112
+ if (!rev.has(c)) rev.set(c, []);
113
+ rev.get(c).push(caller);
114
+ }
115
+ return rev;
116
+ }
117
+
118
+ // what effects a function carries (its row), and a name->row index for loc/direct lookups.
119
+ function indexFns(fns) {
120
+ return new Map(fns.map((e) => [e.fn, e]));
121
+ }
122
+
123
+ export function show(fns, q) {
124
+ const hit = new Set(matches(fns.map((e) => e.fn), q));
125
+ return fns.filter((e) => hit.has(e.fn)).map((e) => {
126
+ const o = { fn: e.fn, inferred: e.inferred, direct: e.direct };
127
+ // Literal Fs paths live under the report's `paths` key (scan emits `entry.paths`), NOT `fs` — the
128
+ // old `e.fs` read a field this engine never writes, so `show`/`candor_show` silently dropped every
129
+ // file path (the MCP tool's own doc promises "hosts/cmds/paths/tables"). Surface it as `paths`, the
130
+ // report's key, mirroring hosts/cmds/tables below.
131
+ if (e.paths?.length) o.paths = e.paths;
132
+ if (e.hosts?.length) o.hosts = e.hosts;
133
+ if (e.cmds?.length) o.cmds = e.cmds;
134
+ if (e.tables?.length) o.tables = e.tables;
135
+ o.unresolved = e.unresolved;
136
+ return o;
137
+ });
138
+ }
139
+
140
+ export function where(fns, eff) {
141
+ return {
142
+ effect: eff,
143
+ directly: fns.filter((e) => e.direct.includes(eff)).map((e) => e.fn).sort(),
144
+ inherited: fns.filter((e) => e.inferred.includes(eff) && !e.direct.includes(eff)).map((e) => e.fn).sort(),
145
+ };
146
+ }
147
+
148
+ export function callers(cg, q) {
149
+ const targets = matches(Object.keys(cg), q);
150
+ const rev = reverseGraph(cg);
151
+ const direct = new Set(), transitive = new Set();
152
+ for (const t of targets) for (const c of rev.get(t) ?? []) direct.add(c);
153
+ const queue = [...targets];
154
+ while (queue.length) {
155
+ const n = queue.pop();
156
+ for (const c of rev.get(n) ?? []) if (!transitive.has(c) && !targets.includes(c)) { transitive.add(c); queue.push(c); }
157
+ }
158
+ return { of: targets, direct: [...direct].sort(), transitive: [...transitive].sort() };
159
+ }
160
+
161
+ export function map(fns) {
162
+ const mods = {};
163
+ for (const e of fns) {
164
+ const mod = e.fn.includes(".") ? e.fn.split(".").slice(0, -1).join(".") : "(root)";
165
+ const m = (mods[mod] ??= { effects: new Set(), functions: 0 });
166
+ for (const x of e.inferred) m.effects.add(x);
167
+ m.functions += 1;
168
+ }
169
+ return Object.fromEntries(Object.entries(mods).sort()
170
+ .map(([k, v]) => [k, { effects: [...v.effects].sort(), functions: v.functions }]));
171
+ }
172
+
173
+ export function reachable(fns) {
174
+ const roots = fns.filter((e) => e.entryPoint);
175
+ const byEff = {};
176
+ for (const e of roots) for (const x of e.inferred) (byEff[x] ??= []).push(e.fn);
177
+ return {
178
+ entryPoints: roots.length,
179
+ effects: Object.fromEntries(Object.entries(byEff).sort()
180
+ .map(([k, v]) => [k, { count: v.length, via: v.sort() }])),
181
+ };
182
+ }
183
+
184
+ // impact: the BACKWARD blast radius — every effectful fn that transitively calls the target, and
185
+ // which ENTRY POINTS are downstream. Matches candor-query's {fn, affectedCount, entryPoints} and adds
186
+ // the `affected` list (a forward-compatible extension: an agent wants the names, not just a count).
187
+ export function impact(fns, cg, q) {
188
+ const targets = matches(Object.keys(cg), q);
189
+ const rev = reverseGraph(cg);
190
+ const idx = indexFns(fns);
191
+ const effectful = new Set(fns.map((e) => e.fn)); // the report lists only effect-carrying units
192
+ const entrySet = new Set(fns.filter((e) => e.entryPoint).map((e) => e.fn));
193
+ const reached = new Set();
194
+ const queue = [...targets];
195
+ while (queue.length) {
196
+ const n = queue.pop();
197
+ for (const c of rev.get(n) ?? []) if (!reached.has(c) && !targets.includes(c)) { reached.add(c); queue.push(c); }
198
+ }
199
+ const tgt = targets[0];
200
+ const affected = [...reached].filter((n) => effectful.has(n)).sort();
201
+ const rootNames = [];
202
+ if (idx.get(tgt)?.entryPoint) rootNames.push(tgt); // the target itself, if a runtime root
203
+ rootNames.push(...[...reached].filter((n) => entrySet.has(n)).sort());
204
+ const entryPoints = rootNames.map((n) => ({ fn: n, inferred: idx.get(n)?.inferred ?? [] }));
205
+ return { fn: tgt ?? q, affectedCount: affected.length, affected, entryPoints };
206
+ }
207
+
208
+ // path: the FORWARD provenance — a shortest BFS over the calls graph from `fn` to the nearest unit
209
+ // that performs `eff` DIRECTLY (the source). Matches candor-query's {effect, fn, path:[{fn,loc,source}]}.
210
+ export function path(fns, cg, fnQ, eff) {
211
+ const idx = indexFns(fns);
212
+ const targets = matches(Object.keys(cg), fnQ);
213
+ const start = targets[0];
214
+ const isSource = (n) => idx.get(n)?.direct?.includes(eff);
215
+ if (start === undefined) return { effect: eff, fn: fnQ, path: [] };
216
+ // BFS, tracking predecessor for path reconstruction.
217
+ const prev = new Map([[start, null]]);
218
+ const queue = [start];
219
+ let found = isSource(start) ? start : null;
220
+ while (queue.length && found === null) {
221
+ const n = queue.shift();
222
+ for (const c of cg[n] ?? []) {
223
+ if (prev.has(c)) continue;
224
+ prev.set(c, n);
225
+ if (isSource(c)) { found = c; break; }
226
+ queue.push(c);
227
+ }
228
+ }
229
+ if (found === null) return { effect: eff, fn: fnQ, path: [] }; // honest: no local source on a path
230
+ const chain = [];
231
+ for (let n = found; n !== null; n = prev.get(n)) chain.unshift(n);
232
+ return {
233
+ effect: eff,
234
+ fn: start,
235
+ path: chain.map((n) => ({ fn: n, loc: idx.get(n)?.loc ?? "", source: n === found })),
236
+ };
237
+ }
238
+
239
+ // diff: the per-unit effect delta between two reports (cur vs base) — {changes:[{fn, gained, lost}]}.
240
+ // The same shape query.mjs emits; the watcher uses it to tell an agent what its edit changed.
241
+ // Effects keyed by fn name, UNIONED across rows that share a name. A plain `new Map(fns.map(...))`
242
+ // keeps only the LAST same-named row — so when the multi-report loader merges workspace members that
243
+ // share a short fn name, one member's effects silently vanish from diff/gains → a SUPPLY-CHAIN MISS
244
+ // (gains fails to flag a gained Net). Unioning is the safe direction (never drops an effect).
245
+ function effectsByFn(fns) {
246
+ const m = new Map();
247
+ for (const e of fns) {
248
+ const s = m.get(e.fn) ?? new Set();
249
+ for (const x of (Array.isArray(e.inferred) ? e.inferred : [])) s.add(x); // a string "Net" would iter chars
250
+ m.set(e.fn, s);
251
+ }
252
+ return m;
253
+ }
254
+
255
+ export function diff(curFns, baseFns) {
256
+ const cur = effectsByFn(curFns);
257
+ const base = effectsByFn(baseFns);
258
+ const changes = [];
259
+ for (const fn of new Set([...cur.keys(), ...base.keys()])) {
260
+ const c = cur.get(fn) ?? new Set(), b = base.get(fn) ?? new Set();
261
+ const gained = [...c].filter((e) => !b.has(e)).sort();
262
+ const lost = [...b].filter((e) => !c.has(e)).sort();
263
+ if (gained.length || lost.length) changes.push({ fn, gained, lost });
264
+ }
265
+ changes.sort((a, b) => a.fn.localeCompare(b.fn));
266
+ return { changes };
267
+ }
268
+
269
+ // gains: the package-level SUPPLY-CHAIN alarm (spec §5.1) — the UNION of effects the surface gained
270
+ // between two reports (base → cur), with per-function detail. A dependency that grows a Net/Exec reach
271
+ // between releases. Same shape as candor-query's `gains --json`. Built on diff so it can't drift.
272
+ export function gains(curFns, baseFns) {
273
+ const gained = new Set(), byFunction = [];
274
+ for (const c of diff(curFns, baseFns).changes) {
275
+ for (const e of c.gained) { gained.add(e); byFunction.push({ fn: c.fn, effect: e }); }
276
+ }
277
+ return { gained: [...gained].sort(), byFunction };
278
+ }
279
+
280
+ // whatif: hypothetically add `eff` to `target` and report the blast radius + any policy violations.
281
+ // `policyParsed` is an already-parsed policy object (or null); kept I/O-free for the core.
282
+ export function whatif(cg, target, eff, policyParsed, scopeMatches) {
283
+ const targets = matches(Object.keys(cg), target);
284
+ if (targets.length === 0) return null; // caller decides how to surface "no such fn"
285
+ const rev = reverseGraph(cg);
286
+ const affected = new Set(targets);
287
+ const queue = [...targets];
288
+ while (queue.length) {
289
+ const n = queue.pop();
290
+ for (const c of rev.get(n) ?? []) if (!affected.has(c)) { affected.add(c); queue.push(c); }
291
+ }
292
+ const violations = [];
293
+ if (policyParsed) {
294
+ for (const r of policyParsed.deny) {
295
+ if (r.effects.length && !r.effects.includes(eff)) continue; // pure ([]) forbids ANY effect
296
+ for (const fn of affected)
297
+ if (!r.scope || scopeMatches(fn, r.scope))
298
+ violations.push({ fn, rule: `deny ${r.effects.join(" ") || "(pure)"} ${r.scope}`.trim() });
299
+ }
300
+ }
301
+ return { of: targets, effect: eff, affected: [...affected].sort(), violations, ok: violations.length === 0 };
302
+ }
package/query.mjs CHANGED
@@ -20,26 +20,13 @@ import fs from "node:fs";
20
20
 
21
21
  import { parsePolicy, scopeMatches } from "./policy.mjs";
22
22
  import { printAgents } from "./contract.mjs";
23
-
24
- // ---- the §3.1 match ladder: exact > segment-suffix > substring ------------------------------------
25
- function matchTier(name, q) {
26
- if (name === q) return 3;
27
- if (name.endsWith(q) && /[.$]$/.test(name.slice(0, name.length - q.length))) return 2;
28
- if (name.includes(q)) return 1;
29
- return 0;
30
- }
31
- function matches(names, q) {
32
- const best = Math.max(0, ...names.map((n) => matchTier(n, q)));
33
- return best === 0 ? [] : names.filter((n) => matchTier(n, q) >= best);
34
- }
35
-
36
- function loadReport(prefix) {
37
- const d = JSON.parse(fs.readFileSync(`${prefix}.json`, "utf8"));
38
- return d.functions ?? d;
39
- }
40
- function loadCallgraph(prefix) {
41
- return JSON.parse(fs.readFileSync(`${prefix}.callgraph.json`, "utf8"));
42
- }
23
+ // ONE source of truth for loading + name-matching — query.mjs kept DRIFTED local copies that didn't
24
+ // merge sibling reports, didn't tolerate a corrupt report (bare JSON.parse → uncaught crash), and used
25
+ // a `matchTier` missing `#` (so the SAME query resolved differently between `impact` and `callers` on a
26
+ // JVM `Type#method` report). Importing the shared functions removes all three divergences (review find).
27
+ import { impact as coreImpact, path as corePath, gains as coreGains,
28
+ show as coreShow,
29
+ loadReport, loadCallgraph, matches } from "./query-core.mjs";
43
30
  const emit = (v) => console.log(JSON.stringify(v, null, 1));
44
31
 
45
32
  const [, , cmd, ...args] = process.argv;
@@ -53,18 +40,11 @@ switch (cmd) {
53
40
  break;
54
41
  }
55
42
  case "show": {
43
+ // Was a hand-copy of query-core's show that had DRIFTED — it read the wrong Fs key (`e.fs`, never
44
+ // written; the paths silently vanished) and dropped Exec `cmds` entirely. Call the shared show so
45
+ // the CLI and the MCP `candor_show` are one implementation that cannot diverge again.
56
46
  const [prefix, q] = args;
57
- const fns = loadReport(prefix);
58
- const hit = new Set(matches(fns.map((e) => e.fn), q));
59
- const out = fns.filter((e) => hit.has(e.fn)).map((e) => {
60
- const o = { fn: e.fn, inferred: e.inferred, direct: e.direct };
61
- if (e.fs?.length) o.fs = e.fs;
62
- if (e.hosts?.length) o.hosts = e.hosts;
63
- if (e.tables?.length) o.tables = e.tables;
64
- o.unresolved = e.unresolved;
65
- return o;
66
- });
67
- emit(out);
47
+ emit(coreShow(loadReport(prefix), q));
68
48
  break;
69
49
  }
70
50
  case "where": {
@@ -125,6 +105,7 @@ switch (cmd) {
125
105
  changes.sort((a, b) => a.fn.localeCompare(b.fn));
126
106
  emit({ changes });
127
107
  process.exit(changes.some((c) => c.gained.length) ? 1 : 0);
108
+ break; // unreachable (process.exit), but eslint can't prove it — defends against fallthrough
128
109
  }
129
110
  case "reachable": {
130
111
  // what the app DOES at runtime: effects unioned over the entry points (SPEC §3.1; same JSON
@@ -139,6 +120,25 @@ switch (cmd) {
139
120
  .map(([k, v]) => [k, { count: v.length, via: v.sort() }])) });
140
121
  break;
141
122
  }
123
+ case "impact": {
124
+ // blast radius (backward dual of reachable) — reuses the shared query-core, the same logic the
125
+ // MCP server serves. SPEC §3.1: {fn, affectedCount, affected, entryPoints:[{fn,inferred}]}.
126
+ const [prefix, q] = args;
127
+ emit(coreImpact(loadReport(prefix), loadCallgraph(prefix), q));
128
+ break;
129
+ }
130
+ case "gains": {
131
+ // the supply-chain alarm (SPEC §5.1): {gained:[Effect], byFunction:[{fn,effect}]} — what the
132
+ // surface gained between two reports (base → cur), the cross-engine machine-readable form.
133
+ const [curPrefix, basePrefix] = args;
134
+ emit(coreGains(loadReport(curPrefix), loadReport(basePrefix)));
135
+ break;
136
+ }
137
+ case "path": {
138
+ const [prefix, fn, eff] = args;
139
+ emit(corePath(loadReport(prefix), loadCallgraph(prefix), fn, eff));
140
+ break;
141
+ }
142
142
  case "whatif": {
143
143
  const [prefix, target, eff, maybePolicy] = args;
144
144
  const cg = loadCallgraph(prefix);
@@ -169,6 +169,7 @@ switch (cmd) {
169
169
  }
170
170
  emit({ of: targets, effect: eff, affected: [...affected].sort(), violations, ok: violations.length === 0 });
171
171
  process.exit(violations.length ? 1 : 0);
172
+ break; // unreachable (process.exit), but eslint can't prove it — defends against fallthrough
172
173
  }
173
174
  default:
174
175
  console.error("usage: node query.mjs <parsepolicy|show|where|callers|map|whatif> …");
package/scan-core.mjs ADDED
@@ -0,0 +1,161 @@
1
+ /**
2
+ * scan-core — the PURE classifier + literal-extraction leaves of scan.mjs, factored out so they can be
3
+ * unit-tested directly (scan.mjs proper is the TS-compiler-driven walk; these take plain strings). No
4
+ * TypeScript-AST dependency, no I/O, no scan state: the κ rules table + its two readers, the §6.2
5
+ * Exec-head refinement, the bare host-literal matcher, the SPEC §2 SQL-table extraction, and the
6
+ * test-path predicate. scan.mjs imports them; the behavior is identical (this is a move, not a rewrite).
7
+ */
8
+
9
+ // A source path that is test/spec/dependency code, not the package's own production surface.
10
+ export function isTestPath(p) {
11
+ return /(^|\/)(node_modules|__tests__|tests?|spec)(\/|$)/.test(p) || /\.(test|spec)\.[mc]?tsx?$/.test(p);
12
+ }
13
+
14
+ // ---- κ — the curated classifier (CLASSIFIER §2: the dispatch/execution boundary, not builders) ----
15
+ // Node builtins + a curated npm tier (the same under-report-and-say-so posture as the crate table:
16
+ // an unlisted package contributes nothing — never a guess).
17
+ // One rules TABLE, two readers: kappa() classifies a call; kappaKnows() answers "is this package
18
+ // curated at all?" for the coverage ledger (a κ-known package whose given call is pure — a TypeORM
19
+ // builder — is covered, not a blind spot). A single source so the two can never drift.
20
+ // [module-name regex, member regex (null = any member), effect]
21
+ // The member token a rule matches against is the resolved declaration's name, EXCEPT a constructor
22
+ // call (`new X()`), whose synthesized token is "new" (its decl `name` is empty — see CLASSIFY). This
23
+ // lets a rule keep the effect on the module's function/verb surface while exempting inert CONSTRUCTION.
24
+ export const KAPPA_RULES = [
25
+ [/^(node:)?fs(\/promises)?$/, null, "Fs"],
26
+ // The net cluster (net/dgram/tls/http/http2/https) is I/O on its FUNCTION/verb surface
27
+ // (request/get/connect/createConnection/createServer/createSocket/listen…), but inert on
28
+ // CONSTRUCTION: `new http.Agent()` is a connection-pool config object, `new http.Server()` /
29
+ // `new net.Socket()` open nothing until a later `.listen()`/`.connect()`/request uses them — no
30
+ // syscall, no fd. So Net for every member EXCEPT a constructor (token "new"); construction is pure.
31
+ // Conservative by the cardinal rule: any NON-constructor member — listed verb or not — keeps Net,
32
+ // so an unlisted effectful function can never under-report; only proven-inert construction is freed.
33
+ // (The pure CONSTANTS http.STATUS_CODES/METHODS/maxHeaderSize and the https.globalAgent accessor are
34
+ // property reads, not calls — they never reach κ and are already pure.)
35
+ // Also exempt node:net's PURE STRING VALIDATORS isIP/isIPv4/isIPv6: they parse a string and return
36
+ // 0/4/6 (or a boolean) with no socket, no fd, no syscall — pure functions. The whole-module Net rule
37
+ // once fabricated Net onto them; a real-world sweep on node-fetch caught it (its trustworthy URL
38
+ // predicates isOriginPotentiallyTrustworthy/isUrlPotentiallyTrustworthy call isIP() and inherited a
39
+ // FABRICATED Net — the cardinal sin — purely from this classification, with no local Net edge). Only
40
+ // these three named validators are freed; every genuine verb (connect/createConnection/createServer…)
41
+ // stays Net (the matcher excludes ONLY new + the three validators, nothing else).
42
+ [/^(node:)?(net|dgram|tls|http2?|https)$/, /^(?!(new|isIP|isIPv4|isIPv6)$)/, "Net"],
43
+ [/^(node:)?child_process$/, null, "Exec"],
44
+ [/^(node:)?sqlite$/, null, "Db"],
45
+ // the curated npm tier
46
+ [/^(axios|got|node-fetch|undici|ws|socket\.io(-client)?|nodemailer)$/, null, "Net"],
47
+ [/^(pg|mysql2?|mongodb|ioredis|redis|sqlite3|better-sqlite3|knex)$/, null, "Db"],
48
+ [/^(execa|cross-spawn|shelljs)$/, null, "Exec"],
49
+ [/^(fs-extra|graceful-fs|rimraf|glob|chokidar)$/, null, "Fs"],
50
+ [/^dotenv$/, null, "Env"],
51
+ [/^(winston|pino|bunyan|npmlog)$/, null, "Log"],
52
+ // entropy: node:crypto's random surface + the password-hashing libs (salted -> Rand). Found by
53
+ // the CTA dogfood on a Nest app: argon2.hash came out SILENTLY PURE (the curated-kappa caveat
54
+ // landing on exactly the call a security review cares about).
55
+ [/^(node:)?crypto$/, /^random/, "Rand"],
56
+ [/^(argon2|bcrypt|bcryptjs)$/, null, "Rand"],
57
+ // The ORM tier — VERB-PRECISE (the CLASSIFIER discipline: tag the execution boundary, not
58
+ // builders; `createQueryBuilder` is pure, its `getMany`/`execute` is the I/O). Found on the
59
+ // first framework-APP scan: a TypeORM/Nest application — Db-heavy by construction — read zero
60
+ // Db because the ORM resolved into an unlisted package (the JVM's Spring-Data lesson, replayed).
61
+ [/^(typeorm|@nestjs\/typeorm)$/,
62
+ /^(find|save|remove|softRemove|recover|insert|update|upsert|delete|restore|count|exist|sum|average|minimum|maximum|query|clear|increment|decrement|getMany|getOne|getOneOrFail|getRawMany|getRawOne|getCount|getExists|execute|stream|transaction)/,
63
+ "Db"],
64
+ [/^(@prisma\/client|\.prisma|\.prisma\/client)$/,
65
+ /^(\$?(queryRaw|executeRaw|transaction)|find(Many|Unique|First)|create|createMany|update|updateMany|upsert|delete|deleteMany|aggregate|count|groupBy)/,
66
+ "Db"],
67
+ [/^mongoose$/,
68
+ /^(find|save|create|insertMany|updateOne|updateMany|replaceOne|deleteOne|deleteMany|aggregate|countDocuments|estimatedDocumentCount|distinct|exec|bulkWrite)/,
69
+ "Db"],
70
+ [/^(sequelize|drizzle-orm)$/,
71
+ /^(find|create|update|destroy|upsert|count|max|min|sum|query|select|insert|delete|execute|transaction)/,
72
+ "Db"],
73
+ // Nest's HttpService wraps axios — the request verbs are Net.
74
+ [/^@nestjs\/axios$/, /^(get|post|put|patch|delete|head|request)$/, "Net"],
75
+ ];
76
+ export function kappa(moduleName, member) {
77
+ for (const [mre, vre, eff] of KAPPA_RULES) {
78
+ if (mre.test(moduleName) && (!vre || vre.test(member))) return eff;
79
+ }
80
+ return null;
81
+ }
82
+ // Packages REVIEWED and ratified effect-free at the call boundary (decorator/metadata plumbing,
83
+ // pure computation, operator algebras whose side effects live in visible user callbacks). This is
84
+ // the ledger's triage outlet: an unlisted package either earns KAPPA_RULES entries or lands here —
85
+ // never silently. NOT for anything that mints entropy (uuid), reads clocks, or signs with RSA-PSS
86
+ // (jsonwebtoken stays unlisted on purpose).
87
+ export const KAPPA_PURE = new Set([
88
+ "@nestjs/common", "@nestjs/core", "@nestjs/swagger", "@nestjs/platform-express",
89
+ "class-validator", "class-transformer", "reflect-metadata",
90
+ "rxjs", "zod", "lodash", "ramda", "date-fns",
91
+ ]);
92
+ export function kappaKnows(moduleName) {
93
+ return KAPPA_PURE.has(moduleName) || KAPPA_RULES.some(([mre]) => mre.test(moduleName));
94
+ }
95
+
96
+ // Refine the Exec cliff (spec §4 ⟨0.5⟩): the effects a literal, statically-known subprocess head
97
+ // implies, matched by basename. ADDED to a caller that already carries Exec (a subprocess is still
98
+ // spawned — Exec is never dropped); an unrecognised head returns [] and keeps the bare cliff (never
99
+ // guess). A candor engine reads Fs/Env only — spec §7 item 12 (the analyzer self-boundary) guarantees
100
+ // it, so that case is spec-supplied. Only UNAMBIGUOUS single-effect tools belong here: a multi-modal
101
+ // head (git status local vs git push Net; rsync local vs remote; make/npm run project code) would
102
+ // fabricate the effect for its common case. The reference engines share this table verbatim.
103
+ export function commandHeadEffects(cmd) {
104
+ const base = cmd.trim().split(/\s+/)[0].split(/[/\\]/).pop();
105
+ if (["curl", "wget", "http", "ssh", "scp", "sftp", "ftp", "telnet"].includes(base)) return ["Net"];
106
+ if (["psql", "mysql", "sqlite3", "mongosh", "mongo", "redis-cli", "cqlsh", "influx"].includes(base)) return ["Db"];
107
+ if (["candor", "candor-run.sh", "candor-scan", "candor-query", "candor-java",
108
+ "candor-classify", "candor-report", "cargo-candor"].includes(base)) return ["Env", "Fs"];
109
+ return [];
110
+ }
111
+ // host[:port] from an address/URL literal; non-address strings yield nothing (never fabricate).
112
+ export function hostLiteral(s) {
113
+ const m = s.match(/^[a-z][a-z0-9+.-]*:\/\/([^/]+)/i); // scheme://host[:port]/…
114
+ if (m) return m[1].replace(/^.*@/, "");
115
+ if (/^[a-z0-9._-]+(:\d+)?$/i.test(s) && s.includes(".")) return s; // bare host[.tld][:port]
116
+ return null;
117
+ }
118
+ // Table-position identifiers in a SQL string literal (SPEC §2 `tables`). Mirrors the Rust
119
+ // tables_in_sql exactly: must open with a statement keyword; FROM/JOIN/INTO anywhere,
120
+ // statement-leading UPDATE/TRUNCATE, TABLE (skipping ONLY/IF NOT EXISTS); a FOR UPDATE locking
121
+ // clause yields nothing. Conservative in the fabrication direction.
122
+ export function tablesInSql(sql) {
123
+ const stmt = new Set(["select","insert","update","delete","create","drop","alter","truncate","merge","replace","with"]);
124
+ const skip = new Set(["only","if","not","exists","table"]);
125
+ const stop = new Set(["select","set","where","values","on","using","group","order","by","limit",
126
+ "returning","as","inner","outer","left","right","cross","lateral","natural","union","all",
127
+ "distinct","case","when","null","default","skip","nowait","of","from","join","into","update",
128
+ "delete","insert"]);
129
+ // `,` survives as its OWN token: it lets `FROM t1, t2` continue the table list without
130
+ // fabricating from other comma-ridden positions (column lists, ON clauses).
131
+ const toks = sql.toLowerCase().replace(/[();]/g, " ").replace(/,/g, " , ").trim().split(/\s+/);
132
+ if (!toks.length || !stmt.has(toks[0])) return [];
133
+ const out = [];
134
+ const ident = (raw) => {
135
+ const t = raw.replace(/^["'`]+|["'`]+$/g, "");
136
+ if (!t || stop.has(t) || !/^[a-z_][a-z0-9_.$"`]*$/.test(t)) return null;
137
+ return t.replace(/["`]/g, "");
138
+ };
139
+ for (let i = 0; i < toks.length; i++) {
140
+ const tablePos = ["from","join","into","table"].includes(toks[i])
141
+ || ((toks[i] === "update" || toks[i] === "truncate") && i === 0);
142
+ if (!tablePos) continue;
143
+ let j = i + 1;
144
+ while (j < toks.length && skip.has(toks[j])) j++;
145
+ if (j >= toks.length) continue;
146
+ const first = ident(toks[j]);
147
+ if (first === null) continue;
148
+ if (!out.includes(first)) out.push(first);
149
+ // Comma-ADJACENT continuation only: `FROM t1, t2, t3` takes all three, while an alias breaks
150
+ // the chain (`FROM t1 a, t2` keeps just t1 — an under-report, never a guess: skipping an alias
151
+ // to chase the comma would fabricate tables out of `INSERT INTO t (a, b)`'s column list, whose
152
+ // parens are spaces by the time we tokenize).
153
+ while (j + 2 < toks.length && toks[j + 1] === ",") {
154
+ const more = ident(toks[j + 2]);
155
+ if (more === null) break;
156
+ if (!out.includes(more)) out.push(more);
157
+ j += 2;
158
+ }
159
+ }
160
+ return out;
161
+ }