candor-ts 0.4.6 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/scan.mjs CHANGED
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
- * candor-ts — the TypeScript implementation of candor-spec 0.4.
3
+ * candor-ts — the TypeScript implementation of candor-spec 0.5.
4
4
  *
5
5
  * Origin (kept honest): this engine began as the clean-room derivability proof — a single-file
6
6
  * slice written from SPEC.md/SEMANTICS.md/CLASSIFIER.md alone, frozen as that claim in git history
@@ -26,6 +26,7 @@ import { fileURLToPath } from "node:url";
26
26
  import { createRequire } from "node:module";
27
27
  import { parsePolicy, evaluatePolicy } from "./policy.mjs";
28
28
  import { printAgents } from "./contract.mjs";
29
+ import { isTestPath, kappa, kappaKnows, commandHeadEffects, hostLiteral, tablesInSql } from "./scan-core.mjs";
29
30
 
30
31
  const ENGINE_DIR = path.dirname(fileURLToPath(import.meta.url));
31
32
 
@@ -56,9 +57,6 @@ if (wantAgents) { printAgents(); process.exit(0); }
56
57
  if (target === null) { console.error(usage); process.exit(2); }
57
58
 
58
59
  // ---- project discovery (a dir, a single file, or a tsconfig) --------------------------------------
59
- function isTestPath(p) {
60
- return /(^|\/)(node_modules|__tests__|tests?|spec)(\/|$)/.test(p) || /\.(test|spec)\.[mc]?tsx?$/.test(p);
61
- }
62
60
  let rootDir, fileNames, compilerOptions = {
63
61
  target: ts.ScriptTarget.ES2022,
64
62
  module: ts.ModuleKind.NodeNext,
@@ -221,68 +219,6 @@ const checker = program.getTypeChecker();
221
219
  const projectFiles = new Set(fileNames.map((f) => path.resolve(f)));
222
220
  const sources = program.getSourceFiles().filter((f) => projectFiles.has(path.resolve(f.fileName)));
223
221
 
224
- // ---- κ — the curated classifier (CLASSIFIER §2: the dispatch/execution boundary, not builders) ----
225
- // Node builtins + a curated npm tier (the same under-report-and-say-so posture as the crate table:
226
- // an unlisted package contributes nothing — never a guess).
227
- // One rules TABLE, two readers: kappa() classifies a call; kappaKnows() answers "is this package
228
- // curated at all?" for the coverage ledger (a κ-known package whose given call is pure — a TypeORM
229
- // builder — is covered, not a blind spot). A single source so the two can never drift.
230
- // [module-name regex, member regex (null = any member), effect]
231
- const KAPPA_RULES = [
232
- [/^(node:)?fs(\/promises)?$/, null, "Fs"],
233
- [/^(node:)?(net|dgram|tls|http2?|https)$/, null, "Net"],
234
- [/^(node:)?child_process$/, null, "Exec"],
235
- [/^(node:)?sqlite$/, null, "Db"],
236
- // the curated npm tier
237
- [/^(axios|got|node-fetch|undici|ws|socket\.io(-client)?|nodemailer)$/, null, "Net"],
238
- [/^(pg|mysql2?|mongodb|ioredis|redis|sqlite3|better-sqlite3|knex)$/, null, "Db"],
239
- [/^(execa|cross-spawn|shelljs)$/, null, "Exec"],
240
- [/^(fs-extra|graceful-fs|rimraf|glob|chokidar)$/, null, "Fs"],
241
- [/^dotenv$/, null, "Env"],
242
- [/^(winston|pino|bunyan|npmlog)$/, null, "Log"],
243
- // entropy: node:crypto's random surface + the password-hashing libs (salted -> Rand). Found by
244
- // the CTA dogfood on a Nest app: argon2.hash came out SILENTLY PURE (the curated-kappa caveat
245
- // landing on exactly the call a security review cares about).
246
- [/^(node:)?crypto$/, /^random/, "Rand"],
247
- [/^(argon2|bcrypt|bcryptjs)$/, null, "Rand"],
248
- // The ORM tier — VERB-PRECISE (the CLASSIFIER discipline: tag the execution boundary, not
249
- // builders; `createQueryBuilder` is pure, its `getMany`/`execute` is the I/O). Found on the
250
- // first framework-APP scan: a TypeORM/Nest application — Db-heavy by construction — read zero
251
- // Db because the ORM resolved into an unlisted package (the JVM's Spring-Data lesson, replayed).
252
- [/^(typeorm|@nestjs\/typeorm)$/,
253
- /^(find|save|remove|softRemove|recover|insert|update|upsert|delete|restore|count|exist|sum|average|minimum|maximum|query|clear|increment|decrement|getMany|getOne|getOneOrFail|getRawMany|getRawOne|getCount|getExists|execute|stream|transaction)/,
254
- "Db"],
255
- [/^(@prisma\/client|\.prisma|\.prisma\/client)$/,
256
- /^(\$?(queryRaw|executeRaw|transaction)|find(Many|Unique|First)|create|createMany|update|updateMany|upsert|delete|deleteMany|aggregate|count|groupBy)/,
257
- "Db"],
258
- [/^mongoose$/,
259
- /^(find|save|create|insertMany|updateOne|updateMany|replaceOne|deleteOne|deleteMany|aggregate|countDocuments|estimatedDocumentCount|distinct|exec|bulkWrite)/,
260
- "Db"],
261
- [/^(sequelize|drizzle-orm)$/,
262
- /^(find|create|update|destroy|upsert|count|max|min|sum|query|select|insert|delete|execute|transaction)/,
263
- "Db"],
264
- // Nest's HttpService wraps axios — the request verbs are Net.
265
- [/^@nestjs\/axios$/, /^(get|post|put|patch|delete|head|request)$/, "Net"],
266
- ];
267
- function kappa(moduleName, member) {
268
- for (const [mre, vre, eff] of KAPPA_RULES) {
269
- if (mre.test(moduleName) && (!vre || vre.test(member))) return eff;
270
- }
271
- return null;
272
- }
273
- // Packages REVIEWED and ratified effect-free at the call boundary (decorator/metadata plumbing,
274
- // pure computation, operator algebras whose side effects live in visible user callbacks). This is
275
- // the ledger's triage outlet: an unlisted package either earns KAPPA_RULES entries or lands here —
276
- // never silently. NOT for anything that mints entropy (uuid), reads clocks, or signs with RSA-PSS
277
- // (jsonwebtoken stays unlisted on purpose).
278
- const KAPPA_PURE = new Set([
279
- "@nestjs/common", "@nestjs/core", "@nestjs/swagger", "@nestjs/platform-express",
280
- "class-validator", "class-transformer", "reflect-metadata",
281
- "rxjs", "zod", "lodash", "ramda", "date-fns",
282
- ]);
283
- function kappaKnows(moduleName) {
284
- return KAPPA_PURE.has(moduleName) || KAPPA_RULES.some(([mre]) => mre.test(moduleName));
285
- }
286
222
 
287
223
  // The module a declaration came from: a project file → "<local>", @types/node → the builtin name,
288
224
  // node_modules/<pkg> → the package name, the ES lib → "<es-lib>".
@@ -297,6 +233,47 @@ function declModule(decl) {
297
233
  return f;
298
234
  }
299
235
 
236
+ // SPEC §5.1 — the effect manifest. An uncurated package MAY declare its effect surface in its
237
+ // package.json (`"candorEffects": ["Net"]`), read as the declared-not-verified tier: it kills the
238
+ // silent pure/blind-spot the package would otherwise carry, exactly like a cap type (and unlike
239
+ // candor's own analysis, which is checked). A name outside §1 VOIDS the declaration loudly — a typo
240
+ // must never silently narrow a surface. Cached per package. `file` is the resolved declaration source.
241
+ const EFFECT_VOCAB = new Set(["Net", "Fs", "Db", "Exec", "Env", "Clock", "Ipc", "Log", "Rand", "Clipboard"]);
242
+ const _manifestCache = new Map();
243
+ // Returns the declared effect array (possibly EMPTY — `[]` is an explicit "declared pure", covered, not
244
+ // a blind spot), or `null` for no/invalid declaration (still a blind spot). A name outside §1 voids the
245
+ // declaration loudly; a non-array `candorEffects` is malformed and warned.
246
+ function packageManifestEffects(file) {
247
+ const m = file && file.match(/^(.*\/node_modules\/(?:@[^/]+\/[^/]+|[^/]+))\//);
248
+ if (!m) return null;
249
+ let dir = m[1];
250
+ // A manifest read from an `@types/<pkg>` directory is a TRUST-BOUNDARY HOLE: the @types stub is a
251
+ // type-only package published by DefinitelyTyped/anyone — NOT the effect-owning package. Honoring its
252
+ // `candorEffects` let an attacker's `@types/realpkg` declare `[]` to SILENCE the real realpkg's effects
253
+ // AND its κ-ledger disclosure (defeating the spec's "a missing manifest is visible via κ" safety net).
254
+ // Redirect to the REAL package's own dir, whose author controls it (`@types/babel__core` → `@babel/core`,
255
+ // `@types/foo` → `foo`); if that has no manifest, it stays an honest κ-ledger blind spot, never silenced.
256
+ const at = dir.match(/^(.*\/node_modules\/)@types\/([^/]+)$/);
257
+ if (at) {
258
+ const real = at[2].includes("__") ? "@" + at[2].replace("__", "/") : at[2];
259
+ dir = at[1] + real;
260
+ }
261
+ if (_manifestCache.has(dir)) return _manifestCache.get(dir);
262
+ let result = null;
263
+ try {
264
+ const d = JSON.parse(fs.readFileSync(path.join(dir, "package.json"), "utf8")).candorEffects;
265
+ if (Array.isArray(d)) {
266
+ const bad = d.filter((e) => !EFFECT_VOCAB.has(e));
267
+ if (bad.length) console.error(`candor-ts: ${path.basename(dir)} candorEffects has an invalid effect '${bad[0]}' — declaration voided (SPEC §1)`);
268
+ else result = d; // a valid declaration, including [] = declared pure
269
+ } else if (d !== undefined) {
270
+ console.error(`candor-ts: ${path.basename(dir)} candorEffects must be an array of §1 effect names — ignored`);
271
+ }
272
+ } catch { /* no/unreadable manifest → undeclared */ }
273
+ _manifestCache.set(dir, result);
274
+ return result;
275
+ }
276
+
300
277
  // ---- the literal surfaces (SPEC §2 hosts/cmds/paths/tables): the statically-decidable subset ------
301
278
  // Read ONLY from string literals at a classified call — informative, never complete, never inferred.
302
279
  function firstStringLiteral(node) {
@@ -305,76 +282,19 @@ function firstStringLiteral(node) {
305
282
  }
306
283
  return null;
307
284
  }
308
- // Refine the Exec cliff (spec §4 ⟨0.5⟩): the effects a literal, statically-known subprocess head
309
- // implies, matched by basename. ADDED to a caller that already carries Exec (a subprocess is still
310
- // spawned — Exec is never dropped); an unrecognised head returns [] and keeps the bare cliff (never
311
- // guess). A candor engine reads Fs/Env only — spec §7 item 12 (the analyzer self-boundary) guarantees
312
- // it, so that case is spec-supplied. Only UNAMBIGUOUS single-effect tools belong here: a multi-modal
313
- // head (git status local vs git push Net; rsync local vs remote; make/npm run project code) would
314
- // fabricate the effect for its common case. The reference engines share this table verbatim.
315
- function commandHeadEffects(cmd) {
316
- const base = cmd.trim().split(/\s+/)[0].split(/[/\\]/).pop();
317
- if (["curl", "wget", "http", "ssh", "scp"].includes(base)) return ["Net"];
318
- if (["psql", "mysql", "sqlite3", "mongosh", "redis-cli"].includes(base)) return ["Db"];
319
- if (["candor", "candor-run.sh", "candor-scan", "candor-query", "candor-java",
320
- "candor-classify", "candor-report", "cargo-candor"].includes(base)) return ["Env", "Fs"];
321
- return [];
322
- }
323
- // host[:port] from an address/URL literal; non-address strings yield nothing (never fabricate).
324
- function hostLiteral(s) {
325
- const m = s.match(/^[a-z][a-z0-9+.-]*:\/\/([^/]+)/i); // scheme://host[:port]/…
326
- if (m) return m[1].replace(/^.*@/, "");
327
- if (/^[a-z0-9._-]+(:\d+)?$/i.test(s) && s.includes(".")) return s; // bare host[.tld][:port]
328
- return null;
329
- }
330
- // Table-position identifiers in a SQL string literal (SPEC §2 `tables`). Mirrors the Rust
331
- // tables_in_sql exactly: must open with a statement keyword; FROM/JOIN/INTO anywhere,
332
- // statement-leading UPDATE/TRUNCATE, TABLE (skipping ONLY/IF NOT EXISTS); a FOR UPDATE locking
333
- // clause yields nothing. Conservative in the fabrication direction.
334
- function tablesInSql(sql) {
335
- const stmt = new Set(["select","insert","update","delete","create","drop","alter","truncate","merge","replace","with"]);
336
- const skip = new Set(["only","if","not","exists","table"]);
337
- const stop = new Set(["select","set","where","values","on","using","group","order","by","limit",
338
- "returning","as","inner","outer","left","right","cross","lateral","natural","union","all",
339
- "distinct","case","when","null","default","skip","nowait","of","from","join","into","update",
340
- "delete","insert"]);
341
- // `,` survives as its OWN token: it lets `FROM t1, t2` continue the table list without
342
- // fabricating from other comma-ridden positions (column lists, ON clauses).
343
- const toks = sql.toLowerCase().replace(/[();]/g, " ").replace(/,/g, " , ").trim().split(/\s+/);
344
- if (!toks.length || !stmt.has(toks[0])) return [];
345
- const out = [];
346
- const ident = (raw) => {
347
- const t = raw.replace(/^["'`]+|["'`]+$/g, "");
348
- if (!t || stop.has(t) || !/^[a-z_][a-z0-9_.$"`]*$/.test(t)) return null;
349
- return t.replace(/["`]/g, "");
350
- };
351
- for (let i = 0; i < toks.length; i++) {
352
- const tablePos = ["from","join","into","table"].includes(toks[i])
353
- || ((toks[i] === "update" || toks[i] === "truncate") && i === 0);
354
- if (!tablePos) continue;
355
- let j = i + 1;
356
- while (j < toks.length && skip.has(toks[j])) j++;
357
- if (j >= toks.length) continue;
358
- const first = ident(toks[j]);
359
- if (first === null) continue;
360
- if (!out.includes(first)) out.push(first);
361
- // Comma-ADJACENT continuation only: `FROM t1, t2, t3` takes all three, while an alias breaks
362
- // the chain (`FROM t1 a, t2` keeps just t1 — an under-report, never a guess: skipping an alias
363
- // to chase the comma would fabricate tables out of `INSERT INTO t (a, b)`'s column list, whose
364
- // parens are spaces by the time we tokenize).
365
- while (j + 2 < toks.length && toks[j + 1] === ",") {
366
- const more = ident(toks[j + 2]);
367
- if (more === null) break;
368
- if (!out.includes(more)) out.push(more);
369
- j += 2;
370
- }
371
- }
372
- return out;
373
- }
374
285
 
375
- // ---- pass 1: collect the analyzed functions across the project (SEMANTICS §2's F) -----------------
376
- // Names are MODULE-QUALIFIED (`src.db.save` for save() in src/db.ts; separators "." so the §6.2
377
- // segment-scope rules apply naturally: `deny Net db` matches the db module). A single-file scan
286
+ // The literal PROGRAM head a subprocess call NAMES argv[0] specifically, never a later argument.
287
+ // Unlike firstStringLiteral (the first literal ANYWHERE in the args), this refuses to refine when
288
+ // the program (arg0) is a runtime value but a trailing arg is a literal whose basename hits the head
289
+ // table: `spawn(toolVar, "curl")` must NOT fabricate Net — the literal is an argument, not the
290
+ // program (spec §4 ⟨0.5⟩: the head is argv[0]). Mirrors candor-java programHeadLiteral and the Rust
291
+ // is_cmd_naming_method gate. Returns null when arg0 is not a static string literal — the safe
292
+ // direction. Used ONLY for the effect refinement, never to widen it; the cosmetic `cmds` surface
293
+ // keeps firstStringLiteral.
294
+ function programHeadLiteral(node) {
295
+ const a0 = (node.arguments ?? [])[0];
296
+ return a0 && ts.isStringLiteralLike(a0) ? a0.text : null;
297
+ }
378
298
  // qualifies by the file's basename (`Cases.union_a`).
379
299
  const fns = new Map(); // qualified name -> { direct, edges, hosts, tables, cmds, paths, loc }
380
300
  const unlistedSeen = new Map(); // the κ-coverage ledger: unlisted npm package -> call-site count
@@ -562,6 +482,24 @@ function enclosing(node) {
562
482
  return null;
563
483
  }
564
484
 
485
+ // True when a receiver expression's chain ROOTS at process.stdout/stderr/stdin — including method chains
486
+ // (`process.stdin.on("data",f).on("end",g)`, `process.stdout.write(x).on(...)`). The std streams are typed
487
+ // tty.ReadStream/WriteStream which EXTEND net.Socket, so `.on`/`.write`/`.end` resolve to net.Socket members
488
+ // and the whole-module Net rule paints them — but console fd 0/1/2 I/O is not Net (§1 has no Console effect).
489
+ // `net.Socket.on`/`.write` return the stream (`this`), so a chained call's receiver is still the std stream;
490
+ // the exact-string check missed it (the receiver is the inner CallExpression). Walk the chain to its head.
491
+ function rootsAtStdStream(expr) {
492
+ let e = expr;
493
+ for (;;) {
494
+ if (!e) return false;
495
+ const t = e.getText().replace(/\s+/g, "");
496
+ if (t === "process.stdout" || t === "process.stderr" || t === "process.stdin") return true;
497
+ if (ts.isCallExpression(e) || ts.isPropertyAccessExpression(e) || ts.isElementAccessExpression(e)
498
+ || ts.isParenthesizedExpression(e) || ts.isNonNullExpression(e)) { e = e.expression; continue; }
499
+ return false;
500
+ }
501
+ }
502
+
565
503
  // ---- pass 2: per call site, the (CLASSIFY)/(EDGE)/(UNKNOWN) resolution of SEMANTICS §4 ------------
566
504
  function visitCalls(node) {
567
505
  if (ts.isCallExpression(node) || ts.isNewExpression(node)) {
@@ -673,8 +611,37 @@ function visitCalls(node) {
673
611
  && checker.getTypeAtLocation(node.expression)?.symbol?.name === "DateConstructor")
674
612
  rec.direct.add("Clock");
675
613
  } else {
676
- const member = decl.name ? decl.name.getText() : "";
677
- const eff = kappa(mod, member); // (CLASSIFY)
614
+ // The member token κ matches: the resolved declaration's name, EXCEPT a `new X()` call,
615
+ // whose declaration is a Constructor (empty name) synthesize "new" so a rule can exempt
616
+ // inert construction from its module-wide effect (the net cluster: `new http.Agent()` etc.).
617
+ // BUT a CONNECTING constructor is NOT inert: `new http.ClientRequest(url)` performs the
618
+ // network I/O on construction (it is what `http.request()` returns and dispatches), so the
619
+ // blanket `new`-exemption would convert a real Net source into pure (a cardinal-sin under-
620
+ // report). For such a ctor we synthesize the CLASS name instead of "new", so the net-cluster
621
+ // rule's `/^(?!new$)/` matcher keeps the effect. The set is the net cluster's documented
622
+ // public connecting ctors; http2 connects via `connect()` (a function, not a ctor) so it
623
+ // needs no entry here. Inert ctors (Agent/Server/Socket/TLSSocket/Http2Server*/message shells)
624
+ // still synthesize "new" and stay pure.
625
+ const CONNECTING_CTORS = new Set(["ClientRequest"]);
626
+ const ctorClassName = ts.isNewExpression(node)
627
+ ? (ts.isConstructorDeclaration(decl) ? decl.parent?.name?.getText?.()
628
+ : (decl.name ? decl.name.getText() : ""))
629
+ : "";
630
+ const isConstruction = ts.isConstructorDeclaration(decl) || ts.isNewExpression(node);
631
+ const member = isConstruction
632
+ ? (CONNECTING_CTORS.has(ctorClassName) ? ctorClassName : "new")
633
+ : (decl.name ? decl.name.getText() : "");
634
+ let eff = kappa(mod, member); // (CLASSIFY)
635
+ // process.stdout/stderr/stdin are typed `tty.WriteStream`, which EXTENDS `net.Socket`, so a
636
+ // `.write()`/`.end()` on them resolves to `net.Socket.write` and the whole-module Net rule
637
+ // paints it Net. But a console write to fd 0/1/2 is TTY/console I/O, NOT network — there is no
638
+ // "Console" effect in §1, so it must be PURE. Suppress the fabricated effect for these receivers
639
+ // (a real `net.Socket` you constructed and `.write()` to still classifies Net — only the three
640
+ // std streams are freed). Real-world sweep: nanoid/commander(×43)/bunyan/pino fabricated Net
641
+ // purely from a `process.stdout.write` — the cardinal sin.
642
+ if (eff && (ts.isPropertyAccessExpression(node.expression) || ts.isElementAccessExpression(node.expression))
643
+ && rootsAtStdStream(node.expression.expression))
644
+ eff = null;
678
645
  if (eff) rec.direct.add(eff);
679
646
  // the literal surfaces, read only at a CLASSIFIED call (SPEC §2)
680
647
  if (eff === "Net") {
@@ -698,15 +665,16 @@ function visitCalls(node) {
698
665
  }
699
666
  if (eff === "Exec") {
700
667
  const lit = firstStringLiteral(node);
701
- if (lit) {
702
- rec.cmds.add(lit.trim().split(/\s+/)[0]); // the program of a command line
703
- // a known literal head refines the cliff (curl→Net, candor→Fs/Env); Exec stays
704
- for (const e of commandHeadEffects(lit)) rec.direct.add(e);
705
- }
668
+ if (lit) rec.cmds.add(lit.trim().split(/\s+/)[0]); // cosmetic cmds surface (any literal)
669
+ // a known literal head refines the cliff (curl→Net, candor→Fs/Env); Exec stays. The head
670
+ // MUST be argv[0] (programHeadLiteral), NOT any literal arg: `spawn(toolVar, "curl")`
671
+ // names no static program, so its trailing literal must not fabricate Net (spec §4).
672
+ const head = programHeadLiteral(node);
673
+ if (head) for (const e of commandHeadEffects(head)) rec.direct.add(e);
706
674
  }
707
675
  if (eff === "Fs") {
708
676
  const lit = firstStringLiteral(node);
709
- if (lit && /[\/\\]|^[.~]/.test(lit)) rec.paths.add(lit); // path-shaped literals only
677
+ if (lit && /[/\\]|^[.~]/.test(lit)) rec.paths.add(lit); // path-shaped literals only
710
678
  }
711
679
  // CANDOR_DEPS: an unclassified call into a package with a loaded sibling report inherits
712
680
  // that function's recorded transitive effects (+ literal surfaces) by `hash`.
@@ -745,7 +713,13 @@ function visitCalls(node) {
745
713
  // via @types/lodash was falsely disclosed — kappaKnows saw the unstripped name).
746
714
  const pkg = mod.startsWith("@types/") ? mod.slice("@types/".length) : mod;
747
715
  const file = decl.getSourceFile().fileName;
748
- if (!kappaKnows(pkg) && !depCoveredPkgs.has(pkg)
716
+ // SPEC §5.1: a package that DECLARES its effects (candorEffects in package.json) is read
717
+ // at the declared-not-verified tier — its effects are attributed and it is NOT a blind
718
+ // spot. Otherwise the κ ledger names it (an uncurated dependency the review must read).
719
+ const declared = packageManifestEffects(file);
720
+ if (declared !== null) {
721
+ for (const e of declared) rec.direct.add(e); // [] = declared pure: covered, adds nothing
722
+ } else if (!kappaKnows(pkg) && !depCoveredPkgs.has(pkg)
749
723
  && /node_modules\//.test(file) && !/node_modules\/(@types\/node|typescript)\//.test(file)) {
750
724
  unlistedSeen.set(pkg, (unlistedSeen.get(pkg) ?? 0) + 1);
751
725
  }
@@ -848,12 +822,17 @@ for (const [name, rec] of fns) {
848
822
  }
849
823
  // `package` names what this report COVERS — a consumer chaining it registers coverage even when
850
824
  // `functions` is empty (an all-pure package's report is its purity claim, SPEC §2 rule 3).
851
- const envelope = { candor: { version: ENGINE_VERSION, toolchain: `node-${process.versions.node}`, spec: "0.4" },
825
+ const envelope = { candor: { version: ENGINE_VERSION, toolchain: `node-${process.versions.node}`, spec: "0.5" },
852
826
  package: pkgName, functions };
853
- fs.writeFileSync(`${outPrefix}.json`, JSON.stringify(envelope, null, 1));
854
827
  const cg = {};
855
828
  for (const [name, rec] of fns) cg[name] = [...rec.edges].sort();
856
- fs.writeFileSync(`${outPrefix}.callgraph.json`, JSON.stringify(cg, null, 1));
829
+ // Write ATOMICALLY (temp + rename): a concurrent reader — the MCP server or another `query` while
830
+ // `candor-ts-watch` re-scans (the recommended agent setup runs both) — must never observe a
831
+ // half-written report. An in-place writeFileSync leaves a truncation window where JSON.parse throws;
832
+ // rename(2) is atomic within a filesystem, so a reader sees either the old report or the new one whole.
833
+ const writeAtomic = (file, text) => { const tmp = `${file}.${process.pid}.tmp`; fs.writeFileSync(tmp, text); fs.renameSync(tmp, file); };
834
+ writeAtomic(`${outPrefix}.json`, JSON.stringify(envelope, null, 1));
835
+ writeAtomic(`${outPrefix}.callgraph.json`, JSON.stringify(cg, null, 1));
857
836
  console.error(`candor-ts: wrote ${functions.length} effectful functions (${fns.size} analyzed, ${sources.length} files) to ${outPrefix}.json`);
858
837
  if (unlistedSeen.size > 0) {
859
838
  const top = [...unlistedSeen.entries()].sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]));
package/watch.mjs ADDED
@@ -0,0 +1,126 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * candor-ts-watch — keep a candor report FRESH as an agent edits, so candor-ts-mcp serves live ground
4
+ * truth (roadmap #1, the freshness half). It tracks the project's TS sources by content hash and
5
+ * re-scans only when a tracked source ACTUALLY changed — a no-op save, a touched node_modules file, or
6
+ * an unrelated write never triggers work. The MCP server reads the same `--out` prefix, so the loop is:
7
+ * agent edits → watcher refreshes the report → agent asks candor_impact and gets the post-edit answer.
8
+ *
9
+ * v1 re-runs a FULL scan on a real change — sound (the report always equals a clean scan), and fast
10
+ * enough for the edit loop on small/mid projects. The deeper optimisation — re-analysing only the
11
+ * changed file's subgraph and re-propagating incrementally (the part that needs candor-ts's scanner
12
+ * factored for per-file extraction) — is the staged next step; the content-hash gate here is the first
13
+ * increment of it (don't redo work when nothing relevant changed).
14
+ *
15
+ * candor-ts-watch <dir> [--out <prefix>] [--interval <ms>]
16
+ */
17
+ import { spawnSync } from "node:child_process";
18
+ import crypto from "node:crypto";
19
+ import fs from "node:fs";
20
+ import path from "node:path";
21
+ import { fileURLToPath } from "node:url";
22
+ import * as Q from "./query-core.mjs";
23
+
24
+ const HERE = path.dirname(fileURLToPath(import.meta.url));
25
+ const SRC = /\.[mc]?[jt]sx?$/; // .ts/.tsx/.mts/.cts/.js/.jsx — what candor-ts analyses
26
+ const SKIP = new Set(["node_modules", ".git", "dist", "build", ".candor"]);
27
+
28
+ // The tracked source set: every analysable file under `target` (a dir), or the single file itself.
29
+ export function trackedFiles(target) {
30
+ const st = fs.statSync(target);
31
+ if (st.isFile()) return SRC.test(target) ? [path.resolve(target)] : [];
32
+ const out = [];
33
+ (function walk(dir) {
34
+ for (const ent of fs.readdirSync(dir, { withFileTypes: true })) {
35
+ if (ent.name.startsWith(".") && ent.name !== ".") continue;
36
+ if (SKIP.has(ent.name)) continue;
37
+ const p = path.join(dir, ent.name);
38
+ if (ent.isDirectory()) walk(p);
39
+ else if (SRC.test(ent.name)) out.push(path.resolve(p));
40
+ }
41
+ })(target);
42
+ return out.sort();
43
+ }
44
+
45
+ // file -> content hash; a missing file is dropped (so deletes register as a change).
46
+ export function hashFiles(files) {
47
+ const h = {};
48
+ for (const f of files) {
49
+ try { h[f] = crypto.createHash("sha1").update(fs.readFileSync(f)).digest("hex"); } catch { /* gone */ }
50
+ }
51
+ return h;
52
+ }
53
+
54
+ // The set of files whose hash differs between two snapshots (added, removed, or modified).
55
+ export function changedFiles(prev, cur) {
56
+ const names = new Set([...Object.keys(prev), ...Object.keys(cur)]);
57
+ return [...names].filter((f) => prev[f] !== cur[f]).sort();
58
+ }
59
+
60
+ // One sound scan into `<out>` (the prefix the MCP server reads). Returns {ok, ms}.
61
+ export function scanOnce(target, out) {
62
+ const t0 = Date.now();
63
+ const r = spawnSync("node", [path.join(HERE, "scan.mjs"), target, "--out", out], { encoding: "utf8" });
64
+ return { ok: r.status === 0, ms: Date.now() - t0, stderr: r.stderr };
65
+ }
66
+
67
+ function rel(f, target) { return path.relative(fs.statSync(target).isFile() ? path.dirname(target) : target, f) || f; }
68
+
69
+ // The report from disk, or [] if there isn't one yet (the first scan has nothing to diff against).
70
+ export function readReportSafe(out) {
71
+ try { return Q.loadReport(out); } catch { return []; }
72
+ }
73
+
74
+ // One-line summary of what an edit changed — the agent-loop payoff: not just "the report is fresh"
75
+ // but "your edit added Net to f". Built on the same diff the CLI emits. "" when nothing's effects moved.
76
+ export function formatDelta(changes) {
77
+ const leaf = (n) => n.split("::").pop().split(".").pop();
78
+ const parts = changes.slice(0, 4).map((c) => {
79
+ const g = c.gained.length ? `+${c.gained.join("/")}` : "";
80
+ const l = c.lost.length ? `-${c.lost.join("/")}` : "";
81
+ return `${leaf(c.fn)} ${[g, l].filter(Boolean).join(" ")}`.trim();
82
+ });
83
+ if (changes.length > 4) parts.push(`+${changes.length - 4} more`);
84
+ return parts.join("; ");
85
+ }
86
+
87
+ async function main() {
88
+ const args = process.argv.slice(2);
89
+ let target = null, out = null, interval = 400;
90
+ for (let i = 0; i < args.length; i++) {
91
+ if (args[i] === "--out") out = args[++i];
92
+ else if (args[i] === "--interval") interval = Number(args[++i]) || interval;
93
+ else if (!args[i].startsWith("--")) target = args[i];
94
+ else { console.error(`candor-ts-watch: unknown flag ${args[i]}`); process.exit(2); }
95
+ }
96
+ if (!target) { console.error("usage: candor-ts-watch <dir> [--out <prefix>] [--interval <ms>]"); process.exit(2); }
97
+ out ??= path.join(fs.statSync(target).isFile() ? path.dirname(target) : target, ".candor", "report");
98
+
99
+ let prev = hashFiles(trackedFiles(target));
100
+ const first = scanOnce(target, out);
101
+ console.error(`candor-ts-watch: ${Object.keys(prev).length} source(s) → ${out}.json (${first.ms}ms). Watching… (Ctrl-C to stop)`);
102
+ if (!first.ok) console.error(first.stderr?.trim());
103
+
104
+ setInterval(() => {
105
+ const cur = hashFiles(trackedFiles(target));
106
+ const changed = changedFiles(prev, cur);
107
+ if (!changed.length) return; // the freshness gate: nothing relevant changed, do nothing
108
+ prev = cur;
109
+ const before = readReportSafe(out); // the prior report, before this re-scan overwrites it
110
+ const r = scanOnce(target, out);
111
+ const names = changed.slice(0, 4).map((f) => rel(f, target)).join(", ") + (changed.length > 4 ? `, +${changed.length - 4}` : "");
112
+ if (!r.ok) {
113
+ console.error(`candor-ts-watch: scan FAILED after a change in ${names}: ${r.stderr?.trim()}`);
114
+ return;
115
+ }
116
+ // The edit-delta: what the change DID to the effect surface (the agent-loop payoff).
117
+ const delta = formatDelta(Q.diff(readReportSafe(out), before).changes);
118
+ console.error(`candor-ts-watch: re-scanned (${changed.length} changed: ${names}) in ${r.ms}ms`
119
+ + (delta ? ` — Δ ${delta}` : " — no effect change"));
120
+ }, interval);
121
+ // NO .unref() — the interval is the ONLY thing keeping the process alive; unref'ing it made Node exit
122
+ // ~0.6s after the startup scan, so the watcher did ONE scan and died while printing "Watching…" (the
123
+ // whole feature was silently broken, and test-watch.mjs only tests the helpers, never the live loop).
124
+ }
125
+
126
+ if (path.resolve(process.argv[1] || "") === path.resolve(fileURLToPath(import.meta.url))) main();