candor-ts 0.4.6 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +15 -4
- package/README.md +62 -1
- package/mcp.mjs +191 -0
- package/package.json +21 -6
- package/policy.mjs +9 -4
- package/query-core.mjs +302 -0
- package/query.mjs +32 -31
- package/scan-core.mjs +161 -0
- package/scan.mjs +126 -147
- package/watch.mjs +126 -0
package/scan.mjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
|
-
* candor-ts — the TypeScript implementation of candor-spec 0.
|
|
3
|
+
* candor-ts — the TypeScript implementation of candor-spec 0.5.
|
|
4
4
|
*
|
|
5
5
|
* Origin (kept honest): this engine began as the clean-room derivability proof — a single-file
|
|
6
6
|
* slice written from SPEC.md/SEMANTICS.md/CLASSIFIER.md alone, frozen as that claim in git history
|
|
@@ -26,6 +26,7 @@ import { fileURLToPath } from "node:url";
|
|
|
26
26
|
import { createRequire } from "node:module";
|
|
27
27
|
import { parsePolicy, evaluatePolicy } from "./policy.mjs";
|
|
28
28
|
import { printAgents } from "./contract.mjs";
|
|
29
|
+
import { isTestPath, kappa, kappaKnows, commandHeadEffects, hostLiteral, tablesInSql } from "./scan-core.mjs";
|
|
29
30
|
|
|
30
31
|
const ENGINE_DIR = path.dirname(fileURLToPath(import.meta.url));
|
|
31
32
|
|
|
@@ -56,9 +57,6 @@ if (wantAgents) { printAgents(); process.exit(0); }
|
|
|
56
57
|
if (target === null) { console.error(usage); process.exit(2); }
|
|
57
58
|
|
|
58
59
|
// ---- project discovery (a dir, a single file, or a tsconfig) --------------------------------------
|
|
59
|
-
function isTestPath(p) {
|
|
60
|
-
return /(^|\/)(node_modules|__tests__|tests?|spec)(\/|$)/.test(p) || /\.(test|spec)\.[mc]?tsx?$/.test(p);
|
|
61
|
-
}
|
|
62
60
|
let rootDir, fileNames, compilerOptions = {
|
|
63
61
|
target: ts.ScriptTarget.ES2022,
|
|
64
62
|
module: ts.ModuleKind.NodeNext,
|
|
@@ -221,68 +219,6 @@ const checker = program.getTypeChecker();
|
|
|
221
219
|
const projectFiles = new Set(fileNames.map((f) => path.resolve(f)));
|
|
222
220
|
const sources = program.getSourceFiles().filter((f) => projectFiles.has(path.resolve(f.fileName)));
|
|
223
221
|
|
|
224
|
-
// ---- κ — the curated classifier (CLASSIFIER §2: the dispatch/execution boundary, not builders) ----
|
|
225
|
-
// Node builtins + a curated npm tier (the same under-report-and-say-so posture as the crate table:
|
|
226
|
-
// an unlisted package contributes nothing — never a guess).
|
|
227
|
-
// One rules TABLE, two readers: kappa() classifies a call; kappaKnows() answers "is this package
|
|
228
|
-
// curated at all?" for the coverage ledger (a κ-known package whose given call is pure — a TypeORM
|
|
229
|
-
// builder — is covered, not a blind spot). A single source so the two can never drift.
|
|
230
|
-
// [module-name regex, member regex (null = any member), effect]
|
|
231
|
-
const KAPPA_RULES = [
|
|
232
|
-
[/^(node:)?fs(\/promises)?$/, null, "Fs"],
|
|
233
|
-
[/^(node:)?(net|dgram|tls|http2?|https)$/, null, "Net"],
|
|
234
|
-
[/^(node:)?child_process$/, null, "Exec"],
|
|
235
|
-
[/^(node:)?sqlite$/, null, "Db"],
|
|
236
|
-
// the curated npm tier
|
|
237
|
-
[/^(axios|got|node-fetch|undici|ws|socket\.io(-client)?|nodemailer)$/, null, "Net"],
|
|
238
|
-
[/^(pg|mysql2?|mongodb|ioredis|redis|sqlite3|better-sqlite3|knex)$/, null, "Db"],
|
|
239
|
-
[/^(execa|cross-spawn|shelljs)$/, null, "Exec"],
|
|
240
|
-
[/^(fs-extra|graceful-fs|rimraf|glob|chokidar)$/, null, "Fs"],
|
|
241
|
-
[/^dotenv$/, null, "Env"],
|
|
242
|
-
[/^(winston|pino|bunyan|npmlog)$/, null, "Log"],
|
|
243
|
-
// entropy: node:crypto's random surface + the password-hashing libs (salted -> Rand). Found by
|
|
244
|
-
// the CTA dogfood on a Nest app: argon2.hash came out SILENTLY PURE (the curated-kappa caveat
|
|
245
|
-
// landing on exactly the call a security review cares about).
|
|
246
|
-
[/^(node:)?crypto$/, /^random/, "Rand"],
|
|
247
|
-
[/^(argon2|bcrypt|bcryptjs)$/, null, "Rand"],
|
|
248
|
-
// The ORM tier — VERB-PRECISE (the CLASSIFIER discipline: tag the execution boundary, not
|
|
249
|
-
// builders; `createQueryBuilder` is pure, its `getMany`/`execute` is the I/O). Found on the
|
|
250
|
-
// first framework-APP scan: a TypeORM/Nest application — Db-heavy by construction — read zero
|
|
251
|
-
// Db because the ORM resolved into an unlisted package (the JVM's Spring-Data lesson, replayed).
|
|
252
|
-
[/^(typeorm|@nestjs\/typeorm)$/,
|
|
253
|
-
/^(find|save|remove|softRemove|recover|insert|update|upsert|delete|restore|count|exist|sum|average|minimum|maximum|query|clear|increment|decrement|getMany|getOne|getOneOrFail|getRawMany|getRawOne|getCount|getExists|execute|stream|transaction)/,
|
|
254
|
-
"Db"],
|
|
255
|
-
[/^(@prisma\/client|\.prisma|\.prisma\/client)$/,
|
|
256
|
-
/^(\$?(queryRaw|executeRaw|transaction)|find(Many|Unique|First)|create|createMany|update|updateMany|upsert|delete|deleteMany|aggregate|count|groupBy)/,
|
|
257
|
-
"Db"],
|
|
258
|
-
[/^mongoose$/,
|
|
259
|
-
/^(find|save|create|insertMany|updateOne|updateMany|replaceOne|deleteOne|deleteMany|aggregate|countDocuments|estimatedDocumentCount|distinct|exec|bulkWrite)/,
|
|
260
|
-
"Db"],
|
|
261
|
-
[/^(sequelize|drizzle-orm)$/,
|
|
262
|
-
/^(find|create|update|destroy|upsert|count|max|min|sum|query|select|insert|delete|execute|transaction)/,
|
|
263
|
-
"Db"],
|
|
264
|
-
// Nest's HttpService wraps axios — the request verbs are Net.
|
|
265
|
-
[/^@nestjs\/axios$/, /^(get|post|put|patch|delete|head|request)$/, "Net"],
|
|
266
|
-
];
|
|
267
|
-
function kappa(moduleName, member) {
|
|
268
|
-
for (const [mre, vre, eff] of KAPPA_RULES) {
|
|
269
|
-
if (mre.test(moduleName) && (!vre || vre.test(member))) return eff;
|
|
270
|
-
}
|
|
271
|
-
return null;
|
|
272
|
-
}
|
|
273
|
-
// Packages REVIEWED and ratified effect-free at the call boundary (decorator/metadata plumbing,
|
|
274
|
-
// pure computation, operator algebras whose side effects live in visible user callbacks). This is
|
|
275
|
-
// the ledger's triage outlet: an unlisted package either earns KAPPA_RULES entries or lands here —
|
|
276
|
-
// never silently. NOT for anything that mints entropy (uuid), reads clocks, or signs with RSA-PSS
|
|
277
|
-
// (jsonwebtoken stays unlisted on purpose).
|
|
278
|
-
const KAPPA_PURE = new Set([
|
|
279
|
-
"@nestjs/common", "@nestjs/core", "@nestjs/swagger", "@nestjs/platform-express",
|
|
280
|
-
"class-validator", "class-transformer", "reflect-metadata",
|
|
281
|
-
"rxjs", "zod", "lodash", "ramda", "date-fns",
|
|
282
|
-
]);
|
|
283
|
-
function kappaKnows(moduleName) {
|
|
284
|
-
return KAPPA_PURE.has(moduleName) || KAPPA_RULES.some(([mre]) => mre.test(moduleName));
|
|
285
|
-
}
|
|
286
222
|
|
|
287
223
|
// The module a declaration came from: a project file → "<local>", @types/node → the builtin name,
|
|
288
224
|
// node_modules/<pkg> → the package name, the ES lib → "<es-lib>".
|
|
@@ -297,6 +233,47 @@ function declModule(decl) {
|
|
|
297
233
|
return f;
|
|
298
234
|
}
|
|
299
235
|
|
|
236
|
+
// SPEC §5.1 — the effect manifest. An uncurated package MAY declare its effect surface in its
|
|
237
|
+
// package.json (`"candorEffects": ["Net"]`), read as the declared-not-verified tier: it kills the
|
|
238
|
+
// silent pure/blind-spot the package would otherwise carry, exactly like a cap type (and unlike
|
|
239
|
+
// candor's own analysis, which is checked). A name outside §1 VOIDS the declaration loudly — a typo
|
|
240
|
+
// must never silently narrow a surface. Cached per package. `file` is the resolved declaration source.
|
|
241
|
+
const EFFECT_VOCAB = new Set(["Net", "Fs", "Db", "Exec", "Env", "Clock", "Ipc", "Log", "Rand", "Clipboard"]);
|
|
242
|
+
const _manifestCache = new Map();
|
|
243
|
+
// Returns the declared effect array (possibly EMPTY — `[]` is an explicit "declared pure", covered, not
|
|
244
|
+
// a blind spot), or `null` for no/invalid declaration (still a blind spot). A name outside §1 voids the
|
|
245
|
+
// declaration loudly; a non-array `candorEffects` is malformed and warned.
|
|
246
|
+
function packageManifestEffects(file) {
|
|
247
|
+
const m = file && file.match(/^(.*\/node_modules\/(?:@[^/]+\/[^/]+|[^/]+))\//);
|
|
248
|
+
if (!m) return null;
|
|
249
|
+
let dir = m[1];
|
|
250
|
+
// A manifest read from an `@types/<pkg>` directory is a TRUST-BOUNDARY HOLE: the @types stub is a
|
|
251
|
+
// type-only package published by DefinitelyTyped/anyone — NOT the effect-owning package. Honoring its
|
|
252
|
+
// `candorEffects` let an attacker's `@types/realpkg` declare `[]` to SILENCE the real realpkg's effects
|
|
253
|
+
// AND its κ-ledger disclosure (defeating the spec's "a missing manifest is visible via κ" safety net).
|
|
254
|
+
// Redirect to the REAL package's own dir, whose author controls it (`@types/babel__core` → `@babel/core`,
|
|
255
|
+
// `@types/foo` → `foo`); if that has no manifest, it stays an honest κ-ledger blind spot, never silenced.
|
|
256
|
+
const at = dir.match(/^(.*\/node_modules\/)@types\/([^/]+)$/);
|
|
257
|
+
if (at) {
|
|
258
|
+
const real = at[2].includes("__") ? "@" + at[2].replace("__", "/") : at[2];
|
|
259
|
+
dir = at[1] + real;
|
|
260
|
+
}
|
|
261
|
+
if (_manifestCache.has(dir)) return _manifestCache.get(dir);
|
|
262
|
+
let result = null;
|
|
263
|
+
try {
|
|
264
|
+
const d = JSON.parse(fs.readFileSync(path.join(dir, "package.json"), "utf8")).candorEffects;
|
|
265
|
+
if (Array.isArray(d)) {
|
|
266
|
+
const bad = d.filter((e) => !EFFECT_VOCAB.has(e));
|
|
267
|
+
if (bad.length) console.error(`candor-ts: ${path.basename(dir)} candorEffects has an invalid effect '${bad[0]}' — declaration voided (SPEC §1)`);
|
|
268
|
+
else result = d; // a valid declaration, including [] = declared pure
|
|
269
|
+
} else if (d !== undefined) {
|
|
270
|
+
console.error(`candor-ts: ${path.basename(dir)} candorEffects must be an array of §1 effect names — ignored`);
|
|
271
|
+
}
|
|
272
|
+
} catch { /* no/unreadable manifest → undeclared */ }
|
|
273
|
+
_manifestCache.set(dir, result);
|
|
274
|
+
return result;
|
|
275
|
+
}
|
|
276
|
+
|
|
300
277
|
// ---- the literal surfaces (SPEC §2 hosts/cmds/paths/tables): the statically-decidable subset ------
|
|
301
278
|
// Read ONLY from string literals at a classified call — informative, never complete, never inferred.
|
|
302
279
|
function firstStringLiteral(node) {
|
|
@@ -305,76 +282,19 @@ function firstStringLiteral(node) {
|
|
|
305
282
|
}
|
|
306
283
|
return null;
|
|
307
284
|
}
|
|
308
|
-
// Refine the Exec cliff (spec §4 ⟨0.5⟩): the effects a literal, statically-known subprocess head
|
|
309
|
-
// implies, matched by basename. ADDED to a caller that already carries Exec (a subprocess is still
|
|
310
|
-
// spawned — Exec is never dropped); an unrecognised head returns [] and keeps the bare cliff (never
|
|
311
|
-
// guess). A candor engine reads Fs/Env only — spec §7 item 12 (the analyzer self-boundary) guarantees
|
|
312
|
-
// it, so that case is spec-supplied. Only UNAMBIGUOUS single-effect tools belong here: a multi-modal
|
|
313
|
-
// head (git status local vs git push Net; rsync local vs remote; make/npm run project code) would
|
|
314
|
-
// fabricate the effect for its common case. The reference engines share this table verbatim.
|
|
315
|
-
function commandHeadEffects(cmd) {
|
|
316
|
-
const base = cmd.trim().split(/\s+/)[0].split(/[/\\]/).pop();
|
|
317
|
-
if (["curl", "wget", "http", "ssh", "scp"].includes(base)) return ["Net"];
|
|
318
|
-
if (["psql", "mysql", "sqlite3", "mongosh", "redis-cli"].includes(base)) return ["Db"];
|
|
319
|
-
if (["candor", "candor-run.sh", "candor-scan", "candor-query", "candor-java",
|
|
320
|
-
"candor-classify", "candor-report", "cargo-candor"].includes(base)) return ["Env", "Fs"];
|
|
321
|
-
return [];
|
|
322
|
-
}
|
|
323
|
-
// host[:port] from an address/URL literal; non-address strings yield nothing (never fabricate).
|
|
324
|
-
function hostLiteral(s) {
|
|
325
|
-
const m = s.match(/^[a-z][a-z0-9+.-]*:\/\/([^/]+)/i); // scheme://host[:port]/…
|
|
326
|
-
if (m) return m[1].replace(/^.*@/, "");
|
|
327
|
-
if (/^[a-z0-9._-]+(:\d+)?$/i.test(s) && s.includes(".")) return s; // bare host[.tld][:port]
|
|
328
|
-
return null;
|
|
329
|
-
}
|
|
330
|
-
// Table-position identifiers in a SQL string literal (SPEC §2 `tables`). Mirrors the Rust
|
|
331
|
-
// tables_in_sql exactly: must open with a statement keyword; FROM/JOIN/INTO anywhere,
|
|
332
|
-
// statement-leading UPDATE/TRUNCATE, TABLE (skipping ONLY/IF NOT EXISTS); a FOR UPDATE locking
|
|
333
|
-
// clause yields nothing. Conservative in the fabrication direction.
|
|
334
|
-
function tablesInSql(sql) {
|
|
335
|
-
const stmt = new Set(["select","insert","update","delete","create","drop","alter","truncate","merge","replace","with"]);
|
|
336
|
-
const skip = new Set(["only","if","not","exists","table"]);
|
|
337
|
-
const stop = new Set(["select","set","where","values","on","using","group","order","by","limit",
|
|
338
|
-
"returning","as","inner","outer","left","right","cross","lateral","natural","union","all",
|
|
339
|
-
"distinct","case","when","null","default","skip","nowait","of","from","join","into","update",
|
|
340
|
-
"delete","insert"]);
|
|
341
|
-
// `,` survives as its OWN token: it lets `FROM t1, t2` continue the table list without
|
|
342
|
-
// fabricating from other comma-ridden positions (column lists, ON clauses).
|
|
343
|
-
const toks = sql.toLowerCase().replace(/[();]/g, " ").replace(/,/g, " , ").trim().split(/\s+/);
|
|
344
|
-
if (!toks.length || !stmt.has(toks[0])) return [];
|
|
345
|
-
const out = [];
|
|
346
|
-
const ident = (raw) => {
|
|
347
|
-
const t = raw.replace(/^["'`]+|["'`]+$/g, "");
|
|
348
|
-
if (!t || stop.has(t) || !/^[a-z_][a-z0-9_.$"`]*$/.test(t)) return null;
|
|
349
|
-
return t.replace(/["`]/g, "");
|
|
350
|
-
};
|
|
351
|
-
for (let i = 0; i < toks.length; i++) {
|
|
352
|
-
const tablePos = ["from","join","into","table"].includes(toks[i])
|
|
353
|
-
|| ((toks[i] === "update" || toks[i] === "truncate") && i === 0);
|
|
354
|
-
if (!tablePos) continue;
|
|
355
|
-
let j = i + 1;
|
|
356
|
-
while (j < toks.length && skip.has(toks[j])) j++;
|
|
357
|
-
if (j >= toks.length) continue;
|
|
358
|
-
const first = ident(toks[j]);
|
|
359
|
-
if (first === null) continue;
|
|
360
|
-
if (!out.includes(first)) out.push(first);
|
|
361
|
-
// Comma-ADJACENT continuation only: `FROM t1, t2, t3` takes all three, while an alias breaks
|
|
362
|
-
// the chain (`FROM t1 a, t2` keeps just t1 — an under-report, never a guess: skipping an alias
|
|
363
|
-
// to chase the comma would fabricate tables out of `INSERT INTO t (a, b)`'s column list, whose
|
|
364
|
-
// parens are spaces by the time we tokenize).
|
|
365
|
-
while (j + 2 < toks.length && toks[j + 1] === ",") {
|
|
366
|
-
const more = ident(toks[j + 2]);
|
|
367
|
-
if (more === null) break;
|
|
368
|
-
if (!out.includes(more)) out.push(more);
|
|
369
|
-
j += 2;
|
|
370
|
-
}
|
|
371
|
-
}
|
|
372
|
-
return out;
|
|
373
|
-
}
|
|
374
285
|
|
|
375
|
-
//
|
|
376
|
-
//
|
|
377
|
-
//
|
|
286
|
+
// The literal PROGRAM head a subprocess call NAMES — argv[0] specifically, never a later argument.
|
|
287
|
+
// Unlike firstStringLiteral (the first literal ANYWHERE in the args), this refuses to refine when
|
|
288
|
+
// the program (arg0) is a runtime value but a trailing arg is a literal whose basename hits the head
|
|
289
|
+
// table: `spawn(toolVar, "curl")` must NOT fabricate Net — the literal is an argument, not the
|
|
290
|
+
// program (spec §4 ⟨0.5⟩: the head is argv[0]). Mirrors candor-java programHeadLiteral and the Rust
|
|
291
|
+
// is_cmd_naming_method gate. Returns null when arg0 is not a static string literal — the safe
|
|
292
|
+
// direction. Used ONLY for the effect refinement, never to widen it; the cosmetic `cmds` surface
|
|
293
|
+
// keeps firstStringLiteral.
|
|
294
|
+
function programHeadLiteral(node) {
|
|
295
|
+
const a0 = (node.arguments ?? [])[0];
|
|
296
|
+
return a0 && ts.isStringLiteralLike(a0) ? a0.text : null;
|
|
297
|
+
}
|
|
378
298
|
// qualifies by the file's basename (`Cases.union_a`).
|
|
379
299
|
const fns = new Map(); // qualified name -> { direct, edges, hosts, tables, cmds, paths, loc }
|
|
380
300
|
const unlistedSeen = new Map(); // the κ-coverage ledger: unlisted npm package -> call-site count
|
|
@@ -562,6 +482,24 @@ function enclosing(node) {
|
|
|
562
482
|
return null;
|
|
563
483
|
}
|
|
564
484
|
|
|
485
|
+
// True when a receiver expression's chain ROOTS at process.stdout/stderr/stdin — including method chains
|
|
486
|
+
// (`process.stdin.on("data",f).on("end",g)`, `process.stdout.write(x).on(...)`). The std streams are typed
|
|
487
|
+
// tty.ReadStream/WriteStream which EXTEND net.Socket, so `.on`/`.write`/`.end` resolve to net.Socket members
|
|
488
|
+
// and the whole-module Net rule paints them — but console fd 0/1/2 I/O is not Net (§1 has no Console effect).
|
|
489
|
+
// `net.Socket.on`/`.write` return the stream (`this`), so a chained call's receiver is still the std stream;
|
|
490
|
+
// the exact-string check missed it (the receiver is the inner CallExpression). Walk the chain to its head.
|
|
491
|
+
function rootsAtStdStream(expr) {
|
|
492
|
+
let e = expr;
|
|
493
|
+
for (;;) {
|
|
494
|
+
if (!e) return false;
|
|
495
|
+
const t = e.getText().replace(/\s+/g, "");
|
|
496
|
+
if (t === "process.stdout" || t === "process.stderr" || t === "process.stdin") return true;
|
|
497
|
+
if (ts.isCallExpression(e) || ts.isPropertyAccessExpression(e) || ts.isElementAccessExpression(e)
|
|
498
|
+
|| ts.isParenthesizedExpression(e) || ts.isNonNullExpression(e)) { e = e.expression; continue; }
|
|
499
|
+
return false;
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
|
|
565
503
|
// ---- pass 2: per call site, the (CLASSIFY)/(EDGE)/(UNKNOWN) resolution of SEMANTICS §4 ------------
|
|
566
504
|
function visitCalls(node) {
|
|
567
505
|
if (ts.isCallExpression(node) || ts.isNewExpression(node)) {
|
|
@@ -673,8 +611,37 @@ function visitCalls(node) {
|
|
|
673
611
|
&& checker.getTypeAtLocation(node.expression)?.symbol?.name === "DateConstructor")
|
|
674
612
|
rec.direct.add("Clock");
|
|
675
613
|
} else {
|
|
676
|
-
|
|
677
|
-
|
|
614
|
+
// The member token κ matches: the resolved declaration's name, EXCEPT a `new X()` call,
|
|
615
|
+
// whose declaration is a Constructor (empty name) — synthesize "new" so a rule can exempt
|
|
616
|
+
// inert construction from its module-wide effect (the net cluster: `new http.Agent()` etc.).
|
|
617
|
+
// BUT a CONNECTING constructor is NOT inert: `new http.ClientRequest(url)` performs the
|
|
618
|
+
// network I/O on construction (it is what `http.request()` returns and dispatches), so the
|
|
619
|
+
// blanket `new`-exemption would convert a real Net source into pure (a cardinal-sin under-
|
|
620
|
+
// report). For such a ctor we synthesize the CLASS name instead of "new", so the net-cluster
|
|
621
|
+
// rule's `/^(?!new$)/` matcher keeps the effect. The set is the net cluster's documented
|
|
622
|
+
// public connecting ctors; http2 connects via `connect()` (a function, not a ctor) so it
|
|
623
|
+
// needs no entry here. Inert ctors (Agent/Server/Socket/TLSSocket/Http2Server*/message shells)
|
|
624
|
+
// still synthesize "new" and stay pure.
|
|
625
|
+
const CONNECTING_CTORS = new Set(["ClientRequest"]);
|
|
626
|
+
const ctorClassName = ts.isNewExpression(node)
|
|
627
|
+
? (ts.isConstructorDeclaration(decl) ? decl.parent?.name?.getText?.()
|
|
628
|
+
: (decl.name ? decl.name.getText() : ""))
|
|
629
|
+
: "";
|
|
630
|
+
const isConstruction = ts.isConstructorDeclaration(decl) || ts.isNewExpression(node);
|
|
631
|
+
const member = isConstruction
|
|
632
|
+
? (CONNECTING_CTORS.has(ctorClassName) ? ctorClassName : "new")
|
|
633
|
+
: (decl.name ? decl.name.getText() : "");
|
|
634
|
+
let eff = kappa(mod, member); // (CLASSIFY)
|
|
635
|
+
// process.stdout/stderr/stdin are typed `tty.WriteStream`, which EXTENDS `net.Socket`, so a
|
|
636
|
+
// `.write()`/`.end()` on them resolves to `net.Socket.write` and the whole-module Net rule
|
|
637
|
+
// paints it Net. But a console write to fd 0/1/2 is TTY/console I/O, NOT network — there is no
|
|
638
|
+
// "Console" effect in §1, so it must be PURE. Suppress the fabricated effect for these receivers
|
|
639
|
+
// (a real `net.Socket` you constructed and `.write()` to still classifies Net — only the three
|
|
640
|
+
// std streams are freed). Real-world sweep: nanoid/commander(×43)/bunyan/pino fabricated Net
|
|
641
|
+
// purely from a `process.stdout.write` — the cardinal sin.
|
|
642
|
+
if (eff && (ts.isPropertyAccessExpression(node.expression) || ts.isElementAccessExpression(node.expression))
|
|
643
|
+
&& rootsAtStdStream(node.expression.expression))
|
|
644
|
+
eff = null;
|
|
678
645
|
if (eff) rec.direct.add(eff);
|
|
679
646
|
// the literal surfaces, read only at a CLASSIFIED call (SPEC §2)
|
|
680
647
|
if (eff === "Net") {
|
|
@@ -698,15 +665,16 @@ function visitCalls(node) {
|
|
|
698
665
|
}
|
|
699
666
|
if (eff === "Exec") {
|
|
700
667
|
const lit = firstStringLiteral(node);
|
|
701
|
-
if (lit)
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
668
|
+
if (lit) rec.cmds.add(lit.trim().split(/\s+/)[0]); // cosmetic cmds surface (any literal)
|
|
669
|
+
// a known literal head refines the cliff (curl→Net, candor→Fs/Env); Exec stays. The head
|
|
670
|
+
// MUST be argv[0] (programHeadLiteral), NOT any literal arg: `spawn(toolVar, "curl")`
|
|
671
|
+
// names no static program, so its trailing literal must not fabricate Net (spec §4).
|
|
672
|
+
const head = programHeadLiteral(node);
|
|
673
|
+
if (head) for (const e of commandHeadEffects(head)) rec.direct.add(e);
|
|
706
674
|
}
|
|
707
675
|
if (eff === "Fs") {
|
|
708
676
|
const lit = firstStringLiteral(node);
|
|
709
|
-
if (lit && /[
|
|
677
|
+
if (lit && /[/\\]|^[.~]/.test(lit)) rec.paths.add(lit); // path-shaped literals only
|
|
710
678
|
}
|
|
711
679
|
// CANDOR_DEPS: an unclassified call into a package with a loaded sibling report inherits
|
|
712
680
|
// that function's recorded transitive effects (+ literal surfaces) by `hash`.
|
|
@@ -745,7 +713,13 @@ function visitCalls(node) {
|
|
|
745
713
|
// via @types/lodash was falsely disclosed — kappaKnows saw the unstripped name).
|
|
746
714
|
const pkg = mod.startsWith("@types/") ? mod.slice("@types/".length) : mod;
|
|
747
715
|
const file = decl.getSourceFile().fileName;
|
|
748
|
-
|
|
716
|
+
// SPEC §5.1: a package that DECLARES its effects (candorEffects in package.json) is read
|
|
717
|
+
// at the declared-not-verified tier — its effects are attributed and it is NOT a blind
|
|
718
|
+
// spot. Otherwise the κ ledger names it (an uncurated dependency the review must read).
|
|
719
|
+
const declared = packageManifestEffects(file);
|
|
720
|
+
if (declared !== null) {
|
|
721
|
+
for (const e of declared) rec.direct.add(e); // [] = declared pure: covered, adds nothing
|
|
722
|
+
} else if (!kappaKnows(pkg) && !depCoveredPkgs.has(pkg)
|
|
749
723
|
&& /node_modules\//.test(file) && !/node_modules\/(@types\/node|typescript)\//.test(file)) {
|
|
750
724
|
unlistedSeen.set(pkg, (unlistedSeen.get(pkg) ?? 0) + 1);
|
|
751
725
|
}
|
|
@@ -848,12 +822,17 @@ for (const [name, rec] of fns) {
|
|
|
848
822
|
}
|
|
849
823
|
// `package` names what this report COVERS — a consumer chaining it registers coverage even when
|
|
850
824
|
// `functions` is empty (an all-pure package's report is its purity claim, SPEC §2 rule 3).
|
|
851
|
-
const envelope = { candor: { version: ENGINE_VERSION, toolchain: `node-${process.versions.node}`, spec: "0.
|
|
825
|
+
const envelope = { candor: { version: ENGINE_VERSION, toolchain: `node-${process.versions.node}`, spec: "0.5" },
|
|
852
826
|
package: pkgName, functions };
|
|
853
|
-
fs.writeFileSync(`${outPrefix}.json`, JSON.stringify(envelope, null, 1));
|
|
854
827
|
const cg = {};
|
|
855
828
|
for (const [name, rec] of fns) cg[name] = [...rec.edges].sort();
|
|
856
|
-
|
|
829
|
+
// Write ATOMICALLY (temp + rename): a concurrent reader — the MCP server or another `query` while
|
|
830
|
+
// `candor-ts-watch` re-scans (the recommended agent setup runs both) — must never observe a
|
|
831
|
+
// half-written report. An in-place writeFileSync leaves a truncation window where JSON.parse throws;
|
|
832
|
+
// rename(2) is atomic within a filesystem, so a reader sees either the old report or the new one whole.
|
|
833
|
+
const writeAtomic = (file, text) => { const tmp = `${file}.${process.pid}.tmp`; fs.writeFileSync(tmp, text); fs.renameSync(tmp, file); };
|
|
834
|
+
writeAtomic(`${outPrefix}.json`, JSON.stringify(envelope, null, 1));
|
|
835
|
+
writeAtomic(`${outPrefix}.callgraph.json`, JSON.stringify(cg, null, 1));
|
|
857
836
|
console.error(`candor-ts: wrote ${functions.length} effectful functions (${fns.size} analyzed, ${sources.length} files) to ${outPrefix}.json`);
|
|
858
837
|
if (unlistedSeen.size > 0) {
|
|
859
838
|
const top = [...unlistedSeen.entries()].sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]));
|
package/watch.mjs
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* candor-ts-watch — keep a candor report FRESH as an agent edits, so candor-ts-mcp serves live ground
|
|
4
|
+
* truth (roadmap #1, the freshness half). It tracks the project's TS sources by content hash and
|
|
5
|
+
* re-scans only when a tracked source ACTUALLY changed — a no-op save, a touched node_modules file, or
|
|
6
|
+
* an unrelated write never triggers work. The MCP server reads the same `--out` prefix, so the loop is:
|
|
7
|
+
* agent edits → watcher refreshes the report → agent asks candor_impact and gets the post-edit answer.
|
|
8
|
+
*
|
|
9
|
+
* v1 re-runs a FULL scan on a real change — sound (the report always equals a clean scan), and fast
|
|
10
|
+
* enough for the edit loop on small/mid projects. The deeper optimisation — re-analysing only the
|
|
11
|
+
* changed file's subgraph and re-propagating incrementally (the part that needs candor-ts's scanner
|
|
12
|
+
* factored for per-file extraction) — is the staged next step; the content-hash gate here is the first
|
|
13
|
+
* increment of it (don't redo work when nothing relevant changed).
|
|
14
|
+
*
|
|
15
|
+
* candor-ts-watch <dir> [--out <prefix>] [--interval <ms>]
|
|
16
|
+
*/
|
|
17
|
+
import { spawnSync } from "node:child_process";
|
|
18
|
+
import crypto from "node:crypto";
|
|
19
|
+
import fs from "node:fs";
|
|
20
|
+
import path from "node:path";
|
|
21
|
+
import { fileURLToPath } from "node:url";
|
|
22
|
+
import * as Q from "./query-core.mjs";
|
|
23
|
+
|
|
24
|
+
const HERE = path.dirname(fileURLToPath(import.meta.url));
|
|
25
|
+
const SRC = /\.[mc]?[jt]sx?$/; // .ts/.tsx/.mts/.cts/.js/.jsx — what candor-ts analyses
|
|
26
|
+
const SKIP = new Set(["node_modules", ".git", "dist", "build", ".candor"]);
|
|
27
|
+
|
|
28
|
+
// The tracked source set: every analysable file under `target` (a dir), or the single file itself.
|
|
29
|
+
export function trackedFiles(target) {
|
|
30
|
+
const st = fs.statSync(target);
|
|
31
|
+
if (st.isFile()) return SRC.test(target) ? [path.resolve(target)] : [];
|
|
32
|
+
const out = [];
|
|
33
|
+
(function walk(dir) {
|
|
34
|
+
for (const ent of fs.readdirSync(dir, { withFileTypes: true })) {
|
|
35
|
+
if (ent.name.startsWith(".") && ent.name !== ".") continue;
|
|
36
|
+
if (SKIP.has(ent.name)) continue;
|
|
37
|
+
const p = path.join(dir, ent.name);
|
|
38
|
+
if (ent.isDirectory()) walk(p);
|
|
39
|
+
else if (SRC.test(ent.name)) out.push(path.resolve(p));
|
|
40
|
+
}
|
|
41
|
+
})(target);
|
|
42
|
+
return out.sort();
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// file -> content hash; a missing file is dropped (so deletes register as a change).
|
|
46
|
+
export function hashFiles(files) {
|
|
47
|
+
const h = {};
|
|
48
|
+
for (const f of files) {
|
|
49
|
+
try { h[f] = crypto.createHash("sha1").update(fs.readFileSync(f)).digest("hex"); } catch { /* gone */ }
|
|
50
|
+
}
|
|
51
|
+
return h;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// The set of files whose hash differs between two snapshots (added, removed, or modified).
|
|
55
|
+
export function changedFiles(prev, cur) {
|
|
56
|
+
const names = new Set([...Object.keys(prev), ...Object.keys(cur)]);
|
|
57
|
+
return [...names].filter((f) => prev[f] !== cur[f]).sort();
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// One sound scan into `<out>` (the prefix the MCP server reads). Returns {ok, ms}.
|
|
61
|
+
export function scanOnce(target, out) {
|
|
62
|
+
const t0 = Date.now();
|
|
63
|
+
const r = spawnSync("node", [path.join(HERE, "scan.mjs"), target, "--out", out], { encoding: "utf8" });
|
|
64
|
+
return { ok: r.status === 0, ms: Date.now() - t0, stderr: r.stderr };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function rel(f, target) { return path.relative(fs.statSync(target).isFile() ? path.dirname(target) : target, f) || f; }
|
|
68
|
+
|
|
69
|
+
// The report from disk, or [] if there isn't one yet (the first scan has nothing to diff against).
|
|
70
|
+
export function readReportSafe(out) {
|
|
71
|
+
try { return Q.loadReport(out); } catch { return []; }
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// One-line summary of what an edit changed — the agent-loop payoff: not just "the report is fresh"
|
|
75
|
+
// but "your edit added Net to f". Built on the same diff the CLI emits. "" when nothing's effects moved.
|
|
76
|
+
export function formatDelta(changes) {
|
|
77
|
+
const leaf = (n) => n.split("::").pop().split(".").pop();
|
|
78
|
+
const parts = changes.slice(0, 4).map((c) => {
|
|
79
|
+
const g = c.gained.length ? `+${c.gained.join("/")}` : "";
|
|
80
|
+
const l = c.lost.length ? `-${c.lost.join("/")}` : "";
|
|
81
|
+
return `${leaf(c.fn)} ${[g, l].filter(Boolean).join(" ")}`.trim();
|
|
82
|
+
});
|
|
83
|
+
if (changes.length > 4) parts.push(`+${changes.length - 4} more`);
|
|
84
|
+
return parts.join("; ");
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
async function main() {
|
|
88
|
+
const args = process.argv.slice(2);
|
|
89
|
+
let target = null, out = null, interval = 400;
|
|
90
|
+
for (let i = 0; i < args.length; i++) {
|
|
91
|
+
if (args[i] === "--out") out = args[++i];
|
|
92
|
+
else if (args[i] === "--interval") interval = Number(args[++i]) || interval;
|
|
93
|
+
else if (!args[i].startsWith("--")) target = args[i];
|
|
94
|
+
else { console.error(`candor-ts-watch: unknown flag ${args[i]}`); process.exit(2); }
|
|
95
|
+
}
|
|
96
|
+
if (!target) { console.error("usage: candor-ts-watch <dir> [--out <prefix>] [--interval <ms>]"); process.exit(2); }
|
|
97
|
+
out ??= path.join(fs.statSync(target).isFile() ? path.dirname(target) : target, ".candor", "report");
|
|
98
|
+
|
|
99
|
+
let prev = hashFiles(trackedFiles(target));
|
|
100
|
+
const first = scanOnce(target, out);
|
|
101
|
+
console.error(`candor-ts-watch: ${Object.keys(prev).length} source(s) → ${out}.json (${first.ms}ms). Watching… (Ctrl-C to stop)`);
|
|
102
|
+
if (!first.ok) console.error(first.stderr?.trim());
|
|
103
|
+
|
|
104
|
+
setInterval(() => {
|
|
105
|
+
const cur = hashFiles(trackedFiles(target));
|
|
106
|
+
const changed = changedFiles(prev, cur);
|
|
107
|
+
if (!changed.length) return; // the freshness gate: nothing relevant changed, do nothing
|
|
108
|
+
prev = cur;
|
|
109
|
+
const before = readReportSafe(out); // the prior report, before this re-scan overwrites it
|
|
110
|
+
const r = scanOnce(target, out);
|
|
111
|
+
const names = changed.slice(0, 4).map((f) => rel(f, target)).join(", ") + (changed.length > 4 ? `, +${changed.length - 4}` : "");
|
|
112
|
+
if (!r.ok) {
|
|
113
|
+
console.error(`candor-ts-watch: scan FAILED after a change in ${names}: ${r.stderr?.trim()}`);
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
// The edit-delta: what the change DID to the effect surface (the agent-loop payoff).
|
|
117
|
+
const delta = formatDelta(Q.diff(readReportSafe(out), before).changes);
|
|
118
|
+
console.error(`candor-ts-watch: re-scanned (${changed.length} changed: ${names}) in ${r.ms}ms`
|
|
119
|
+
+ (delta ? ` — Δ ${delta}` : " — no effect change"));
|
|
120
|
+
}, interval);
|
|
121
|
+
// NO .unref() — the interval is the ONLY thing keeping the process alive; unref'ing it made Node exit
|
|
122
|
+
// ~0.6s after the startup scan, so the watcher did ONE scan and died while printing "Watching…" (the
|
|
123
|
+
// whole feature was silently broken, and test-watch.mjs only tests the helpers, never the live loop).
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if (path.resolve(process.argv[1] || "") === path.resolve(fileURLToPath(import.meta.url))) main();
|