npm - @archrad/deterministic - Versions diffs - 0.1.5 → 0.1.6 - Mend

@archrad/deterministic 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/CHANGELOG.md +28 -4
package/dist/cli.js +1 -1
package/dist/mcp-server-tools-patch.d.ts +29 -0
package/dist/mcp-server-tools-patch.d.ts.map +1 -0
package/dist/mcp-server-tools-patch.js +71 -0
package/dist/mcp-server.js +69 -49
package/docs/CI.md +122 -0
package/docs/MCP.md +2 -2
package/package.json +3 -2
package/scripts/generate-corpus.mjs +667 -0

package/CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [0.1.6] - 2026-04-10
+### Added
+- **`docs/CI.md`** — exit code semantics (`--fail-on-warning`, `--max-warnings`) and copy-paste snippets for GitHub Actions, GitLab CI, Bitbucket Pipelines, Jenkins, and Azure DevOps.
+- **`scripts/generate-corpus.mjs`** — validates hand-written `corpus/*.json` pairs; `--count` mode generates weighted synthetic JSONL training pairs via the deterministic engine. Run `npm run build` first.
+- **Integration tests** — `archrad validate` exit code assertions (`src/cli-exit.integration.test.ts`).
+### Changed
+- **MCP** — Rewrote all six `registerTool` title/description blocks for agent discoverability (`src/mcp-server-tools-patch.ts`).
+- **npm package** — `corpus/` excluded from published tarball (`.npmignore` + removed from `package.json` `files`).
+### Fixed
+- **`archrad_validate_drift`** MCP schema: `target` enum is `python` | `nodejs` only, aligned with `docs/MCP.md`.
+### Security
+- `npm audit fix` applied to dev/test transitive dependencies.
 ## [0.1.5] - 2026-04-07
 ### Added
@@ -114,8 +135,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Documented **codegen vs validation** for retry/timeout IR fields and **InkByte vs OSS** scope in README and structural/semantic doc.
 - README positioning: **deterministic compiler and linter for system architecture**; validation layers table (OSS vs Cloud); **`validate-drift`**, drift GIF / trust-loop recording docs, library **`runValidateDrift`** example.
-[Unreleased]: https://github.com/archradhq/arch-deterministic/compare/v0.1.3...HEAD
-[0.1.3]: https://github.com/archradhq/arch-deterministic/releases/tag/v0.1.3
-[0.1.2]: https://github.com/archradhq/arch-deterministic/releases/tag/v0.1.2
-[0.1.1]: https://github.com/archradhq/arch-deterministic/releases/tag/v0.1.1
+[Unreleased]: https://github.com/archradhq/arch-deterministic/compare/v0.1.6...HEAD
+[0.1.6]: https://github.com/archradhq/arch-deterministic/compare/v0.1.5...v0.1.6
+[0.1.5]: https://github.com/archradhq/arch-deterministic/compare/v0.1.4...v0.1.5
+[0.1.4]: https://github.com/archradhq/arch-deterministic/compare/v0.1.3...v0.1.4
+[0.1.3]: https://github.com/archradhq/arch-deterministic/compare/v0.1.2...v0.1.3
+[0.1.2]: https://github.com/archradhq/arch-deterministic/compare/v0.1.1...v0.1.2
+[0.1.1]: https://github.com/archradhq/arch-deterministic/compare/v0.1.0...v0.1.1
 [0.1.0]: https://github.com/archradhq/arch-deterministic/releases/tag/v0.1.0

package/dist/cli.js CHANGED Viewed

@@ -76,7 +76,7 @@ const program = new Command();
 program
     .name('archrad')
     .description('Validate your architecture before you write code. Deterministic compiler + linter — FastAPI / Express (no LLM, no server).')
-    .version('0.1.5');
+    .version('0.1.6');
 program
     .command('validate')
     .description('Validate your architecture before you write code — IR structural (IR-STRUCT-*) + architecture lint (IR-LINT-*)')

package/dist/mcp-server-tools-patch.d.ts ADDED Viewed

@@ -0,0 +1,29 @@
+/**
+ * MCP tool catalog metadata (title + description) for archrad-mcp discoverability.
+ * Keep in sync with registerTool handlers in mcp-server.ts.
+ */
+export declare const MCP_TOOL_ARCHRAD_VALIDATE_IR: {
+    readonly title: "Validate IR — structural (IR-STRUCT-*) + architecture lint (IR-LINT-*) + PolicyPack";
+    readonly description: "Architecture-as-code validation: run this when you need to check whether an IR graph is valid or to list violations before export or drift checks.\n\nKeywords: validate IR, architecture lint, IR-STRUCT, IR-LINT, policy pack, blueprint graph, nodes and edges.\n\nRuns in one call:\n1) Structural validation — graph shape, references, IR-STRUCT-* errors.\n2) Architecture lint — design rules (auth, dead nodes, DB access, sync chains, etc.).\n3) Optional PolicyPack rules — pass policiesDirectory to load YAML/JSON packs from disk.\n\nReturns irStructuralFindings, irLintFindings, and combined (sorted by severity). ok is false when any finding has severity \"error\".\n\nAfter results: call archrad_suggest_fix with a finding code for remediation text; use archrad_lint_summary for a short human-readable digest.\n\nInput: provide exactly one of ir (inline JSON object) or irPath (path to .json). Large graphs: prefer irPath.";
+};
+export declare const MCP_TOOL_ARCHRAD_LINT_SUMMARY: {
+    readonly title: "Lint summary — plain-text counts and top findings";
+    readonly description: "Human-readable summary of validation results: error/warning counts and up to 20 top findings (plain text).\n\nKeywords: summary, PR comment, explain violations, readable lint output.\n\nUse when you need a short narrative or comment, not structured JSON. For machine-actionable findings, use archrad_validate_ir instead.\n\nSame inputs as archrad_validate_ir: ir or irPath, optional policiesDirectory. Provide only one of ir or irPath.";
+};
+export declare const MCP_TOOL_ARCHRAD_SUGGEST_FIX: {
+    readonly title: "Suggest fix — static remediation for a built-in finding code";
+    readonly description: "Look up curated remediation steps and documentation URL for one built-in rule code (e.g. IR-LINT-MISSING-AUTH-010, IR-STRUCT-*, DRIFT-*).\n\nKeywords: remediation, how to fix, rule code, docs link, IR-LINT, IR-STRUCT.\n\nDoes not return generated code patches or IR edits — only static guidance. PolicyPack and org-specific rule ids are not covered; see your YAML packs.\n\nCall archrad_list_rule_codes to list codes that have static guidance.";
+};
+export declare const MCP_TOOL_ARCHRAD_LIST_RULE_CODES: {
+    readonly title: "List rule codes — built-in codes with static guidance";
+    readonly description: "Returns the sorted list of built-in IR-STRUCT-*, IR-LINT-*, and DRIFT-* codes that archrad_suggest_fix can explain.\n\nKeywords: catalog, all rules, rule list, documentation index.\n\nUse before suggest_fix to confirm a code exists. Excludes PolicyPack custom ids. No arguments.";
+};
+export declare const MCP_TOOL_ARCHRAD_VALIDATE_DRIFT: {
+    readonly title: "Validate drift — IR blueprint vs on-disk export (python | nodejs)";
+    readonly description: "Compare the architecture IR to generated code under exportDir and report drift (files that no longer match deterministic export).\n\nKeywords: drift, CI, codegen diff, FastAPI, Express, Node, Python, validate export, architecture vs implementation.\n\nRequires: ir or irPath, exportDir (absolute path to the export tree), and target. target must be \"python\" or \"nodejs\" (use \"nodejs\" for Node/TypeScript; do not use \"node\").\n\nOptional: policiesDirectory, skipIrLint (true to skip IR-LINT and only check drift).\n\nReturns driftFindings plus IR structural and lint findings from the same engine as CLI validate-drift.";
+};
+export declare const MCP_TOOL_ARCHRAD_POLICY_PACKS_LOAD: {
+    readonly title: "Load PolicyPack — compile and validate packs (dry run, no IR)";
+    readonly description: "Validate PolicyPack YAML/JSON without running against a graph: syntax, rule ids, and compilation.\n\nKeywords: policy pack, YAML rules, validate policies, org rules, offline check.\n\nYou usually do not need this before archrad_validate_ir or archrad_validate_drift — those accept policiesDirectory and load packs internally. Use this tool to debug pack files in isolation.\n\nProvide either directory (folder path) or files (array of { name, content }), not both.";
+};
+//# sourceMappingURL=mcp-server-tools-patch.d.ts.map

package/dist/mcp-server-tools-patch.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"mcp-server-tools-patch.d.ts","sourceRoot":"","sources":["../src/mcp-server-tools-patch.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,eAAO,MAAM,4BAA4B;;;CAgB/B,CAAC;AAEX,eAAO,MAAM,6BAA6B;;;CAShC,CAAC;AAEX,eAAO,MAAM,4BAA4B;;;CAS/B,CAAC;AAEX,eAAO,MAAM,gCAAgC;;;CAOnC,CAAC;AAEX,eAAO,MAAM,+BAA+B;;;CAWlC,CAAC;AAEX,eAAO,MAAM,kCAAkC;;;CASrC,CAAC"}

package/dist/mcp-server-tools-patch.js ADDED Viewed

@@ -0,0 +1,71 @@
+/**
+ * MCP tool catalog metadata (title + description) for archrad-mcp discoverability.
+ * Keep in sync with registerTool handlers in mcp-server.ts.
+ */
+export const MCP_TOOL_ARCHRAD_VALIDATE_IR = {
+    title: 'Validate IR — structural (IR-STRUCT-*) + architecture lint (IR-LINT-*) + PolicyPack',
+    description: `Architecture-as-code validation: run this when you need to check whether an IR graph is valid or to list violations before export or drift checks.
+Keywords: validate IR, architecture lint, IR-STRUCT, IR-LINT, policy pack, blueprint graph, nodes and edges.
+Runs in one call:
+1) Structural validation — graph shape, references, IR-STRUCT-* errors.
+2) Architecture lint — design rules (auth, dead nodes, DB access, sync chains, etc.).
+3) Optional PolicyPack rules — pass policiesDirectory to load YAML/JSON packs from disk.
+Returns irStructuralFindings, irLintFindings, and combined (sorted by severity). ok is false when any finding has severity "error".
+After results: call archrad_suggest_fix with a finding code for remediation text; use archrad_lint_summary for a short human-readable digest.
+Input: provide exactly one of ir (inline JSON object) or irPath (path to .json). Large graphs: prefer irPath.`,
+};
+export const MCP_TOOL_ARCHRAD_LINT_SUMMARY = {
+    title: 'Lint summary — plain-text counts and top findings',
+    description: `Human-readable summary of validation results: error/warning counts and up to 20 top findings (plain text).
+Keywords: summary, PR comment, explain violations, readable lint output.
+Use when you need a short narrative or comment, not structured JSON. For machine-actionable findings, use archrad_validate_ir instead.
+Same inputs as archrad_validate_ir: ir or irPath, optional policiesDirectory. Provide only one of ir or irPath.`,
+};
+export const MCP_TOOL_ARCHRAD_SUGGEST_FIX = {
+    title: 'Suggest fix — static remediation for a built-in finding code',
+    description: `Look up curated remediation steps and documentation URL for one built-in rule code (e.g. IR-LINT-MISSING-AUTH-010, IR-STRUCT-*, DRIFT-*).
+Keywords: remediation, how to fix, rule code, docs link, IR-LINT, IR-STRUCT.
+Does not return generated code patches or IR edits — only static guidance. PolicyPack and org-specific rule ids are not covered; see your YAML packs.
+Call archrad_list_rule_codes to list codes that have static guidance.`,
+};
+export const MCP_TOOL_ARCHRAD_LIST_RULE_CODES = {
+    title: 'List rule codes — built-in codes with static guidance',
+    description: `Returns the sorted list of built-in IR-STRUCT-*, IR-LINT-*, and DRIFT-* codes that archrad_suggest_fix can explain.
+Keywords: catalog, all rules, rule list, documentation index.
+Use before suggest_fix to confirm a code exists. Excludes PolicyPack custom ids. No arguments.`,
+};
+export const MCP_TOOL_ARCHRAD_VALIDATE_DRIFT = {
+    title: 'Validate drift — IR blueprint vs on-disk export (python | nodejs)',
+    description: `Compare the architecture IR to generated code under exportDir and report drift (files that no longer match deterministic export).
+Keywords: drift, CI, codegen diff, FastAPI, Express, Node, Python, validate export, architecture vs implementation.
+Requires: ir or irPath, exportDir (absolute path to the export tree), and target. target must be "python" or "nodejs" (use "nodejs" for Node/TypeScript; do not use "node").
+Optional: policiesDirectory, skipIrLint (true to skip IR-LINT and only check drift).
+Returns driftFindings plus IR structural and lint findings from the same engine as CLI validate-drift.`,
+};
+export const MCP_TOOL_ARCHRAD_POLICY_PACKS_LOAD = {
+    title: 'Load PolicyPack — compile and validate packs (dry run, no IR)',
+    description: `Validate PolicyPack YAML/JSON without running against a graph: syntax, rule ids, and compilation.
+Keywords: policy pack, YAML rules, validate policies, org rules, offline check.
+You usually do not need this before archrad_validate_ir or archrad_validate_drift — those accept policiesDirectory and load packs internally. Use this tool to debug pack files in isolation.
+Provide either directory (folder path) or files (array of { name, content }), not both.`,
+};

package/dist/mcp-server.js CHANGED Viewed

@@ -10,13 +10,10 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
 import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
 import { normalizeIrGraph, validateIrStructural, validateIrLint, runValidateDrift, sortFindings, loadPolicyPacksFromDirectory, loadPolicyPacksFromFiles, } from './index.js';
 import { getStaticRuleGuidance, listStaticRuleCodes } from './static-rule-guidance.js';
-const VERSION = '0.1.5';
+import { MCP_TOOL_ARCHRAD_LIST_RULE_CODES, MCP_TOOL_ARCHRAD_LINT_SUMMARY, MCP_TOOL_ARCHRAD_POLICY_PACKS_LOAD, MCP_TOOL_ARCHRAD_SUGGEST_FIX, MCP_TOOL_ARCHRAD_VALIDATE_DRIFT, MCP_TOOL_ARCHRAD_VALIDATE_IR, } from './mcp-server-tools-patch.js';
+const VERSION = '0.1.6';
 /** Hard cap for `irPath` reads (see docs/MCP.md). */
 const MAX_IR_FILE_BYTES = 25 * 1024 * 1024;
-const irSourceSchema = {
-    ir: z.unknown().optional(),
-    irPath: z.string().optional(),
-};
 function jsonResult(payload) {
     return {
         content: [{ type: 'text', text: JSON.stringify(payload, null, 2) }],
@@ -64,34 +61,16 @@ async function main() {
         name: 'archrad-deterministic',
         version: VERSION,
     });
-    server.registerTool('archrad_suggest_fix', {
-        title: 'Static remediation for a finding code',
-        description: 'Deterministic title, remediation text, and canonical docs URL for a built-in IR-STRUCT / IR-LINT / DRIFT code. Does not generate patches or IR edits.',
-        inputSchema: {
-            findingCode: z.string().min(1),
-        },
-    }, async (args) => {
-        const g = getStaticRuleGuidance(args.findingCode);
-        if (!g) {
-            return jsonResult({
-                ok: false,
-                findingCode: args.findingCode,
-                error: 'Unknown built-in code. PolicyPack and org rules use custom rule ids in YAML — see your pack. Use archrad_list_rule_codes for built-in codes.',
-            });
-        }
-        return jsonResult({ ok: true, ...g });
-    });
-    server.registerTool('archrad_list_rule_codes', {
-        title: 'List built-in rule codes',
-        description: 'Sorted list of IR-STRUCT-*, IR-LINT-*, and DRIFT-* codes that have static guidance via archrad_suggest_fix.',
-        inputSchema: {},
-    }, async () => jsonResult({ codes: listStaticRuleCodes() }));
     server.registerTool('archrad_validate_ir', {
-        title: 'Validate IR (structural + IR-LINT)',
-        description: 'Run deterministic structural validation (IR-STRUCT-*) and architecture lint (IR-LINT-*). Pass `ir` inline or `irPath` to a JSON file (recommended for large graphs). Optional local PolicyPack directory.',
+        title: MCP_TOOL_ARCHRAD_VALIDATE_IR.title,
+        description: MCP_TOOL_ARCHRAD_VALIDATE_IR.description,
         inputSchema: {
-            ...irSourceSchema,
-            policiesDirectory: z.string().optional(),
+            ir: z.unknown().optional().describe('Inline IR graph as a JSON object. Use for small graphs only.'),
+            irPath: z.string().optional().describe('Absolute or relative path to an IR JSON file. Preferred for large graphs.'),
+            policiesDirectory: z
+                .string()
+                .optional()
+                .describe('Path to a directory of PolicyPack YAML/JSON files. Optional — omit if you have no custom rules.'),
         },
     }, async (args) => {
         const loaded = await loadIrFromArgs(args);
@@ -122,11 +101,15 @@ async function main() {
         });
     });
     server.registerTool('archrad_lint_summary', {
-        title: 'Lint summary',
-        description: 'Short text summary of IR structural + lint findings. Use `ir` or `irPath` (see archrad_validate_ir).',
+        title: MCP_TOOL_ARCHRAD_LINT_SUMMARY.title,
+        description: MCP_TOOL_ARCHRAD_LINT_SUMMARY.description,
         inputSchema: {
-            ...irSourceSchema,
-            policiesDirectory: z.string().optional(),
+            ir: z.unknown().optional().describe('Inline IR graph as a JSON object.'),
+            irPath: z.string().optional().describe('Absolute or relative path to an IR JSON file.'),
+            policiesDirectory: z
+                .string()
+                .optional()
+                .describe('Path to a directory of PolicyPack YAML/JSON files. Optional.'),
         },
     }, async (args) => {
         const loaded = await loadIrFromArgs(args);
@@ -156,17 +139,47 @@ async function main() {
         ];
         if (combined.length > 20)
             lines.push(`… and ${combined.length - 20} more.`);
-        return jsonResult({ summary: lines.join('\n'), counts: { total: combined.length, errors: errors.length, warnings: warnings.length } });
+        return jsonResult({
+            summary: lines.join('\n'),
+            counts: { total: combined.length, errors: errors.length, warnings: warnings.length },
+        });
     });
+    server.registerTool('archrad_suggest_fix', {
+        title: MCP_TOOL_ARCHRAD_SUGGEST_FIX.title,
+        description: MCP_TOOL_ARCHRAD_SUGGEST_FIX.description,
+        inputSchema: {
+            findingCode: z.string().min(1).describe('The finding code to look up, e.g. "IR-LINT-MISSING-AUTH-010".'),
+        },
+    }, async (args) => {
+        const g = getStaticRuleGuidance(args.findingCode);
+        if (!g) {
+            return jsonResult({
+                ok: false,
+                findingCode: args.findingCode,
+                error: 'Unknown built-in code. PolicyPack and org rules use custom rule ids in YAML — see your pack. Call archrad_list_rule_codes to see all built-in codes with static guidance.',
+            });
+        }
+        return jsonResult({ ok: true, ...g });
+    });
+    server.registerTool('archrad_list_rule_codes', {
+        title: MCP_TOOL_ARCHRAD_LIST_RULE_CODES.title,
+        description: MCP_TOOL_ARCHRAD_LIST_RULE_CODES.description,
+        inputSchema: {},
+    }, async () => jsonResult({ codes: listStaticRuleCodes() }));
     server.registerTool('archrad_validate_drift', {
-        title: 'Validate drift',
-        description: 'Compare on-disk export to a fresh deterministic export. Pass `ir` or `irPath` (JSON file).',
+        title: MCP_TOOL_ARCHRAD_VALIDATE_DRIFT.title,
+        description: MCP_TOOL_ARCHRAD_VALIDATE_DRIFT.description,
         inputSchema: {
-            ...irSourceSchema,
-            target: z.enum(['python', 'node', 'nodejs']),
-            exportDir: z.string(),
-            policiesDirectory: z.string().optional(),
-            skipIrLint: z.boolean().optional(),
+            ir: z.unknown().optional().describe('Inline IR graph as a JSON object.'),
+            irPath: z.string().optional().describe('Absolute or relative path to an IR JSON file.'),
+            target: z
+                .enum(['python', 'nodejs'])
+                .describe('Export target language. Use "nodejs" for Node.js/TypeScript, "python" for Python.'),
+            exportDir: z
+                .string()
+                .describe('Absolute path to the on-disk export directory to compare against the IR.'),
+            policiesDirectory: z.string().optional().describe('Path to a PolicyPack directory. Optional.'),
+            skipIrLint: z.boolean().optional().describe('Set to true to skip IR-LINT checks and only check for drift. Default: false.'),
         },
     }, async (args) => {
         const loaded = await loadIrFromArgs(args);
@@ -202,13 +215,17 @@ async function main() {
         });
     });
     server.registerTool('archrad_policy_packs_load', {
-        title: 'Load policy packs',
-        description: 'Compile PolicyPack YAML/JSON from a directory or from in-memory file list.',
+        title: MCP_TOOL_ARCHRAD_POLICY_PACKS_LOAD.title,
+        description: MCP_TOOL_ARCHRAD_POLICY_PACKS_LOAD.description,
         inputSchema: {
-            directory: z.string().optional(),
+            directory: z.string().optional().describe('Path to a directory of PolicyPack YAML/JSON files.'),
             files: z
-                .array(z.object({ name: z.string(), content: z.string() }))
-                .optional(),
+                .array(z.object({
+                name: z.string().describe('Filename, e.g. "auth-rules.yaml".'),
+                content: z.string().describe('Raw file content as a string.'),
+            }))
+                .optional()
+                .describe('In-memory file list. Use when you have policy content as strings rather than on-disk files.'),
         },
     }, async (args) => {
         if (args.files && args.files.length > 0) {
@@ -225,7 +242,10 @@ async function main() {
             }
             return jsonResult({ ok: true, ruleCount: loaded.ruleCount });
         }
-        return jsonResult({ ok: false, error: 'Provide `directory` or `files`.' });
+        return jsonResult({
+            ok: false,
+            error: 'Provide either directory (path string) or files (array of {name, content}).',
+        });
     });
     const transport = new StdioServerTransport();
     await server.connect(transport);

package/docs/CI.md ADDED Viewed

@@ -0,0 +1,122 @@
+# CI integration — `archrad validate`
+`archrad validate` is the usual gate for **architecture-as-code** in pipelines. It reads an IR JSON file and runs structural validation (IR-STRUCT-*) plus architecture lint (IR-LINT-*).
+## Exit codes
+| Situation | Default exit code |
+|-----------|-------------------|
+| No findings | **0** |
+| Any finding with severity **`error`** (structural / blocking) | **1** |
+| **Warnings only** (e.g. many IR-LINT-* rules) | **0** |
+Optional stricter gates:
+- **`--fail-on-warning`** — exit **1** if any warning exists.
+- **`--max-warnings <n>`** — exit **1** if the warning count is **greater than** `n` (e.g. **`--max-warnings 0`** allows no warnings).
+JSON output: add **`--json`** (findings array on stdout).
+Policy packs: **`--policies <dir>`** (directory of PolicyPack YAML/JSON), merged after built-in IR-LINT-* (omit **`--skip-lint`** if you want lint + policies).
+Example:
+```bash
+npx archrad validate --ir ./graph.json
+npx archrad validate --ir ./graph.json --fail-on-warning
+npx archrad validate --ir ./graph.json --max-warnings 0 --json
+```
+Install **`@archrad/deterministic`** as a dev dependency so `npx archrad` resolves locally, or invoke **`node node_modules/@archrad/deterministic/dist/cli.js`** explicitly.
+---
+## GitHub Actions
+```yaml
+jobs:
+  archrad:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+          cache: 'npm'
+      - run: npm ci
+      - run: npx archrad validate --ir ./path/to/graph.json
+```
+With warnings as failures:
+```yaml
+      - run: npx archrad validate --ir ./path/to/graph.json --fail-on-warning
+```
+---
+## GitLab CI
+```yaml
+archrad-validate:
+  image: node:20-bookworm
+  script:
+    - npm ci
+    - npx archrad validate --ir ./path/to/graph.json
+```
+---
+## Bitbucket Pipelines
+```yaml
+pipelines:
+  default:
+    - step:
+        name: ArchRad validate
+        image: node:20
+        script:
+          - npm ci
+          - npx archrad validate --ir ./path/to/graph.json
+```
+---
+## Jenkins (Declarative)
+```groovy
+pipeline {
+  agent any
+  stages {
+    stage('ArchRad') {
+      steps {
+        sh 'npm ci'
+        sh 'npx archrad validate --ir ./path/to/graph.json'
+      }
+    }
+  }
+}
+```
+---
+## Azure DevOps
+```yaml
+steps:
+  - task: NodeTool@0
+    inputs:
+      versionSpec: '20.x'
+  - script: npm ci
+    displayName: npm ci
+  - script: npx archrad validate --ir ./path/to/graph.json
+    displayName: archrad validate
+```
+---
+## Notes
+- Replace **`./path/to/graph.json`** with your IR path (repo-relative in CI).
+- Ensure the job installs the same **`@archrad/deterministic`** version you use locally (`package.json` / lockfile).
+- Drift checks use **`archrad validate-drift`** (separate command); see **`docs/DRIFT.md`**.

package/docs/MCP.md CHANGED Viewed

@@ -91,7 +91,7 @@ Open the URL Inspector prints (often **http://localhost:6274**). Under **Tools**
 - **`archrad_list_rule_codes`:** JSON with a **`codes`** array.
 - **`archrad_validate_ir`:** JSON with **`irStructuralFindings`**, **`irLintFindings`**, **`combined`**, **`ok`** — not a connection or file error.
-## 6. Tools (0.1.5)
+## 6. Tools (0.1.6)
 Tools are **idempotent** and **deterministic** where stated.
@@ -101,7 +101,7 @@ Tools are **idempotent** and **deterministic** where stated.
 |------|--------|--------|-------|
 | **`archrad_validate_ir`** | `ir` **or** `irPath`; optional `policiesDirectory` | `{ ok, irStructuralFindings, irLintFindings, combined }` | Same as CLI validate. |
 | **`archrad_lint_summary`** | `ir` **or** `irPath`; optional `policiesDirectory` | Short summary + counts | Agent-friendly. |
-| **`archrad_validate_drift`** | `ir` **or** `irPath`; `target`; `exportDir`; optional policies, `skipIrLint` | Drift + export findings | Same as CLI `validate-drift`. |
+| **`archrad_validate_drift`** | `ir` **or** `irPath`; `target`; `exportDir`; optional policies, `skipIrLint` | Drift + export findings | Same engine as CLI `validate-drift`. **MCP `target` values:** `python` or `nodejs` only (not `node`). The CLI `validate-drift` / `export` may still accept `node` as an alias for Node exports. |
 | **`archrad_policy_packs_load`** | `directory` or `files[]` | `{ ok, ruleCount }` or errors | Compiles packs; does not return visitor functions over MCP. |
 ### 6.2 Static guidance (no generated architecture)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@archrad/deterministic",
-  "version": "0.1.5",
+  "version": "0.1.6",
   "description": "A deterministic compiler and linter for system architecture. Validate your architecture before you write code. OSS: structural validation + basic architecture lint (rule-based); FastAPI/Express export; OpenAPI document-shape; golden Docker/Makefile — no LLM.",
   "keywords": [
     "archrad",
@@ -64,9 +64,10 @@
   "scripts": {
     "build": "tsc -p tsconfig.build.json",
     "prepublishOnly": "npm run build",
-    "test": "tsc -p tsconfig.build.json --noEmit && vitest run",
+    "test": "tsc -p tsconfig.build.json --noEmit && npm run build && vitest run",
     "lint": "biome check ./src",
     "typecheck": "tsc -p tsconfig.build.json --noEmit",
+    "generate-corpus": "node scripts/generate-corpus.mjs",
     "smoke:mcp": "node scripts/smoke-mcp.mjs",
     "record:demo:payment-retry": "vhs scripts/record-demo-payment-retry.tape",
     "record:demo:drift": "vhs scripts/record-demo-drift.tape"

package/scripts/generate-corpus.mjs ADDED Viewed

@@ -0,0 +1,667 @@
+#!/usr/bin/env node
+/**
+ * generate-corpus.mjs
+ *
+ * • Default (no --count / --generate): validate hand-written corpus/*.json
+ * • Generate: synthetic IR graphs → ArchRad engine → JSONL training pairs
+ *
+ * Usage:
+ *   npm run build
+ *   npm run generate-corpus
+ *   node scripts/generate-corpus.mjs --count 1000 --out corpus/auto-generated.jsonl
+ */
+import { mkdirSync, readdirSync, readFileSync, writeFileSync } from 'node:fs';
+import { readdir, readFile } from 'node:fs/promises';
+import { dirname, join, resolve } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { normalizeIrGraph, validateIrLint, validateIrStructural } from '../dist/index.js';
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const ROOT = resolve(__dirname, '..');
+const corpusDir = join(ROOT, 'corpus');
+// ─── CLI ──────────────────────────────────────────────────────────────────────
+const argv = process.argv.slice(2);
+const wantsGenerate = argv.includes('--generate') || argv.includes('--count');
+const getArg = (flag, def) => {
+  const i = argv.indexOf(flag);
+  return i !== -1 && argv[i + 1] ? argv[i + 1] : def;
+};
+// ─── Validate hand-written corpus (default) ───────────────────────────────────
+function isPairRecord(x) {
+  return (
+    x != null &&
+    typeof x === 'object' &&
+    typeof x.id === 'string' &&
+    'input' in x &&
+    'output' in x
+  );
+}
+async function validateHandwritten() {
+  let names;
+  try {
+    names = await readdir(corpusDir);
+  } catch (e) {
+    console.error('generate-corpus: cannot read corpus dir:', corpusDir, e);
+    process.exitCode = 1;
+    return;
+  }
+  const jsonFiles = names.filter((n) => n.endsWith('.json') && !n.startsWith('auto-')).sort();
+  if (!jsonFiles.length) {
+    console.error('generate-corpus: no hand-written .json files in', corpusDir);
+    process.exitCode = 1;
+    return;
+  }
+  let totalPairs = 0;
+  for (const name of jsonFiles) {
+    const path = join(corpusDir, name);
+    let data;
+    try {
+      data = JSON.parse(await readFile(path, 'utf8'));
+    } catch (e) {
+      console.error(`generate-corpus: ${path}:`, e);
+      process.exitCode = 1;
+      return;
+    }
+    if (!Array.isArray(data)) {
+      console.error(`generate-corpus: ${name} must be a JSON array`);
+      process.exitCode = 1;
+      return;
+    }
+    for (let i = 0; i < data.length; i++) {
+      if (!isPairRecord(data[i])) {
+        console.error(`generate-corpus: ${name}[${i}] missing id/input/output`);
+        process.exitCode = 1;
+        return;
+      }
+    }
+    totalPairs += data.length;
+    console.log(`${name}: ${data.length} pair(s)`);
+  }
+  console.log(`generate-corpus: OK — ${jsonFiles.length} file(s), ${totalPairs} pair(s) total`);
+}
+// ─── Name pools (generation) ──────────────────────────────────────────────────
+const GATEWAY_NAMES = [
+  ['api-gateway', 'API Gateway'],
+  ['web-gateway', 'Web Gateway'],
+  ['mobile-gateway', 'Mobile Gateway'],
+  ['public-gateway', 'Public Gateway'],
+  ['edge-gateway', 'Edge Gateway'],
+  ['payment-gateway', 'Payment Gateway'],
+  ['admin-gateway', 'Admin Gateway'],
+  ['partner-gateway', 'Partner Gateway'],
+];
+const API_NAMES = [
+  ['rest-api', 'REST API'],
+  ['public-api', 'Public API'],
+  ['partner-api', 'Partner API'],
+  ['internal-api', 'Internal API'],
+  ['checkout-api', 'Checkout API'],
+  ['reporting-api', 'Reporting API'],
+];
+const BFF_NAMES = [
+  ['web-bff', 'Web BFF'],
+  ['mobile-bff', 'Mobile BFF'],
+  ['dashboard-bff', 'Dashboard BFF'],
+];
+const GRPC_NAMES = [
+  ['grpc-gateway', 'gRPC Gateway'],
+  ['grpc-api', 'gRPC API'],
+];
+const GRAPHQL_NAMES = [
+  ['graphql-api', 'GraphQL API'],
+  ['graph-api', 'Graph API'],
+];
+const SERVICE_NAMES = [
+  ['user-service', 'User Service'],
+  ['order-service', 'Order Service'],
+  ['payment-service', 'Payment Service'],
+  ['inventory-service', 'Inventory Service'],
+  ['notification-service', 'Notification Service'],
+  ['billing-service', 'Billing Service'],
+  ['shipping-service', 'Shipping Service'],
+  ['catalog-service', 'Catalog Service'],
+  ['search-service', 'Search Service'],
+  ['auth-proxy', 'Auth Proxy'],
+  ['profile-service', 'Profile Service'],
+  ['report-service', 'Report Service'],
+  ['analytics-service', 'Analytics Service'],
+  ['fraud-service', 'Fraud Detection'],
+  ['compliance-service', 'Compliance Service'],
+  ['fulfillment-service', 'Fulfillment Service'],
+  ['recommendation-service', 'Recommendation Service'],
+  ['pricing-service', 'Pricing Service'],
+  ['tax-service', 'Tax Service'],
+  ['review-service', 'Review Service'],
+];
+const DB_NAMES = [
+  ['user-db', 'User DB', 'database'],
+  ['order-db', 'Order DB', 'database'],
+  ['payment-db', 'Payment DB', 'database'],
+  ['inventory-db', 'Inventory DB', 'database'],
+  ['main-postgres', 'Main Postgres', 'postgres'],
+  ['analytics-db', 'Analytics DB', 'database'],
+  ['audit-db', 'Audit DB', 'database'],
+  ['session-cache', 'Session Cache', 'redis'],
+  ['content-db', 'Content DB', 'mongodb'],
+  ['ledger-db', 'Ledger DB', 'database'],
+  ['archive-db', 'Archive DB', 'database'],
+  ['events-table', 'Events Table', 'dynamo'],
+  ['media-bucket', 'Media Bucket', 's3'],
+];
+const QUEUE_NAMES = [
+  ['email-queue', 'Email Queue', 'queue'],
+  ['order-events', 'Order Events', 'kafka'],
+  ['notification-queue', 'Notification Queue', 'queue'],
+  ['payment-events', 'Payment Events', 'kafka'],
+  ['job-queue', 'Job Queue', 'queue'],
+];
+const AUTH_NAMES = [
+  ['jwt-middleware', 'JWT Middleware', 'auth'],
+  ['oauth-provider', 'OAuth Provider', 'oauth'],
+  ['keycloak', 'Keycloak', 'keycloak'],
+  ['okta', 'Okta IdP', 'okta'],
+  ['auth-middleware', 'Auth Middleware', 'middleware'],
+  ['iam-service', 'IAM Service', 'iam'],
+];
+function pick(arr) {
+  return arr[Math.floor(Math.random() * arr.length)];
+}
+function pickN(arr, n) {
+  const shuffled = [...arr].sort(() => Math.random() - 0.5);
+  return shuffled.slice(0, Math.min(n, arr.length));
+}
+function pickHttpLike() {
+  const pools = [GATEWAY_NAMES, API_NAMES, BFF_NAMES, GRPC_NAMES, GRAPHQL_NAMES];
+  const pool = pick(pools);
+  const [id, name] = pick(pool);
+  let type = 'gateway';
+  if (pool === API_NAMES) type = 'api';
+  else if (pool === BFF_NAMES) type = 'bff';
+  else if (pool === GRPC_NAMES) type = 'grpc';
+  else if (pool === GRAPHQL_NAMES) type = 'graphql';
+  return { id, name, type };
+}
+function pickDefaultHealthUrl() {
+  return pick(['/health', '/healthz', '/ping']);
+}
+/** Use on HTTP-like nodes except generators that intentionally test IR-LINT-NO-HEALTHCHECK-003. */
+function httpCleanConfig(extra = {}) {
+  return { authRequired: true, url: pickDefaultHealthUrl(), ...extra };
+}
+function makeEdge(from, to, protocol = 'https') {
+  return { from, to, metadata: { protocol } };
+}
+/** Keeps IR-LINT-SYNC-CHAIN-001 off clean layered graphs (async auth→service breaks sync depth). */
+function makeAsyncEdge(from, to) {
+  return { from, to, metadata: { protocol: 'async' } };
+}
+function runEngine(graph) {
+  const ir = { graph };
+  const norm = normalizeIrGraph(ir);
+  if ('findings' in norm) {
+    return { ok: false, structuralFindings: norm.findings, lintFindings: [] };
+  }
+  const structural = validateIrStructural(ir);
+  const lint = validateIrLint(ir);
+  const combined = [...structural, ...lint];
+  return {
+    ok: combined.every((f) => f.severity !== 'error'),
+    structuralFindings: structural,
+    lintFindings: lint,
+    combined,
+  };
+}
+function toPair(id, graph, result, variant) {
+  return {
+    id,
+    instruction: 'Given this IR graph, what architecture violations exist?',
+    variant,
+    input: { graph },
+    output: {
+      ok: result.ok,
+      violations: result.lintFindings.map((f) => ({
+        code: f.code,
+        severity: f.severity,
+        nodeId: f.nodeId ?? null,
+        message: f.message,
+        fix: f.fixHint ?? null,
+      })),
+    },
+  };
+}
+// ─── Graph generators ─────────────────────────────────────────────────────────
+function genDirectDbAccess() {
+  const http = pickHttpLike();
+  const [dbId, dbName, dbType] = pick(DB_NAMES);
+  const nodes = [
+    { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
+    { id: dbId, type: dbType, name: dbName },
+  ];
+  const edges = [makeEdge(http.id, dbId, 'tcp')];
+  return { graph: { nodes, edges }, variant: 'direct-db-access' };
+}
+function genCleanServiceLayer() {
+  const http = pickHttpLike();
+  const [svcId, svcName] = pick(SERVICE_NAMES);
+  const [dbId, dbName, dbType] = pick(DB_NAMES);
+  const nodes = [
+    { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
+    { id: svcId, type: 'service', name: svcName },
+    { id: dbId, type: dbType, name: dbName },
+  ];
+  const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
+  return { graph: { nodes, edges }, variant: 'clean-service-layer' };
+}
+function genMissingAuth() {
+  const http = pickHttpLike();
+  const [svcId, svcName] = pick(SERVICE_NAMES);
+  const [dbId, dbName, dbType] = pick(DB_NAMES);
+  const nodes = [
+    { id: http.id, type: http.type, name: http.name },
+    { id: svcId, type: 'service', name: svcName },
+    { id: dbId, type: dbType, name: dbName },
+  ];
+  const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
+  return { graph: { nodes, edges }, variant: 'missing-auth' };
+}
+function genCleanWithAuth() {
+  const http = pickHttpLike();
+  const [authId, authName, authType] = pick(AUTH_NAMES);
+  const [svcId, svcName] = pick(SERVICE_NAMES);
+  const [dbId, dbName, dbType] = pick(DB_NAMES);
+  const nodes = [
+    { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
+    { id: authId, type: authType, name: authName },
+    { id: svcId, type: 'service', name: svcName },
+    { id: dbId, type: dbType, name: dbName },
+  ];
+  const edges = [
+    makeEdge(http.id, authId),
+    makeAsyncEdge(authId, svcId),
+    makeEdge(svcId, dbId, 'tcp'),
+  ];
+  return { graph: { nodes, edges }, variant: 'clean-with-auth' };
+}
+function genCleanAuthConfig() {
+  const http = pickHttpLike();
+  const [svcId, svcName] = pick(SERVICE_NAMES);
+  const [dbId, dbName, dbType] = pick(DB_NAMES);
+  const authKey = pick(['authRequired', 'auth', 'security', 'authentication']);
+  const nodes = [
+    { id: http.id, type: http.type, name: http.name, config: { [authKey]: true, url: '/health' } },
+    { id: svcId, type: 'service', name: svcName },
+    { id: dbId, type: dbType, name: dbName },
+  ];
+  const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
+  return { graph: { nodes, edges }, variant: 'clean-auth-config' };
+}
+function genHighFanout() {
+  const http = pickHttpLike();
+  const count = 5 + Math.floor(Math.random() * 4);
+  const services = pickN(SERVICE_NAMES, count);
+  const nodes = [
+    { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
+    ...services.map(([id, name]) => ({ id, type: 'service', name })),
+  ];
+  const edges = [];
+  for (const [svcId] of services) {
+    const [baseDbId, dbName, dbType] = pick(DB_NAMES);
+    const dbId = `${baseDbId}__${svcId}`;
+    nodes.push({ id: dbId, type: dbType, name: dbName });
+    edges.push(makeEdge(http.id, svcId));
+    edges.push(makeEdge(svcId, dbId, 'tcp'));
+  }
+  return { graph: { nodes, edges }, variant: 'high-fanout' };
+}
+function genSyncChain() {
+  const http = pickHttpLike();
+  const depth = 3 + Math.floor(Math.random() * 3);
+  const services = pickN(SERVICE_NAMES, depth);
+  const [dbId, dbName, dbType] = pick(DB_NAMES);
+  const nodes = [
+    { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
+    ...services.map(([id, name]) => ({ id, type: 'service', name })),
+    { id: dbId, type: dbType, name: dbName },
+  ];
+  const edges = [];
+  edges.push(makeEdge(http.id, services[0][0]));
+  for (let i = 0; i < services.length - 1; i++) {
+    edges.push(makeEdge(services[i][0], services[i + 1][0]));
+  }
+  edges.push(makeEdge(services[services.length - 1][0], dbId, 'tcp'));
+  return { graph: { nodes, edges }, variant: 'sync-chain' };
+}
+function genCleanAsyncBreak() {
+  const http = pickHttpLike();
+  const [svc1Id, svc1Name] = pick(SERVICE_NAMES);
+  const [qId, qName, qType] = pick(QUEUE_NAMES);
+  const [svc2Id, svc2Name] = pick(SERVICE_NAMES.filter(([id]) => id !== svc1Id));
+  const [svc3Id, svc3Name] = pick(SERVICE_NAMES.filter(([id]) => id !== svc1Id && id !== svc2Id));
+  const [dbId, dbName, dbType] = pick(DB_NAMES);
+  const nodes = [
+    { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
+    { id: svc1Id, type: 'service', name: svc1Name },
+    { id: qId, type: qType, name: qName },
+    { id: svc2Id, type: 'service', name: svc2Name },
+    { id: svc3Id, type: 'service', name: svc3Name },
+    { id: dbId, type: dbType, name: dbName },
+  ];
+  const edges = [
+    makeEdge(http.id, svc1Id),
+    { from: svc1Id, to: qId, metadata: { protocol: 'amqp' } },
+    { from: qId, to: svc2Id, metadata: { protocol: 'amqp' } },
+    makeEdge(svc2Id, svc3Id),
+    makeEdge(svc3Id, dbId, 'tcp'),
+  ];
+  return { graph: { nodes, edges }, variant: 'clean-async-break' };
+}
+function genNoHealthcheck() {
+  const http = pickHttpLike();
+  const [svcId, svcName] = pick(SERVICE_NAMES);
+  const [dbId, dbName, dbType] = pick(DB_NAMES);
+  const nodes = [
+    // Intentionally no health-like url — only this generator should omit it for 003.
+    { id: http.id, type: http.type, name: http.name, config: { authRequired: true } },
+    { id: svcId, type: 'service', name: svcName },
+    { id: dbId, type: dbType, name: dbName },
+  ];
+  const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
+  return { graph: { nodes, edges }, variant: 'no-healthcheck' };
+}
+function genCleanHealthcheck() {
+  const http = pickHttpLike();
+  const [svcId, svcName] = pick(SERVICE_NAMES);
+  const [dbId, dbName, dbType] = pick(DB_NAMES);
+  const healthPath = pick(['/health', '/healthz', '/ping', '/status', '/ready', '/live']);
+  const nodes = [
+    { id: http.id, type: http.type, name: http.name, config: { authRequired: true, url: healthPath } },
+    { id: svcId, type: 'service', name: svcName },
+    { id: dbId, type: dbType, name: dbName },
+  ];
+  const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
+  return { graph: { nodes, edges }, variant: 'clean-healthcheck' };
+}
+function genIsolatedNode() {
+  const http = pickHttpLike();
+  const [svcId, svcName] = pick(SERVICE_NAMES);
+  const [dbId, dbName, dbType] = pick(DB_NAMES);
+  const [orphanId, orphanName] = pick(SERVICE_NAMES.filter(([id]) => id !== svcId));
+  const nodes = [
+    { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
+    { id: svcId, type: 'service', name: svcName },
+    { id: dbId, type: dbType, name: dbName },
+    { id: orphanId, type: 'service', name: orphanName },
+  ];
+  const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
+  return { graph: { nodes, edges }, variant: 'isolated-node' };
+}
+function genDuplicateEdge() {
+  const http = pickHttpLike();
+  const [svcId, svcName] = pick(SERVICE_NAMES);
+  const [dbId, dbName, dbType] = pick(DB_NAMES);
+  const nodes = [
+    { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
+    { id: svcId, type: 'service', name: svcName },
+    { id: dbId, type: dbType, name: dbName },
+  ];
+  const edges = [makeEdge(http.id, svcId), makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
+  return { graph: { nodes, edges }, variant: 'duplicate-edge' };
+}
+function genMissingName() {
+  const http = pickHttpLike();
+  const [svcId, svcName] = pick(SERVICE_NAMES);
+  const [dbId, dbName, dbType] = pick(DB_NAMES);
+  const nodes = [
+    { id: http.id, type: http.type, config: { authRequired: true, url: pickDefaultHealthUrl() } },
+    { id: svcId, type: 'service', name: svcName },
+    { id: dbId, type: dbType, name: dbName },
+  ];
+  const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
+  return { graph: { nodes, edges }, variant: 'missing-name' };
+}
+function genDatastoreNoIncoming() {
+  const [svcId, svcName] = pick(SERVICE_NAMES);
+  const [db1Id, db1Name, db1Type] = pick(DB_NAMES);
+  const [db2Id, db2Name, db2Type] = pick(DB_NAMES.filter(([id]) => id !== db1Id));
+  const nodes = [
+    { id: svcId, type: 'service', name: svcName },
+    { id: db1Id, type: db1Type, name: db1Name },
+    { id: db2Id, type: db2Type, name: db2Name },
+  ];
+  // db2 has no incoming edges (008) but outgoing to db1 so it is not IR-LINT-ISOLATED-NODE-005.
+  const edges = [
+    makeEdge(svcId, db1Id, 'tcp'),
+    makeEdge(db2Id, db1Id, 'tcp'),
+  ];
+  return { graph: { nodes, edges }, variant: 'datastore-no-incoming' };
+}
+function genMultipleHttpEntries() {
+  const http1 = pickHttpLike();
+  let http2 = pickHttpLike();
+  while (http2.id === http1.id) http2 = pickHttpLike();
+  const [svcId, svcName] = pick(SERVICE_NAMES);
+  const [dbId, dbName, dbType] = pick(DB_NAMES);
+  const nodes = [
+    { id: http1.id, type: http1.type, name: http1.name, config: httpCleanConfig() },
+    { id: http2.id, type: http2.type, name: http2.name, config: httpCleanConfig() },
+    { id: svcId, type: 'service', name: svcName },
+    { id: dbId, type: dbType, name: dbName },
+  ];
+  const edges = [makeEdge(http1.id, svcId), makeEdge(http2.id, svcId), makeEdge(svcId, dbId, 'tcp')];
+  return { graph: { nodes, edges }, variant: 'multiple-http-entries' };
+}
+function genDeadNode() {
+  const http = pickHttpLike();
+  const [svcId, svcName] = pick(SERVICE_NAMES);
+  const [deadId, deadName] = pick(SERVICE_NAMES.filter(([id]) => id !== svcId));
+  const [dbId, dbName, dbType] = pick(DB_NAMES);
+  const nodes = [
+    { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
+    { id: svcId, type: 'service', name: svcName },
+    { id: deadId, type: 'service', name: deadName },
+    { id: dbId, type: dbType, name: dbName },
+  ];
+  const edges = [
+    makeEdge(http.id, svcId),
+    makeEdge(http.id, deadId),
+    makeEdge(svcId, dbId, 'tcp'),
+  ];
+  return { graph: { nodes, edges }, variant: 'dead-node' };
+}
+function genMultiViolation() {
+  const http = pickHttpLike();
+  const [svcId, svcName] = pick(SERVICE_NAMES);
+  const [dbId, dbName, dbType] = pick(DB_NAMES);
+  const [orphanId, orphanName] = pick(SERVICE_NAMES.filter(([id]) => id !== svcId));
+  const nodes = [
+    { id: http.id, type: http.type, name: http.name, config: { url: pickDefaultHealthUrl() } },
+    { id: svcId, type: 'service', name: svcName },
+    { id: dbId, type: dbType, name: dbName },
+    { id: orphanId, type: 'service', name: orphanName },
+  ];
+  const edges = [makeEdge(http.id, svcId), makeEdge(http.id, dbId, 'tcp')];
+  return { graph: { nodes, edges }, variant: 'multi-violation' };
+}
+function genCleanGraph() {
+  const http = pickHttpLike();
+  const [authId, authName, authType] = pick(AUTH_NAMES);
+  const services = pickN(SERVICE_NAMES, 2 + Math.floor(Math.random() * 3));
+  const [dbId, dbName, dbType] = pick(DB_NAMES);
+  const healthPath = pick(['/health', '/healthz', '/ping', '/status']);
+  const nodes = [
+    { id: http.id, type: http.type, name: http.name, config: httpCleanConfig({ url: healthPath }) },
+    { id: authId, type: authType, name: authName },
+    ...services.map(([id, name]) => ({ id, type: 'service', name })),
+    { id: dbId, type: dbType, name: dbName },
+  ];
+  const edges = [
+    makeEdge(http.id, authId),
+    ...services.map(([id]) => makeAsyncEdge(authId, id)),
+    ...services.map(([id]) => makeEdge(id, dbId, 'tcp')),
+  ];
+  return { graph: { nodes, edges }, variant: 'clean-graph' };
+}
+const GENERATORS = [
+  { fn: genHighFanout, weight: 12 },
+  { fn: genMissingName, weight: 10 },
+  { fn: genDuplicateEdge, weight: 8 },
+  { fn: genDirectDbAccess, weight: 8 },
+  { fn: genMissingAuth, weight: 8 },
+  { fn: genDatastoreNoIncoming, weight: 8 },
+  { fn: genMultiViolation, weight: 6 },
+  { fn: genDeadNode, weight: 5 },
+  { fn: genMultipleHttpEntries, weight: 5 },
+  { fn: genNoHealthcheck, weight: 2 },
+  { fn: genIsolatedNode, weight: 2 },
+  { fn: genSyncChain, weight: 3 },
+  { fn: genCleanServiceLayer, weight: 5 },
+  { fn: genCleanWithAuth, weight: 5 },
+  { fn: genCleanAuthConfig, weight: 4 },
+  { fn: genCleanAsyncBreak, weight: 3 },
+  { fn: genCleanHealthcheck, weight: 3 },
+  { fn: genCleanGraph, weight: 8 },
+];
+const POOL = GENERATORS.flatMap(({ fn, weight }) => Array(weight).fill(fn));
+function generateCorpus() {
+  const TARGET_COUNT = parseInt(getArg('--count', '500'), 10);
+  const OUT_PATH = resolve(ROOT, getArg('--out', 'corpus/auto-generated.jsonl'));
+  console.log(`Generating ${TARGET_COUNT} corpus pairs...`);
+  mkdirSync(resolve(ROOT, 'corpus'), { recursive: true });
+  const lines = [];
+  let skipped = 0;
+  let attempts = 0;
+  const seenGraphs = new Set();
+  while (lines.length < TARGET_COUNT) {
+    attempts++;
+    if (attempts > TARGET_COUNT * 10) {
+      console.warn(`Stopping after ${attempts} attempts — possible infinite loop.`);
+      break;
+    }
+    const gen = pick(POOL);
+    let graphDef;
+    try {
+      graphDef = gen();
+    } catch {
+      skipped++;
+      continue;
+    }
+    const key = JSON.stringify(graphDef.graph);
+    if (seenGraphs.has(key)) {
+      skipped++;
+      continue;
+    }
+    seenGraphs.add(key);
+    let result;
+    try {
+      result = runEngine(graphDef.graph);
+    } catch {
+      skipped++;
+      continue;
+    }
+    if (result.structuralFindings?.some((f) => f.severity === 'error')) {
+      skipped++;
+      continue;
+    }
+    const id = `gen-${lines.length}-${graphDef.variant}`;
+    lines.push(JSON.stringify(toPair(id, graphDef.graph, result, graphDef.variant)));
+  }
+  writeFileSync(OUT_PATH, lines.join('\n') + '\n', 'utf8');
+  const pairs = lines.map((l) => JSON.parse(l));
+  const withViolations = pairs.filter((p) => p.output.violations.length > 0).length;
+  const withoutViolations = pairs.length - withViolations;
+  const ruleCounts = {};
+  for (const p of pairs) {
+    for (const v of p.output.violations) {
+      ruleCounts[v.code] = (ruleCounts[v.code] ?? 0) + 1;
+    }
+  }
+  console.log(`\nDone.`);
+  console.log(`  Written to: ${OUT_PATH}`);
+  console.log(`  Total pairs: ${lines.length}`);
+  console.log(`  With violations: ${withViolations}`);
+  console.log(`  Clean (no lint violations): ${withoutViolations}`);
+  console.log(`  Skipped (duplicates/errors): ${skipped}`);
+  console.log(`\nViolation distribution:`);
+  for (const [code, count] of Object.entries(ruleCounts).sort((a, b) => b[1] - a[1])) {
+    console.log(`  ${code}: ${count}`);
+  }
+}
+async function main() {
+  if (!wantsGenerate) {
+    await validateHandwritten();
+    return;
+  }
+  try {
+    readFileSync(join(ROOT, 'dist', 'index.js'), 'utf8');
+  } catch {
+    console.error('generate-corpus: run `npm run build` first (dist/index.js missing).');
+    process.exitCode = 1;
+    return;
+  }
+  generateCorpus();
+}
+main();