@archrad/deterministic 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -7,6 +7,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.1.6] - 2026-04-10
11
+
12
+ ### Added
13
+
14
+ - **`docs/CI.md`** — exit code semantics (`--fail-on-warning`, `--max-warnings`) and copy-paste snippets for GitHub Actions, GitLab CI, Bitbucket Pipelines, Jenkins, and Azure DevOps.
15
+ - **`scripts/generate-corpus.mjs`** — validates hand-written `corpus/*.json` pairs; `--count` mode generates weighted synthetic JSONL training pairs via the deterministic engine. Run `npm run build` first.
16
+ - **Integration tests** — `archrad validate` exit code assertions (`src/cli-exit.integration.test.ts`).
17
+
18
+ ### Changed
19
+
20
+ - **MCP** — Rewrote all six `registerTool` title/description blocks for agent discoverability (`src/mcp-server-tools-patch.ts`).
21
+ - **npm package** — `corpus/` excluded from published tarball (`.npmignore` + removed from `package.json` `files`).
22
+
23
+ ### Fixed
24
+
25
+ - **`archrad_validate_drift`** MCP schema: `target` enum is `python` | `nodejs` only, aligned with `docs/MCP.md`.
26
+
27
+ ### Security
28
+
29
+ - `npm audit fix` applied to dev/test transitive dependencies.
30
+
10
31
  ## [0.1.5] - 2026-04-07
11
32
 
12
33
  ### Added
@@ -114,8 +135,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
114
135
  - Documented **codegen vs validation** for retry/timeout IR fields and **InkByte vs OSS** scope in README and structural/semantic doc.
115
136
  - README positioning: **deterministic compiler and linter for system architecture**; validation layers table (OSS vs Cloud); **`validate-drift`**, drift GIF / trust-loop recording docs, library **`runValidateDrift`** example.
116
137
 
117
- [Unreleased]: https://github.com/archradhq/arch-deterministic/compare/v0.1.3...HEAD
118
- [0.1.3]: https://github.com/archradhq/arch-deterministic/releases/tag/v0.1.3
119
- [0.1.2]: https://github.com/archradhq/arch-deterministic/releases/tag/v0.1.2
120
- [0.1.1]: https://github.com/archradhq/arch-deterministic/releases/tag/v0.1.1
138
+ [Unreleased]: https://github.com/archradhq/arch-deterministic/compare/v0.1.6...HEAD
139
+ [0.1.6]: https://github.com/archradhq/arch-deterministic/compare/v0.1.5...v0.1.6
140
+ [0.1.5]: https://github.com/archradhq/arch-deterministic/compare/v0.1.4...v0.1.5
141
+ [0.1.4]: https://github.com/archradhq/arch-deterministic/compare/v0.1.3...v0.1.4
142
+ [0.1.3]: https://github.com/archradhq/arch-deterministic/compare/v0.1.2...v0.1.3
143
+ [0.1.2]: https://github.com/archradhq/arch-deterministic/compare/v0.1.1...v0.1.2
144
+ [0.1.1]: https://github.com/archradhq/arch-deterministic/compare/v0.1.0...v0.1.1
121
145
  [0.1.0]: https://github.com/archradhq/arch-deterministic/releases/tag/v0.1.0
package/dist/cli.js CHANGED
@@ -76,7 +76,7 @@ const program = new Command();
76
76
  program
77
77
  .name('archrad')
78
78
  .description('Validate your architecture before you write code. Deterministic compiler + linter — FastAPI / Express (no LLM, no server).')
79
- .version('0.1.5');
79
+ .version('0.1.6');
80
80
  program
81
81
  .command('validate')
82
82
  .description('Validate your architecture before you write code — IR structural (IR-STRUCT-*) + architecture lint (IR-LINT-*)')
@@ -0,0 +1,29 @@
1
+ /**
2
+ * MCP tool catalog metadata (title + description) for archrad-mcp discoverability.
3
+ * Keep in sync with registerTool handlers in mcp-server.ts.
4
+ */
5
+ export declare const MCP_TOOL_ARCHRAD_VALIDATE_IR: {
6
+ readonly title: "Validate IR — structural (IR-STRUCT-*) + architecture lint (IR-LINT-*) + PolicyPack";
7
+ readonly description: "Architecture-as-code validation: run this when you need to check whether an IR graph is valid or to list violations before export or drift checks.\n\nKeywords: validate IR, architecture lint, IR-STRUCT, IR-LINT, policy pack, blueprint graph, nodes and edges.\n\nRuns in one call:\n1) Structural validation — graph shape, references, IR-STRUCT-* errors.\n2) Architecture lint — design rules (auth, dead nodes, DB access, sync chains, etc.).\n3) Optional PolicyPack rules — pass policiesDirectory to load YAML/JSON packs from disk.\n\nReturns irStructuralFindings, irLintFindings, and combined (sorted by severity). ok is false when any finding has severity \"error\".\n\nAfter results: call archrad_suggest_fix with a finding code for remediation text; use archrad_lint_summary for a short human-readable digest.\n\nInput: provide exactly one of ir (inline JSON object) or irPath (path to .json). Large graphs: prefer irPath.";
8
+ };
9
+ export declare const MCP_TOOL_ARCHRAD_LINT_SUMMARY: {
10
+ readonly title: "Lint summary — plain-text counts and top findings";
11
+ readonly description: "Human-readable summary of validation results: error/warning counts and up to 20 top findings (plain text).\n\nKeywords: summary, PR comment, explain violations, readable lint output.\n\nUse when you need a short narrative or comment, not structured JSON. For machine-actionable findings, use archrad_validate_ir instead.\n\nSame inputs as archrad_validate_ir: ir or irPath, optional policiesDirectory. Provide only one of ir or irPath.";
12
+ };
13
+ export declare const MCP_TOOL_ARCHRAD_SUGGEST_FIX: {
14
+ readonly title: "Suggest fix — static remediation for a built-in finding code";
15
+ readonly description: "Look up curated remediation steps and documentation URL for one built-in rule code (e.g. IR-LINT-MISSING-AUTH-010, IR-STRUCT-*, DRIFT-*).\n\nKeywords: remediation, how to fix, rule code, docs link, IR-LINT, IR-STRUCT.\n\nDoes not return generated code patches or IR edits — only static guidance. PolicyPack and org-specific rule ids are not covered; see your YAML packs.\n\nCall archrad_list_rule_codes to list codes that have static guidance.";
16
+ };
17
+ export declare const MCP_TOOL_ARCHRAD_LIST_RULE_CODES: {
18
+ readonly title: "List rule codes — built-in codes with static guidance";
19
+ readonly description: "Returns the sorted list of built-in IR-STRUCT-*, IR-LINT-*, and DRIFT-* codes that archrad_suggest_fix can explain.\n\nKeywords: catalog, all rules, rule list, documentation index.\n\nUse before suggest_fix to confirm a code exists. Excludes PolicyPack custom ids. No arguments.";
20
+ };
21
+ export declare const MCP_TOOL_ARCHRAD_VALIDATE_DRIFT: {
22
+ readonly title: "Validate drift — IR blueprint vs on-disk export (python | nodejs)";
23
+ readonly description: "Compare the architecture IR to generated code under exportDir and report drift (files that no longer match deterministic export).\n\nKeywords: drift, CI, codegen diff, FastAPI, Express, Node, Python, validate export, architecture vs implementation.\n\nRequires: ir or irPath, exportDir (absolute path to the export tree), and target. target must be \"python\" or \"nodejs\" (use \"nodejs\" for Node/TypeScript; do not use \"node\").\n\nOptional: policiesDirectory, skipIrLint (true to skip IR-LINT and only check drift).\n\nReturns driftFindings plus IR structural and lint findings from the same engine as CLI validate-drift.";
24
+ };
25
+ export declare const MCP_TOOL_ARCHRAD_POLICY_PACKS_LOAD: {
26
+ readonly title: "Load PolicyPack — compile and validate packs (dry run, no IR)";
27
+ readonly description: "Validate PolicyPack YAML/JSON without running against a graph: syntax, rule ids, and compilation.\n\nKeywords: policy pack, YAML rules, validate policies, org rules, offline check.\n\nYou usually do not need this before archrad_validate_ir or archrad_validate_drift — those accept policiesDirectory and load packs internally. Use this tool to debug pack files in isolation.\n\nProvide either directory (folder path) or files (array of { name, content }), not both.";
28
+ };
29
+ //# sourceMappingURL=mcp-server-tools-patch.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mcp-server-tools-patch.d.ts","sourceRoot":"","sources":["../src/mcp-server-tools-patch.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,eAAO,MAAM,4BAA4B;;;CAgB/B,CAAC;AAEX,eAAO,MAAM,6BAA6B;;;CAShC,CAAC;AAEX,eAAO,MAAM,4BAA4B;;;CAS/B,CAAC;AAEX,eAAO,MAAM,gCAAgC;;;CAOnC,CAAC;AAEX,eAAO,MAAM,+BAA+B;;;CAWlC,CAAC;AAEX,eAAO,MAAM,kCAAkC;;;CASrC,CAAC"}
@@ -0,0 +1,71 @@
1
+ /**
2
+ * MCP tool catalog metadata (title + description) for archrad-mcp discoverability.
3
+ * Keep in sync with registerTool handlers in mcp-server.ts.
4
+ */
5
+ export const MCP_TOOL_ARCHRAD_VALIDATE_IR = {
6
+ title: 'Validate IR — structural (IR-STRUCT-*) + architecture lint (IR-LINT-*) + PolicyPack',
7
+ description: `Architecture-as-code validation: run this when you need to check whether an IR graph is valid or to list violations before export or drift checks.
8
+
9
+ Keywords: validate IR, architecture lint, IR-STRUCT, IR-LINT, policy pack, blueprint graph, nodes and edges.
10
+
11
+ Runs in one call:
12
+ 1) Structural validation — graph shape, references, IR-STRUCT-* errors.
13
+ 2) Architecture lint — design rules (auth, dead nodes, DB access, sync chains, etc.).
14
+ 3) Optional PolicyPack rules — pass policiesDirectory to load YAML/JSON packs from disk.
15
+
16
+ Returns irStructuralFindings, irLintFindings, and combined (sorted by severity). ok is false when any finding has severity "error".
17
+
18
+ After results: call archrad_suggest_fix with a finding code for remediation text; use archrad_lint_summary for a short human-readable digest.
19
+
20
+ Input: provide exactly one of ir (inline JSON object) or irPath (path to .json). Large graphs: prefer irPath.`,
21
+ };
22
+ export const MCP_TOOL_ARCHRAD_LINT_SUMMARY = {
23
+ title: 'Lint summary — plain-text counts and top findings',
24
+ description: `Human-readable summary of validation results: error/warning counts and up to 20 top findings (plain text).
25
+
26
+ Keywords: summary, PR comment, explain violations, readable lint output.
27
+
28
+ Use when you need a short narrative or comment, not structured JSON. For machine-actionable findings, use archrad_validate_ir instead.
29
+
30
+ Same inputs as archrad_validate_ir: ir or irPath, optional policiesDirectory. Provide only one of ir or irPath.`,
31
+ };
32
+ export const MCP_TOOL_ARCHRAD_SUGGEST_FIX = {
33
+ title: 'Suggest fix — static remediation for a built-in finding code',
34
+ description: `Look up curated remediation steps and documentation URL for one built-in rule code (e.g. IR-LINT-MISSING-AUTH-010, IR-STRUCT-*, DRIFT-*).
35
+
36
+ Keywords: remediation, how to fix, rule code, docs link, IR-LINT, IR-STRUCT.
37
+
38
+ Does not return generated code patches or IR edits — only static guidance. PolicyPack and org-specific rule ids are not covered; see your YAML packs.
39
+
40
+ Call archrad_list_rule_codes to list codes that have static guidance.`,
41
+ };
42
+ export const MCP_TOOL_ARCHRAD_LIST_RULE_CODES = {
43
+ title: 'List rule codes — built-in codes with static guidance',
44
+ description: `Returns the sorted list of built-in IR-STRUCT-*, IR-LINT-*, and DRIFT-* codes that archrad_suggest_fix can explain.
45
+
46
+ Keywords: catalog, all rules, rule list, documentation index.
47
+
48
+ Use before suggest_fix to confirm a code exists. Excludes PolicyPack custom ids. No arguments.`,
49
+ };
50
+ export const MCP_TOOL_ARCHRAD_VALIDATE_DRIFT = {
51
+ title: 'Validate drift — IR blueprint vs on-disk export (python | nodejs)',
52
+ description: `Compare the architecture IR to generated code under exportDir and report drift (files that no longer match deterministic export).
53
+
54
+ Keywords: drift, CI, codegen diff, FastAPI, Express, Node, Python, validate export, architecture vs implementation.
55
+
56
+ Requires: ir or irPath, exportDir (absolute path to the export tree), and target. target must be "python" or "nodejs" (use "nodejs" for Node/TypeScript; do not use "node").
57
+
58
+ Optional: policiesDirectory, skipIrLint (true to skip IR-LINT and only check drift).
59
+
60
+ Returns driftFindings plus IR structural and lint findings from the same engine as CLI validate-drift.`,
61
+ };
62
+ export const MCP_TOOL_ARCHRAD_POLICY_PACKS_LOAD = {
63
+ title: 'Load PolicyPack — compile and validate packs (dry run, no IR)',
64
+ description: `Validate PolicyPack YAML/JSON without running against a graph: syntax, rule ids, and compilation.
65
+
66
+ Keywords: policy pack, YAML rules, validate policies, org rules, offline check.
67
+
68
+ You usually do not need this before archrad_validate_ir or archrad_validate_drift — those accept policiesDirectory and load packs internally. Use this tool to debug pack files in isolation.
69
+
70
+ Provide either directory (folder path) or files (array of { name, content }), not both.`,
71
+ };
@@ -10,13 +10,10 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
10
10
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
11
11
  import { normalizeIrGraph, validateIrStructural, validateIrLint, runValidateDrift, sortFindings, loadPolicyPacksFromDirectory, loadPolicyPacksFromFiles, } from './index.js';
12
12
  import { getStaticRuleGuidance, listStaticRuleCodes } from './static-rule-guidance.js';
13
- const VERSION = '0.1.5';
13
+ import { MCP_TOOL_ARCHRAD_LIST_RULE_CODES, MCP_TOOL_ARCHRAD_LINT_SUMMARY, MCP_TOOL_ARCHRAD_POLICY_PACKS_LOAD, MCP_TOOL_ARCHRAD_SUGGEST_FIX, MCP_TOOL_ARCHRAD_VALIDATE_DRIFT, MCP_TOOL_ARCHRAD_VALIDATE_IR, } from './mcp-server-tools-patch.js';
14
+ const VERSION = '0.1.6';
14
15
  /** Hard cap for `irPath` reads (see docs/MCP.md). */
15
16
  const MAX_IR_FILE_BYTES = 25 * 1024 * 1024;
16
- const irSourceSchema = {
17
- ir: z.unknown().optional(),
18
- irPath: z.string().optional(),
19
- };
20
17
  function jsonResult(payload) {
21
18
  return {
22
19
  content: [{ type: 'text', text: JSON.stringify(payload, null, 2) }],
@@ -64,34 +61,16 @@ async function main() {
64
61
  name: 'archrad-deterministic',
65
62
  version: VERSION,
66
63
  });
67
- server.registerTool('archrad_suggest_fix', {
68
- title: 'Static remediation for a finding code',
69
- description: 'Deterministic title, remediation text, and canonical docs URL for a built-in IR-STRUCT / IR-LINT / DRIFT code. Does not generate patches or IR edits.',
70
- inputSchema: {
71
- findingCode: z.string().min(1),
72
- },
73
- }, async (args) => {
74
- const g = getStaticRuleGuidance(args.findingCode);
75
- if (!g) {
76
- return jsonResult({
77
- ok: false,
78
- findingCode: args.findingCode,
79
- error: 'Unknown built-in code. PolicyPack and org rules use custom rule ids in YAML — see your pack. Use archrad_list_rule_codes for built-in codes.',
80
- });
81
- }
82
- return jsonResult({ ok: true, ...g });
83
- });
84
- server.registerTool('archrad_list_rule_codes', {
85
- title: 'List built-in rule codes',
86
- description: 'Sorted list of IR-STRUCT-*, IR-LINT-*, and DRIFT-* codes that have static guidance via archrad_suggest_fix.',
87
- inputSchema: {},
88
- }, async () => jsonResult({ codes: listStaticRuleCodes() }));
89
64
  server.registerTool('archrad_validate_ir', {
90
- title: 'Validate IR (structural + IR-LINT)',
91
- description: 'Run deterministic structural validation (IR-STRUCT-*) and architecture lint (IR-LINT-*). Pass `ir` inline or `irPath` to a JSON file (recommended for large graphs). Optional local PolicyPack directory.',
65
+ title: MCP_TOOL_ARCHRAD_VALIDATE_IR.title,
66
+ description: MCP_TOOL_ARCHRAD_VALIDATE_IR.description,
92
67
  inputSchema: {
93
- ...irSourceSchema,
94
- policiesDirectory: z.string().optional(),
68
+ ir: z.unknown().optional().describe('Inline IR graph as a JSON object. Use for small graphs only.'),
69
+ irPath: z.string().optional().describe('Absolute or relative path to an IR JSON file. Preferred for large graphs.'),
70
+ policiesDirectory: z
71
+ .string()
72
+ .optional()
73
+ .describe('Path to a directory of PolicyPack YAML/JSON files. Optional — omit if you have no custom rules.'),
95
74
  },
96
75
  }, async (args) => {
97
76
  const loaded = await loadIrFromArgs(args);
@@ -122,11 +101,15 @@ async function main() {
122
101
  });
123
102
  });
124
103
  server.registerTool('archrad_lint_summary', {
125
- title: 'Lint summary',
126
- description: 'Short text summary of IR structural + lint findings. Use `ir` or `irPath` (see archrad_validate_ir).',
104
+ title: MCP_TOOL_ARCHRAD_LINT_SUMMARY.title,
105
+ description: MCP_TOOL_ARCHRAD_LINT_SUMMARY.description,
127
106
  inputSchema: {
128
- ...irSourceSchema,
129
- policiesDirectory: z.string().optional(),
107
+ ir: z.unknown().optional().describe('Inline IR graph as a JSON object.'),
108
+ irPath: z.string().optional().describe('Absolute or relative path to an IR JSON file.'),
109
+ policiesDirectory: z
110
+ .string()
111
+ .optional()
112
+ .describe('Path to a directory of PolicyPack YAML/JSON files. Optional.'),
130
113
  },
131
114
  }, async (args) => {
132
115
  const loaded = await loadIrFromArgs(args);
@@ -156,17 +139,47 @@ async function main() {
156
139
  ];
157
140
  if (combined.length > 20)
158
141
  lines.push(`… and ${combined.length - 20} more.`);
159
- return jsonResult({ summary: lines.join('\n'), counts: { total: combined.length, errors: errors.length, warnings: warnings.length } });
142
+ return jsonResult({
143
+ summary: lines.join('\n'),
144
+ counts: { total: combined.length, errors: errors.length, warnings: warnings.length },
145
+ });
160
146
  });
147
+ server.registerTool('archrad_suggest_fix', {
148
+ title: MCP_TOOL_ARCHRAD_SUGGEST_FIX.title,
149
+ description: MCP_TOOL_ARCHRAD_SUGGEST_FIX.description,
150
+ inputSchema: {
151
+ findingCode: z.string().min(1).describe('The finding code to look up, e.g. "IR-LINT-MISSING-AUTH-010".'),
152
+ },
153
+ }, async (args) => {
154
+ const g = getStaticRuleGuidance(args.findingCode);
155
+ if (!g) {
156
+ return jsonResult({
157
+ ok: false,
158
+ findingCode: args.findingCode,
159
+ error: 'Unknown built-in code. PolicyPack and org rules use custom rule ids in YAML — see your pack. Call archrad_list_rule_codes to see all built-in codes with static guidance.',
160
+ });
161
+ }
162
+ return jsonResult({ ok: true, ...g });
163
+ });
164
+ server.registerTool('archrad_list_rule_codes', {
165
+ title: MCP_TOOL_ARCHRAD_LIST_RULE_CODES.title,
166
+ description: MCP_TOOL_ARCHRAD_LIST_RULE_CODES.description,
167
+ inputSchema: {},
168
+ }, async () => jsonResult({ codes: listStaticRuleCodes() }));
161
169
  server.registerTool('archrad_validate_drift', {
162
- title: 'Validate drift',
163
- description: 'Compare on-disk export to a fresh deterministic export. Pass `ir` or `irPath` (JSON file).',
170
+ title: MCP_TOOL_ARCHRAD_VALIDATE_DRIFT.title,
171
+ description: MCP_TOOL_ARCHRAD_VALIDATE_DRIFT.description,
164
172
  inputSchema: {
165
- ...irSourceSchema,
166
- target: z.enum(['python', 'node', 'nodejs']),
167
- exportDir: z.string(),
168
- policiesDirectory: z.string().optional(),
169
- skipIrLint: z.boolean().optional(),
173
+ ir: z.unknown().optional().describe('Inline IR graph as a JSON object.'),
174
+ irPath: z.string().optional().describe('Absolute or relative path to an IR JSON file.'),
175
+ target: z
176
+ .enum(['python', 'nodejs'])
177
+ .describe('Export target language. Use "nodejs" for Node.js/TypeScript, "python" for Python.'),
178
+ exportDir: z
179
+ .string()
180
+ .describe('Absolute path to the on-disk export directory to compare against the IR.'),
181
+ policiesDirectory: z.string().optional().describe('Path to a PolicyPack directory. Optional.'),
182
+ skipIrLint: z.boolean().optional().describe('Set to true to skip IR-LINT checks and only check for drift. Default: false.'),
170
183
  },
171
184
  }, async (args) => {
172
185
  const loaded = await loadIrFromArgs(args);
@@ -202,13 +215,17 @@ async function main() {
202
215
  });
203
216
  });
204
217
  server.registerTool('archrad_policy_packs_load', {
205
- title: 'Load policy packs',
206
- description: 'Compile PolicyPack YAML/JSON from a directory or from in-memory file list.',
218
+ title: MCP_TOOL_ARCHRAD_POLICY_PACKS_LOAD.title,
219
+ description: MCP_TOOL_ARCHRAD_POLICY_PACKS_LOAD.description,
207
220
  inputSchema: {
208
- directory: z.string().optional(),
221
+ directory: z.string().optional().describe('Path to a directory of PolicyPack YAML/JSON files.'),
209
222
  files: z
210
- .array(z.object({ name: z.string(), content: z.string() }))
211
- .optional(),
223
+ .array(z.object({
224
+ name: z.string().describe('Filename, e.g. "auth-rules.yaml".'),
225
+ content: z.string().describe('Raw file content as a string.'),
226
+ }))
227
+ .optional()
228
+ .describe('In-memory file list. Use when you have policy content as strings rather than on-disk files.'),
212
229
  },
213
230
  }, async (args) => {
214
231
  if (args.files && args.files.length > 0) {
@@ -225,7 +242,10 @@ async function main() {
225
242
  }
226
243
  return jsonResult({ ok: true, ruleCount: loaded.ruleCount });
227
244
  }
228
- return jsonResult({ ok: false, error: 'Provide `directory` or `files`.' });
245
+ return jsonResult({
246
+ ok: false,
247
+ error: 'Provide either directory (path string) or files (array of {name, content}).',
248
+ });
229
249
  });
230
250
  const transport = new StdioServerTransport();
231
251
  await server.connect(transport);
package/docs/CI.md ADDED
@@ -0,0 +1,122 @@
1
+ # CI integration — `archrad validate`
2
+
3
+ `archrad validate` is the usual gate for **architecture-as-code** in pipelines. It reads an IR JSON file and runs structural validation (IR-STRUCT-*) plus architecture lint (IR-LINT-*).
4
+
5
+ ## Exit codes
6
+
7
+ | Situation | Default exit code |
8
+ |-----------|-------------------|
9
+ | No findings | **0** |
10
+ | Any finding with severity **`error`** (structural / blocking) | **1** |
11
+ | **Warnings only** (e.g. many IR-LINT-* rules) | **0** |
12
+
13
+ Optional stricter gates:
14
+
15
+ - **`--fail-on-warning`** — exit **1** if any warning exists.
16
+ - **`--max-warnings <n>`** — exit **1** if the warning count is **greater than** `n` (e.g. **`--max-warnings 0`** allows no warnings).
17
+
18
+ JSON output: add **`--json`** (findings array on stdout).
19
+
20
+ Policy packs: **`--policies <dir>`** (directory of PolicyPack YAML/JSON), merged after built-in IR-LINT-* (omit **`--skip-lint`** if you want lint + policies).
21
+
22
+ Example:
23
+
24
+ ```bash
25
+ npx archrad validate --ir ./graph.json
26
+ npx archrad validate --ir ./graph.json --fail-on-warning
27
+ npx archrad validate --ir ./graph.json --max-warnings 0 --json
28
+ ```
29
+
30
+ Install **`@archrad/deterministic`** as a dev dependency so `npx archrad` resolves locally, or invoke **`node node_modules/@archrad/deterministic/dist/cli.js`** explicitly.
31
+
32
+ ---
33
+
34
+ ## GitHub Actions
35
+
36
+ ```yaml
37
+ jobs:
38
+ archrad:
39
+ runs-on: ubuntu-latest
40
+ steps:
41
+ - uses: actions/checkout@v4
42
+ - uses: actions/setup-node@v4
43
+ with:
44
+ node-version: '20'
45
+ cache: 'npm'
46
+ - run: npm ci
47
+ - run: npx archrad validate --ir ./path/to/graph.json
48
+ ```
49
+
50
+ With warnings as failures:
51
+
52
+ ```yaml
53
+ - run: npx archrad validate --ir ./path/to/graph.json --fail-on-warning
54
+ ```
55
+
56
+ ---
57
+
58
+ ## GitLab CI
59
+
60
+ ```yaml
61
+ archrad-validate:
62
+ image: node:20-bookworm
63
+ script:
64
+ - npm ci
65
+ - npx archrad validate --ir ./path/to/graph.json
66
+ ```
67
+
68
+ ---
69
+
70
+ ## Bitbucket Pipelines
71
+
72
+ ```yaml
73
+ pipelines:
74
+ default:
75
+ - step:
76
+ name: ArchRad validate
77
+ image: node:20
78
+ script:
79
+ - npm ci
80
+ - npx archrad validate --ir ./path/to/graph.json
81
+ ```
82
+
83
+ ---
84
+
85
+ ## Jenkins (Declarative)
86
+
87
+ ```groovy
88
+ pipeline {
89
+ agent any
90
+ stages {
91
+ stage('ArchRad') {
92
+ steps {
93
+ sh 'npm ci'
94
+ sh 'npx archrad validate --ir ./path/to/graph.json'
95
+ }
96
+ }
97
+ }
98
+ }
99
+ ```
100
+
101
+ ---
102
+
103
+ ## Azure DevOps
104
+
105
+ ```yaml
106
+ steps:
107
+ - task: NodeTool@0
108
+ inputs:
109
+ versionSpec: '20.x'
110
+ - script: npm ci
111
+ displayName: npm ci
112
+ - script: npx archrad validate --ir ./path/to/graph.json
113
+ displayName: archrad validate
114
+ ```
115
+
116
+ ---
117
+
118
+ ## Notes
119
+
120
+ - Replace **`./path/to/graph.json`** with your IR path (repo-relative in CI).
121
+ - Ensure the job installs the same **`@archrad/deterministic`** version you use locally (`package.json` / lockfile).
122
+ - Drift checks use **`archrad validate-drift`** (separate command); see **`docs/DRIFT.md`**.
package/docs/MCP.md CHANGED
@@ -91,7 +91,7 @@ Open the URL Inspector prints (often **http://localhost:6274**). Under **Tools**
91
91
  - **`archrad_list_rule_codes`:** JSON with a **`codes`** array.
92
92
  - **`archrad_validate_ir`:** JSON with **`irStructuralFindings`**, **`irLintFindings`**, **`combined`**, **`ok`** — not a connection or file error.
93
93
 
94
- ## 6. Tools (0.1.5)
94
+ ## 6. Tools (0.1.6)
95
95
 
96
96
  Tools are **idempotent** and **deterministic** where stated.
97
97
 
@@ -101,7 +101,7 @@ Tools are **idempotent** and **deterministic** where stated.
101
101
  |------|--------|--------|-------|
102
102
  | **`archrad_validate_ir`** | `ir` **or** `irPath`; optional `policiesDirectory` | `{ ok, irStructuralFindings, irLintFindings, combined }` | Same as CLI validate. |
103
103
  | **`archrad_lint_summary`** | `ir` **or** `irPath`; optional `policiesDirectory` | Short summary + counts | Agent-friendly. |
104
- | **`archrad_validate_drift`** | `ir` **or** `irPath`; `target`; `exportDir`; optional policies, `skipIrLint` | Drift + export findings | Same as CLI `validate-drift`. |
104
+ | **`archrad_validate_drift`** | `ir` **or** `irPath`; `target`; `exportDir`; optional policies, `skipIrLint` | Drift + export findings | Same engine as CLI `validate-drift`. **MCP `target` values:** `python` or `nodejs` only (not `node`). The CLI `validate-drift` / `export` may still accept `node` as an alias for Node exports. |
105
105
  | **`archrad_policy_packs_load`** | `directory` or `files[]` | `{ ok, ruleCount }` or errors | Compiles packs; does not return visitor functions over MCP. |
106
106
 
107
107
  ### 6.2 Static guidance (no generated architecture)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@archrad/deterministic",
3
- "version": "0.1.5",
3
+ "version": "0.1.6",
4
4
  "description": "A deterministic compiler and linter for system architecture. Validate your architecture before you write code. OSS: structural validation + basic architecture lint (rule-based); FastAPI/Express export; OpenAPI document-shape; golden Docker/Makefile — no LLM.",
5
5
  "keywords": [
6
6
  "archrad",
@@ -64,9 +64,10 @@
64
64
  "scripts": {
65
65
  "build": "tsc -p tsconfig.build.json",
66
66
  "prepublishOnly": "npm run build",
67
- "test": "tsc -p tsconfig.build.json --noEmit && vitest run",
67
+ "test": "tsc -p tsconfig.build.json --noEmit && npm run build && vitest run",
68
68
  "lint": "biome check ./src",
69
69
  "typecheck": "tsc -p tsconfig.build.json --noEmit",
70
+ "generate-corpus": "node scripts/generate-corpus.mjs",
70
71
  "smoke:mcp": "node scripts/smoke-mcp.mjs",
71
72
  "record:demo:payment-retry": "vhs scripts/record-demo-payment-retry.tape",
72
73
  "record:demo:drift": "vhs scripts/record-demo-drift.tape"
@@ -0,0 +1,667 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * generate-corpus.mjs
4
+ *
5
+ * • Default (no --count / --generate): validate hand-written corpus/*.json
6
+ * • Generate: synthetic IR graphs → ArchRad engine → JSONL training pairs
7
+ *
8
+ * Usage:
9
+ * npm run build
10
+ * npm run generate-corpus
11
+ * node scripts/generate-corpus.mjs --count 1000 --out corpus/auto-generated.jsonl
12
+ */
13
+
14
+ import { mkdirSync, readdirSync, readFileSync, writeFileSync } from 'node:fs';
15
+ import { readdir, readFile } from 'node:fs/promises';
16
+ import { dirname, join, resolve } from 'node:path';
17
+ import { fileURLToPath } from 'node:url';
18
+ import { normalizeIrGraph, validateIrLint, validateIrStructural } from '../dist/index.js';
19
+
20
+ const __dirname = dirname(fileURLToPath(import.meta.url));
21
+ const ROOT = resolve(__dirname, '..');
22
+ const corpusDir = join(ROOT, 'corpus');
23
+
24
+ // ─── CLI ──────────────────────────────────────────────────────────────────────
25
+
26
+ const argv = process.argv.slice(2);
27
+ const wantsGenerate = argv.includes('--generate') || argv.includes('--count');
28
+
29
+ const getArg = (flag, def) => {
30
+ const i = argv.indexOf(flag);
31
+ return i !== -1 && argv[i + 1] ? argv[i + 1] : def;
32
+ };
33
+
34
+ // ─── Validate hand-written corpus (default) ───────────────────────────────────
35
+
36
+ function isPairRecord(x) {
37
+ return (
38
+ x != null &&
39
+ typeof x === 'object' &&
40
+ typeof x.id === 'string' &&
41
+ 'input' in x &&
42
+ 'output' in x
43
+ );
44
+ }
45
+
46
+ async function validateHandwritten() {
47
+ let names;
48
+ try {
49
+ names = await readdir(corpusDir);
50
+ } catch (e) {
51
+ console.error('generate-corpus: cannot read corpus dir:', corpusDir, e);
52
+ process.exitCode = 1;
53
+ return;
54
+ }
55
+
56
+ const jsonFiles = names.filter((n) => n.endsWith('.json') && !n.startsWith('auto-')).sort();
57
+ if (!jsonFiles.length) {
58
+ console.error('generate-corpus: no hand-written .json files in', corpusDir);
59
+ process.exitCode = 1;
60
+ return;
61
+ }
62
+
63
+ let totalPairs = 0;
64
+ for (const name of jsonFiles) {
65
+ const path = join(corpusDir, name);
66
+ let data;
67
+ try {
68
+ data = JSON.parse(await readFile(path, 'utf8'));
69
+ } catch (e) {
70
+ console.error(`generate-corpus: ${path}:`, e);
71
+ process.exitCode = 1;
72
+ return;
73
+ }
74
+ if (!Array.isArray(data)) {
75
+ console.error(`generate-corpus: ${name} must be a JSON array`);
76
+ process.exitCode = 1;
77
+ return;
78
+ }
79
+ for (let i = 0; i < data.length; i++) {
80
+ if (!isPairRecord(data[i])) {
81
+ console.error(`generate-corpus: ${name}[${i}] missing id/input/output`);
82
+ process.exitCode = 1;
83
+ return;
84
+ }
85
+ }
86
+ totalPairs += data.length;
87
+ console.log(`${name}: ${data.length} pair(s)`);
88
+ }
89
+ console.log(`generate-corpus: OK — ${jsonFiles.length} file(s), ${totalPairs} pair(s) total`);
90
+ }
91
+
92
+ // ─── Name pools (generation) ──────────────────────────────────────────────────
93
+
94
+ const GATEWAY_NAMES = [
95
+ ['api-gateway', 'API Gateway'],
96
+ ['web-gateway', 'Web Gateway'],
97
+ ['mobile-gateway', 'Mobile Gateway'],
98
+ ['public-gateway', 'Public Gateway'],
99
+ ['edge-gateway', 'Edge Gateway'],
100
+ ['payment-gateway', 'Payment Gateway'],
101
+ ['admin-gateway', 'Admin Gateway'],
102
+ ['partner-gateway', 'Partner Gateway'],
103
+ ];
104
+
105
+ const API_NAMES = [
106
+ ['rest-api', 'REST API'],
107
+ ['public-api', 'Public API'],
108
+ ['partner-api', 'Partner API'],
109
+ ['internal-api', 'Internal API'],
110
+ ['checkout-api', 'Checkout API'],
111
+ ['reporting-api', 'Reporting API'],
112
+ ];
113
+
114
+ const BFF_NAMES = [
115
+ ['web-bff', 'Web BFF'],
116
+ ['mobile-bff', 'Mobile BFF'],
117
+ ['dashboard-bff', 'Dashboard BFF'],
118
+ ];
119
+
120
+ const GRPC_NAMES = [
121
+ ['grpc-gateway', 'gRPC Gateway'],
122
+ ['grpc-api', 'gRPC API'],
123
+ ];
124
+
125
+ const GRAPHQL_NAMES = [
126
+ ['graphql-api', 'GraphQL API'],
127
+ ['graph-api', 'Graph API'],
128
+ ];
129
+
130
+ const SERVICE_NAMES = [
131
+ ['user-service', 'User Service'],
132
+ ['order-service', 'Order Service'],
133
+ ['payment-service', 'Payment Service'],
134
+ ['inventory-service', 'Inventory Service'],
135
+ ['notification-service', 'Notification Service'],
136
+ ['billing-service', 'Billing Service'],
137
+ ['shipping-service', 'Shipping Service'],
138
+ ['catalog-service', 'Catalog Service'],
139
+ ['search-service', 'Search Service'],
140
+ ['auth-proxy', 'Auth Proxy'],
141
+ ['profile-service', 'Profile Service'],
142
+ ['report-service', 'Report Service'],
143
+ ['analytics-service', 'Analytics Service'],
144
+ ['fraud-service', 'Fraud Detection'],
145
+ ['compliance-service', 'Compliance Service'],
146
+ ['fulfillment-service', 'Fulfillment Service'],
147
+ ['recommendation-service', 'Recommendation Service'],
148
+ ['pricing-service', 'Pricing Service'],
149
+ ['tax-service', 'Tax Service'],
150
+ ['review-service', 'Review Service'],
151
+ ];
152
+
153
+ const DB_NAMES = [
154
+ ['user-db', 'User DB', 'database'],
155
+ ['order-db', 'Order DB', 'database'],
156
+ ['payment-db', 'Payment DB', 'database'],
157
+ ['inventory-db', 'Inventory DB', 'database'],
158
+ ['main-postgres', 'Main Postgres', 'postgres'],
159
+ ['analytics-db', 'Analytics DB', 'database'],
160
+ ['audit-db', 'Audit DB', 'database'],
161
+ ['session-cache', 'Session Cache', 'redis'],
162
+ ['content-db', 'Content DB', 'mongodb'],
163
+ ['ledger-db', 'Ledger DB', 'database'],
164
+ ['archive-db', 'Archive DB', 'database'],
165
+ ['events-table', 'Events Table', 'dynamo'],
166
+ ['media-bucket', 'Media Bucket', 's3'],
167
+ ];
168
+
169
+ const QUEUE_NAMES = [
170
+ ['email-queue', 'Email Queue', 'queue'],
171
+ ['order-events', 'Order Events', 'kafka'],
172
+ ['notification-queue', 'Notification Queue', 'queue'],
173
+ ['payment-events', 'Payment Events', 'kafka'],
174
+ ['job-queue', 'Job Queue', 'queue'],
175
+ ];
176
+
177
+ const AUTH_NAMES = [
178
+ ['jwt-middleware', 'JWT Middleware', 'auth'],
179
+ ['oauth-provider', 'OAuth Provider', 'oauth'],
180
+ ['keycloak', 'Keycloak', 'keycloak'],
181
+ ['okta', 'Okta IdP', 'okta'],
182
+ ['auth-middleware', 'Auth Middleware', 'middleware'],
183
+ ['iam-service', 'IAM Service', 'iam'],
184
+ ];
185
+
186
+ function pick(arr) {
187
+ return arr[Math.floor(Math.random() * arr.length)];
188
+ }
189
+
190
+ function pickN(arr, n) {
191
+ const shuffled = [...arr].sort(() => Math.random() - 0.5);
192
+ return shuffled.slice(0, Math.min(n, arr.length));
193
+ }
194
+
195
+ function pickHttpLike() {
196
+ const pools = [GATEWAY_NAMES, API_NAMES, BFF_NAMES, GRPC_NAMES, GRAPHQL_NAMES];
197
+ const pool = pick(pools);
198
+ const [id, name] = pick(pool);
199
+ let type = 'gateway';
200
+ if (pool === API_NAMES) type = 'api';
201
+ else if (pool === BFF_NAMES) type = 'bff';
202
+ else if (pool === GRPC_NAMES) type = 'grpc';
203
+ else if (pool === GRAPHQL_NAMES) type = 'graphql';
204
+ return { id, name, type };
205
+ }
206
+
207
+ function pickDefaultHealthUrl() {
208
+ return pick(['/health', '/healthz', '/ping']);
209
+ }
210
+
211
+ /** Use on HTTP-like nodes except generators that intentionally test IR-LINT-NO-HEALTHCHECK-003. */
212
+ function httpCleanConfig(extra = {}) {
213
+ return { authRequired: true, url: pickDefaultHealthUrl(), ...extra };
214
+ }
215
+
216
+ function makeEdge(from, to, protocol = 'https') {
217
+ return { from, to, metadata: { protocol } };
218
+ }
219
+
220
+ /** Keeps IR-LINT-SYNC-CHAIN-001 off clean layered graphs (async auth→service breaks sync depth). */
221
+ function makeAsyncEdge(from, to) {
222
+ return { from, to, metadata: { protocol: 'async' } };
223
+ }
224
+
225
+ function runEngine(graph) {
226
+ const ir = { graph };
227
+ const norm = normalizeIrGraph(ir);
228
+ if ('findings' in norm) {
229
+ return { ok: false, structuralFindings: norm.findings, lintFindings: [] };
230
+ }
231
+ const structural = validateIrStructural(ir);
232
+ const lint = validateIrLint(ir);
233
+ const combined = [...structural, ...lint];
234
+ return {
235
+ ok: combined.every((f) => f.severity !== 'error'),
236
+ structuralFindings: structural,
237
+ lintFindings: lint,
238
+ combined,
239
+ };
240
+ }
241
+
242
+ function toPair(id, graph, result, variant) {
243
+ return {
244
+ id,
245
+ instruction: 'Given this IR graph, what architecture violations exist?',
246
+ variant,
247
+ input: { graph },
248
+ output: {
249
+ ok: result.ok,
250
+ violations: result.lintFindings.map((f) => ({
251
+ code: f.code,
252
+ severity: f.severity,
253
+ nodeId: f.nodeId ?? null,
254
+ message: f.message,
255
+ fix: f.fixHint ?? null,
256
+ })),
257
+ },
258
+ };
259
+ }
260
+
261
+ // ─── Graph generators ─────────────────────────────────────────────────────────
262
+
263
+ function genDirectDbAccess() {
264
+ const http = pickHttpLike();
265
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
266
+ const nodes = [
267
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
268
+ { id: dbId, type: dbType, name: dbName },
269
+ ];
270
+ const edges = [makeEdge(http.id, dbId, 'tcp')];
271
+ return { graph: { nodes, edges }, variant: 'direct-db-access' };
272
+ }
273
+
274
+ function genCleanServiceLayer() {
275
+ const http = pickHttpLike();
276
+ const [svcId, svcName] = pick(SERVICE_NAMES);
277
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
278
+ const nodes = [
279
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
280
+ { id: svcId, type: 'service', name: svcName },
281
+ { id: dbId, type: dbType, name: dbName },
282
+ ];
283
+ const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
284
+ return { graph: { nodes, edges }, variant: 'clean-service-layer' };
285
+ }
286
+
287
+ function genMissingAuth() {
288
+ const http = pickHttpLike();
289
+ const [svcId, svcName] = pick(SERVICE_NAMES);
290
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
291
+ const nodes = [
292
+ { id: http.id, type: http.type, name: http.name },
293
+ { id: svcId, type: 'service', name: svcName },
294
+ { id: dbId, type: dbType, name: dbName },
295
+ ];
296
+ const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
297
+ return { graph: { nodes, edges }, variant: 'missing-auth' };
298
+ }
299
+
300
+ function genCleanWithAuth() {
301
+ const http = pickHttpLike();
302
+ const [authId, authName, authType] = pick(AUTH_NAMES);
303
+ const [svcId, svcName] = pick(SERVICE_NAMES);
304
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
305
+ const nodes = [
306
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
307
+ { id: authId, type: authType, name: authName },
308
+ { id: svcId, type: 'service', name: svcName },
309
+ { id: dbId, type: dbType, name: dbName },
310
+ ];
311
+ const edges = [
312
+ makeEdge(http.id, authId),
313
+ makeAsyncEdge(authId, svcId),
314
+ makeEdge(svcId, dbId, 'tcp'),
315
+ ];
316
+ return { graph: { nodes, edges }, variant: 'clean-with-auth' };
317
+ }
318
+
319
+ function genCleanAuthConfig() {
320
+ const http = pickHttpLike();
321
+ const [svcId, svcName] = pick(SERVICE_NAMES);
322
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
323
+ const authKey = pick(['authRequired', 'auth', 'security', 'authentication']);
324
+ const nodes = [
325
+ { id: http.id, type: http.type, name: http.name, config: { [authKey]: true, url: '/health' } },
326
+ { id: svcId, type: 'service', name: svcName },
327
+ { id: dbId, type: dbType, name: dbName },
328
+ ];
329
+ const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
330
+ return { graph: { nodes, edges }, variant: 'clean-auth-config' };
331
+ }
332
+
333
+ function genHighFanout() {
334
+ const http = pickHttpLike();
335
+ const count = 5 + Math.floor(Math.random() * 4);
336
+ const services = pickN(SERVICE_NAMES, count);
337
+ const nodes = [
338
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
339
+ ...services.map(([id, name]) => ({ id, type: 'service', name })),
340
+ ];
341
+ const edges = [];
342
+ for (const [svcId] of services) {
343
+ const [baseDbId, dbName, dbType] = pick(DB_NAMES);
344
+ const dbId = `${baseDbId}__${svcId}`;
345
+ nodes.push({ id: dbId, type: dbType, name: dbName });
346
+ edges.push(makeEdge(http.id, svcId));
347
+ edges.push(makeEdge(svcId, dbId, 'tcp'));
348
+ }
349
+ return { graph: { nodes, edges }, variant: 'high-fanout' };
350
+ }
351
+
352
+ function genSyncChain() {
353
+ const http = pickHttpLike();
354
+ const depth = 3 + Math.floor(Math.random() * 3);
355
+ const services = pickN(SERVICE_NAMES, depth);
356
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
357
+ const nodes = [
358
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
359
+ ...services.map(([id, name]) => ({ id, type: 'service', name })),
360
+ { id: dbId, type: dbType, name: dbName },
361
+ ];
362
+ const edges = [];
363
+ edges.push(makeEdge(http.id, services[0][0]));
364
+ for (let i = 0; i < services.length - 1; i++) {
365
+ edges.push(makeEdge(services[i][0], services[i + 1][0]));
366
+ }
367
+ edges.push(makeEdge(services[services.length - 1][0], dbId, 'tcp'));
368
+ return { graph: { nodes, edges }, variant: 'sync-chain' };
369
+ }
370
+
371
+ function genCleanAsyncBreak() {
372
+ const http = pickHttpLike();
373
+ const [svc1Id, svc1Name] = pick(SERVICE_NAMES);
374
+ const [qId, qName, qType] = pick(QUEUE_NAMES);
375
+ const [svc2Id, svc2Name] = pick(SERVICE_NAMES.filter(([id]) => id !== svc1Id));
376
+ const [svc3Id, svc3Name] = pick(SERVICE_NAMES.filter(([id]) => id !== svc1Id && id !== svc2Id));
377
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
378
+ const nodes = [
379
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
380
+ { id: svc1Id, type: 'service', name: svc1Name },
381
+ { id: qId, type: qType, name: qName },
382
+ { id: svc2Id, type: 'service', name: svc2Name },
383
+ { id: svc3Id, type: 'service', name: svc3Name },
384
+ { id: dbId, type: dbType, name: dbName },
385
+ ];
386
+ const edges = [
387
+ makeEdge(http.id, svc1Id),
388
+ { from: svc1Id, to: qId, metadata: { protocol: 'amqp' } },
389
+ { from: qId, to: svc2Id, metadata: { protocol: 'amqp' } },
390
+ makeEdge(svc2Id, svc3Id),
391
+ makeEdge(svc3Id, dbId, 'tcp'),
392
+ ];
393
+ return { graph: { nodes, edges }, variant: 'clean-async-break' };
394
+ }
395
+
396
+ function genNoHealthcheck() {
397
+ const http = pickHttpLike();
398
+ const [svcId, svcName] = pick(SERVICE_NAMES);
399
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
400
+ const nodes = [
401
+ // Intentionally no health-like url — only this generator should omit it for 003.
402
+ { id: http.id, type: http.type, name: http.name, config: { authRequired: true } },
403
+ { id: svcId, type: 'service', name: svcName },
404
+ { id: dbId, type: dbType, name: dbName },
405
+ ];
406
+ const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
407
+ return { graph: { nodes, edges }, variant: 'no-healthcheck' };
408
+ }
409
+
410
+ function genCleanHealthcheck() {
411
+ const http = pickHttpLike();
412
+ const [svcId, svcName] = pick(SERVICE_NAMES);
413
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
414
+ const healthPath = pick(['/health', '/healthz', '/ping', '/status', '/ready', '/live']);
415
+ const nodes = [
416
+ { id: http.id, type: http.type, name: http.name, config: { authRequired: true, url: healthPath } },
417
+ { id: svcId, type: 'service', name: svcName },
418
+ { id: dbId, type: dbType, name: dbName },
419
+ ];
420
+ const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
421
+ return { graph: { nodes, edges }, variant: 'clean-healthcheck' };
422
+ }
423
+
424
+ function genIsolatedNode() {
425
+ const http = pickHttpLike();
426
+ const [svcId, svcName] = pick(SERVICE_NAMES);
427
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
428
+ const [orphanId, orphanName] = pick(SERVICE_NAMES.filter(([id]) => id !== svcId));
429
+ const nodes = [
430
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
431
+ { id: svcId, type: 'service', name: svcName },
432
+ { id: dbId, type: dbType, name: dbName },
433
+ { id: orphanId, type: 'service', name: orphanName },
434
+ ];
435
+ const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
436
+ return { graph: { nodes, edges }, variant: 'isolated-node' };
437
+ }
438
+
439
+ function genDuplicateEdge() {
440
+ const http = pickHttpLike();
441
+ const [svcId, svcName] = pick(SERVICE_NAMES);
442
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
443
+ const nodes = [
444
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
445
+ { id: svcId, type: 'service', name: svcName },
446
+ { id: dbId, type: dbType, name: dbName },
447
+ ];
448
+ const edges = [makeEdge(http.id, svcId), makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
449
+ return { graph: { nodes, edges }, variant: 'duplicate-edge' };
450
+ }
451
+
452
+ function genMissingName() {
453
+ const http = pickHttpLike();
454
+ const [svcId, svcName] = pick(SERVICE_NAMES);
455
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
456
+ const nodes = [
457
+ { id: http.id, type: http.type, config: { authRequired: true, url: pickDefaultHealthUrl() } },
458
+ { id: svcId, type: 'service', name: svcName },
459
+ { id: dbId, type: dbType, name: dbName },
460
+ ];
461
+ const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
462
+ return { graph: { nodes, edges }, variant: 'missing-name' };
463
+ }
464
+
465
+ function genDatastoreNoIncoming() {
466
+ const [svcId, svcName] = pick(SERVICE_NAMES);
467
+ const [db1Id, db1Name, db1Type] = pick(DB_NAMES);
468
+ const [db2Id, db2Name, db2Type] = pick(DB_NAMES.filter(([id]) => id !== db1Id));
469
+ const nodes = [
470
+ { id: svcId, type: 'service', name: svcName },
471
+ { id: db1Id, type: db1Type, name: db1Name },
472
+ { id: db2Id, type: db2Type, name: db2Name },
473
+ ];
474
+ // db2 has no incoming edges (008) but outgoing to db1 so it is not IR-LINT-ISOLATED-NODE-005.
475
+ const edges = [
476
+ makeEdge(svcId, db1Id, 'tcp'),
477
+ makeEdge(db2Id, db1Id, 'tcp'),
478
+ ];
479
+ return { graph: { nodes, edges }, variant: 'datastore-no-incoming' };
480
+ }
481
+
482
+ function genMultipleHttpEntries() {
483
+ const http1 = pickHttpLike();
484
+ let http2 = pickHttpLike();
485
+ while (http2.id === http1.id) http2 = pickHttpLike();
486
+ const [svcId, svcName] = pick(SERVICE_NAMES);
487
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
488
+ const nodes = [
489
+ { id: http1.id, type: http1.type, name: http1.name, config: httpCleanConfig() },
490
+ { id: http2.id, type: http2.type, name: http2.name, config: httpCleanConfig() },
491
+ { id: svcId, type: 'service', name: svcName },
492
+ { id: dbId, type: dbType, name: dbName },
493
+ ];
494
+ const edges = [makeEdge(http1.id, svcId), makeEdge(http2.id, svcId), makeEdge(svcId, dbId, 'tcp')];
495
+ return { graph: { nodes, edges }, variant: 'multiple-http-entries' };
496
+ }
497
+
498
+ function genDeadNode() {
499
+ const http = pickHttpLike();
500
+ const [svcId, svcName] = pick(SERVICE_NAMES);
501
+ const [deadId, deadName] = pick(SERVICE_NAMES.filter(([id]) => id !== svcId));
502
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
503
+ const nodes = [
504
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
505
+ { id: svcId, type: 'service', name: svcName },
506
+ { id: deadId, type: 'service', name: deadName },
507
+ { id: dbId, type: dbType, name: dbName },
508
+ ];
509
+ const edges = [
510
+ makeEdge(http.id, svcId),
511
+ makeEdge(http.id, deadId),
512
+ makeEdge(svcId, dbId, 'tcp'),
513
+ ];
514
+ return { graph: { nodes, edges }, variant: 'dead-node' };
515
+ }
516
+
517
+ function genMultiViolation() {
518
+ const http = pickHttpLike();
519
+ const [svcId, svcName] = pick(SERVICE_NAMES);
520
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
521
+ const [orphanId, orphanName] = pick(SERVICE_NAMES.filter(([id]) => id !== svcId));
522
+ const nodes = [
523
+ { id: http.id, type: http.type, name: http.name, config: { url: pickDefaultHealthUrl() } },
524
+ { id: svcId, type: 'service', name: svcName },
525
+ { id: dbId, type: dbType, name: dbName },
526
+ { id: orphanId, type: 'service', name: orphanName },
527
+ ];
528
+ const edges = [makeEdge(http.id, svcId), makeEdge(http.id, dbId, 'tcp')];
529
+ return { graph: { nodes, edges }, variant: 'multi-violation' };
530
+ }
531
+
532
+ function genCleanGraph() {
533
+ const http = pickHttpLike();
534
+ const [authId, authName, authType] = pick(AUTH_NAMES);
535
+ const services = pickN(SERVICE_NAMES, 2 + Math.floor(Math.random() * 3));
536
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
537
+ const healthPath = pick(['/health', '/healthz', '/ping', '/status']);
538
+ const nodes = [
539
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig({ url: healthPath }) },
540
+ { id: authId, type: authType, name: authName },
541
+ ...services.map(([id, name]) => ({ id, type: 'service', name })),
542
+ { id: dbId, type: dbType, name: dbName },
543
+ ];
544
+ const edges = [
545
+ makeEdge(http.id, authId),
546
+ ...services.map(([id]) => makeAsyncEdge(authId, id)),
547
+ ...services.map(([id]) => makeEdge(id, dbId, 'tcp')),
548
+ ];
549
+ return { graph: { nodes, edges }, variant: 'clean-graph' };
550
+ }
551
+
552
+ const GENERATORS = [
553
+ { fn: genHighFanout, weight: 12 },
554
+ { fn: genMissingName, weight: 10 },
555
+ { fn: genDuplicateEdge, weight: 8 },
556
+ { fn: genDirectDbAccess, weight: 8 },
557
+ { fn: genMissingAuth, weight: 8 },
558
+ { fn: genDatastoreNoIncoming, weight: 8 },
559
+ { fn: genMultiViolation, weight: 6 },
560
+ { fn: genDeadNode, weight: 5 },
561
+ { fn: genMultipleHttpEntries, weight: 5 },
562
+ { fn: genNoHealthcheck, weight: 2 },
563
+ { fn: genIsolatedNode, weight: 2 },
564
+ { fn: genSyncChain, weight: 3 },
565
+ { fn: genCleanServiceLayer, weight: 5 },
566
+ { fn: genCleanWithAuth, weight: 5 },
567
+ { fn: genCleanAuthConfig, weight: 4 },
568
+ { fn: genCleanAsyncBreak, weight: 3 },
569
+ { fn: genCleanHealthcheck, weight: 3 },
570
+ { fn: genCleanGraph, weight: 8 },
571
+ ];
572
+
573
+ const POOL = GENERATORS.flatMap(({ fn, weight }) => Array(weight).fill(fn));
574
+
575
+ function generateCorpus() {
576
+ const TARGET_COUNT = parseInt(getArg('--count', '500'), 10);
577
+ const OUT_PATH = resolve(ROOT, getArg('--out', 'corpus/auto-generated.jsonl'));
578
+
579
+ console.log(`Generating ${TARGET_COUNT} corpus pairs...`);
580
+
581
+ mkdirSync(resolve(ROOT, 'corpus'), { recursive: true });
582
+
583
+ const lines = [];
584
+ let skipped = 0;
585
+ let attempts = 0;
586
+ const seenGraphs = new Set();
587
+
588
+ while (lines.length < TARGET_COUNT) {
589
+ attempts++;
590
+ if (attempts > TARGET_COUNT * 10) {
591
+ console.warn(`Stopping after ${attempts} attempts — possible infinite loop.`);
592
+ break;
593
+ }
594
+
595
+ const gen = pick(POOL);
596
+ let graphDef;
597
+ try {
598
+ graphDef = gen();
599
+ } catch {
600
+ skipped++;
601
+ continue;
602
+ }
603
+
604
+ const key = JSON.stringify(graphDef.graph);
605
+ if (seenGraphs.has(key)) {
606
+ skipped++;
607
+ continue;
608
+ }
609
+ seenGraphs.add(key);
610
+
611
+ let result;
612
+ try {
613
+ result = runEngine(graphDef.graph);
614
+ } catch {
615
+ skipped++;
616
+ continue;
617
+ }
618
+
619
+ if (result.structuralFindings?.some((f) => f.severity === 'error')) {
620
+ skipped++;
621
+ continue;
622
+ }
623
+
624
+ const id = `gen-${lines.length}-${graphDef.variant}`;
625
+ lines.push(JSON.stringify(toPair(id, graphDef.graph, result, graphDef.variant)));
626
+ }
627
+
628
+ writeFileSync(OUT_PATH, lines.join('\n') + '\n', 'utf8');
629
+
630
+ const pairs = lines.map((l) => JSON.parse(l));
631
+ const withViolations = pairs.filter((p) => p.output.violations.length > 0).length;
632
+ const withoutViolations = pairs.length - withViolations;
633
+ const ruleCounts = {};
634
+ for (const p of pairs) {
635
+ for (const v of p.output.violations) {
636
+ ruleCounts[v.code] = (ruleCounts[v.code] ?? 0) + 1;
637
+ }
638
+ }
639
+
640
+ console.log(`\nDone.`);
641
+ console.log(` Written to: ${OUT_PATH}`);
642
+ console.log(` Total pairs: ${lines.length}`);
643
+ console.log(` With violations: ${withViolations}`);
644
+ console.log(` Clean (no lint violations): ${withoutViolations}`);
645
+ console.log(` Skipped (duplicates/errors): ${skipped}`);
646
+ console.log(`\nViolation distribution:`);
647
+ for (const [code, count] of Object.entries(ruleCounts).sort((a, b) => b[1] - a[1])) {
648
+ console.log(` ${code}: ${count}`);
649
+ }
650
+ }
651
+
652
+ async function main() {
653
+ if (!wantsGenerate) {
654
+ await validateHandwritten();
655
+ return;
656
+ }
657
+ try {
658
+ readFileSync(join(ROOT, 'dist', 'index.js'), 'utf8');
659
+ } catch {
660
+ console.error('generate-corpus: run `npm run build` first (dist/index.js missing).');
661
+ process.exitCode = 1;
662
+ return;
663
+ }
664
+ generateCorpus();
665
+ }
666
+
667
+ main();