@archrad/deterministic 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,667 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * generate-corpus.mjs
4
+ *
5
+ * • Default (no --count / --generate): validate hand-written corpus/*.json
6
+ * • Generate: synthetic IR graphs → ArchRad engine → JSONL training pairs
7
+ *
8
+ * Usage:
9
+ * npm run build
10
+ * npm run generate-corpus
11
+ * node scripts/generate-corpus.mjs --count 1000 --out corpus/auto-generated.jsonl
12
+ */
13
+
14
+ import { mkdirSync, readdirSync, readFileSync, writeFileSync } from 'node:fs';
15
+ import { readdir, readFile } from 'node:fs/promises';
16
+ import { dirname, join, resolve } from 'node:path';
17
+ import { fileURLToPath } from 'node:url';
18
+ import { normalizeIrGraph, validateIrLint, validateIrStructural } from '../dist/index.js';
19
+
20
+ const __dirname = dirname(fileURLToPath(import.meta.url));
21
+ const ROOT = resolve(__dirname, '..');
22
+ const corpusDir = join(ROOT, 'corpus');
23
+
24
+ // ─── CLI ──────────────────────────────────────────────────────────────────────
25
+
26
+ const argv = process.argv.slice(2);
27
+ const wantsGenerate = argv.includes('--generate') || argv.includes('--count');
28
+
29
+ const getArg = (flag, def) => {
30
+ const i = argv.indexOf(flag);
31
+ return i !== -1 && argv[i + 1] ? argv[i + 1] : def;
32
+ };
33
+
34
+ // ─── Validate hand-written corpus (default) ───────────────────────────────────
35
+
36
+ function isPairRecord(x) {
37
+ return (
38
+ x != null &&
39
+ typeof x === 'object' &&
40
+ typeof x.id === 'string' &&
41
+ 'input' in x &&
42
+ 'output' in x
43
+ );
44
+ }
45
+
46
+ async function validateHandwritten() {
47
+ let names;
48
+ try {
49
+ names = await readdir(corpusDir);
50
+ } catch (e) {
51
+ console.error('generate-corpus: cannot read corpus dir:', corpusDir, e);
52
+ process.exitCode = 1;
53
+ return;
54
+ }
55
+
56
+ const jsonFiles = names.filter((n) => n.endsWith('.json') && !n.startsWith('auto-')).sort();
57
+ if (!jsonFiles.length) {
58
+ console.error('generate-corpus: no hand-written .json files in', corpusDir);
59
+ process.exitCode = 1;
60
+ return;
61
+ }
62
+
63
+ let totalPairs = 0;
64
+ for (const name of jsonFiles) {
65
+ const path = join(corpusDir, name);
66
+ let data;
67
+ try {
68
+ data = JSON.parse(await readFile(path, 'utf8'));
69
+ } catch (e) {
70
+ console.error(`generate-corpus: ${path}:`, e);
71
+ process.exitCode = 1;
72
+ return;
73
+ }
74
+ if (!Array.isArray(data)) {
75
+ console.error(`generate-corpus: ${name} must be a JSON array`);
76
+ process.exitCode = 1;
77
+ return;
78
+ }
79
+ for (let i = 0; i < data.length; i++) {
80
+ if (!isPairRecord(data[i])) {
81
+ console.error(`generate-corpus: ${name}[${i}] missing id/input/output`);
82
+ process.exitCode = 1;
83
+ return;
84
+ }
85
+ }
86
+ totalPairs += data.length;
87
+ console.log(`${name}: ${data.length} pair(s)`);
88
+ }
89
+ console.log(`generate-corpus: OK — ${jsonFiles.length} file(s), ${totalPairs} pair(s) total`);
90
+ }
91
+
92
+ // ─── Name pools (generation) ──────────────────────────────────────────────────
93
+
94
+ const GATEWAY_NAMES = [
95
+ ['api-gateway', 'API Gateway'],
96
+ ['web-gateway', 'Web Gateway'],
97
+ ['mobile-gateway', 'Mobile Gateway'],
98
+ ['public-gateway', 'Public Gateway'],
99
+ ['edge-gateway', 'Edge Gateway'],
100
+ ['payment-gateway', 'Payment Gateway'],
101
+ ['admin-gateway', 'Admin Gateway'],
102
+ ['partner-gateway', 'Partner Gateway'],
103
+ ];
104
+
105
+ const API_NAMES = [
106
+ ['rest-api', 'REST API'],
107
+ ['public-api', 'Public API'],
108
+ ['partner-api', 'Partner API'],
109
+ ['internal-api', 'Internal API'],
110
+ ['checkout-api', 'Checkout API'],
111
+ ['reporting-api', 'Reporting API'],
112
+ ];
113
+
114
+ const BFF_NAMES = [
115
+ ['web-bff', 'Web BFF'],
116
+ ['mobile-bff', 'Mobile BFF'],
117
+ ['dashboard-bff', 'Dashboard BFF'],
118
+ ];
119
+
120
+ const GRPC_NAMES = [
121
+ ['grpc-gateway', 'gRPC Gateway'],
122
+ ['grpc-api', 'gRPC API'],
123
+ ];
124
+
125
+ const GRAPHQL_NAMES = [
126
+ ['graphql-api', 'GraphQL API'],
127
+ ['graph-api', 'Graph API'],
128
+ ];
129
+
130
+ const SERVICE_NAMES = [
131
+ ['user-service', 'User Service'],
132
+ ['order-service', 'Order Service'],
133
+ ['payment-service', 'Payment Service'],
134
+ ['inventory-service', 'Inventory Service'],
135
+ ['notification-service', 'Notification Service'],
136
+ ['billing-service', 'Billing Service'],
137
+ ['shipping-service', 'Shipping Service'],
138
+ ['catalog-service', 'Catalog Service'],
139
+ ['search-service', 'Search Service'],
140
+ ['auth-proxy', 'Auth Proxy'],
141
+ ['profile-service', 'Profile Service'],
142
+ ['report-service', 'Report Service'],
143
+ ['analytics-service', 'Analytics Service'],
144
+ ['fraud-service', 'Fraud Detection'],
145
+ ['compliance-service', 'Compliance Service'],
146
+ ['fulfillment-service', 'Fulfillment Service'],
147
+ ['recommendation-service', 'Recommendation Service'],
148
+ ['pricing-service', 'Pricing Service'],
149
+ ['tax-service', 'Tax Service'],
150
+ ['review-service', 'Review Service'],
151
+ ];
152
+
153
+ const DB_NAMES = [
154
+ ['user-db', 'User DB', 'database'],
155
+ ['order-db', 'Order DB', 'database'],
156
+ ['payment-db', 'Payment DB', 'database'],
157
+ ['inventory-db', 'Inventory DB', 'database'],
158
+ ['main-postgres', 'Main Postgres', 'postgres'],
159
+ ['analytics-db', 'Analytics DB', 'database'],
160
+ ['audit-db', 'Audit DB', 'database'],
161
+ ['session-cache', 'Session Cache', 'redis'],
162
+ ['content-db', 'Content DB', 'mongodb'],
163
+ ['ledger-db', 'Ledger DB', 'database'],
164
+ ['archive-db', 'Archive DB', 'database'],
165
+ ['events-table', 'Events Table', 'dynamo'],
166
+ ['media-bucket', 'Media Bucket', 's3'],
167
+ ];
168
+
169
+ const QUEUE_NAMES = [
170
+ ['email-queue', 'Email Queue', 'queue'],
171
+ ['order-events', 'Order Events', 'kafka'],
172
+ ['notification-queue', 'Notification Queue', 'queue'],
173
+ ['payment-events', 'Payment Events', 'kafka'],
174
+ ['job-queue', 'Job Queue', 'queue'],
175
+ ];
176
+
177
+ const AUTH_NAMES = [
178
+ ['jwt-middleware', 'JWT Middleware', 'auth'],
179
+ ['oauth-provider', 'OAuth Provider', 'oauth'],
180
+ ['keycloak', 'Keycloak', 'keycloak'],
181
+ ['okta', 'Okta IdP', 'okta'],
182
+ ['auth-middleware', 'Auth Middleware', 'middleware'],
183
+ ['iam-service', 'IAM Service', 'iam'],
184
+ ];
185
+
186
+ function pick(arr) {
187
+ return arr[Math.floor(Math.random() * arr.length)];
188
+ }
189
+
190
+ function pickN(arr, n) {
191
+ const shuffled = [...arr].sort(() => Math.random() - 0.5);
192
+ return shuffled.slice(0, Math.min(n, arr.length));
193
+ }
194
+
195
+ function pickHttpLike() {
196
+ const pools = [GATEWAY_NAMES, API_NAMES, BFF_NAMES, GRPC_NAMES, GRAPHQL_NAMES];
197
+ const pool = pick(pools);
198
+ const [id, name] = pick(pool);
199
+ let type = 'gateway';
200
+ if (pool === API_NAMES) type = 'api';
201
+ else if (pool === BFF_NAMES) type = 'bff';
202
+ else if (pool === GRPC_NAMES) type = 'grpc';
203
+ else if (pool === GRAPHQL_NAMES) type = 'graphql';
204
+ return { id, name, type };
205
+ }
206
+
207
+ function pickDefaultHealthUrl() {
208
+ return pick(['/health', '/healthz', '/ping']);
209
+ }
210
+
211
+ /** Use on HTTP-like nodes except generators that intentionally test IR-LINT-NO-HEALTHCHECK-003. */
212
+ function httpCleanConfig(extra = {}) {
213
+ return { authRequired: true, url: pickDefaultHealthUrl(), ...extra };
214
+ }
215
+
216
+ function makeEdge(from, to, protocol = 'https') {
217
+ return { from, to, metadata: { protocol } };
218
+ }
219
+
220
+ /** Keeps IR-LINT-SYNC-CHAIN-001 off clean layered graphs (async auth→service breaks sync depth). */
221
+ function makeAsyncEdge(from, to) {
222
+ return { from, to, metadata: { protocol: 'async' } };
223
+ }
224
+
225
+ function runEngine(graph) {
226
+ const ir = { graph };
227
+ const norm = normalizeIrGraph(ir);
228
+ if ('findings' in norm) {
229
+ return { ok: false, structuralFindings: norm.findings, lintFindings: [] };
230
+ }
231
+ const structural = validateIrStructural(ir);
232
+ const lint = validateIrLint(ir);
233
+ const combined = [...structural, ...lint];
234
+ return {
235
+ ok: combined.every((f) => f.severity !== 'error'),
236
+ structuralFindings: structural,
237
+ lintFindings: lint,
238
+ combined,
239
+ };
240
+ }
241
+
242
+ function toPair(id, graph, result, variant) {
243
+ return {
244
+ id,
245
+ instruction: 'Given this IR graph, what architecture violations exist?',
246
+ variant,
247
+ input: { graph },
248
+ output: {
249
+ ok: result.ok,
250
+ violations: result.lintFindings.map((f) => ({
251
+ code: f.code,
252
+ severity: f.severity,
253
+ nodeId: f.nodeId ?? null,
254
+ message: f.message,
255
+ fix: f.fixHint ?? null,
256
+ })),
257
+ },
258
+ };
259
+ }
260
+
261
+ // ─── Graph generators ─────────────────────────────────────────────────────────
262
+
263
+ function genDirectDbAccess() {
264
+ const http = pickHttpLike();
265
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
266
+ const nodes = [
267
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
268
+ { id: dbId, type: dbType, name: dbName },
269
+ ];
270
+ const edges = [makeEdge(http.id, dbId, 'tcp')];
271
+ return { graph: { nodes, edges }, variant: 'direct-db-access' };
272
+ }
273
+
274
+ function genCleanServiceLayer() {
275
+ const http = pickHttpLike();
276
+ const [svcId, svcName] = pick(SERVICE_NAMES);
277
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
278
+ const nodes = [
279
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
280
+ { id: svcId, type: 'service', name: svcName },
281
+ { id: dbId, type: dbType, name: dbName },
282
+ ];
283
+ const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
284
+ return { graph: { nodes, edges }, variant: 'clean-service-layer' };
285
+ }
286
+
287
+ function genMissingAuth() {
288
+ const http = pickHttpLike();
289
+ const [svcId, svcName] = pick(SERVICE_NAMES);
290
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
291
+ const nodes = [
292
+ { id: http.id, type: http.type, name: http.name },
293
+ { id: svcId, type: 'service', name: svcName },
294
+ { id: dbId, type: dbType, name: dbName },
295
+ ];
296
+ const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
297
+ return { graph: { nodes, edges }, variant: 'missing-auth' };
298
+ }
299
+
300
+ function genCleanWithAuth() {
301
+ const http = pickHttpLike();
302
+ const [authId, authName, authType] = pick(AUTH_NAMES);
303
+ const [svcId, svcName] = pick(SERVICE_NAMES);
304
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
305
+ const nodes = [
306
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
307
+ { id: authId, type: authType, name: authName },
308
+ { id: svcId, type: 'service', name: svcName },
309
+ { id: dbId, type: dbType, name: dbName },
310
+ ];
311
+ const edges = [
312
+ makeEdge(http.id, authId),
313
+ makeAsyncEdge(authId, svcId),
314
+ makeEdge(svcId, dbId, 'tcp'),
315
+ ];
316
+ return { graph: { nodes, edges }, variant: 'clean-with-auth' };
317
+ }
318
+
319
+ function genCleanAuthConfig() {
320
+ const http = pickHttpLike();
321
+ const [svcId, svcName] = pick(SERVICE_NAMES);
322
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
323
+ const authKey = pick(['authRequired', 'auth', 'security', 'authentication']);
324
+ const nodes = [
325
+ { id: http.id, type: http.type, name: http.name, config: { [authKey]: true, url: '/health' } },
326
+ { id: svcId, type: 'service', name: svcName },
327
+ { id: dbId, type: dbType, name: dbName },
328
+ ];
329
+ const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
330
+ return { graph: { nodes, edges }, variant: 'clean-auth-config' };
331
+ }
332
+
333
+ function genHighFanout() {
334
+ const http = pickHttpLike();
335
+ const count = 5 + Math.floor(Math.random() * 4);
336
+ const services = pickN(SERVICE_NAMES, count);
337
+ const nodes = [
338
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
339
+ ...services.map(([id, name]) => ({ id, type: 'service', name })),
340
+ ];
341
+ const edges = [];
342
+ for (const [svcId] of services) {
343
+ const [baseDbId, dbName, dbType] = pick(DB_NAMES);
344
+ const dbId = `${baseDbId}__${svcId}`;
345
+ nodes.push({ id: dbId, type: dbType, name: dbName });
346
+ edges.push(makeEdge(http.id, svcId));
347
+ edges.push(makeEdge(svcId, dbId, 'tcp'));
348
+ }
349
+ return { graph: { nodes, edges }, variant: 'high-fanout' };
350
+ }
351
+
352
+ function genSyncChain() {
353
+ const http = pickHttpLike();
354
+ const depth = 3 + Math.floor(Math.random() * 3);
355
+ const services = pickN(SERVICE_NAMES, depth);
356
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
357
+ const nodes = [
358
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
359
+ ...services.map(([id, name]) => ({ id, type: 'service', name })),
360
+ { id: dbId, type: dbType, name: dbName },
361
+ ];
362
+ const edges = [];
363
+ edges.push(makeEdge(http.id, services[0][0]));
364
+ for (let i = 0; i < services.length - 1; i++) {
365
+ edges.push(makeEdge(services[i][0], services[i + 1][0]));
366
+ }
367
+ edges.push(makeEdge(services[services.length - 1][0], dbId, 'tcp'));
368
+ return { graph: { nodes, edges }, variant: 'sync-chain' };
369
+ }
370
+
371
+ function genCleanAsyncBreak() {
372
+ const http = pickHttpLike();
373
+ const [svc1Id, svc1Name] = pick(SERVICE_NAMES);
374
+ const [qId, qName, qType] = pick(QUEUE_NAMES);
375
+ const [svc2Id, svc2Name] = pick(SERVICE_NAMES.filter(([id]) => id !== svc1Id));
376
+ const [svc3Id, svc3Name] = pick(SERVICE_NAMES.filter(([id]) => id !== svc1Id && id !== svc2Id));
377
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
378
+ const nodes = [
379
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
380
+ { id: svc1Id, type: 'service', name: svc1Name },
381
+ { id: qId, type: qType, name: qName },
382
+ { id: svc2Id, type: 'service', name: svc2Name },
383
+ { id: svc3Id, type: 'service', name: svc3Name },
384
+ { id: dbId, type: dbType, name: dbName },
385
+ ];
386
+ const edges = [
387
+ makeEdge(http.id, svc1Id),
388
+ { from: svc1Id, to: qId, metadata: { protocol: 'amqp' } },
389
+ { from: qId, to: svc2Id, metadata: { protocol: 'amqp' } },
390
+ makeEdge(svc2Id, svc3Id),
391
+ makeEdge(svc3Id, dbId, 'tcp'),
392
+ ];
393
+ return { graph: { nodes, edges }, variant: 'clean-async-break' };
394
+ }
395
+
396
+ function genNoHealthcheck() {
397
+ const http = pickHttpLike();
398
+ const [svcId, svcName] = pick(SERVICE_NAMES);
399
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
400
+ const nodes = [
401
+ // Intentionally no health-like url — only this generator should omit it for 003.
402
+ { id: http.id, type: http.type, name: http.name, config: { authRequired: true } },
403
+ { id: svcId, type: 'service', name: svcName },
404
+ { id: dbId, type: dbType, name: dbName },
405
+ ];
406
+ const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
407
+ return { graph: { nodes, edges }, variant: 'no-healthcheck' };
408
+ }
409
+
410
+ function genCleanHealthcheck() {
411
+ const http = pickHttpLike();
412
+ const [svcId, svcName] = pick(SERVICE_NAMES);
413
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
414
+ const healthPath = pick(['/health', '/healthz', '/ping', '/status', '/ready', '/live']);
415
+ const nodes = [
416
+ { id: http.id, type: http.type, name: http.name, config: { authRequired: true, url: healthPath } },
417
+ { id: svcId, type: 'service', name: svcName },
418
+ { id: dbId, type: dbType, name: dbName },
419
+ ];
420
+ const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
421
+ return { graph: { nodes, edges }, variant: 'clean-healthcheck' };
422
+ }
423
+
424
+ function genIsolatedNode() {
425
+ const http = pickHttpLike();
426
+ const [svcId, svcName] = pick(SERVICE_NAMES);
427
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
428
+ const [orphanId, orphanName] = pick(SERVICE_NAMES.filter(([id]) => id !== svcId));
429
+ const nodes = [
430
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
431
+ { id: svcId, type: 'service', name: svcName },
432
+ { id: dbId, type: dbType, name: dbName },
433
+ { id: orphanId, type: 'service', name: orphanName },
434
+ ];
435
+ const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
436
+ return { graph: { nodes, edges }, variant: 'isolated-node' };
437
+ }
438
+
439
+ function genDuplicateEdge() {
440
+ const http = pickHttpLike();
441
+ const [svcId, svcName] = pick(SERVICE_NAMES);
442
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
443
+ const nodes = [
444
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
445
+ { id: svcId, type: 'service', name: svcName },
446
+ { id: dbId, type: dbType, name: dbName },
447
+ ];
448
+ const edges = [makeEdge(http.id, svcId), makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
449
+ return { graph: { nodes, edges }, variant: 'duplicate-edge' };
450
+ }
451
+
452
+ function genMissingName() {
453
+ const http = pickHttpLike();
454
+ const [svcId, svcName] = pick(SERVICE_NAMES);
455
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
456
+ const nodes = [
457
+ { id: http.id, type: http.type, config: { authRequired: true, url: pickDefaultHealthUrl() } },
458
+ { id: svcId, type: 'service', name: svcName },
459
+ { id: dbId, type: dbType, name: dbName },
460
+ ];
461
+ const edges = [makeEdge(http.id, svcId), makeEdge(svcId, dbId, 'tcp')];
462
+ return { graph: { nodes, edges }, variant: 'missing-name' };
463
+ }
464
+
465
+ function genDatastoreNoIncoming() {
466
+ const [svcId, svcName] = pick(SERVICE_NAMES);
467
+ const [db1Id, db1Name, db1Type] = pick(DB_NAMES);
468
+ const [db2Id, db2Name, db2Type] = pick(DB_NAMES.filter(([id]) => id !== db1Id));
469
+ const nodes = [
470
+ { id: svcId, type: 'service', name: svcName },
471
+ { id: db1Id, type: db1Type, name: db1Name },
472
+ { id: db2Id, type: db2Type, name: db2Name },
473
+ ];
474
+ // db2 has no incoming edges (008) but outgoing to db1 so it is not IR-LINT-ISOLATED-NODE-005.
475
+ const edges = [
476
+ makeEdge(svcId, db1Id, 'tcp'),
477
+ makeEdge(db2Id, db1Id, 'tcp'),
478
+ ];
479
+ return { graph: { nodes, edges }, variant: 'datastore-no-incoming' };
480
+ }
481
+
482
+ function genMultipleHttpEntries() {
483
+ const http1 = pickHttpLike();
484
+ let http2 = pickHttpLike();
485
+ while (http2.id === http1.id) http2 = pickHttpLike();
486
+ const [svcId, svcName] = pick(SERVICE_NAMES);
487
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
488
+ const nodes = [
489
+ { id: http1.id, type: http1.type, name: http1.name, config: httpCleanConfig() },
490
+ { id: http2.id, type: http2.type, name: http2.name, config: httpCleanConfig() },
491
+ { id: svcId, type: 'service', name: svcName },
492
+ { id: dbId, type: dbType, name: dbName },
493
+ ];
494
+ const edges = [makeEdge(http1.id, svcId), makeEdge(http2.id, svcId), makeEdge(svcId, dbId, 'tcp')];
495
+ return { graph: { nodes, edges }, variant: 'multiple-http-entries' };
496
+ }
497
+
498
+ function genDeadNode() {
499
+ const http = pickHttpLike();
500
+ const [svcId, svcName] = pick(SERVICE_NAMES);
501
+ const [deadId, deadName] = pick(SERVICE_NAMES.filter(([id]) => id !== svcId));
502
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
503
+ const nodes = [
504
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig() },
505
+ { id: svcId, type: 'service', name: svcName },
506
+ { id: deadId, type: 'service', name: deadName },
507
+ { id: dbId, type: dbType, name: dbName },
508
+ ];
509
+ const edges = [
510
+ makeEdge(http.id, svcId),
511
+ makeEdge(http.id, deadId),
512
+ makeEdge(svcId, dbId, 'tcp'),
513
+ ];
514
+ return { graph: { nodes, edges }, variant: 'dead-node' };
515
+ }
516
+
517
+ function genMultiViolation() {
518
+ const http = pickHttpLike();
519
+ const [svcId, svcName] = pick(SERVICE_NAMES);
520
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
521
+ const [orphanId, orphanName] = pick(SERVICE_NAMES.filter(([id]) => id !== svcId));
522
+ const nodes = [
523
+ { id: http.id, type: http.type, name: http.name, config: { url: pickDefaultHealthUrl() } },
524
+ { id: svcId, type: 'service', name: svcName },
525
+ { id: dbId, type: dbType, name: dbName },
526
+ { id: orphanId, type: 'service', name: orphanName },
527
+ ];
528
+ const edges = [makeEdge(http.id, svcId), makeEdge(http.id, dbId, 'tcp')];
529
+ return { graph: { nodes, edges }, variant: 'multi-violation' };
530
+ }
531
+
532
+ function genCleanGraph() {
533
+ const http = pickHttpLike();
534
+ const [authId, authName, authType] = pick(AUTH_NAMES);
535
+ const services = pickN(SERVICE_NAMES, 2 + Math.floor(Math.random() * 3));
536
+ const [dbId, dbName, dbType] = pick(DB_NAMES);
537
+ const healthPath = pick(['/health', '/healthz', '/ping', '/status']);
538
+ const nodes = [
539
+ { id: http.id, type: http.type, name: http.name, config: httpCleanConfig({ url: healthPath }) },
540
+ { id: authId, type: authType, name: authName },
541
+ ...services.map(([id, name]) => ({ id, type: 'service', name })),
542
+ { id: dbId, type: dbType, name: dbName },
543
+ ];
544
+ const edges = [
545
+ makeEdge(http.id, authId),
546
+ ...services.map(([id]) => makeAsyncEdge(authId, id)),
547
+ ...services.map(([id]) => makeEdge(id, dbId, 'tcp')),
548
+ ];
549
+ return { graph: { nodes, edges }, variant: 'clean-graph' };
550
+ }
551
+
552
+ const GENERATORS = [
553
+ { fn: genHighFanout, weight: 12 },
554
+ { fn: genMissingName, weight: 10 },
555
+ { fn: genDuplicateEdge, weight: 8 },
556
+ { fn: genDirectDbAccess, weight: 8 },
557
+ { fn: genMissingAuth, weight: 8 },
558
+ { fn: genDatastoreNoIncoming, weight: 8 },
559
+ { fn: genMultiViolation, weight: 6 },
560
+ { fn: genDeadNode, weight: 5 },
561
+ { fn: genMultipleHttpEntries, weight: 5 },
562
+ { fn: genNoHealthcheck, weight: 2 },
563
+ { fn: genIsolatedNode, weight: 2 },
564
+ { fn: genSyncChain, weight: 3 },
565
+ { fn: genCleanServiceLayer, weight: 5 },
566
+ { fn: genCleanWithAuth, weight: 5 },
567
+ { fn: genCleanAuthConfig, weight: 4 },
568
+ { fn: genCleanAsyncBreak, weight: 3 },
569
+ { fn: genCleanHealthcheck, weight: 3 },
570
+ { fn: genCleanGraph, weight: 8 },
571
+ ];
572
+
573
+ const POOL = GENERATORS.flatMap(({ fn, weight }) => Array(weight).fill(fn));
574
+
575
+ function generateCorpus() {
576
+ const TARGET_COUNT = parseInt(getArg('--count', '500'), 10);
577
+ const OUT_PATH = resolve(ROOT, getArg('--out', 'corpus/auto-generated.jsonl'));
578
+
579
+ console.log(`Generating ${TARGET_COUNT} corpus pairs...`);
580
+
581
+ mkdirSync(resolve(ROOT, 'corpus'), { recursive: true });
582
+
583
+ const lines = [];
584
+ let skipped = 0;
585
+ let attempts = 0;
586
+ const seenGraphs = new Set();
587
+
588
+ while (lines.length < TARGET_COUNT) {
589
+ attempts++;
590
+ if (attempts > TARGET_COUNT * 10) {
591
+ console.warn(`Stopping after ${attempts} attempts — possible infinite loop.`);
592
+ break;
593
+ }
594
+
595
+ const gen = pick(POOL);
596
+ let graphDef;
597
+ try {
598
+ graphDef = gen();
599
+ } catch {
600
+ skipped++;
601
+ continue;
602
+ }
603
+
604
+ const key = JSON.stringify(graphDef.graph);
605
+ if (seenGraphs.has(key)) {
606
+ skipped++;
607
+ continue;
608
+ }
609
+ seenGraphs.add(key);
610
+
611
+ let result;
612
+ try {
613
+ result = runEngine(graphDef.graph);
614
+ } catch {
615
+ skipped++;
616
+ continue;
617
+ }
618
+
619
+ if (result.structuralFindings?.some((f) => f.severity === 'error')) {
620
+ skipped++;
621
+ continue;
622
+ }
623
+
624
+ const id = `gen-${lines.length}-${graphDef.variant}`;
625
+ lines.push(JSON.stringify(toPair(id, graphDef.graph, result, graphDef.variant)));
626
+ }
627
+
628
+ writeFileSync(OUT_PATH, lines.join('\n') + '\n', 'utf8');
629
+
630
+ const pairs = lines.map((l) => JSON.parse(l));
631
+ const withViolations = pairs.filter((p) => p.output.violations.length > 0).length;
632
+ const withoutViolations = pairs.length - withViolations;
633
+ const ruleCounts = {};
634
+ for (const p of pairs) {
635
+ for (const v of p.output.violations) {
636
+ ruleCounts[v.code] = (ruleCounts[v.code] ?? 0) + 1;
637
+ }
638
+ }
639
+
640
+ console.log(`\nDone.`);
641
+ console.log(` Written to: ${OUT_PATH}`);
642
+ console.log(` Total pairs: ${lines.length}`);
643
+ console.log(` With violations: ${withViolations}`);
644
+ console.log(` Clean (no lint violations): ${withoutViolations}`);
645
+ console.log(` Skipped (duplicates/errors): ${skipped}`);
646
+ console.log(`\nViolation distribution:`);
647
+ for (const [code, count] of Object.entries(ruleCounts).sort((a, b) => b[1] - a[1])) {
648
+ console.log(` ${code}: ${count}`);
649
+ }
650
+ }
651
+
652
+ async function main() {
653
+ if (!wantsGenerate) {
654
+ await validateHandwritten();
655
+ return;
656
+ }
657
+ try {
658
+ readFileSync(join(ROOT, 'dist', 'index.js'), 'utf8');
659
+ } catch {
660
+ console.error('generate-corpus: run `npm run build` first (dist/index.js missing).');
661
+ process.exitCode = 1;
662
+ return;
663
+ }
664
+ generateCorpus();
665
+ }
666
+
667
+ main();
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Optional install notice (not wired from package.json by default).
3
+ * Skipped in CI and silent installs. Enable only if you want a local console line on `npm install`.
4
+ */
5
+ import { readFileSync } from 'node:fs';
6
+ import { dirname, join } from 'node:path';
7
+ import { fileURLToPath } from 'node:url';
8
+
9
+ const __dirname = dirname(fileURLToPath(import.meta.url));
10
+ const root = join(__dirname, '..');
11
+
12
+ if (process.env.CI === 'true' || process.env.GITHUB_ACTIONS === 'true' || process.env.GITLAB_CI === 'true') {
13
+ process.exit(0);
14
+ }
15
+ if (process.env.npm_config_loglevel === 'silent') {
16
+ process.exit(0);
17
+ }
18
+
19
+ const pkg = JSON.parse(readFileSync(join(root, 'package.json'), 'utf8'));
20
+ const v = pkg.version;
21
+ const url = `https://archrad.com/docs/drift?ref=npm-${encodeURIComponent(v)}`;
22
+ console.log(`\n ${pkg.name}@${v} — Drift in CI (docs only; no package telemetry): ${url}\n`);