@maintainabilityai/research-runner 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +82 -0
  3. package/bin/research-runner.js +2 -0
  4. package/dist/cli.d.ts +1 -0
  5. package/dist/cli.js +209 -0
  6. package/dist/llm/anthropic-client.d.ts +39 -0
  7. package/dist/llm/anthropic-client.js +74 -0
  8. package/dist/llm/github-models-client.d.ts +46 -0
  9. package/dist/llm/github-models-client.js +78 -0
  10. package/dist/llm/llm-router.d.ts +46 -0
  11. package/dist/llm/llm-router.js +60 -0
  12. package/dist/mesh/get-mesh-sha.d.ts +1 -0
  13. package/dist/mesh/get-mesh-sha.js +27 -0
  14. package/dist/mesh/mesh-reader.d.ts +14 -0
  15. package/dist/mesh/mesh-reader.js +392 -0
  16. package/dist/mesh/prompt-loader.d.ts +22 -0
  17. package/dist/mesh/prompt-loader.js +119 -0
  18. package/dist/mesh/threat-model-reader.d.ts +33 -0
  19. package/dist/mesh/threat-model-reader.js +123 -0
  20. package/dist/runner/archeologist.d.ts +39 -0
  21. package/dist/runner/archeologist.js +620 -0
  22. package/dist/runner/audit-emitter.d.ts +62 -0
  23. package/dist/runner/audit-emitter.js +210 -0
  24. package/dist/runner/hatters-tag-builder.d.ts +52 -0
  25. package/dist/runner/hatters-tag-builder.js +40 -0
  26. package/dist/runner/nodes/analyze-architecture.d.ts +10 -0
  27. package/dist/runner/nodes/analyze-architecture.js +447 -0
  28. package/dist/runner/nodes/arxiv-search.d.ts +12 -0
  29. package/dist/runner/nodes/arxiv-search.js +52 -0
  30. package/dist/runner/nodes/clone-and-index.d.ts +32 -0
  31. package/dist/runner/nodes/clone-and-index.js +158 -0
  32. package/dist/runner/nodes/dedupe-and-rank.d.ts +27 -0
  33. package/dist/runner/nodes/dedupe-and-rank.js +98 -0
  34. package/dist/runner/nodes/deterministic-review.d.ts +55 -0
  35. package/dist/runner/nodes/deterministic-review.js +206 -0
  36. package/dist/runner/nodes/expert-review.d.ts +68 -0
  37. package/dist/runner/nodes/expert-review.js +197 -0
  38. package/dist/runner/nodes/gap-analysis.d.ts +48 -0
  39. package/dist/runner/nodes/gap-analysis.js +153 -0
  40. package/dist/runner/nodes/generate-prd-manifest.d.ts +53 -0
  41. package/dist/runner/nodes/generate-prd-manifest.js +209 -0
  42. package/dist/runner/nodes/hackernews-search.d.ts +12 -0
  43. package/dist/runner/nodes/hackernews-search.js +63 -0
  44. package/dist/runner/nodes/identify-gaps.d.ts +33 -0
  45. package/dist/runner/nodes/identify-gaps.js +185 -0
  46. package/dist/runner/nodes/plan-queries.d.ts +28 -0
  47. package/dist/runner/nodes/plan-queries.js +120 -0
  48. package/dist/runner/nodes/prd-validator.d.ts +51 -0
  49. package/dist/runner/nodes/prd-validator.js +203 -0
  50. package/dist/runner/nodes/synthesis-archaeology-validator.d.ts +22 -0
  51. package/dist/runner/nodes/synthesis-archaeology-validator.js +131 -0
  52. package/dist/runner/nodes/synthesis-validator.d.ts +51 -0
  53. package/dist/runner/nodes/synthesis-validator.js +185 -0
  54. package/dist/runner/nodes/synthesize-prd.d.ts +84 -0
  55. package/dist/runner/nodes/synthesize-prd.js +202 -0
  56. package/dist/runner/nodes/synthesize-report.d.ts +53 -0
  57. package/dist/runner/nodes/synthesize-report.js +188 -0
  58. package/dist/runner/nodes/tavily-search.d.ts +21 -0
  59. package/dist/runner/nodes/tavily-search.js +57 -0
  60. package/dist/runner/nodes/uspto-search.d.ts +13 -0
  61. package/dist/runner/nodes/uspto-search.js +62 -0
  62. package/dist/runner/nodes/verify-grounding.d.ts +54 -0
  63. package/dist/runner/nodes/verify-grounding.js +134 -0
  64. package/dist/runner/prd.d.ts +28 -0
  65. package/dist/runner/prd.js +494 -0
  66. package/dist/schemas/audit-event.d.ts +1151 -0
  67. package/dist/schemas/audit-event.js +141 -0
  68. package/dist/schemas/index.d.ts +17 -0
  69. package/dist/schemas/index.js +33 -0
  70. package/dist/schemas/mesh-context.d.ts +415 -0
  71. package/dist/schemas/mesh-context.js +95 -0
  72. package/dist/schemas/observed-architecture.d.ts +262 -0
  73. package/dist/schemas/observed-architecture.js +90 -0
  74. package/dist/schemas/prd-brief.d.ts +111 -0
  75. package/dist/schemas/prd-brief.js +37 -0
  76. package/dist/schemas/prd-doc.d.ts +249 -0
  77. package/dist/schemas/prd-doc.js +42 -0
  78. package/dist/schemas/prd-manifest.d.ts +171 -0
  79. package/dist/schemas/prd-manifest.js +73 -0
  80. package/dist/schemas/primitives.d.ts +47 -0
  81. package/dist/schemas/primitives.js +41 -0
  82. package/dist/schemas/query-plan.d.ts +33 -0
  83. package/dist/schemas/query-plan.js +25 -0
  84. package/dist/schemas/ranked-source.d.ts +82 -0
  85. package/dist/schemas/ranked-source.js +29 -0
  86. package/dist/schemas/research-brief.d.ts +114 -0
  87. package/dist/schemas/research-brief.js +49 -0
  88. package/dist/schemas/research-doc.d.ts +104 -0
  89. package/dist/schemas/research-doc.js +37 -0
  90. package/dist/search/arxiv-client.d.ts +41 -0
  91. package/dist/search/arxiv-client.js +88 -0
  92. package/dist/search/hackernews-client.d.ts +33 -0
  93. package/dist/search/hackernews-client.js +44 -0
  94. package/dist/search/provider-result.d.ts +25 -0
  95. package/dist/search/provider-result.js +2 -0
  96. package/dist/search/tavily-client.d.ts +38 -0
  97. package/dist/search/tavily-client.js +53 -0
  98. package/dist/search/uspto-client.d.ts +50 -0
  99. package/dist/search/uspto-client.js +112 -0
  100. package/dist/utils/run-id.d.ts +2 -0
  101. package/dist/utils/run-id.js +22 -0
  102. package/package.json +53 -0
@@ -0,0 +1,158 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.cloneAndIndex = cloneAndIndex;
37
+ /**
38
+ * clone_and_index — pure node (archaeology path).
39
+ *
40
+ * Shallow-clones the target repo into a temp directory, walks the file
41
+ * tree, and returns an inventory + the clone SHA. We skip non-source
42
+ * directories (.git, node_modules, dist, build, .next, target,
43
+ * __pycache__, venv) to keep the inventory honest.
44
+ *
45
+ * Phase 3a doesn't parse any code yet — that's analyze_architecture's
46
+ * job. This node is small + pure + fast so the orchestrator can decide
47
+ * whether to even bother analyzing (e.g. an empty repo or a docs-only
48
+ * mirror short-circuits to a node_error before we burn LLM tokens).
49
+ */
50
+ const node_child_process_1 = require("node:child_process");
51
+ const fs = __importStar(require("node:fs"));
52
+ const os = __importStar(require("node:os"));
53
+ const path = __importStar(require("node:path"));
54
+ const SKIP_DIRS = new Set([
55
+ '.git', 'node_modules', 'dist', 'build', '.next', 'target',
56
+ '__pycache__', '.venv', 'venv', '.pytest_cache', '.cache',
57
+ '.terraform', '.gradle', 'out', '.vs', '.idea',
58
+ ]);
59
+ const KNOWN_MANIFESTS = new Set([
60
+ 'package.json', 'package-lock.json', 'pnpm-lock.yaml', 'yarn.lock',
61
+ 'pyproject.toml', 'requirements.txt', 'setup.py', 'pipfile',
62
+ 'cargo.toml', 'cargo.lock',
63
+ 'go.mod', 'go.sum',
64
+ 'pom.xml', 'build.gradle', 'build.gradle.kts',
65
+ 'gemfile', 'gemfile.lock',
66
+ 'composer.json', 'composer.lock',
67
+ 'mix.exs',
68
+ ]);
69
+ /** Max source files to enumerate by name (audit log payload control). */
70
+ const MAX_SOURCE_FILES = 200;
71
+ function cloneAndIndex(opts) {
72
+ if (!/^[\w.-]+\/[\w.-]+$/.test(opts.targetRepo)) {
73
+ throw new Error(`clone_and_index: invalid targetRepo "${opts.targetRepo}"; expected owner/repo`);
74
+ }
75
+ const parentDir = opts.parentDir ?? os.tmpdir();
76
+ fs.mkdirSync(parentDir, { recursive: true });
77
+ const cloneDir = fs.mkdtempSync(path.join(parentDir, 'archeologist-clone-'));
78
+ const originUrl = opts.originUrl ?? `https://github.com/${opts.targetRepo}.git`;
79
+ const cloneArgs = ['clone', '--depth', '1', '--single-branch', originUrl, cloneDir];
80
+ if (opts.ref) {
81
+ cloneArgs.splice(0, cloneArgs.length, 'clone', '--depth', '1', '--branch', opts.ref, originUrl, cloneDir);
82
+ }
83
+ const cloneResult = (0, node_child_process_1.spawnSync)('git', cloneArgs, { stdio: ['ignore', 'pipe', 'pipe'], encoding: 'utf8' });
84
+ if (cloneResult.status !== 0) {
85
+ fs.rmSync(cloneDir, { recursive: true, force: true });
86
+ throw new Error(`clone_and_index: git clone failed (status=${cloneResult.status}): ${(cloneResult.stderr || '').slice(0, 400)}`);
87
+ }
88
+ const shaResult = (0, node_child_process_1.spawnSync)('git', ['rev-parse', 'HEAD'], { cwd: cloneDir, stdio: ['ignore', 'pipe', 'ignore'], encoding: 'utf8' });
89
+ const cloneSha = (shaResult.stdout || '').trim();
90
+ if (!/^[0-9a-f]{7,40}$/.test(cloneSha)) {
91
+ fs.rmSync(cloneDir, { recursive: true, force: true });
92
+ throw new Error('clone_and_index: failed to resolve clone HEAD SHA after clone');
93
+ }
94
+ const inventory = walkInventory(cloneDir);
95
+ return { cloneDir, cloneSha, inventory };
96
+ }
97
+ /** Recursive walker that respects SKIP_DIRS and caps the enumerated sourceFiles list. */
98
+ function walkInventory(rootDir) {
99
+ const inventory = {
100
+ totalFiles: 0,
101
+ totalBytes: 0,
102
+ byExtension: {},
103
+ rootManifests: [],
104
+ topLevelEntries: [],
105
+ sourceFiles: [],
106
+ };
107
+ // Top-level entries (preserve original case so it's obvious if "Api" vs "api")
108
+ const topLevel = fs.readdirSync(rootDir, { withFileTypes: true });
109
+ for (const ent of topLevel) {
110
+ if (SKIP_DIRS.has(ent.name.toLowerCase())) {
111
+ continue;
112
+ }
113
+ inventory.topLevelEntries.push(ent.name);
114
+ if (ent.isFile() && KNOWN_MANIFESTS.has(ent.name.toLowerCase())) {
115
+ inventory.rootManifests.push(ent.name);
116
+ }
117
+ }
118
+ function walk(dir) {
119
+ let entries;
120
+ try {
121
+ entries = fs.readdirSync(dir, { withFileTypes: true });
122
+ }
123
+ catch {
124
+ return;
125
+ }
126
+ for (const ent of entries) {
127
+ if (SKIP_DIRS.has(ent.name.toLowerCase())) {
128
+ continue;
129
+ }
130
+ const full = path.join(dir, ent.name);
131
+ if (ent.isDirectory()) {
132
+ walk(full);
133
+ continue;
134
+ }
135
+ if (!ent.isFile()) {
136
+ continue;
137
+ }
138
+ let stat;
139
+ try {
140
+ stat = fs.statSync(full);
141
+ }
142
+ catch {
143
+ continue;
144
+ }
145
+ inventory.totalFiles += 1;
146
+ inventory.totalBytes += stat.size;
147
+ const ext = (path.extname(ent.name) || '<noext>').toLowerCase();
148
+ inventory.byExtension[ext] = (inventory.byExtension[ext] ?? 0) + 1;
149
+ if (inventory.sourceFiles.length < MAX_SOURCE_FILES) {
150
+ inventory.sourceFiles.push(path.relative(rootDir, full));
151
+ }
152
+ }
153
+ }
154
+ walk(rootDir);
155
+ // Sort byExtension descending by count for deterministic audit output
156
+ inventory.byExtension = Object.fromEntries(Object.entries(inventory.byExtension).sort((a, b) => b[1] - a[1]));
157
+ return inventory;
158
+ }
@@ -0,0 +1,27 @@
1
+ /**
2
+ * dedupe_and_rank — pure node.
3
+ *
4
+ * Inputs: the flat list of ProviderResult from every search node
5
+ * (tavily, arxiv, uspto, hackernews — any combination).
6
+ *
7
+ * Behaviour:
8
+ * 1. Canonicalize URLs (lowercase host, strip default port, drop fragment,
9
+ * drop trailing slash, drop common tracking query params).
10
+ * 2. Collapse duplicates by canonical URL — keep the highest-scoring
11
+ * occurrence's title/excerpt, sum scores, multiply by a small recall
12
+ * boost (1 + 0.15 × extra queries that surfaced the same source).
13
+ * 3. Sort desc by composite score, take top N (default 20).
14
+ * 4. Assign sequential S1, S2, … ids — the canonical citation tokens the
15
+ * synthesis prompt references. Preserves provider, authors, and
16
+ * publication date through to the published doc.
17
+ */
18
+ import type { ProviderResult } from '../../search/provider-result';
19
+ import type { RankedSource } from '../../schemas';
20
+ export interface DedupeAndRankOpts {
21
+ /** Flat ProviderResult list across all providers. */
22
+ results: ProviderResult[];
23
+ topN?: number;
24
+ retrievedAt?: string;
25
+ }
26
+ export declare function canonicalizeUrl(rawUrl: string): string;
27
+ export declare function dedupeAndRank(opts: DedupeAndRankOpts): RankedSource[];
@@ -0,0 +1,98 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.canonicalizeUrl = canonicalizeUrl;
4
+ exports.dedupeAndRank = dedupeAndRank;
5
+ const TRACKING_PARAMS = new Set([
6
+ 'utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content',
7
+ 'gclid', 'fbclid', 'mc_cid', 'mc_eid', 'ref', 'ref_src', 'ref_url',
8
+ ]);
9
+ function canonicalizeUrl(rawUrl) {
10
+ try {
11
+ const u = new URL(rawUrl.trim());
12
+ u.hostname = u.hostname.toLowerCase();
13
+ if ((u.protocol === 'http:' && u.port === '80') || (u.protocol === 'https:' && u.port === '443')) {
14
+ u.port = '';
15
+ }
16
+ for (const key of [...u.searchParams.keys()]) {
17
+ if (TRACKING_PARAMS.has(key.toLowerCase())) {
18
+ u.searchParams.delete(key);
19
+ }
20
+ }
21
+ u.hash = '';
22
+ let pathname = u.pathname.replace(/\/+$/, '');
23
+ if (pathname === '') {
24
+ pathname = '/';
25
+ }
26
+ u.pathname = pathname;
27
+ return u.toString();
28
+ }
29
+ catch {
30
+ return rawUrl.trim().toLowerCase();
31
+ }
32
+ }
33
+ function dedupeAndRank(opts) {
34
+ const topN = opts.topN ?? 20;
35
+ const retrievedAt = opts.retrievedAt ?? new Date().toISOString();
36
+ const bucket = new Map();
37
+ for (const r of opts.results) {
38
+ if (!r.url) {
39
+ continue;
40
+ }
41
+ const canonical = canonicalizeUrl(r.url);
42
+ const existing = bucket.get(canonical);
43
+ if (existing) {
44
+ existing.scoreSum += r.score;
45
+ existing.occurrences += 1;
46
+ existing.queries.add(r.fromQuery);
47
+ // Keep the highest-scoring occurrence's title/excerpt + first non-empty published/authors
48
+ if (r.score > existing.scoreSum / existing.occurrences) {
49
+ existing.title = r.title || existing.title;
50
+ if (r.content) {
51
+ existing.excerpt = r.content.slice(0, 500);
52
+ }
53
+ }
54
+ if (!existing.publishedAt && r.publishedDate) {
55
+ existing.publishedAt = r.publishedDate;
56
+ }
57
+ if (!existing.authors && r.authors && r.authors.length > 0) {
58
+ existing.authors = r.authors;
59
+ }
60
+ }
61
+ else {
62
+ bucket.set(canonical, {
63
+ canonicalUrl: canonical,
64
+ provider: r.provider,
65
+ title: r.title || canonical,
66
+ excerpt: (r.content || '').slice(0, 500),
67
+ publishedAt: r.publishedDate,
68
+ authors: r.authors,
69
+ scoreSum: r.score,
70
+ occurrences: 1,
71
+ queries: new Set([r.fromQuery]),
72
+ });
73
+ }
74
+ }
75
+ const ranked = [...bucket.values()]
76
+ .map(a => {
77
+ const recall = 1 + 0.15 * (a.queries.size - 1);
78
+ const composite = Math.min(1, a.scoreSum * recall / Math.max(1, a.occurrences));
79
+ return { aggregated: a, composite };
80
+ })
81
+ .sort((a, b) => b.composite - a.composite)
82
+ .slice(0, topN);
83
+ return ranked.map((entry, i) => ({
84
+ id: `S${i + 1}`,
85
+ provider: entry.aggregated.provider,
86
+ title: entry.aggregated.title.slice(0, 300),
87
+ url: entry.aggregated.canonicalUrl,
88
+ retrieved_at: retrievedAt,
89
+ salience_score: roundTo(entry.composite, 4),
90
+ excerpt: entry.aggregated.excerpt.slice(0, 500),
91
+ ...(entry.aggregated.publishedAt ? { published_at: entry.aggregated.publishedAt } : {}),
92
+ ...(entry.aggregated.authors && entry.aggregated.authors.length > 0 ? { authors: entry.aggregated.authors } : {}),
93
+ }));
94
+ }
95
+ function roundTo(n, digits) {
96
+ const factor = 10 ** digits;
97
+ return Math.round(n * factor) / factor;
98
+ }
@@ -0,0 +1,55 @@
1
+ /**
2
+ * deterministic_architecture_review + deterministic_security_review
3
+ *
4
+ * Companion to the LLM expert reviewers — these are PURE (no LLM call) and
5
+ * grep-based. They produce the deterministic counter-signal verify_grounding
6
+ * needs to enforce the "both-must-pass" rule from the v0.6 spec:
7
+ *
8
+ * - invalid_citations: PRD cites an ID that doesn't exist in the mesh
9
+ * (e.g. FR-02 traces to R5 but R5 was never declared;
10
+ * SR-01 cites THR-999 but mesh.bar.threats has none).
11
+ * - coverage_discrepancies: the Coverage Analysis table claims YES/PARTIAL
12
+ * for a premise but no FR/SR actually cites it,
13
+ * or vice versa (says NO but body covers it).
14
+ *
15
+ * Severity: PASS if no invalid + no discrepancies. MINOR if only discrepancies.
16
+ * MAJOR if any invalid_citation. (Deterministic reviewers don't go to BLOCKING
17
+ * — that's an LLM-only judgment about substantive issues.)
18
+ */
19
+ import type { MeshContext } from '../../schemas';
20
+ import type { CoverageStatus, PrdCitationSignals } from './prd-validator';
21
+ export type DeterministicExpertKind = 'architecture' | 'security';
22
+ export type DeterministicSeverity = 'PASS' | 'MINOR' | 'MAJOR';
23
+ export interface InvalidCitation {
24
+ /** Where the bad citation lives, e.g. "FR-02" or "SR-01" or "Coverage row R5". */
25
+ where: string;
26
+ cite: string;
27
+ reason: string;
28
+ }
29
+ export interface CoverageDiscrepancy {
30
+ premise: string;
31
+ claimed_status: CoverageStatus | 'MISSING_ROW';
32
+ observed: 'cited_by_body' | 'not_cited_by_body';
33
+ detail: string;
34
+ }
35
+ export interface DeterministicReview {
36
+ expert: DeterministicExpertKind;
37
+ iteration: number;
38
+ severity: DeterministicSeverity;
39
+ invalid_citations: InvalidCitation[];
40
+ coverage_discrepancies: CoverageDiscrepancy[];
41
+ /** Reviewer-specific stats — e.g. `{ calm_nodes_referenced: 3 }` for arch. */
42
+ stats: Record<string, number>;
43
+ }
44
+ export interface ArchitectureReviewOpts {
45
+ iteration: number;
46
+ signals: PrdCitationSignals;
47
+ meshContext: MeshContext;
48
+ }
49
+ export declare function deterministicArchitectureReview(opts: ArchitectureReviewOpts): DeterministicReview;
50
+ export interface SecurityReviewOpts {
51
+ iteration: number;
52
+ signals: PrdCitationSignals;
53
+ meshContext: MeshContext;
54
+ }
55
+ export declare function deterministicSecurityReview(opts: SecurityReviewOpts): DeterministicReview;
@@ -0,0 +1,206 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.deterministicArchitectureReview = deterministicArchitectureReview;
4
+ exports.deterministicSecurityReview = deterministicSecurityReview;
5
+ // OWASP Top 10 (2021) — what an A0X citation must match. v2026-04 catalog
6
+ // stays at 2021 since the next revision is still draft as of this release;
7
+ // when 2025 ships we add A11/etc. here.
8
+ const OWASP_2021 = new Set(['A01', 'A02', 'A03', 'A04', 'A05', 'A06', 'A07', 'A08', 'A09', 'A10']);
9
+ // NIST 800-53 control families — we don't validate the number, just the
10
+ // family prefix. Catalog families per Rev 5.
11
+ const NIST_FAMILIES = new Set([
12
+ 'AC', 'AT', 'AU', 'CA', 'CM', 'CP', 'IA', 'IR', 'MA', 'MP',
13
+ 'PE', 'PL', 'PM', 'PS', 'PT', 'RA', 'SA', 'SC', 'SI', 'SR', 'SU',
14
+ ]);
15
+ function deterministicArchitectureReview(opts) {
16
+ const { iteration, signals } = opts;
17
+ const validPremises = new Set(signals.premise_ids);
18
+ const invalid = [];
19
+ // FR cites must reference declared premises (R[N] / E[N])
20
+ for (const fr of signals.fr_entries) {
21
+ for (const cite of fr.cited) {
22
+ if (!validPremises.has(cite)) {
23
+ invalid.push({
24
+ where: fr.id,
25
+ cite,
26
+ reason: `Premise ${cite} not declared in Input Premises section`,
27
+ });
28
+ }
29
+ }
30
+ }
31
+ // Coverage discrepancies cross the table against actual FR citations
32
+ const discrepancies = computeCoverageDiscrepancies(signals, /*forSecurity*/ false);
33
+ // Stats: how many CALM nodes from the mesh are mentioned anywhere in the
34
+ // PRD body. We use the cached calm_node_ids set from MeshContext if present.
35
+ const calmIds = extractCalmNodeIds(opts.meshContext);
36
+ // Citation signals don't carry the raw body — we use the SR/FR cite arrays
37
+ // as a proxy: FRs with `CALM node:` hints would already have surfaced via
38
+ // generate-prd-manifest. Here we just count the in-scope CALM ids.
39
+ const stats = {
40
+ calm_nodes_in_scope: calmIds.size,
41
+ fr_count: signals.fr_entries.length,
42
+ sr_count: signals.sr_entries.length,
43
+ };
44
+ return {
45
+ expert: 'architecture',
46
+ iteration,
47
+ severity: severityFrom(invalid, discrepancies),
48
+ invalid_citations: invalid,
49
+ coverage_discrepancies: discrepancies,
50
+ stats,
51
+ };
52
+ }
53
+ function deterministicSecurityReview(opts) {
54
+ const { iteration, signals, meshContext } = opts;
55
+ const meshThreatIds = new Set();
56
+ const threats = meshContext.bar?.threats;
57
+ if (Array.isArray(threats)) {
58
+ for (const t of threats) {
59
+ if (t.id) {
60
+ meshThreatIds.add(t.id);
61
+ }
62
+ }
63
+ }
64
+ const invalid = [];
65
+ let threatCitations = 0;
66
+ let owaspCitations = 0;
67
+ let nistCitations = 0;
68
+ for (const sr of signals.sr_entries) {
69
+ for (const cite of sr.cited) {
70
+ if (cite.startsWith('THR-')) {
71
+ threatCitations += 1;
72
+ // Only validate when the mesh declares threats — if the BAR has
73
+ // no threats catalogue we can't say a THR-* cite is invalid.
74
+ if (meshThreatIds.size > 0 && !meshThreatIds.has(cite)) {
75
+ invalid.push({
76
+ where: sr.id,
77
+ cite,
78
+ reason: `Threat ${cite} not present in mesh.bar.threats`,
79
+ });
80
+ }
81
+ }
82
+ else if (/^A\d{2}$/.test(cite)) {
83
+ owaspCitations += 1;
84
+ if (!OWASP_2021.has(cite)) {
85
+ invalid.push({
86
+ where: sr.id,
87
+ cite,
88
+ reason: `OWASP id ${cite} out of range (A01–A10 for 2021 catalog)`,
89
+ });
90
+ }
91
+ }
92
+ else if (/^NIST-[A-Z]{2}-\d+$/.test(cite)) {
93
+ nistCitations += 1;
94
+ const family = cite.slice(5, 7);
95
+ if (!NIST_FAMILIES.has(family)) {
96
+ invalid.push({
97
+ where: sr.id,
98
+ cite,
99
+ reason: `NIST family ${family} not in 800-53 Rev 5 catalogue`,
100
+ });
101
+ }
102
+ }
103
+ }
104
+ }
105
+ const discrepancies = computeCoverageDiscrepancies(signals, /*forSecurity*/ true);
106
+ const stats = {
107
+ sr_count: signals.sr_entries.length,
108
+ threat_citations: threatCitations,
109
+ owasp_citations: owaspCitations,
110
+ nist_citations: nistCitations,
111
+ threats_in_scope: meshThreatIds.size,
112
+ };
113
+ return {
114
+ expert: 'security',
115
+ iteration,
116
+ severity: severityFrom(invalid, discrepancies),
117
+ invalid_citations: invalid,
118
+ coverage_discrepancies: discrepancies,
119
+ stats,
120
+ };
121
+ }
122
+ // ============================================================================
123
+ // Shared helpers
124
+ // ============================================================================
125
+ /**
126
+ * Compare the Coverage Analysis table against actual FR/SR citations.
127
+ *
128
+ * For each premise in the table:
129
+ * - status YES/PARTIAL but no FR/SR cites it → 'cited_by_body=false' discrepancy
130
+ * - status NO but at least one FR/SR cites it → 'cited_by_body=true' discrepancy
131
+ * For each premise NOT in the table but declared in Input Premises → MISSING_ROW
132
+ */
133
+ function computeCoverageDiscrepancies(signals, _forSecurity) {
134
+ const out = [];
135
+ // Build the set of premises actually cited by any FR or SR.
136
+ const citedByBody = new Set();
137
+ for (const fr of signals.fr_entries) {
138
+ for (const c of fr.cited) {
139
+ citedByBody.add(c);
140
+ }
141
+ }
142
+ for (const sr of signals.sr_entries) {
143
+ for (const c of sr.cited) {
144
+ citedByBody.add(c);
145
+ }
146
+ }
147
+ const tablePremises = new Set(signals.coverage_rows.map(r => r.premise));
148
+ for (const row of signals.coverage_rows) {
149
+ const isCited = citedByBody.has(row.premise);
150
+ if ((row.status === 'YES' || row.status === 'PARTIAL') && !isCited) {
151
+ out.push({
152
+ premise: row.premise,
153
+ claimed_status: row.status,
154
+ observed: 'not_cited_by_body',
155
+ detail: `Coverage table claims ${row.status} but no FR/SR cites ${row.premise}`,
156
+ });
157
+ }
158
+ else if (row.status === 'NO' && isCited) {
159
+ out.push({
160
+ premise: row.premise,
161
+ claimed_status: row.status,
162
+ observed: 'cited_by_body',
163
+ detail: `Coverage table claims NO but body cites ${row.premise} in an FR/SR — table is stale`,
164
+ });
165
+ }
166
+ }
167
+ // Premises declared but missing from the table
168
+ for (const p of signals.premise_ids) {
169
+ if (!tablePremises.has(p)) {
170
+ out.push({
171
+ premise: p,
172
+ claimed_status: 'MISSING_ROW',
173
+ observed: citedByBody.has(p) ? 'cited_by_body' : 'not_cited_by_body',
174
+ detail: `Premise ${p} has no row in Coverage Analysis table`,
175
+ });
176
+ }
177
+ }
178
+ return out;
179
+ }
180
+ function severityFrom(invalid, discrepancies) {
181
+ if (invalid.length > 0) {
182
+ return 'MAJOR';
183
+ }
184
+ if (discrepancies.length > 0) {
185
+ return 'MINOR';
186
+ }
187
+ return 'PASS';
188
+ }
189
+ function extractCalmNodeIds(mesh) {
190
+ const out = new Set();
191
+ const calm = mesh.bar?.calm_model;
192
+ if (!calm || typeof calm !== 'object') {
193
+ return out;
194
+ }
195
+ const nodes = calm.nodes;
196
+ if (!Array.isArray(nodes)) {
197
+ return out;
198
+ }
199
+ for (const n of nodes) {
200
+ const id = n['unique-id'];
201
+ if (typeof id === 'string') {
202
+ out.add(id);
203
+ }
204
+ }
205
+ return out;
206
+ }
@@ -0,0 +1,68 @@
1
+ /**
2
+ * expert_review — shared LLM node for architecture_review + security_review.
3
+ *
4
+ * The two expert reviews differ only by:
5
+ * - which `.caterpillar/prompts/prd/{architecture-review,security-review}.md`
6
+ * pack they load
7
+ * - which mesh context they receive (CALM nodes/ADRs for arch;
8
+ * STRIDE/OWASP/NIST for sec)
9
+ *
10
+ * Both prompts contractually return a structured-text block:
11
+ * SCORE: <float 0.00 - 1.00>
12
+ * SEVERITY: <PASS | MINOR | MAJOR | BLOCKING>
13
+ * COVERED: <comma-separated IDs>
14
+ * MISSING: <comma-separated IDs>
15
+ * CHANGES:
16
+ * - <change 1>
17
+ * - <change 2>
18
+ *
19
+ * We parse this with regex; the parser is lenient about whitespace + casing.
20
+ * Output flows directly into verify_grounding.
21
+ */
22
+ import type { LlmProvider, MeshContext } from '../../schemas';
23
+ import { type LoadedPrompt } from '../../mesh/prompt-loader';
24
+ export type ExpertKind = 'architecture' | 'security';
25
+ export type ReviewSeverity = 'PASS' | 'MINOR' | 'MAJOR' | 'BLOCKING';
26
+ export interface ExpertReview {
27
+ expert: ExpertKind;
28
+ iteration: number;
29
+ score: number;
30
+ severity: ReviewSeverity;
31
+ /** Field names mirror GroundingBlock.iterations so verify_grounding can pass them through. */
32
+ covered_ids: string[];
33
+ missing_ids: string[];
34
+ changes: string[];
35
+ }
36
+ export interface ExpertReviewOpts {
37
+ meshDir: string;
38
+ expert: ExpertKind;
39
+ iteration: number;
40
+ prdBody: string;
41
+ meshContext: MeshContext;
42
+ /** Prior iteration's review (passed back to the LLM for delta-checking). */
43
+ priorReview?: ExpertReview;
44
+ provider: LlmProvider;
45
+ anthropicApiKey?: string;
46
+ githubToken?: string;
47
+ fetchImpl?: typeof fetch;
48
+ }
49
+ export interface ExpertReviewResult {
50
+ review: ExpertReview;
51
+ prompt: LoadedPrompt;
52
+ llm: {
53
+ provider: LlmProvider;
54
+ model: string;
55
+ inputTokens: number;
56
+ outputTokens: number;
57
+ costUsd: number;
58
+ attempts: number;
59
+ };
60
+ }
61
+ export declare function runExpertReview(opts: ExpertReviewOpts): Promise<ExpertReviewResult>;
62
+ export declare function parseReviewResponse(raw: string, expert: ExpertKind, iteration: number): {
63
+ success: true;
64
+ data: ExpertReview;
65
+ } | {
66
+ success: false;
67
+ error: string;
68
+ };