@maintainabilityai/research-runner 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +82 -0
  3. package/bin/research-runner.js +2 -0
  4. package/dist/cli.d.ts +1 -0
  5. package/dist/cli.js +209 -0
  6. package/dist/llm/anthropic-client.d.ts +39 -0
  7. package/dist/llm/anthropic-client.js +74 -0
  8. package/dist/llm/github-models-client.d.ts +46 -0
  9. package/dist/llm/github-models-client.js +78 -0
  10. package/dist/llm/llm-router.d.ts +46 -0
  11. package/dist/llm/llm-router.js +60 -0
  12. package/dist/mesh/get-mesh-sha.d.ts +1 -0
  13. package/dist/mesh/get-mesh-sha.js +27 -0
  14. package/dist/mesh/mesh-reader.d.ts +14 -0
  15. package/dist/mesh/mesh-reader.js +392 -0
  16. package/dist/mesh/prompt-loader.d.ts +22 -0
  17. package/dist/mesh/prompt-loader.js +119 -0
  18. package/dist/mesh/threat-model-reader.d.ts +33 -0
  19. package/dist/mesh/threat-model-reader.js +123 -0
  20. package/dist/runner/archeologist.d.ts +39 -0
  21. package/dist/runner/archeologist.js +620 -0
  22. package/dist/runner/audit-emitter.d.ts +62 -0
  23. package/dist/runner/audit-emitter.js +210 -0
  24. package/dist/runner/hatters-tag-builder.d.ts +52 -0
  25. package/dist/runner/hatters-tag-builder.js +40 -0
  26. package/dist/runner/nodes/analyze-architecture.d.ts +10 -0
  27. package/dist/runner/nodes/analyze-architecture.js +447 -0
  28. package/dist/runner/nodes/arxiv-search.d.ts +12 -0
  29. package/dist/runner/nodes/arxiv-search.js +52 -0
  30. package/dist/runner/nodes/clone-and-index.d.ts +32 -0
  31. package/dist/runner/nodes/clone-and-index.js +158 -0
  32. package/dist/runner/nodes/dedupe-and-rank.d.ts +27 -0
  33. package/dist/runner/nodes/dedupe-and-rank.js +98 -0
  34. package/dist/runner/nodes/deterministic-review.d.ts +55 -0
  35. package/dist/runner/nodes/deterministic-review.js +206 -0
  36. package/dist/runner/nodes/expert-review.d.ts +68 -0
  37. package/dist/runner/nodes/expert-review.js +197 -0
  38. package/dist/runner/nodes/gap-analysis.d.ts +48 -0
  39. package/dist/runner/nodes/gap-analysis.js +153 -0
  40. package/dist/runner/nodes/generate-prd-manifest.d.ts +53 -0
  41. package/dist/runner/nodes/generate-prd-manifest.js +209 -0
  42. package/dist/runner/nodes/hackernews-search.d.ts +12 -0
  43. package/dist/runner/nodes/hackernews-search.js +63 -0
  44. package/dist/runner/nodes/identify-gaps.d.ts +33 -0
  45. package/dist/runner/nodes/identify-gaps.js +185 -0
  46. package/dist/runner/nodes/plan-queries.d.ts +28 -0
  47. package/dist/runner/nodes/plan-queries.js +120 -0
  48. package/dist/runner/nodes/prd-validator.d.ts +51 -0
  49. package/dist/runner/nodes/prd-validator.js +203 -0
  50. package/dist/runner/nodes/synthesis-archaeology-validator.d.ts +22 -0
  51. package/dist/runner/nodes/synthesis-archaeology-validator.js +131 -0
  52. package/dist/runner/nodes/synthesis-validator.d.ts +51 -0
  53. package/dist/runner/nodes/synthesis-validator.js +185 -0
  54. package/dist/runner/nodes/synthesize-prd.d.ts +84 -0
  55. package/dist/runner/nodes/synthesize-prd.js +202 -0
  56. package/dist/runner/nodes/synthesize-report.d.ts +53 -0
  57. package/dist/runner/nodes/synthesize-report.js +188 -0
  58. package/dist/runner/nodes/tavily-search.d.ts +21 -0
  59. package/dist/runner/nodes/tavily-search.js +57 -0
  60. package/dist/runner/nodes/uspto-search.d.ts +13 -0
  61. package/dist/runner/nodes/uspto-search.js +62 -0
  62. package/dist/runner/nodes/verify-grounding.d.ts +54 -0
  63. package/dist/runner/nodes/verify-grounding.js +134 -0
  64. package/dist/runner/prd.d.ts +28 -0
  65. package/dist/runner/prd.js +494 -0
  66. package/dist/schemas/audit-event.d.ts +1151 -0
  67. package/dist/schemas/audit-event.js +141 -0
  68. package/dist/schemas/index.d.ts +17 -0
  69. package/dist/schemas/index.js +33 -0
  70. package/dist/schemas/mesh-context.d.ts +415 -0
  71. package/dist/schemas/mesh-context.js +95 -0
  72. package/dist/schemas/observed-architecture.d.ts +262 -0
  73. package/dist/schemas/observed-architecture.js +90 -0
  74. package/dist/schemas/prd-brief.d.ts +111 -0
  75. package/dist/schemas/prd-brief.js +37 -0
  76. package/dist/schemas/prd-doc.d.ts +249 -0
  77. package/dist/schemas/prd-doc.js +42 -0
  78. package/dist/schemas/prd-manifest.d.ts +171 -0
  79. package/dist/schemas/prd-manifest.js +73 -0
  80. package/dist/schemas/primitives.d.ts +47 -0
  81. package/dist/schemas/primitives.js +41 -0
  82. package/dist/schemas/query-plan.d.ts +33 -0
  83. package/dist/schemas/query-plan.js +25 -0
  84. package/dist/schemas/ranked-source.d.ts +82 -0
  85. package/dist/schemas/ranked-source.js +29 -0
  86. package/dist/schemas/research-brief.d.ts +114 -0
  87. package/dist/schemas/research-brief.js +49 -0
  88. package/dist/schemas/research-doc.d.ts +104 -0
  89. package/dist/schemas/research-doc.js +37 -0
  90. package/dist/search/arxiv-client.d.ts +41 -0
  91. package/dist/search/arxiv-client.js +88 -0
  92. package/dist/search/hackernews-client.d.ts +33 -0
  93. package/dist/search/hackernews-client.js +44 -0
  94. package/dist/search/provider-result.d.ts +25 -0
  95. package/dist/search/provider-result.js +2 -0
  96. package/dist/search/tavily-client.d.ts +38 -0
  97. package/dist/search/tavily-client.js +53 -0
  98. package/dist/search/uspto-client.d.ts +50 -0
  99. package/dist/search/uspto-client.js +112 -0
  100. package/dist/utils/run-id.d.ts +2 -0
  101. package/dist/utils/run-id.js +22 -0
  102. package/package.json +53 -0
@@ -0,0 +1,63 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.runHackerNewsSearch = runHackerNewsSearch;
4
+ /**
5
+ * hackernews_search — pure_api node.
6
+ *
7
+ * Runs each community query through Algolia's HN Search. Salience score
8
+ * derived from HN's `points` field with a soft cap: log(1+points)/8 →
9
+ * a 100-point story scores ~0.58, a 1000-point story scores ~0.86,
10
+ * everything ≥ 5000 saturates at 1.0.
11
+ */
12
+ const hackernews_client_1 = require("../../search/hackernews-client");
13
+ async function runHackerNewsSearch(opts) {
14
+ const settled = await Promise.allSettled(opts.queries.map(query => (0, hackernews_client_1.hackerNewsSearch)({
15
+ query,
16
+ hitsPerPage: opts.hitsPerQuery ?? 5,
17
+ fetchImpl: opts.fetchImpl,
18
+ })));
19
+ const envelopes = [];
20
+ const results = [];
21
+ for (let i = 0; i < opts.queries.length; i++) {
22
+ const query = opts.queries[i];
23
+ const outcome = settled[i];
24
+ if (outcome.status === 'fulfilled') {
25
+ const ok = outcome.value;
26
+ envelopes.push({
27
+ query,
28
+ httpStatus: ok.httpStatus,
29
+ responseBytes: ok.responseBytes,
30
+ resultCount: ok.results.length,
31
+ });
32
+ for (const r of ok.results) {
33
+ // Prefer the external article URL; fall back to the HN discussion thread.
34
+ const url = r.url || r.hnUrl;
35
+ if (!url) {
36
+ continue;
37
+ }
38
+ results.push({
39
+ provider: 'hackernews',
40
+ fromQuery: query,
41
+ title: r.title,
42
+ url,
43
+ content: '', // HN search returns no abstract
44
+ score: pointsToScore(r.points),
45
+ publishedDate: r.createdAt || undefined,
46
+ authors: r.author ? [r.author] : undefined,
47
+ });
48
+ }
49
+ }
50
+ else {
51
+ const err = outcome.reason instanceof Error ? outcome.reason.message : String(outcome.reason);
52
+ envelopes.push({ query, httpStatus: 0, responseBytes: 0, resultCount: 0, error: err });
53
+ }
54
+ }
55
+ return { envelopes, results };
56
+ }
57
+ function pointsToScore(points) {
58
+ if (points <= 0) {
59
+ return 0.3;
60
+ }
61
+ // log scale with saturation: points=100 → 0.58, 1000 → 0.86, 5000+ → 1.0
62
+ return Math.min(1, Math.log(1 + points) / 8);
63
+ }
@@ -0,0 +1,33 @@
1
+ /**
2
+ * identify_gaps — pure node (archaeology path).
3
+ *
4
+ * Compares the ObservedArchitecture from analyze_architecture against the
5
+ * MeshContext.bar.calm_model and produces:
6
+ * - Gap[] — structured discrepancies tagged with severity + evidence
7
+ * - 3 web queries — used by web_research to ground the synthesis in
8
+ * external best-practice guidance for the most significant gaps
9
+ *
10
+ * Phase 3a uses a conservative, heuristic comparison. CALM nodes are
11
+ * matched against observed modules by case-insensitive substring on
12
+ * either `unique-id` or `name`. Endpoints are matched against CALM
13
+ * `interface` declarations when present; otherwise flagged as
14
+ * "endpoint_not_in_calm" and downgraded to LOW severity to avoid
15
+ * crying wolf when the CALM model is just terse.
16
+ *
17
+ * Phase 3b (with tree-sitter) will tighten the matching with import-graph
18
+ * reachability and control-flow analysis. For now the synthesis prompt
19
+ * knows the gaps are heuristic; reviewers triage.
20
+ */
21
+ import type { ArchaeologyGap, MeshContext, ObservedArchitecture } from '../../schemas';
22
+ export interface IdentifyGapsOpts {
23
+ observed: ObservedArchitecture;
24
+ meshContext: MeshContext;
25
+ /** Cap on returned gaps. Default 15. */
26
+ topN?: number;
27
+ }
28
+ export interface IdentifyGapsResult {
29
+ gaps: ArchaeologyGap[];
30
+ /** Three web queries the next node (web_research) will run via Tavily. */
31
+ webQueries: string[];
32
+ }
33
+ export declare function identifyGaps(opts: IdentifyGapsOpts): IdentifyGapsResult;
@@ -0,0 +1,185 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.identifyGaps = identifyGaps;
4
+ function identifyGaps(opts) {
5
+ const topN = opts.topN ?? 15;
6
+ const calmNodes = extractCalmNodes(opts.meshContext);
7
+ const observed = opts.observed;
8
+ const gaps = [];
9
+ let nextId = 1;
10
+ const nextGapId = () => `G${nextId++}`;
11
+ // Rule 1: missing_module — CALM mentions a node that observed modules don't match
12
+ for (const calm of calmNodes) {
13
+ if (matchesAnyModule(calm, observed)) {
14
+ continue;
15
+ }
16
+ gaps.push({
17
+ id: nextGapId(),
18
+ kind: 'missing_module',
19
+ severity: 'HIGH',
20
+ summary: `CALM node \`${calm.uniqueId}\` (${calm.name}, type=${calm.nodeType}) has no matching module in the code.`,
21
+ observedEvidence: [`(no module named or containing "${calm.name}")`],
22
+ meshReferences: [calm.uniqueId],
23
+ });
24
+ }
25
+ // Rule 2: orphan_module — observed module with no matching CALM node
26
+ // (downgraded when the module is shared/util-ish — those are infrastructure, not features)
27
+ for (const mod of observed.modules) {
28
+ if (calmNodes.some(n => moduleMatchesCalm(mod.name, n))) {
29
+ continue;
30
+ }
31
+ if (mod.layer === 'shared') {
32
+ continue;
33
+ }
34
+ if (mod.fileCount < 3) {
35
+ continue;
36
+ } // tiny dirs are noise
37
+ gaps.push({
38
+ id: nextGapId(),
39
+ kind: 'orphan_module',
40
+ severity: mod.layer === 'unknown' ? 'LOW' : 'MEDIUM',
41
+ summary: `Module \`${mod.name}\` (layer=${mod.layer}, ${mod.fileCount} file(s)) has no matching CALM node.`,
42
+ observedEvidence: [`OA[${mod.name}]`],
43
+ meshReferences: [],
44
+ });
45
+ }
46
+ // Rule 3: endpoint_not_in_calm — observed endpoints not represented as CALM interface declarations
47
+ // We only count this once per file to avoid 60-endpoint spam in a single file.
48
+ const flaggedFiles = new Set();
49
+ for (const ep of observed.endpoints) {
50
+ if (flaggedFiles.has(ep.file)) {
51
+ continue;
52
+ }
53
+ // Loose check: any CALM node id contains the file's directory name → considered covered
54
+ const dirHint = ep.file.split('/')[0].toLowerCase();
55
+ const covered = calmNodes.some(n => n.uniqueId.toLowerCase().includes(dirHint) || n.name.toLowerCase().includes(dirHint));
56
+ if (covered) {
57
+ continue;
58
+ }
59
+ flaggedFiles.add(ep.file);
60
+ gaps.push({
61
+ id: nextGapId(),
62
+ kind: 'endpoint_not_in_calm',
63
+ severity: 'LOW',
64
+ summary: `Endpoint \`${ep.method} ${ep.path}\` (framework=${ep.framework}) in \`${ep.file}\` is not represented in the CALM model.`,
65
+ observedEvidence: [`OA[${ep.file}]`],
66
+ meshReferences: [],
67
+ });
68
+ }
69
+ // Rule 4: framework_choice_undeclared — observed frameworks not mentioned in mesh decisions
70
+ const adrText = (opts.meshContext.bar?.adrs ?? [])
71
+ .map(a => `${a.title} ${a.decision}`)
72
+ .join(' ')
73
+ .toLowerCase();
74
+ for (const fw of observed.profile.frameworks) {
75
+ if (!adrText.includes(fw.toLowerCase())) {
76
+ gaps.push({
77
+ id: nextGapId(),
78
+ kind: 'framework_choice_undeclared',
79
+ severity: 'MEDIUM',
80
+ summary: `Framework \`${fw}\` is in use but no ADR mentions it.`,
81
+ observedEvidence: [`OA[manifests:${observed.profile.manifests.join(',')}]`],
82
+ meshReferences: [],
83
+ });
84
+ }
85
+ }
86
+ // Cap (severity HIGH > MEDIUM > LOW; preserve discovery order within a tier)
87
+ const sevOrder = { HIGH: 0, MEDIUM: 1, LOW: 2 };
88
+ const ranked = gaps.sort((a, b) => sevOrder[a.severity] - sevOrder[b.severity]).slice(0, topN);
89
+ return { gaps: ranked, webQueries: deriveQueriesFromGaps(ranked, observed) };
90
+ }
91
+ // ============================================================================
92
+ // Helpers
93
+ // ============================================================================
94
+ function extractCalmNodes(meshContext) {
95
+ const calm = meshContext.bar?.calm_model;
96
+ if (!calm || typeof calm !== 'object') {
97
+ return [];
98
+ }
99
+ const nodesRaw = calm.nodes;
100
+ if (!Array.isArray(nodesRaw)) {
101
+ return [];
102
+ }
103
+ return nodesRaw
104
+ .map(n => {
105
+ if (!n || typeof n !== 'object') {
106
+ return null;
107
+ }
108
+ const obj = n;
109
+ const uniqueId = String(obj['unique-id'] ?? obj['uniqueId'] ?? '');
110
+ if (!uniqueId) {
111
+ return null;
112
+ }
113
+ return {
114
+ uniqueId,
115
+ name: String(obj['name'] ?? uniqueId),
116
+ nodeType: String(obj['node-type'] ?? obj['nodeType'] ?? 'unknown'),
117
+ };
118
+ })
119
+ .filter((n) => n !== null);
120
+ }
121
+ function matchesAnyModule(calm, observed) {
122
+ const needle = (calm.name || calm.uniqueId).toLowerCase();
123
+ if (needle.length < 2) {
124
+ return false;
125
+ }
126
+ return observed.modules.some(m => m.name.toLowerCase().includes(needle))
127
+ || observed.endpoints.some(e => e.file.toLowerCase().includes(needle));
128
+ }
129
+ function moduleMatchesCalm(moduleName, calm) {
130
+ const moduleLeaf = moduleName.split('/').pop().toLowerCase();
131
+ return calm.uniqueId.toLowerCase().includes(moduleLeaf)
132
+ || calm.name.toLowerCase().includes(moduleLeaf)
133
+ || moduleLeaf.includes(calm.uniqueId.toLowerCase())
134
+ || moduleLeaf.includes(calm.name.toLowerCase());
135
+ }
136
+ /**
137
+ * Turn the top gaps into 3 web research queries. Always 3 — pads with
138
+ * generic-but-relevant fallbacks when fewer gaps surfaced.
139
+ */
140
+ function deriveQueriesFromGaps(gaps, observed) {
141
+ const year = new Date().getUTCFullYear();
142
+ const primaryFw = observed.profile.frameworks[0] ?? observed.profile.languages[0] ?? 'web service';
143
+ const queries = [];
144
+ for (const gap of gaps) {
145
+ if (queries.length >= 3) {
146
+ break;
147
+ }
148
+ switch (gap.kind) {
149
+ case 'missing_module': {
150
+ const ref = gap.meshReferences[0] ?? 'service';
151
+ queries.push(`how to introduce ${ref} into a ${primaryFw} architecture ${year}`);
152
+ break;
153
+ }
154
+ case 'orphan_module': {
155
+ const evidence = gap.observedEvidence[0]?.replace(/^OA\[(.*)\]$/, '$1') ?? 'module';
156
+ queries.push(`document architecture decision for ${evidence} in CALM ${year}`);
157
+ break;
158
+ }
159
+ case 'endpoint_not_in_calm':
160
+ queries.push(`best practices for representing REST endpoints in architecture-as-code ${year}`);
161
+ break;
162
+ case 'missing_security_control':
163
+ queries.push(`implementing security controls in ${primaryFw} services ${year}`);
164
+ break;
165
+ case 'framework_choice_undeclared':
166
+ queries.push(`when to write an ADR for a new framework adoption ${year}`);
167
+ break;
168
+ }
169
+ }
170
+ // Pad to 3 with generic queries — always include year for recency.
171
+ const fallbacks = [
172
+ `architecture-as-code best practices ${primaryFw} ${year}`,
173
+ `${primaryFw} layered architecture review checklist ${year}`,
174
+ `CALM architecture model adoption case studies ${year}`,
175
+ ];
176
+ for (const fb of fallbacks) {
177
+ if (queries.length >= 3) {
178
+ break;
179
+ }
180
+ if (!queries.includes(fb)) {
181
+ queries.push(fb);
182
+ }
183
+ }
184
+ return queries.slice(0, 3);
185
+ }
@@ -0,0 +1,28 @@
1
+ import { type LlmProvider, type MeshContext, type QueryPlan, type ResearchBrief } from '../../schemas';
2
+ import { type LoadedPrompt } from '../../mesh/prompt-loader';
3
+ export interface PlanQueriesOpts {
4
+ meshDir: string;
5
+ brief: ResearchBrief;
6
+ meshContext: MeshContext;
7
+ /** Provider routing — comes from brief.llm_provider unless overridden. */
8
+ provider?: LlmProvider;
9
+ /** Required when provider === 'anthropic'. */
10
+ anthropicApiKey?: string;
11
+ /** Required when provider === 'github-models'. */
12
+ githubToken?: string;
13
+ fetchImpl?: typeof fetch;
14
+ }
15
+ export interface PlanQueriesResult {
16
+ queryPlan: QueryPlan;
17
+ prompt: LoadedPrompt;
18
+ llm: {
19
+ provider: LlmProvider;
20
+ model: string;
21
+ inputTokens: number;
22
+ outputTokens: number;
23
+ costUsd: number;
24
+ /** How many LLM calls we ended up making (1 happy path, 2 on retry). */
25
+ attempts: number;
26
+ };
27
+ }
28
+ export declare function planQueries(opts: PlanQueriesOpts): Promise<PlanQueriesResult>;
@@ -0,0 +1,120 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.planQueries = planQueries;
4
+ const schemas_1 = require("../../schemas");
5
+ const llm_router_1 = require("../../llm/llm-router");
6
+ const prompt_loader_1 = require("../../mesh/prompt-loader");
7
+ async function planQueries(opts) {
8
+ const provider = opts.provider ?? opts.brief.llm_provider;
9
+ const promptContext = buildPromptContext(opts.brief, opts.meshContext);
10
+ const prompt = (0, prompt_loader_1.loadPrompt)({
11
+ meshDir: opts.meshDir,
12
+ packId: 'research/query-plan',
13
+ context: promptContext,
14
+ });
15
+ const baseSystem = 'You output a SINGLE JSON object exactly matching the schema described. No prose before or after, no markdown fence. The first character of your response MUST be `{`.';
16
+ let lastError = null;
17
+ let totalInput = 0;
18
+ let totalOutput = 0;
19
+ let totalCost = 0;
20
+ let lastModel = '';
21
+ for (let attempt = 1; attempt <= 2; attempt++) {
22
+ const userPrompt = attempt === 1
23
+ ? prompt.filled
24
+ : `${prompt.filled}\n\n---\n\nYour previous response failed validation:\n${lastError}\n\nReturn a SINGLE JSON object with exactly 4 keys (web, arxiv, patent, community) and the exact counts (5, 3, 3, 3) requested. Web queries MUST contain a 4-digit year; patent queries MUST contain the literal token "AND".`;
25
+ const result = await (0, llm_router_1.callLlm)({
26
+ provider,
27
+ tier: 'plan',
28
+ anthropicApiKey: opts.anthropicApiKey,
29
+ githubToken: opts.githubToken,
30
+ system: baseSystem,
31
+ prompt: userPrompt,
32
+ maxTokens: 2000,
33
+ fetchImpl: opts.fetchImpl,
34
+ });
35
+ totalInput += result.inputTokens;
36
+ totalOutput += result.outputTokens;
37
+ totalCost += result.costUsd;
38
+ lastModel = result.model;
39
+ const parsed = parseQueryPlanResponse(result.text);
40
+ if (parsed.success) {
41
+ return {
42
+ queryPlan: parsed.data,
43
+ prompt,
44
+ llm: { provider, model: lastModel, inputTokens: totalInput, outputTokens: totalOutput, costUsd: totalCost, attempts: attempt },
45
+ };
46
+ }
47
+ lastError = parsed.error;
48
+ }
49
+ throw new Error(`plan_queries: LLM output failed QueryPlan validation after 2 attempts. Last error: ${lastError}`);
50
+ }
51
+ /** Walk the response, extract the first JSON object, validate against QueryPlan. */
52
+ function parseQueryPlanResponse(raw) {
53
+ const trimmed = raw.trim();
54
+ // Tolerate the model wrapping the JSON in ```json ... ```
55
+ const fenceMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/);
56
+ const candidate = fenceMatch ? fenceMatch[1].trim() : trimmed;
57
+ let parsedJson;
58
+ try {
59
+ parsedJson = JSON.parse(candidate);
60
+ }
61
+ catch (e) {
62
+ return { success: false, error: `not valid JSON: ${e instanceof Error ? e.message : String(e)}` };
63
+ }
64
+ const result = schemas_1.QueryPlan.safeParse(parsedJson);
65
+ if (result.success) {
66
+ return { success: true, data: result.data };
67
+ }
68
+ return { success: false, error: result.error.issues.map(formatIssue).join('; ') };
69
+ }
70
+ function formatIssue(issue) {
71
+ return `${issue.path.join('.') || '<root>'}: ${issue.message}`;
72
+ }
73
+ /** Project the inputs the prompt asks for into a flat-dotted shape. */
74
+ function buildPromptContext(brief, mesh) {
75
+ const calmSummary = mesh.bar?.calm_model ? summarizeCalmModel(mesh.bar.calm_model) : '(no CALM model loaded)';
76
+ const threatsSummary = mesh.bar?.threats ? summarizeThreats(mesh.bar.threats) : '(no threat model on file)';
77
+ const relatedResearch = mesh.bar?.related_research?.length
78
+ ? mesh.bar.related_research.map(r => r.topic)
79
+ : [];
80
+ return {
81
+ brief: {
82
+ topic: brief.topic,
83
+ scope_level: brief.scope.level,
84
+ },
85
+ mesh: {
86
+ bar: {
87
+ name: mesh.bar?.name ?? '(no bar in scope)',
88
+ calm_summary: calmSummary,
89
+ threats_summary: threatsSummary,
90
+ },
91
+ related_research: relatedResearch,
92
+ },
93
+ };
94
+ }
95
+ function summarizeCalmModel(calm) {
96
+ if (!calm || typeof calm !== 'object') {
97
+ return '(no CALM model loaded)';
98
+ }
99
+ const obj = calm;
100
+ const nodeCount = Array.isArray(obj.nodes) ? obj.nodes.length : 0;
101
+ const relCount = Array.isArray(obj.relationships) ? obj.relationships.length : 0;
102
+ const nodeKinds = Array.isArray(obj.nodes)
103
+ ? Array.from(new Set(obj.nodes.map(n => n['node-type'] || 'unknown'))).join(', ')
104
+ : '';
105
+ return `${nodeCount} node(s) [${nodeKinds || 'no node-types'}], ${relCount} relationship(s)`;
106
+ }
107
+ function summarizeThreats(threats) {
108
+ if (!Array.isArray(threats)) {
109
+ return '(no threats)';
110
+ }
111
+ if (threats.length === 0) {
112
+ return '(no threats)';
113
+ }
114
+ const byCategory = {};
115
+ for (const t of threats) {
116
+ const cat = t.category || 'unknown';
117
+ byCategory[cat] = (byCategory[cat] || 0) + 1;
118
+ }
119
+ return Object.entries(byCategory).map(([c, n]) => `${c} × ${n}`).join(', ');
120
+ }
@@ -0,0 +1,51 @@
1
+ /**
2
+ * prd-validator — structural validator for the markdown body produced by
3
+ * synthesize_prd.
4
+ *
5
+ * Enforces the canonical 10-section PRD shape defined in
6
+ * `.caterpillar/prompts/prd/synthesis.md`:
7
+ *
8
+ * 1. Input Premises (R[N] / E[N] numbered list)
9
+ * 2. Problem Statement and Scope
10
+ * 3. Goals and Non-Goals
11
+ * 4. Functional Requirements with Traceability (FR-NN; cites ≥1 R/E)
12
+ * 5. Non-Functional Requirements (NFR-NN; cites ≥1 R/E)
13
+ * 6. Security Requirements with Threat Tracing (SR-NN; cites ≥1 THR/A0X/NIST)
14
+ * 7. Coverage Analysis (table; every premise tagged YES/PARTIAL/NO)
15
+ * 8. Risk Matrix
16
+ * 9. Success Metrics
17
+ * 10. References
18
+ *
19
+ * Returns ValidationReport + extra per-FR / per-SR / per-coverage signals
20
+ * verify_grounding consumes.
21
+ */
22
+ import type { ValidationReport } from './synthesis-validator';
23
+ export declare const CANONICAL_PRD_SECTIONS: readonly ["Input Premises", "Problem Statement and Scope", "Goals and Non-Goals", "Functional Requirements with Traceability", "Non-Functional Requirements", "Security Requirements with Threat Tracing", "Coverage Analysis", "Risk Matrix", "Success Metrics", "References"];
24
+ export type CanonicalPrdSection = typeof CANONICAL_PRD_SECTIONS[number];
25
+ export type CoverageStatus = 'YES' | 'PARTIAL' | 'NO';
26
+ export interface PrdCitationSignals {
27
+ /** Numbered premise IDs (R1, R2, E1, E2, …) in the Input Premises section. */
28
+ premise_ids: string[];
29
+ /** FR entries with the upstream IDs they cite (R/E). */
30
+ fr_entries: Array<{
31
+ id: string;
32
+ cited: string[];
33
+ }>;
34
+ /** SR entries with the upstream IDs they cite (THR/A0X/NIST). */
35
+ sr_entries: Array<{
36
+ id: string;
37
+ cited: string[];
38
+ }>;
39
+ /** Coverage Analysis table rows — premise → self-reported status. */
40
+ coverage_rows: Array<{
41
+ premise: string;
42
+ status: CoverageStatus;
43
+ whereAddressed: string;
44
+ }>;
45
+ }
46
+ export interface PrdValidationReport extends ValidationReport {
47
+ /** Detailed citation signals verify_grounding needs. */
48
+ signals: PrdCitationSignals;
49
+ }
50
+ export declare function validatePrd(body: string): PrdValidationReport;
51
+ export declare function extractCitationSignals(body: string): PrdCitationSignals;
@@ -0,0 +1,203 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CANONICAL_PRD_SECTIONS = void 0;
4
+ exports.validatePrd = validatePrd;
5
+ exports.extractCitationSignals = extractCitationSignals;
6
+ exports.CANONICAL_PRD_SECTIONS = [
7
+ 'Input Premises',
8
+ 'Problem Statement and Scope',
9
+ 'Goals and Non-Goals',
10
+ 'Functional Requirements with Traceability',
11
+ 'Non-Functional Requirements',
12
+ 'Security Requirements with Threat Tracing',
13
+ 'Coverage Analysis',
14
+ 'Risk Matrix',
15
+ 'Success Metrics',
16
+ 'References',
17
+ ];
18
+ const FR_REQUIREMENT_RE = /\bFR-\d+\b/g;
19
+ const SR_REQUIREMENT_RE = /\bSR-\d+\b/g;
20
+ const R_OR_E_CITATION_RE = /\b[RE]\d+\b/g;
21
+ const THR_OR_OWASP_OR_NIST_RE = /\b(?:THR-\d+|A\d{2}|NIST-[A-Z]{2}-\d+)\b/g;
22
+ const COVERAGE_STATUS_VALID = new Set(['YES', 'PARTIAL', 'NO']);
23
+ function validatePrd(body) {
24
+ const errors = [];
25
+ const sectionsFound = extractH2Sections(body);
26
+ // Rule 1 + 2: sections present in canonical order
27
+ for (let i = 0; i < exports.CANONICAL_PRD_SECTIONS.length; i++) {
28
+ const expected = exports.CANONICAL_PRD_SECTIONS[i];
29
+ if (sectionsFound[i] !== expected) {
30
+ errors.push(`Section #${i + 1} expected "## ${expected}" but found ${sectionsFound[i] ? `"## ${sectionsFound[i]}"` : '(missing)'}.`);
31
+ }
32
+ }
33
+ // Parse signals (used for verify_grounding even when rules fail)
34
+ const signals = extractCitationSignals(body);
35
+ // Rule 3: every FR cites ≥1 R or E
36
+ for (const fr of signals.fr_entries) {
37
+ if (fr.cited.length === 0) {
38
+ errors.push(`Functional Requirement ${fr.id} has no R[N] / E[N] citation.`);
39
+ }
40
+ }
41
+ // Rule 4: every SR cites ≥1 THR / A0X / NIST
42
+ for (const sr of signals.sr_entries) {
43
+ if (sr.cited.length === 0) {
44
+ errors.push(`Security Requirement ${sr.id} has no THR-NNN / A0X / NIST-XX-NN citation.`);
45
+ }
46
+ }
47
+ // Rule 5: Coverage Analysis table covers every input premise
48
+ const tableCovered = new Set(signals.coverage_rows.map(r => r.premise));
49
+ for (const pid of signals.premise_ids) {
50
+ if (!tableCovered.has(pid)) {
51
+ errors.push(`Coverage Analysis table is missing a row for premise ${pid}.`);
52
+ }
53
+ }
54
+ // Rule 6: every coverage status is YES / PARTIAL / NO (no free-text drift)
55
+ for (const row of signals.coverage_rows) {
56
+ if (!COVERAGE_STATUS_VALID.has(row.status)) {
57
+ errors.push(`Coverage row for ${row.premise} has invalid status "${row.status}"; must be YES / PARTIAL / NO.`);
58
+ }
59
+ }
60
+ // Heuristic untraced-claims signal across narrative sections (informational)
61
+ const untraced_claims = countUntracedClaims(body);
62
+ return {
63
+ valid: errors.length === 0,
64
+ errors,
65
+ sectionsFound,
66
+ signals,
67
+ citation_stats: {
68
+ source_count: signals.premise_ids.length,
69
+ conclusion_count: 0, // PRDs don't have C[N]
70
+ recommendation_count: signals.fr_entries.length + signals.sr_entries.length,
71
+ underCitedConclusions: 0,
72
+ untracedRecommendations: signals.fr_entries.filter(f => f.cited.length === 0).length
73
+ + signals.sr_entries.filter(s => s.cited.length === 0).length,
74
+ untraced_claims,
75
+ },
76
+ };
77
+ }
78
+ // ============================================================================
79
+ // Section parsing
80
+ // ============================================================================
81
+ function extractH2Sections(body) {
82
+ const out = [];
83
+ for (const line of body.split('\n')) {
84
+ const m = line.match(/^##\s+(.+?)\s*$/);
85
+ if (m) {
86
+ out.push(m[1].trim());
87
+ }
88
+ }
89
+ return out;
90
+ }
91
+ function extractSection(body, sectionName) {
92
+ const lines = body.split('\n');
93
+ let inSection = false;
94
+ const collected = [];
95
+ for (const line of lines) {
96
+ const h2 = line.match(/^##\s+(.+?)\s*$/);
97
+ if (h2) {
98
+ if (h2[1].trim() === sectionName) {
99
+ inSection = true;
100
+ continue;
101
+ }
102
+ if (inSection) {
103
+ break;
104
+ }
105
+ }
106
+ if (inSection) {
107
+ collected.push(line);
108
+ }
109
+ }
110
+ return collected.join('\n');
111
+ }
112
+ // ============================================================================
113
+ // Citation signal extraction
114
+ // ============================================================================
115
+ function extractCitationSignals(body) {
116
+ const premiseIds = extractPremiseIds(extractSection(body, 'Input Premises'));
117
+ const frEntries = extractRequirementEntries(extractSection(body, 'Functional Requirements with Traceability'), FR_REQUIREMENT_RE, R_OR_E_CITATION_RE);
118
+ const srEntries = extractRequirementEntries(extractSection(body, 'Security Requirements with Threat Tracing'), SR_REQUIREMENT_RE, THR_OR_OWASP_OR_NIST_RE);
119
+ const coverageRows = extractCoverageRows(extractSection(body, 'Coverage Analysis'));
120
+ return {
121
+ premise_ids: premiseIds,
122
+ fr_entries: frEntries,
123
+ sr_entries: srEntries,
124
+ coverage_rows: coverageRows,
125
+ };
126
+ }
127
+ function extractPremiseIds(block) {
128
+ const ids = new Set();
129
+ for (const m of block.matchAll(/\b([RE]\d+)\b/g)) {
130
+ ids.add(m[1]);
131
+ }
132
+ return [...ids].sort(naturalCompare);
133
+ }
134
+ /**
135
+ * Split a requirements block on each `FR-NN` / `SR-NN` marker, then extract
136
+ * citations from the chunk body up to (but not including) the next marker.
137
+ * Robust against multi-line requirement bodies + Markdown bullet formatting.
138
+ */
139
+ function extractRequirementEntries(block, idRe, citationRe) {
140
+ const lines = block.split('\n');
141
+ const idAtStartRe = new RegExp(`^\\s*(?:[-*]|\\d+\\.)?\\s*(?:\\*\\*)?(${idRe.source.replace(/\\b|g/g, '')})(?:\\*\\*)?`, 'i');
142
+ const entries = [];
143
+ for (const line of lines) {
144
+ const m = line.match(idAtStartRe);
145
+ if (m) {
146
+ entries.push({ id: m[1].toUpperCase(), bodyLines: [line] });
147
+ }
148
+ else if (entries.length > 0) {
149
+ entries[entries.length - 1].bodyLines.push(line);
150
+ }
151
+ }
152
+ // Dedupe by id (keep first occurrence's body for citation extraction)
153
+ const seen = new Set();
154
+ const unique = [];
155
+ for (const e of entries) {
156
+ if (seen.has(e.id)) {
157
+ continue;
158
+ }
159
+ seen.add(e.id);
160
+ const body = e.bodyLines.join('\n');
161
+ // Strip the leading `FR-NN` / `SR-NN` token itself so it doesn't count as its own citation.
162
+ const stripped = body.replace(new RegExp(`\\b${e.id}\\b`, 'gi'), '');
163
+ const cited = new Set();
164
+ for (const cm of stripped.matchAll(citationRe)) {
165
+ cited.add(cm[0]);
166
+ }
167
+ unique.push({ id: e.id, cited: [...cited].sort(naturalCompare) });
168
+ }
169
+ return unique.sort((a, b) => naturalCompare(a.id, b.id));
170
+ }
171
+ function extractCoverageRows(block) {
172
+ const rows = [];
173
+ for (const line of block.split('\n')) {
174
+ // Match `| R1 | YES | FR-01, FR-04 |` (markdown table rows)
175
+ const m = line.match(/^\s*\|\s*([RE]\d+)\s*\|\s*([A-Z]+)\s*\|\s*([^|]*?)\s*\|\s*$/);
176
+ if (!m) {
177
+ continue;
178
+ }
179
+ const premise = m[1];
180
+ const status = m[2].toUpperCase();
181
+ const whereAddressed = m[3].trim();
182
+ rows.push({ premise, status, whereAddressed });
183
+ }
184
+ return rows;
185
+ }
186
+ function countUntracedClaims(body) {
187
+ const narrative = ['Problem Statement and Scope', 'Goals and Non-Goals'];
188
+ let count = 0;
189
+ for (const sec of narrative) {
190
+ const block = extractSection(body, sec);
191
+ const sentences = block.match(/[^.!?\n]+[.!?]/g) ?? [];
192
+ for (const s of sentences) {
193
+ if (!/\b[RE]\d+\b/.test(s) && s.trim().length > 40) {
194
+ count += 1;
195
+ }
196
+ }
197
+ }
198
+ return count;
199
+ }
200
+ /** Natural compare for `R10` < `R2` correctness. */
201
+ function naturalCompare(a, b) {
202
+ return a.localeCompare(b, undefined, { numeric: true });
203
+ }