@maintainabilityai/research-runner 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +82 -0
  3. package/bin/research-runner.js +2 -0
  4. package/dist/cli.d.ts +1 -0
  5. package/dist/cli.js +209 -0
  6. package/dist/llm/anthropic-client.d.ts +39 -0
  7. package/dist/llm/anthropic-client.js +74 -0
  8. package/dist/llm/github-models-client.d.ts +46 -0
  9. package/dist/llm/github-models-client.js +78 -0
  10. package/dist/llm/llm-router.d.ts +46 -0
  11. package/dist/llm/llm-router.js +60 -0
  12. package/dist/mesh/get-mesh-sha.d.ts +1 -0
  13. package/dist/mesh/get-mesh-sha.js +27 -0
  14. package/dist/mesh/mesh-reader.d.ts +14 -0
  15. package/dist/mesh/mesh-reader.js +392 -0
  16. package/dist/mesh/prompt-loader.d.ts +22 -0
  17. package/dist/mesh/prompt-loader.js +119 -0
  18. package/dist/mesh/threat-model-reader.d.ts +33 -0
  19. package/dist/mesh/threat-model-reader.js +123 -0
  20. package/dist/runner/archeologist.d.ts +39 -0
  21. package/dist/runner/archeologist.js +620 -0
  22. package/dist/runner/audit-emitter.d.ts +62 -0
  23. package/dist/runner/audit-emitter.js +210 -0
  24. package/dist/runner/hatters-tag-builder.d.ts +52 -0
  25. package/dist/runner/hatters-tag-builder.js +40 -0
  26. package/dist/runner/nodes/analyze-architecture.d.ts +10 -0
  27. package/dist/runner/nodes/analyze-architecture.js +447 -0
  28. package/dist/runner/nodes/arxiv-search.d.ts +12 -0
  29. package/dist/runner/nodes/arxiv-search.js +52 -0
  30. package/dist/runner/nodes/clone-and-index.d.ts +32 -0
  31. package/dist/runner/nodes/clone-and-index.js +158 -0
  32. package/dist/runner/nodes/dedupe-and-rank.d.ts +27 -0
  33. package/dist/runner/nodes/dedupe-and-rank.js +98 -0
  34. package/dist/runner/nodes/deterministic-review.d.ts +55 -0
  35. package/dist/runner/nodes/deterministic-review.js +206 -0
  36. package/dist/runner/nodes/expert-review.d.ts +68 -0
  37. package/dist/runner/nodes/expert-review.js +197 -0
  38. package/dist/runner/nodes/gap-analysis.d.ts +48 -0
  39. package/dist/runner/nodes/gap-analysis.js +153 -0
  40. package/dist/runner/nodes/generate-prd-manifest.d.ts +53 -0
  41. package/dist/runner/nodes/generate-prd-manifest.js +209 -0
  42. package/dist/runner/nodes/hackernews-search.d.ts +12 -0
  43. package/dist/runner/nodes/hackernews-search.js +63 -0
  44. package/dist/runner/nodes/identify-gaps.d.ts +33 -0
  45. package/dist/runner/nodes/identify-gaps.js +185 -0
  46. package/dist/runner/nodes/plan-queries.d.ts +28 -0
  47. package/dist/runner/nodes/plan-queries.js +120 -0
  48. package/dist/runner/nodes/prd-validator.d.ts +51 -0
  49. package/dist/runner/nodes/prd-validator.js +203 -0
  50. package/dist/runner/nodes/synthesis-archaeology-validator.d.ts +22 -0
  51. package/dist/runner/nodes/synthesis-archaeology-validator.js +131 -0
  52. package/dist/runner/nodes/synthesis-validator.d.ts +51 -0
  53. package/dist/runner/nodes/synthesis-validator.js +185 -0
  54. package/dist/runner/nodes/synthesize-prd.d.ts +84 -0
  55. package/dist/runner/nodes/synthesize-prd.js +202 -0
  56. package/dist/runner/nodes/synthesize-report.d.ts +53 -0
  57. package/dist/runner/nodes/synthesize-report.js +188 -0
  58. package/dist/runner/nodes/tavily-search.d.ts +21 -0
  59. package/dist/runner/nodes/tavily-search.js +57 -0
  60. package/dist/runner/nodes/uspto-search.d.ts +13 -0
  61. package/dist/runner/nodes/uspto-search.js +62 -0
  62. package/dist/runner/nodes/verify-grounding.d.ts +54 -0
  63. package/dist/runner/nodes/verify-grounding.js +134 -0
  64. package/dist/runner/prd.d.ts +28 -0
  65. package/dist/runner/prd.js +494 -0
  66. package/dist/schemas/audit-event.d.ts +1151 -0
  67. package/dist/schemas/audit-event.js +141 -0
  68. package/dist/schemas/index.d.ts +17 -0
  69. package/dist/schemas/index.js +33 -0
  70. package/dist/schemas/mesh-context.d.ts +415 -0
  71. package/dist/schemas/mesh-context.js +95 -0
  72. package/dist/schemas/observed-architecture.d.ts +262 -0
  73. package/dist/schemas/observed-architecture.js +90 -0
  74. package/dist/schemas/prd-brief.d.ts +111 -0
  75. package/dist/schemas/prd-brief.js +37 -0
  76. package/dist/schemas/prd-doc.d.ts +249 -0
  77. package/dist/schemas/prd-doc.js +42 -0
  78. package/dist/schemas/prd-manifest.d.ts +171 -0
  79. package/dist/schemas/prd-manifest.js +73 -0
  80. package/dist/schemas/primitives.d.ts +47 -0
  81. package/dist/schemas/primitives.js +41 -0
  82. package/dist/schemas/query-plan.d.ts +33 -0
  83. package/dist/schemas/query-plan.js +25 -0
  84. package/dist/schemas/ranked-source.d.ts +82 -0
  85. package/dist/schemas/ranked-source.js +29 -0
  86. package/dist/schemas/research-brief.d.ts +114 -0
  87. package/dist/schemas/research-brief.js +49 -0
  88. package/dist/schemas/research-doc.d.ts +104 -0
  89. package/dist/schemas/research-doc.js +37 -0
  90. package/dist/search/arxiv-client.d.ts +41 -0
  91. package/dist/search/arxiv-client.js +88 -0
  92. package/dist/search/hackernews-client.d.ts +33 -0
  93. package/dist/search/hackernews-client.js +44 -0
  94. package/dist/search/provider-result.d.ts +25 -0
  95. package/dist/search/provider-result.js +2 -0
  96. package/dist/search/tavily-client.d.ts +38 -0
  97. package/dist/search/tavily-client.js +53 -0
  98. package/dist/search/uspto-client.d.ts +50 -0
  99. package/dist/search/uspto-client.js +112 -0
  100. package/dist/utils/run-id.d.ts +2 -0
  101. package/dist/utils/run-id.js +22 -0
  102. package/package.json +53 -0
@@ -0,0 +1,22 @@
1
+ /**
2
+ * synthesis-archaeology-validator — structural validator for the
3
+ * archaeology-path synthesis body.
4
+ *
5
+ * Mirrors synthesis-validator's shape (ValidationReport with citation_stats)
6
+ * but enforces the 9 canonical sections from
7
+ * `.caterpillar/prompts/research/synthesis-archaeology.md`:
8
+ *
9
+ * 1. Executive Summary
10
+ * 2. Repository Profile
11
+ * 3. Current Architecture
12
+ * 4. Gap Analysis (G[N] entries with severity)
13
+ * 5. External Research Findings
14
+ * 6. Recommendations (each cites ≥1 G[N] AND ≥1 grounding token)
15
+ * 7. Implementation Roadmap
16
+ * 8. Risk Factors
17
+ * 9. Untraced items (REQUIRED — may say "None.")
18
+ */
19
+ import type { ValidationReport } from './synthesis-validator';
20
+ export declare const CANONICAL_ARCHAEOLOGY_SECTIONS: readonly ["Executive Summary", "Repository Profile", "Current Architecture", "Gap Analysis", "External Research Findings", "Recommendations", "Implementation Roadmap", "Risk Factors", "Untraced items"];
21
+ export type CanonicalArchaeologySection = typeof CANONICAL_ARCHAEOLOGY_SECTIONS[number];
22
+ export declare function validateArchaeologySynthesis(body: string): ValidationReport;
@@ -0,0 +1,131 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CANONICAL_ARCHAEOLOGY_SECTIONS = void 0;
4
+ exports.validateArchaeologySynthesis = validateArchaeologySynthesis;
5
+ exports.CANONICAL_ARCHAEOLOGY_SECTIONS = [
6
+ 'Executive Summary',
7
+ 'Repository Profile',
8
+ 'Current Architecture',
9
+ 'Gap Analysis',
10
+ 'External Research Findings',
11
+ 'Recommendations',
12
+ 'Implementation Roadmap',
13
+ 'Risk Factors',
14
+ 'Untraced items',
15
+ ];
16
+ function validateArchaeologySynthesis(body) {
17
+ const errors = [];
18
+ const sectionsFound = extractH2Sections(body);
19
+ // Sections present in canonical order
20
+ for (let i = 0; i < exports.CANONICAL_ARCHAEOLOGY_SECTIONS.length; i++) {
21
+ const expected = exports.CANONICAL_ARCHAEOLOGY_SECTIONS[i];
22
+ if (sectionsFound[i] !== expected) {
23
+ errors.push(`Section #${i + 1} expected "## ${expected}" but found ${sectionsFound[i] ? `"## ${sectionsFound[i]}"` : '(missing)'}.`);
24
+ }
25
+ }
26
+ // Gap Analysis: at least one G[N] entry with severity
27
+ const gapBlock = extractSection(body, 'Gap Analysis');
28
+ const gapEntries = splitOnGapMarkers(gapBlock);
29
+ const gapIds = gapEntries.map(g => g.id);
30
+ for (const g of gapEntries) {
31
+ // `\b\*\*` fails between space and `*` (both non-word) — drop the boundary
32
+ // before `**` and require the inner word boundary instead.
33
+ if (!/\bSEVERITY\s*[:=]\s*(HIGH|MEDIUM|LOW)\b|\*\*(HIGH|MEDIUM|LOW)\*\*/i.test(g.body)) {
34
+ errors.push(`Gap G${g.id} is missing a severity tag (HIGH / MEDIUM / LOW).`);
35
+ }
36
+ }
37
+ if (gapEntries.length === 0 && sectionsFound.includes('Gap Analysis')) {
38
+ errors.push('Gap Analysis section has no `G[N]` entries.');
39
+ }
40
+ // Recommendations: each cites ≥1 G[N]
41
+ const recsBlock = extractSection(body, 'Recommendations');
42
+ const recLines = recsBlock.split('\n').filter(l => /^\s*(?:[-*]|\d+\.)\s+/.test(l));
43
+ let untracedRecommendations = 0;
44
+ for (const rec of recLines) {
45
+ if (!/\bG\d+\b/.test(rec)) {
46
+ untracedRecommendations += 1;
47
+ }
48
+ }
49
+ if (recLines.length > 0 && untracedRecommendations === recLines.length) {
50
+ errors.push(`All ${recLines.length} Recommendation(s) lack G[N] traceability.`);
51
+ }
52
+ else if (untracedRecommendations > 0) {
53
+ errors.push(`${untracedRecommendations} of ${recLines.length} Recommendation(s) lack G[N] traceability.`);
54
+ }
55
+ // Untraced items REQUIRED — even if empty (must say "None." or similar)
56
+ const untracedBlock = extractSection(body, 'Untraced items').trim();
57
+ if (untracedBlock.length === 0) {
58
+ errors.push('Untraced items section is empty — must explicitly say "None." when there are none.');
59
+ }
60
+ // Citation stats
61
+ // For archaeology, source_count = unique S[N] across External Research Findings + Risk Factors.
62
+ // The synthesis prompt also asks the LLM to cite OA[<file>] / OA[<module>] in narrative
63
+ // sections; we don't try to enforce those at the validator level (heuristic untraced count
64
+ // would be too noisy across short body paragraphs).
65
+ const sourceCitations = new Set([...body.matchAll(/\bS(\d+)\b/g)].map(m => m[1]));
66
+ const citation_stats = {
67
+ source_count: sourceCitations.size,
68
+ conclusion_count: 0, // archaeology synthesis doesn't have C[N]
69
+ recommendation_count: recLines.length,
70
+ underCitedConclusions: 0,
71
+ untracedRecommendations,
72
+ untraced_claims: 0,
73
+ };
74
+ return {
75
+ valid: errors.length === 0,
76
+ errors,
77
+ sectionsFound,
78
+ citation_stats,
79
+ // Expose archaeology-specific data for the orchestrator's audit + Hatter's Tag
80
+ ...(gapIds.length > 0 ? { archaeology: { gap_count: gapIds.length } } : {}),
81
+ };
82
+ }
83
+ // ============================================================================
84
+ // Helpers (copy of the research-side helpers — kept local to avoid coupling)
85
+ // ============================================================================
86
+ function extractH2Sections(body) {
87
+ const out = [];
88
+ for (const line of body.split('\n')) {
89
+ const m = line.match(/^##\s+(.+?)\s*$/);
90
+ if (m) {
91
+ out.push(m[1].trim());
92
+ }
93
+ }
94
+ return out;
95
+ }
96
+ function extractSection(body, sectionName) {
97
+ const lines = body.split('\n');
98
+ let inSection = false;
99
+ const collected = [];
100
+ for (const line of lines) {
101
+ const h2 = line.match(/^##\s+(.+?)\s*$/);
102
+ if (h2) {
103
+ if (h2[1].trim() === sectionName) {
104
+ inSection = true;
105
+ continue;
106
+ }
107
+ if (inSection) {
108
+ break;
109
+ }
110
+ }
111
+ if (inSection) {
112
+ collected.push(line);
113
+ }
114
+ }
115
+ return collected.join('\n');
116
+ }
117
+ function splitOnGapMarkers(block) {
118
+ const markerRe = /^\s*(?:\*\*G(\d+)\*\*|###\s+G(\d+))(?=\s|$)/;
119
+ const lines = block.split('\n');
120
+ const entries = [];
121
+ for (const line of lines) {
122
+ const m = line.match(markerRe);
123
+ if (m) {
124
+ entries.push({ id: m[1] ?? m[2], body: [line] });
125
+ }
126
+ else if (entries.length > 0) {
127
+ entries[entries.length - 1].body.push(line);
128
+ }
129
+ }
130
+ return entries.map(e => ({ id: e.id, body: e.body.join('\n') }));
131
+ }
@@ -0,0 +1,51 @@
1
+ /**
2
+ * synthesis-validator — pure structural validator for the markdown body
3
+ * produced by synthesize_report.
4
+ *
5
+ * Enforces the canonical 10-section structure defined in
6
+ * `.caterpillar/prompts/research/synthesis.md`. Returns a ValidationReport
7
+ * (pass/fail + reasons + per-section citation stats) the caller uses to:
8
+ * - decide whether to retry the LLM with feedback
9
+ * - populate ResearchDoc.citation_stats in the audit log
10
+ *
11
+ * Deliberately conservative: only checks structural rules the prompt was
12
+ * explicit about. Semantic checks (does this conclusion actually follow
13
+ * from its sources?) are left to the expert review nodes in the PRD phase.
14
+ */
15
+ /** Sections required in this exact order. Drift fails validation. */
16
+ export declare const CANONICAL_SECTIONS: readonly ["Source Premises", "Executive Summary", "Cross-Source Analysis", "Evidence Gaps", "Jobs-to-be-Done Analysis", "Patent Landscape", "Whitespace Analysis", "Formal Conclusions", "Recommendations", "References"];
17
+ export type CanonicalSection = typeof CANONICAL_SECTIONS[number];
18
+ export interface CitationStats {
19
+ source_count: number;
20
+ conclusion_count: number;
21
+ recommendation_count: number;
22
+ /** Conclusions with a confidence rating but fewer than 2 source citations (1 ok for LOW). */
23
+ underCitedConclusions: number;
24
+ /** Recommendations missing a `C[N]` reference. */
25
+ untracedRecommendations: number;
26
+ /** Top-level claims (sentences) in narrative sections with no `S[N]` citation. Heuristic. */
27
+ untraced_claims: number;
28
+ }
29
+ export interface ValidationReport {
30
+ valid: boolean;
31
+ /** Human-readable errors — fed back to the LLM on retry. */
32
+ errors: string[];
33
+ /** Sections found (in body order). */
34
+ sectionsFound: string[];
35
+ citation_stats: CitationStats;
36
+ }
37
+ /**
38
+ * Parse + validate a synthesised research-doc markdown body.
39
+ *
40
+ * Validation rules (each contributes one error string when violated):
41
+ * 1. Every CANONICAL_SECTION must appear as an H2.
42
+ * 2. The H2 sections must appear in CANONICAL order (no shuffling).
43
+ * 3. `Source Premises` must contain at least 1 `**S[N]**` entry.
44
+ * 4. Every `Formal Conclusion` (a `**C[N]**` line) must:
45
+ * - carry a confidence label `**HIGH**` / `**MEDIUM**` / `**LOW**`,
46
+ * - cite ≥2 `S[N]` (≥1 permitted only when confidence is `LOW`).
47
+ * 5. Every `Recommendation` line must reference at least one `C[N]`.
48
+ *
49
+ * Returns a ValidationReport with `valid: true` when no rule fails.
50
+ */
51
+ export declare function validateSynthesis(body: string): ValidationReport;
@@ -0,0 +1,185 @@
1
+ "use strict";
2
+ /**
3
+ * synthesis-validator — pure structural validator for the markdown body
4
+ * produced by synthesize_report.
5
+ *
6
+ * Enforces the canonical 10-section structure defined in
7
+ * `.caterpillar/prompts/research/synthesis.md`. Returns a ValidationReport
8
+ * (pass/fail + reasons + per-section citation stats) the caller uses to:
9
+ * - decide whether to retry the LLM with feedback
10
+ * - populate ResearchDoc.citation_stats in the audit log
11
+ *
12
+ * Deliberately conservative: only checks structural rules the prompt was
13
+ * explicit about. Semantic checks (does this conclusion actually follow
14
+ * from its sources?) are left to the expert review nodes in the PRD phase.
15
+ */
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ exports.CANONICAL_SECTIONS = void 0;
18
+ exports.validateSynthesis = validateSynthesis;
19
+ /** Sections required in this exact order. Drift fails validation. */
20
+ exports.CANONICAL_SECTIONS = [
21
+ 'Source Premises',
22
+ 'Executive Summary',
23
+ 'Cross-Source Analysis',
24
+ 'Evidence Gaps',
25
+ 'Jobs-to-be-Done Analysis',
26
+ 'Patent Landscape',
27
+ 'Whitespace Analysis',
28
+ 'Formal Conclusions',
29
+ 'Recommendations',
30
+ 'References',
31
+ ];
32
+ /**
33
+ * Parse + validate a synthesised research-doc markdown body.
34
+ *
35
+ * Validation rules (each contributes one error string when violated):
36
+ * 1. Every CANONICAL_SECTION must appear as an H2.
37
+ * 2. The H2 sections must appear in CANONICAL order (no shuffling).
38
+ * 3. `Source Premises` must contain at least 1 `**S[N]**` entry.
39
+ * 4. Every `Formal Conclusion` (a `**C[N]**` line) must:
40
+ * - carry a confidence label `**HIGH**` / `**MEDIUM**` / `**LOW**`,
41
+ * - cite ≥2 `S[N]` (≥1 permitted only when confidence is `LOW`).
42
+ * 5. Every `Recommendation` line must reference at least one `C[N]`.
43
+ *
44
+ * Returns a ValidationReport with `valid: true` when no rule fails.
45
+ */
46
+ function validateSynthesis(body) {
47
+ const errors = [];
48
+ const sectionsFound = extractH2Sections(body);
49
+ // Rule 1 + 2 combined: sections must be present in canonical order
50
+ for (let i = 0; i < exports.CANONICAL_SECTIONS.length; i++) {
51
+ const expected = exports.CANONICAL_SECTIONS[i];
52
+ if (sectionsFound[i] !== expected) {
53
+ errors.push(`Section #${i + 1} expected "## ${expected}" but found ${sectionsFound[i] ? `"## ${sectionsFound[i]}"` : '(missing)'}.`);
54
+ // One error per slot is enough — keep going so we report all drift in one shot.
55
+ }
56
+ }
57
+ // Rule 3: Source Premises has ≥1 entry
58
+ const sourceBlock = extractSection(body, 'Source Premises');
59
+ const sourceIds = [...sourceBlock.matchAll(/\*\*S(\d+)\*\*/g)].map(m => parseInt(m[1], 10));
60
+ const source_count = new Set(sourceIds).size;
61
+ if (source_count === 0) {
62
+ errors.push('Source Premises has no `**S[N]**` entries.');
63
+ }
64
+ // Rule 4: Formal Conclusions.
65
+ // Split the block on each `**C[N]**` (or `### C[N]`) marker — gives us one
66
+ // chunk per conclusion containing the statement + confidence + citations.
67
+ const conclusionsBlock = extractSection(body, 'Formal Conclusions');
68
+ const conclusionChunks = splitOnConclusionMarkers(conclusionsBlock);
69
+ const conclusion_count = conclusionChunks.length;
70
+ let underCitedConclusions = 0;
71
+ for (const { id, body: tail } of conclusionChunks) {
72
+ const confidenceMatch = tail.match(/\*\*(HIGH|MEDIUM|LOW)\*\*/);
73
+ if (!confidenceMatch) {
74
+ errors.push(`Conclusion C${id} is missing a confidence label (**HIGH** / **MEDIUM** / **LOW**).`);
75
+ underCitedConclusions += 1;
76
+ continue;
77
+ }
78
+ const confidence = confidenceMatch[1];
79
+ const cited = new Set([...tail.matchAll(/\bS(\d+)\b/g)].map(c => parseInt(c[1], 10)));
80
+ const minRequired = confidence === 'LOW' ? 1 : 2;
81
+ if (cited.size < minRequired) {
82
+ errors.push(`Conclusion C${id} (${confidence}) cites ${cited.size} source(s); requires ≥${minRequired}.`);
83
+ underCitedConclusions += 1;
84
+ }
85
+ }
86
+ // Rule 5: Recommendations
87
+ const recsBlock = extractSection(body, 'Recommendations');
88
+ // Treat each bullet (- or *) at the start of a line, or a numbered "1." as one recommendation.
89
+ const recLines = recsBlock.split('\n').filter(l => /^\s*(?:[-*]|\d+\.)\s+/.test(l));
90
+ const recommendation_count = recLines.length;
91
+ let untracedRecommendations = 0;
92
+ for (const rec of recLines) {
93
+ if (!/\bC\d+\b/.test(rec)) {
94
+ untracedRecommendations += 1;
95
+ }
96
+ }
97
+ if (recommendation_count > 0 && untracedRecommendations === recommendation_count) {
98
+ errors.push(`All ${recommendation_count} Recommendation(s) lack C[N] traceability.`);
99
+ }
100
+ else if (untracedRecommendations > 0) {
101
+ errors.push(`${untracedRecommendations} of ${recommendation_count} Recommendation(s) lack C[N] traceability.`);
102
+ }
103
+ // Heuristic untraced-claims count across narrative sections (not used as a hard fail signal).
104
+ const narrativeSections = ['Executive Summary', 'Cross-Source Analysis', 'Jobs-to-be-Done Analysis', 'Whitespace Analysis'];
105
+ let untraced_claims = 0;
106
+ for (const sec of narrativeSections) {
107
+ const block = extractSection(body, sec);
108
+ // Count sentences ending in . ? ! that contain neither S[N] nor C[N]
109
+ const sentences = block.match(/[^.!?\n]+[.!?]/g) ?? [];
110
+ for (const s of sentences) {
111
+ if (!/\b[SC]\d+\b/.test(s) && s.trim().length > 40) {
112
+ untraced_claims += 1;
113
+ }
114
+ }
115
+ }
116
+ return {
117
+ valid: errors.length === 0,
118
+ errors,
119
+ sectionsFound,
120
+ citation_stats: {
121
+ source_count,
122
+ conclusion_count,
123
+ recommendation_count,
124
+ underCitedConclusions,
125
+ untracedRecommendations,
126
+ untraced_claims,
127
+ },
128
+ };
129
+ }
130
+ /** Pull H2 headings (one per `## Heading` line). Subsection H3+ headings are ignored. */
131
+ function extractH2Sections(body) {
132
+ const out = [];
133
+ for (const line of body.split('\n')) {
134
+ const m = line.match(/^##\s+(.+?)\s*$/);
135
+ if (m) {
136
+ out.push(m[1].trim());
137
+ }
138
+ }
139
+ return out;
140
+ }
141
+ /**
142
+ * Walk the lines of a Formal Conclusions block and return one entry per
143
+ * `**C[N]**` (or `### C[N]`) marker. The chunk body is every line up to
144
+ * the next marker. Robust against multi-line conclusions and missing
145
+ * trailing newlines.
146
+ */
147
+ function splitOnConclusionMarkers(block) {
148
+ // No \b after `**C1**` — `*` and the following space are both non-word, so
149
+ // \b is false there. Use an explicit space/EOL lookahead instead.
150
+ const markerRe = /^\s*(?:\*\*C(\d+)\*\*|###\s+C(\d+))(?=\s|$)/;
151
+ const lines = block.split('\n');
152
+ const entries = [];
153
+ for (const line of lines) {
154
+ const m = line.match(markerRe);
155
+ if (m) {
156
+ entries.push({ id: m[1] ?? m[2], body: [line] });
157
+ }
158
+ else if (entries.length > 0) {
159
+ entries[entries.length - 1].body.push(line);
160
+ }
161
+ }
162
+ return entries.map(e => ({ id: e.id, body: e.body.join('\n') }));
163
+ }
164
+ /** Slice the body for a single named H2 section (text up to the next H2 or EOF). */
165
+ function extractSection(body, sectionName) {
166
+ const lines = body.split('\n');
167
+ let inSection = false;
168
+ const collected = [];
169
+ for (const line of lines) {
170
+ const h2 = line.match(/^##\s+(.+?)\s*$/);
171
+ if (h2) {
172
+ if (h2[1].trim() === sectionName) {
173
+ inSection = true;
174
+ continue;
175
+ }
176
+ if (inSection) {
177
+ break;
178
+ } // next H2 closes the section
179
+ }
180
+ if (inSection) {
181
+ collected.push(line);
182
+ }
183
+ }
184
+ return collected.join('\n');
185
+ }
@@ -0,0 +1,84 @@
1
+ /**
2
+ * synthesize_prd — LLM node.
3
+ *
4
+ * Loads `.caterpillar/prompts/prd/synthesis.md`, fills in brief +
5
+ * mesh-context + ranked-sources (carried over from the upstream research
6
+ * doc), calls Anthropic / GitHub Models, runs prd-validator on the body.
7
+ *
8
+ * Unlike research's synthesize_report, this node has TWO call modes:
9
+ * - First iteration: standard prompt fill, no prior-review feedback
10
+ * - Subsequent iterations: same prompt + a feedback prefix block
11
+ * summarising the prior verify_grounding failure (CHANGES from
12
+ * architecture_review + security_review) so the LLM knows what to fix
13
+ *
14
+ * Returns the validated PRD body + the structural signals
15
+ * verify_grounding needs, plus standard LLM telemetry.
16
+ */
17
+ import type { LlmProvider, MeshContext, PrdBrief, RankedSource } from '../../schemas';
18
+ import { type LoadedPrompt } from '../../mesh/prompt-loader';
19
+ import { type PrdCitationSignals, type PrdValidationReport } from './prd-validator';
20
+ /**
21
+ * GapFeedback composer surface — collected from all 4 reviewers (LLM + deterministic)
22
+ * at the end of each iteration that didn't PASS. Prepended to the next synthesis.
23
+ */
24
+ export interface DeterministicFindings {
25
+ severity: 'PASS' | 'MINOR' | 'MAJOR';
26
+ invalid_citations: Array<{
27
+ where: string;
28
+ cite: string;
29
+ reason: string;
30
+ }>;
31
+ coverage_discrepancies: Array<{
32
+ premise: string;
33
+ claimed_status: string;
34
+ detail: string;
35
+ }>;
36
+ }
37
+ export interface PriorReviewFeedback {
38
+ iteration: number;
39
+ architecture: {
40
+ score: number;
41
+ severity: string;
42
+ changes: string[];
43
+ };
44
+ security: {
45
+ score: number;
46
+ severity: string;
47
+ changes: string[];
48
+ };
49
+ /** Deterministic findings — what citation-grep saw; usually the most actionable signal. */
50
+ det_architecture?: DeterministicFindings;
51
+ det_security?: DeterministicFindings;
52
+ /** |arch_score − sec_score| from the prior round; high values signal reconciliation needed. */
53
+ disagreement_delta?: number;
54
+ }
55
+ export interface SynthesizePrdOpts {
56
+ meshDir: string;
57
+ brief: PrdBrief;
58
+ meshContext: MeshContext;
59
+ /** Sources carried over from the research doc (R[N] in the PRD body). */
60
+ rankedSources: RankedSource[];
61
+ /** Provider routing — comes from brief.llm_provider unless overridden. */
62
+ provider?: LlmProvider;
63
+ anthropicApiKey?: string;
64
+ githubToken?: string;
65
+ /** Present on iteration ≥ 2 — carries the prior round's review CHANGES. */
66
+ priorFeedback?: PriorReviewFeedback;
67
+ fetchImpl?: typeof fetch;
68
+ }
69
+ export interface SynthesizePrdResult {
70
+ body_md: string;
71
+ prompt: LoadedPrompt;
72
+ validation: PrdValidationReport;
73
+ signals: PrdCitationSignals;
74
+ llm: {
75
+ provider: LlmProvider;
76
+ model: string;
77
+ inputTokens: number;
78
+ outputTokens: number;
79
+ costUsd: number;
80
+ /** Number of LLM calls within this single iteration (1 happy, 2 on retry). */
81
+ attempts: number;
82
+ };
83
+ }
84
+ export declare function synthesizePrd(opts: SynthesizePrdOpts): Promise<SynthesizePrdResult>;
@@ -0,0 +1,202 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.synthesizePrd = synthesizePrd;
4
+ const llm_router_1 = require("../../llm/llm-router");
5
+ const prompt_loader_1 = require("../../mesh/prompt-loader");
6
+ const prd_validator_1 = require("./prd-validator");
7
+ const MAX_TOKENS = 8000;
8
+ async function synthesizePrd(opts) {
9
+ const provider = opts.provider ?? opts.brief.llm_provider;
10
+ const promptContext = buildPromptContext(opts.brief, opts.meshContext, opts.rankedSources);
11
+ const prompt = (0, prompt_loader_1.loadPrompt)({
12
+ meshDir: opts.meshDir,
13
+ packId: 'prd/synthesis',
14
+ context: promptContext,
15
+ });
16
+ const system = 'You write PRDs with strict section discipline and bidirectional traceability. Every Functional Requirement (FR-NN) cites at least one R[N] or E[N] premise. Every Security Requirement (SR-NN) cites at least one THR-NNN, A0X, or NIST-XX-NN identifier. The Coverage Analysis table includes a row for every premise. The 10 H2 sections appear in the exact order requested. No prose before the first `##` heading.';
17
+ let lastReport = null;
18
+ let totalInput = 0;
19
+ let totalOutput = 0;
20
+ let totalCost = 0;
21
+ let lastModel = '';
22
+ for (let attempt = 1; attempt <= 2; attempt++) {
23
+ const feedbackBlock = attempt === 1 && opts.priorFeedback
24
+ ? buildFeedbackBlock(opts.priorFeedback)
25
+ : '';
26
+ const validationBlock = attempt === 2 && lastReport
27
+ ? `\n\n---\n\nYour previous response failed structural validation:\n${lastReport.errors.map(e => `- ${e}`).join('\n')}\n\nRewrite the document. Fix EVERY error above. Maintain section order. Every FR-NN must cite ≥1 R[N] or E[N]; every SR-NN must cite ≥1 THR/A0X/NIST identifier; the Coverage Analysis table must include every premise.`
28
+ : '';
29
+ const userPrompt = `${feedbackBlock}${prompt.filled}${validationBlock}`;
30
+ const result = await (0, llm_router_1.callLlm)({
31
+ provider,
32
+ tier: 'synth',
33
+ anthropicApiKey: opts.anthropicApiKey,
34
+ githubToken: opts.githubToken,
35
+ system,
36
+ prompt: userPrompt,
37
+ maxTokens: MAX_TOKENS,
38
+ fetchImpl: opts.fetchImpl,
39
+ });
40
+ totalInput += result.inputTokens;
41
+ totalOutput += result.outputTokens;
42
+ totalCost += result.costUsd;
43
+ lastModel = result.model;
44
+ const body = stripFences(result.text);
45
+ const report = (0, prd_validator_1.validatePrd)(body);
46
+ if (report.valid) {
47
+ return {
48
+ body_md: body,
49
+ prompt,
50
+ validation: report,
51
+ signals: report.signals,
52
+ llm: { provider, model: lastModel, inputTokens: totalInput, outputTokens: totalOutput, costUsd: totalCost, attempts: attempt },
53
+ };
54
+ }
55
+ lastReport = report;
56
+ }
57
+ throw new Error(`synthesize_prd: structural validation failed after 2 attempts. Last errors: ${lastReport.errors.join('; ')}`);
58
+ }
59
+ function stripFences(raw) {
60
+ const trimmed = raw.trim();
61
+ const fenceMatch = trimmed.match(/^```(?:markdown|md)?\s*([\s\S]*?)```\s*$/);
62
+ return fenceMatch ? fenceMatch[1].trim() : trimmed;
63
+ }
64
+ /**
65
+ * Feedback block prepended on iteration 2+. Merges signals from all four
66
+ * reviewers — the two LLM experts (CHANGES list) AND the two deterministic
67
+ * reviewers (invalid citations + coverage discrepancies). The deterministic
68
+ * sections are most actionable: they name specific IDs the LLM must add /
69
+ * remove. Disagreement-delta is surfaced so the LLM can reconcile the two
70
+ * expert perspectives instead of just averaging them.
71
+ */
72
+ function buildFeedbackBlock(feedback) {
73
+ const lines = [];
74
+ lines.push('# Prior Review Feedback (iteration ' + feedback.iteration + ')');
75
+ lines.push('');
76
+ lines.push('The previous PRD draft did not meet the grounding threshold. Apply every change below before re-emitting the PRD.');
77
+ lines.push('');
78
+ if (feedback.disagreement_delta !== undefined && feedback.disagreement_delta >= 0.2) {
79
+ lines.push(`## Reviewer Disagreement: ${feedback.disagreement_delta.toFixed(2)}`);
80
+ lines.push('');
81
+ lines.push('The architecture and security experts gave scores that diverged by ≥ 0.2. Reconcile their perspectives in this draft — explicitly address why one side scored higher.');
82
+ lines.push('');
83
+ }
84
+ // LLM expert CHANGES — qualitative guidance
85
+ lines.push(`## Architecture expert review (LLM, score ${feedback.architecture.score.toFixed(2)}, severity ${feedback.architecture.severity})`);
86
+ if (feedback.architecture.changes.length === 0) {
87
+ lines.push('- _no specific changes flagged_');
88
+ }
89
+ for (const c of feedback.architecture.changes) {
90
+ lines.push(`- ${c}`);
91
+ }
92
+ lines.push('');
93
+ lines.push(`## Security expert review (LLM, score ${feedback.security.score.toFixed(2)}, severity ${feedback.security.severity})`);
94
+ if (feedback.security.changes.length === 0) {
95
+ lines.push('- _no specific changes flagged_');
96
+ }
97
+ for (const c of feedback.security.changes) {
98
+ lines.push(`- ${c}`);
99
+ }
100
+ lines.push('');
101
+ // Deterministic findings — specific IDs to fix. Most actionable.
102
+ const det = (label, d) => {
103
+ if (!d || (d.invalid_citations.length === 0 && d.coverage_discrepancies.length === 0)) {
104
+ return [];
105
+ }
106
+ const out = [`## ${label} (deterministic citation grep, severity ${d.severity})`];
107
+ if (d.invalid_citations.length > 0) {
108
+ out.push('');
109
+ out.push('**Invalid citations — these IDs do not exist in the mesh and must be removed or replaced:**');
110
+ for (const c of d.invalid_citations) {
111
+ out.push(`- \`${c.where}\` cites \`${c.cite}\` — ${c.reason}`);
112
+ }
113
+ }
114
+ if (d.coverage_discrepancies.length > 0) {
115
+ out.push('');
116
+ out.push('**Coverage Analysis table discrepancies — fix the table or the body so they agree:**');
117
+ for (const x of d.coverage_discrepancies) {
118
+ out.push(`- Premise \`${x.premise}\`: ${x.detail}`);
119
+ }
120
+ }
121
+ out.push('');
122
+ return out;
123
+ };
124
+ for (const l of det('Architecture (deterministic)', feedback.det_architecture)) {
125
+ lines.push(l);
126
+ }
127
+ for (const l of det('Security (deterministic)', feedback.det_security)) {
128
+ lines.push(l);
129
+ }
130
+ lines.push('---');
131
+ lines.push('');
132
+ return lines.join('\n');
133
+ }
134
+ /** Build the dotted-key context the PRD synthesis prompt asks for. */
135
+ function buildPromptContext(brief, mesh, rankedSources) {
136
+ // Premise IDs: R1..RN from the research doc's ranked sources;
137
+ // E1..EN come from mesh-extracted expert points + ask_experts answers.
138
+ // Phase 4 doesn't run ask_experts yet — E IDs come from the mesh BAR's
139
+ // STRIDE entries + ADRs + mesh_gaps as inferred expert input.
140
+ const meshExpertPoints = extractMeshExpertPoints(mesh);
141
+ return {
142
+ brief: {
143
+ topic: deriveTopic(brief),
144
+ },
145
+ scope: {
146
+ bar_id: brief.scope.id ?? '(portfolio scope)',
147
+ },
148
+ research_findings: rankedSources.map((s, i) => `R${i + 1}: ${s.title} — ${s.url} (provider=${s.provider}, salience=${s.salience_score})`).join('\n'),
149
+ mesh_expert_input: meshExpertPoints.length === 0
150
+ ? '(no expert input — BAR mesh artifacts are sparse)'
151
+ : meshExpertPoints.map((p, i) => `E${i + 1}: ${p}`).join('\n'),
152
+ clarifying_answers: '(none — ask_experts deferred to phase 4b)',
153
+ calm_endpoints: extractCalmEndpoints(mesh).join(', ') || '(none)',
154
+ stride_entries: extractThreatIds(mesh).join(', ') || '(none)',
155
+ nist_controls: '(none documented in mesh)',
156
+ owasp_in_scope: '(derived per threat at synthesis time)',
157
+ };
158
+ }
159
+ function deriveTopic(brief) {
160
+ if (brief.research_source.kind === 'pr') {
161
+ const m = brief.research_source.url.match(/\/pull\/(\d+)/);
162
+ return m ? `PRD from research PR #${m[1]}` : 'PRD (research source: PR)';
163
+ }
164
+ const base = brief.research_source.relative_path.split('/').pop()?.replace(/\.md$/, '') ?? 'topic';
165
+ return base.replace(/-/g, ' ');
166
+ }
167
+ function extractMeshExpertPoints(mesh) {
168
+ const out = [];
169
+ const bar = mesh.bar;
170
+ if (!bar) {
171
+ return out;
172
+ }
173
+ for (const adr of bar.adrs.slice(0, 6)) {
174
+ out.push(`ADR ${adr.id} (${adr.status}): ${adr.title}`);
175
+ }
176
+ for (const gap of bar.mesh_gaps) {
177
+ out.push(`mesh gap: ${gap}`);
178
+ }
179
+ return out;
180
+ }
181
+ function extractCalmEndpoints(mesh) {
182
+ const calm = mesh.bar?.calm_model;
183
+ if (!calm || typeof calm !== 'object') {
184
+ return [];
185
+ }
186
+ const nodes = calm.nodes;
187
+ if (!Array.isArray(nodes)) {
188
+ return [];
189
+ }
190
+ return nodes
191
+ .map(n => n['unique-id'])
192
+ .filter((id) => typeof id === 'string');
193
+ }
194
+ function extractThreatIds(mesh) {
195
+ const threats = mesh.bar?.threats;
196
+ if (!Array.isArray(threats)) {
197
+ return [];
198
+ }
199
+ return threats
200
+ .map(t => t.id)
201
+ .filter((id) => typeof id === 'string');
202
+ }