@maintainabilityai/research-runner 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +82 -0
- package/bin/research-runner.js +2 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +209 -0
- package/dist/llm/anthropic-client.d.ts +39 -0
- package/dist/llm/anthropic-client.js +74 -0
- package/dist/llm/github-models-client.d.ts +46 -0
- package/dist/llm/github-models-client.js +78 -0
- package/dist/llm/llm-router.d.ts +46 -0
- package/dist/llm/llm-router.js +60 -0
- package/dist/mesh/get-mesh-sha.d.ts +1 -0
- package/dist/mesh/get-mesh-sha.js +27 -0
- package/dist/mesh/mesh-reader.d.ts +14 -0
- package/dist/mesh/mesh-reader.js +392 -0
- package/dist/mesh/prompt-loader.d.ts +22 -0
- package/dist/mesh/prompt-loader.js +119 -0
- package/dist/mesh/threat-model-reader.d.ts +33 -0
- package/dist/mesh/threat-model-reader.js +123 -0
- package/dist/runner/archeologist.d.ts +39 -0
- package/dist/runner/archeologist.js +620 -0
- package/dist/runner/audit-emitter.d.ts +62 -0
- package/dist/runner/audit-emitter.js +210 -0
- package/dist/runner/hatters-tag-builder.d.ts +52 -0
- package/dist/runner/hatters-tag-builder.js +40 -0
- package/dist/runner/nodes/analyze-architecture.d.ts +10 -0
- package/dist/runner/nodes/analyze-architecture.js +447 -0
- package/dist/runner/nodes/arxiv-search.d.ts +12 -0
- package/dist/runner/nodes/arxiv-search.js +52 -0
- package/dist/runner/nodes/clone-and-index.d.ts +32 -0
- package/dist/runner/nodes/clone-and-index.js +158 -0
- package/dist/runner/nodes/dedupe-and-rank.d.ts +27 -0
- package/dist/runner/nodes/dedupe-and-rank.js +98 -0
- package/dist/runner/nodes/deterministic-review.d.ts +55 -0
- package/dist/runner/nodes/deterministic-review.js +206 -0
- package/dist/runner/nodes/expert-review.d.ts +68 -0
- package/dist/runner/nodes/expert-review.js +197 -0
- package/dist/runner/nodes/gap-analysis.d.ts +48 -0
- package/dist/runner/nodes/gap-analysis.js +153 -0
- package/dist/runner/nodes/generate-prd-manifest.d.ts +53 -0
- package/dist/runner/nodes/generate-prd-manifest.js +209 -0
- package/dist/runner/nodes/hackernews-search.d.ts +12 -0
- package/dist/runner/nodes/hackernews-search.js +63 -0
- package/dist/runner/nodes/identify-gaps.d.ts +33 -0
- package/dist/runner/nodes/identify-gaps.js +185 -0
- package/dist/runner/nodes/plan-queries.d.ts +28 -0
- package/dist/runner/nodes/plan-queries.js +120 -0
- package/dist/runner/nodes/prd-validator.d.ts +51 -0
- package/dist/runner/nodes/prd-validator.js +203 -0
- package/dist/runner/nodes/synthesis-archaeology-validator.d.ts +22 -0
- package/dist/runner/nodes/synthesis-archaeology-validator.js +131 -0
- package/dist/runner/nodes/synthesis-validator.d.ts +51 -0
- package/dist/runner/nodes/synthesis-validator.js +185 -0
- package/dist/runner/nodes/synthesize-prd.d.ts +84 -0
- package/dist/runner/nodes/synthesize-prd.js +202 -0
- package/dist/runner/nodes/synthesize-report.d.ts +53 -0
- package/dist/runner/nodes/synthesize-report.js +188 -0
- package/dist/runner/nodes/tavily-search.d.ts +21 -0
- package/dist/runner/nodes/tavily-search.js +57 -0
- package/dist/runner/nodes/uspto-search.d.ts +13 -0
- package/dist/runner/nodes/uspto-search.js +62 -0
- package/dist/runner/nodes/verify-grounding.d.ts +54 -0
- package/dist/runner/nodes/verify-grounding.js +134 -0
- package/dist/runner/prd.d.ts +28 -0
- package/dist/runner/prd.js +494 -0
- package/dist/schemas/audit-event.d.ts +1151 -0
- package/dist/schemas/audit-event.js +141 -0
- package/dist/schemas/index.d.ts +17 -0
- package/dist/schemas/index.js +33 -0
- package/dist/schemas/mesh-context.d.ts +415 -0
- package/dist/schemas/mesh-context.js +95 -0
- package/dist/schemas/observed-architecture.d.ts +262 -0
- package/dist/schemas/observed-architecture.js +90 -0
- package/dist/schemas/prd-brief.d.ts +111 -0
- package/dist/schemas/prd-brief.js +37 -0
- package/dist/schemas/prd-doc.d.ts +249 -0
- package/dist/schemas/prd-doc.js +42 -0
- package/dist/schemas/prd-manifest.d.ts +171 -0
- package/dist/schemas/prd-manifest.js +73 -0
- package/dist/schemas/primitives.d.ts +47 -0
- package/dist/schemas/primitives.js +41 -0
- package/dist/schemas/query-plan.d.ts +33 -0
- package/dist/schemas/query-plan.js +25 -0
- package/dist/schemas/ranked-source.d.ts +82 -0
- package/dist/schemas/ranked-source.js +29 -0
- package/dist/schemas/research-brief.d.ts +114 -0
- package/dist/schemas/research-brief.js +49 -0
- package/dist/schemas/research-doc.d.ts +104 -0
- package/dist/schemas/research-doc.js +37 -0
- package/dist/search/arxiv-client.d.ts +41 -0
- package/dist/search/arxiv-client.js +88 -0
- package/dist/search/hackernews-client.d.ts +33 -0
- package/dist/search/hackernews-client.js +44 -0
- package/dist/search/provider-result.d.ts +25 -0
- package/dist/search/provider-result.js +2 -0
- package/dist/search/tavily-client.d.ts +38 -0
- package/dist/search/tavily-client.js +53 -0
- package/dist/search/uspto-client.d.ts +50 -0
- package/dist/search/uspto-client.js +112 -0
- package/dist/utils/run-id.d.ts +2 -0
- package/dist/utils/run-id.js +22 -0
- package/package.json +53 -0
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.runExpertReview = runExpertReview;
|
|
4
|
+
exports.parseReviewResponse = parseReviewResponse;
|
|
5
|
+
const llm_router_1 = require("../../llm/llm-router");
|
|
6
|
+
const prompt_loader_1 = require("../../mesh/prompt-loader");
|
|
7
|
+
const MAX_TOKENS = 1500;
|
|
8
|
+
async function runExpertReview(opts) {
|
|
9
|
+
const packId = opts.expert === 'architecture'
|
|
10
|
+
? 'prd/architecture-review'
|
|
11
|
+
: 'prd/security-review';
|
|
12
|
+
const promptContext = buildPromptContext(opts.expert, opts.prdBody, opts.meshContext, opts.iteration, opts.priorReview);
|
|
13
|
+
const prompt = (0, prompt_loader_1.loadPrompt)({ meshDir: opts.meshDir, packId, context: promptContext });
|
|
14
|
+
const system = opts.expert === 'architecture'
|
|
15
|
+
? 'You are a senior architect reviewing a PRD for grounding against the CALM model. Output strictly in the SCORE / SEVERITY / COVERED / MISSING / CHANGES format — no prose before or after. SEVERITY must be one of PASS / MINOR / MAJOR / BLOCKING.'
|
|
16
|
+
: 'You are a senior application-security engineer reviewing a PRD for STRIDE / OWASP / NIST coverage. Output strictly in the SCORE / SEVERITY / COVERED / MISSING / CHANGES format — no prose before or after. SEVERITY must be one of PASS / MINOR / MAJOR / BLOCKING.';
|
|
17
|
+
let totalInput = 0;
|
|
18
|
+
let totalOutput = 0;
|
|
19
|
+
let totalCost = 0;
|
|
20
|
+
let lastModel = '';
|
|
21
|
+
let lastError = null;
|
|
22
|
+
for (let attempt = 1; attempt <= 2; attempt++) {
|
|
23
|
+
const userPrompt = attempt === 1
|
|
24
|
+
? prompt.filled
|
|
25
|
+
: `${prompt.filled}\n\n---\n\nYour previous response could not be parsed:\n${lastError}\n\nReturn EXACTLY this 5-field structured-text format, no prose:\nSCORE: <0..1>\nSEVERITY: <PASS|MINOR|MAJOR|BLOCKING>\nCOVERED: <ids>\nMISSING: <ids>\nCHANGES:\n- <change>`;
|
|
26
|
+
const result = await (0, llm_router_1.callLlm)({
|
|
27
|
+
provider: opts.provider,
|
|
28
|
+
tier: 'plan', // reviews are tighter than synthesis — cheaper tier is fine
|
|
29
|
+
anthropicApiKey: opts.anthropicApiKey,
|
|
30
|
+
githubToken: opts.githubToken,
|
|
31
|
+
system,
|
|
32
|
+
prompt: userPrompt,
|
|
33
|
+
maxTokens: MAX_TOKENS,
|
|
34
|
+
fetchImpl: opts.fetchImpl,
|
|
35
|
+
});
|
|
36
|
+
totalInput += result.inputTokens;
|
|
37
|
+
totalOutput += result.outputTokens;
|
|
38
|
+
totalCost += result.costUsd;
|
|
39
|
+
lastModel = result.model;
|
|
40
|
+
const parsed = parseReviewResponse(result.text, opts.expert, opts.iteration);
|
|
41
|
+
if (parsed.success) {
|
|
42
|
+
return {
|
|
43
|
+
review: parsed.data,
|
|
44
|
+
prompt,
|
|
45
|
+
llm: { provider: opts.provider, model: lastModel, inputTokens: totalInput, outputTokens: totalOutput, costUsd: totalCost, attempts: attempt },
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
lastError = parsed.error;
|
|
49
|
+
}
|
|
50
|
+
throw new Error(`expert_review (${opts.expert}): could not parse SCORE/SEVERITY/COVERED/MISSING/CHANGES after 2 attempts. Last error: ${lastError}`);
|
|
51
|
+
}
|
|
52
|
+
// ============================================================================
|
|
53
|
+
// Response parsing
|
|
54
|
+
// ============================================================================
|
|
55
|
+
const VALID_SEVERITY = new Set(['PASS', 'MINOR', 'MAJOR', 'BLOCKING']);
|
|
56
|
+
function parseReviewResponse(raw, expert, iteration) {
|
|
57
|
+
const text = stripFences(raw.trim());
|
|
58
|
+
const score = parseScore(text);
|
|
59
|
+
if (score === null) {
|
|
60
|
+
return { success: false, error: 'SCORE: <float> line missing or unparseable' };
|
|
61
|
+
}
|
|
62
|
+
const severity = parseSeverity(text);
|
|
63
|
+
if (!severity) {
|
|
64
|
+
return { success: false, error: 'SEVERITY must be one of PASS / MINOR / MAJOR / BLOCKING' };
|
|
65
|
+
}
|
|
66
|
+
const covered_ids = parseIdList(text, /^COVERED:\s*(.*)$/im);
|
|
67
|
+
const missing_ids = parseIdList(text, /^MISSING:\s*(.*)$/im);
|
|
68
|
+
const changes = parseChangesBlock(text);
|
|
69
|
+
return {
|
|
70
|
+
success: true,
|
|
71
|
+
data: {
|
|
72
|
+
expert,
|
|
73
|
+
iteration,
|
|
74
|
+
score,
|
|
75
|
+
severity,
|
|
76
|
+
covered_ids,
|
|
77
|
+
missing_ids,
|
|
78
|
+
changes,
|
|
79
|
+
},
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
function parseScore(text) {
|
|
83
|
+
const m = text.match(/^SCORE:\s*([0-9.]+)\s*$/im);
|
|
84
|
+
if (!m) {
|
|
85
|
+
return null;
|
|
86
|
+
}
|
|
87
|
+
const n = parseFloat(m[1]);
|
|
88
|
+
if (!Number.isFinite(n)) {
|
|
89
|
+
return null;
|
|
90
|
+
}
|
|
91
|
+
return Math.max(0, Math.min(1, n));
|
|
92
|
+
}
|
|
93
|
+
function parseSeverity(text) {
|
|
94
|
+
const m = text.match(/^SEVERITY:\s*([A-Z]+)\s*$/im);
|
|
95
|
+
if (!m) {
|
|
96
|
+
return null;
|
|
97
|
+
}
|
|
98
|
+
const sev = m[1].toUpperCase();
|
|
99
|
+
return VALID_SEVERITY.has(sev) ? sev : null;
|
|
100
|
+
}
|
|
101
|
+
function parseIdList(text, re) {
|
|
102
|
+
const m = text.match(re);
|
|
103
|
+
if (!m) {
|
|
104
|
+
return [];
|
|
105
|
+
}
|
|
106
|
+
return m[1]
|
|
107
|
+
.split(',')
|
|
108
|
+
.map(s => s.trim())
|
|
109
|
+
.filter(s => s.length > 0 && s !== '-' && s.toLowerCase() !== 'none');
|
|
110
|
+
}
|
|
111
|
+
function parseChangesBlock(text) {
|
|
112
|
+
const idx = text.search(/^CHANGES:\s*$/im);
|
|
113
|
+
if (idx === -1) {
|
|
114
|
+
return [];
|
|
115
|
+
}
|
|
116
|
+
const tail = text.slice(idx);
|
|
117
|
+
const lines = tail.split('\n').slice(1);
|
|
118
|
+
const changes = [];
|
|
119
|
+
for (const line of lines) {
|
|
120
|
+
const m = line.match(/^\s*[-*]\s+(.+)$/);
|
|
121
|
+
if (m) {
|
|
122
|
+
changes.push(m[1].trim());
|
|
123
|
+
}
|
|
124
|
+
else if (line.trim().length > 0 && !line.match(/^[A-Z]+:\s/)) {
|
|
125
|
+
// Continuation lines for a multi-line change item
|
|
126
|
+
if (changes.length > 0) {
|
|
127
|
+
changes[changes.length - 1] += ' ' + line.trim();
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
else if (line.match(/^[A-Z_]+:\s/)) {
|
|
131
|
+
// Hit the next header field — stop
|
|
132
|
+
break;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
return changes;
|
|
136
|
+
}
|
|
137
|
+
function stripFences(s) {
|
|
138
|
+
const fenceMatch = s.match(/^```\s*([\s\S]*?)```\s*$/);
|
|
139
|
+
return fenceMatch ? fenceMatch[1].trim() : s;
|
|
140
|
+
}
|
|
141
|
+
// ============================================================================
|
|
142
|
+
// Prompt context
|
|
143
|
+
// ============================================================================
|
|
144
|
+
function buildPromptContext(expert, prdBody, meshContext, iteration, priorReview) {
|
|
145
|
+
const ctx = {
|
|
146
|
+
prd_doc: prdBody,
|
|
147
|
+
iteration,
|
|
148
|
+
prior_review: priorReview
|
|
149
|
+
? `Previous iteration (${priorReview.iteration}) — score ${priorReview.score.toFixed(2)}, severity ${priorReview.severity}. CHANGES requested:\n${priorReview.changes.map(c => `- ${c}`).join('\n')}`
|
|
150
|
+
: '(first iteration — no prior review)',
|
|
151
|
+
};
|
|
152
|
+
if (expert === 'architecture') {
|
|
153
|
+
ctx['mesh.bar.calm_summary'] = summarizeCalm(meshContext);
|
|
154
|
+
ctx.calm_node_ids = extractCalmNodeIds(meshContext).join(', ') || '(none)';
|
|
155
|
+
ctx.adrs_in_scope = (meshContext.bar?.adrs ?? []).map(a => a.id).join(', ') || '(none)';
|
|
156
|
+
}
|
|
157
|
+
else {
|
|
158
|
+
ctx.stride_entries = extractStrideIds(meshContext).join(', ') || '(none)';
|
|
159
|
+
ctx.owasp_in_scope = '(derived from STRIDE entries — see PRD body for current claims)';
|
|
160
|
+
ctx.nist_controls = '(see policies/nist-800-53-controls.yaml in mesh)';
|
|
161
|
+
}
|
|
162
|
+
return ctx;
|
|
163
|
+
}
|
|
164
|
+
function summarizeCalm(mesh) {
|
|
165
|
+
const calm = mesh.bar?.calm_model;
|
|
166
|
+
if (!calm || typeof calm !== 'object') {
|
|
167
|
+
return '(no CALM model loaded)';
|
|
168
|
+
}
|
|
169
|
+
const nodes = calm.nodes;
|
|
170
|
+
if (!Array.isArray(nodes)) {
|
|
171
|
+
return '(empty CALM model)';
|
|
172
|
+
}
|
|
173
|
+
const lines = [`${nodes.length} node(s):`];
|
|
174
|
+
for (const n of nodes.slice(0, 12)) {
|
|
175
|
+
const o = n;
|
|
176
|
+
lines.push(` - ${o['unique-id']} (${o['node-type'] ?? 'unknown'}) — ${o.name ?? ''}`);
|
|
177
|
+
}
|
|
178
|
+
return lines.join('\n');
|
|
179
|
+
}
|
|
180
|
+
function extractCalmNodeIds(mesh) {
|
|
181
|
+
const calm = mesh.bar?.calm_model;
|
|
182
|
+
if (!calm || typeof calm !== 'object') {
|
|
183
|
+
return [];
|
|
184
|
+
}
|
|
185
|
+
const nodes = calm.nodes;
|
|
186
|
+
if (!Array.isArray(nodes)) {
|
|
187
|
+
return [];
|
|
188
|
+
}
|
|
189
|
+
return nodes.map(n => String(n['unique-id'] ?? '')).filter(Boolean);
|
|
190
|
+
}
|
|
191
|
+
function extractStrideIds(mesh) {
|
|
192
|
+
const threats = mesh.bar?.threats;
|
|
193
|
+
if (!Array.isArray(threats)) {
|
|
194
|
+
return [];
|
|
195
|
+
}
|
|
196
|
+
return threats.map(t => String(t.id ?? '')).filter(Boolean);
|
|
197
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import type { LlmProvider, RankedSource, ResearchBrief } from '../../schemas';
|
|
2
|
+
export type GapSignalKind = 'low_source_diversity' | 'topic_uncovered' | 'low_provider_overlap';
|
|
3
|
+
export interface GapSignal {
|
|
4
|
+
kind: GapSignalKind;
|
|
5
|
+
/** Short human-readable explanation for the audit log. */
|
|
6
|
+
evidence: string;
|
|
7
|
+
}
|
|
8
|
+
export interface ShouldRunGapAnalysisOpts {
|
|
9
|
+
brief: ResearchBrief;
|
|
10
|
+
rankedSources: RankedSource[];
|
|
11
|
+
/** Trigger when total < this. Default 5. */
|
|
12
|
+
minSources?: number;
|
|
13
|
+
/** Trigger when one provider holds > this fraction. Default 0.85. */
|
|
14
|
+
dominantProviderRatio?: number;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Pure deterministic trigger check. Returns the gap signals found; an
|
|
18
|
+
* empty array means "do not run gap_analysis".
|
|
19
|
+
*/
|
|
20
|
+
export declare function detectGapSignals(opts: ShouldRunGapAnalysisOpts): GapSignal[];
|
|
21
|
+
export interface RunGapAnalysisOpts {
|
|
22
|
+
meshDir: string;
|
|
23
|
+
brief: ResearchBrief;
|
|
24
|
+
rankedSources: RankedSource[];
|
|
25
|
+
signals: GapSignal[];
|
|
26
|
+
provider: LlmProvider;
|
|
27
|
+
anthropicApiKey?: string;
|
|
28
|
+
githubToken?: string;
|
|
29
|
+
fetchImpl?: typeof fetch;
|
|
30
|
+
}
|
|
31
|
+
export interface GapAnalysisResult {
|
|
32
|
+
/** Exactly 3 follow-up web queries the LLM produced. */
|
|
33
|
+
followUpQueries: string[];
|
|
34
|
+
/** Prompt-pack telemetry for the audit log. */
|
|
35
|
+
prompt: {
|
|
36
|
+
packPath: string;
|
|
37
|
+
packSha256: string;
|
|
38
|
+
};
|
|
39
|
+
llm: {
|
|
40
|
+
provider: LlmProvider;
|
|
41
|
+
model: string;
|
|
42
|
+
inputTokens: number;
|
|
43
|
+
outputTokens: number;
|
|
44
|
+
costUsd: number;
|
|
45
|
+
attempts: number;
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
export declare function runGapAnalysis(opts: RunGapAnalysisOpts): Promise<GapAnalysisResult>;
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.detectGapSignals = detectGapSignals;
|
|
4
|
+
exports.runGapAnalysis = runGapAnalysis;
|
|
5
|
+
/**
|
|
6
|
+
* gap_analysis — pure trigger + LLM hop, bounded one-shot.
|
|
7
|
+
*
|
|
8
|
+
* After the first-pass dedupe, this module:
|
|
9
|
+
* 1) Decides whether to fire at all (pure, deterministic).
|
|
10
|
+
* 2) If yes, asks the LLM for exactly 3 follow-up web queries.
|
|
11
|
+
*
|
|
12
|
+
* Trigger conditions (any one fires):
|
|
13
|
+
* - low_source_diversity: fewer than `minSources` total ranked sources
|
|
14
|
+
* across the whole first pass.
|
|
15
|
+
* - topic_uncovered: at least one topic keyword from the brief is
|
|
16
|
+
* mentioned in zero result titles/excerpts.
|
|
17
|
+
* - low_provider_overlap: most sources came from a single provider
|
|
18
|
+
* (the synthesis prompt's Cross-Source Analysis falls apart when
|
|
19
|
+
* there's no cross-source angle).
|
|
20
|
+
*
|
|
21
|
+
* The bounded loop (one extra round of tavily, no further) is enforced by
|
|
22
|
+
* the orchestrator — not this node. We return the queries; the caller
|
|
23
|
+
* decides what to do with them.
|
|
24
|
+
*/
|
|
25
|
+
const zod_1 = require("zod");
|
|
26
|
+
const llm_router_1 = require("../../llm/llm-router");
|
|
27
|
+
const prompt_loader_1 = require("../../mesh/prompt-loader");
|
|
28
|
+
const FollowUpQueriesSchema = zod_1.z.array(zod_1.z.string().min(3)).length(3);
|
|
29
|
+
/**
|
|
30
|
+
* Pure deterministic trigger check. Returns the gap signals found; an
|
|
31
|
+
* empty array means "do not run gap_analysis".
|
|
32
|
+
*/
|
|
33
|
+
function detectGapSignals(opts) {
|
|
34
|
+
const signals = [];
|
|
35
|
+
const minSources = opts.minSources ?? 5;
|
|
36
|
+
const dominantRatio = opts.dominantProviderRatio ?? 0.85;
|
|
37
|
+
const { rankedSources, brief } = opts;
|
|
38
|
+
if (rankedSources.length < minSources) {
|
|
39
|
+
signals.push({
|
|
40
|
+
kind: 'low_source_diversity',
|
|
41
|
+
evidence: `only ${rankedSources.length} ranked source(s); threshold ${minSources}`,
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
if (rankedSources.length > 0) {
|
|
45
|
+
const byProvider = new Map();
|
|
46
|
+
for (const r of rankedSources) {
|
|
47
|
+
byProvider.set(r.provider, (byProvider.get(r.provider) ?? 0) + 1);
|
|
48
|
+
}
|
|
49
|
+
const maxCount = Math.max(...byProvider.values());
|
|
50
|
+
const ratio = maxCount / rankedSources.length;
|
|
51
|
+
if (ratio > dominantRatio && rankedSources.length >= 4) {
|
|
52
|
+
const dominant = [...byProvider.entries()].find(([, n]) => n === maxCount)?.[0];
|
|
53
|
+
signals.push({
|
|
54
|
+
kind: 'low_provider_overlap',
|
|
55
|
+
evidence: `${(ratio * 100).toFixed(0)}% of sources from ${dominant}; no cross-source signal`,
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
// Topic-uncoverage heuristic: split brief.topic into 3+ char keywords,
|
|
60
|
+
// check each appears in at least one result title or excerpt. If a
|
|
61
|
+
// keyword appears nowhere, that's a topic-uncovered signal.
|
|
62
|
+
const keywords = extractTopicKeywords(brief.topic);
|
|
63
|
+
const haystack = rankedSources.map(r => `${r.title} ${r.excerpt}`).join(' ').toLowerCase();
|
|
64
|
+
const uncovered = keywords.filter(k => !haystack.includes(k));
|
|
65
|
+
if (uncovered.length > 0) {
|
|
66
|
+
signals.push({
|
|
67
|
+
kind: 'topic_uncovered',
|
|
68
|
+
evidence: `brief keyword(s) absent from any result: ${uncovered.slice(0, 5).join(', ')}`,
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
return signals;
|
|
72
|
+
}
|
|
73
|
+
function extractTopicKeywords(topic) {
|
|
74
|
+
const stop = new Set(['the', 'and', 'for', 'with', 'from', 'into', 'about', 'over', 'after', 'before']);
|
|
75
|
+
return topic
|
|
76
|
+
.toLowerCase()
|
|
77
|
+
.split(/[^a-z0-9]+/)
|
|
78
|
+
.filter(w => w.length >= 4 && !stop.has(w))
|
|
79
|
+
.slice(0, 10);
|
|
80
|
+
}
|
|
81
|
+
async function runGapAnalysis(opts) {
|
|
82
|
+
const promptContext = {
|
|
83
|
+
brief: { topic: opts.brief.topic },
|
|
84
|
+
first_pass: {
|
|
85
|
+
summary: `${opts.rankedSources.length} ranked source(s); top providers: ${topProviderSummary(opts.rankedSources)}`,
|
|
86
|
+
gaps: opts.signals.map(s => `- ${s.kind}: ${s.evidence}`).join('\n'),
|
|
87
|
+
},
|
|
88
|
+
};
|
|
89
|
+
const prompt = (0, prompt_loader_1.loadPrompt)({
|
|
90
|
+
meshDir: opts.meshDir,
|
|
91
|
+
packId: 'research/gap-analysis',
|
|
92
|
+
context: promptContext,
|
|
93
|
+
});
|
|
94
|
+
const system = 'You output a SINGLE JSON array of exactly 3 strings. No prose before or after, no markdown fence. The first character of your response MUST be `[`.';
|
|
95
|
+
let totalInput = 0;
|
|
96
|
+
let totalOutput = 0;
|
|
97
|
+
let totalCost = 0;
|
|
98
|
+
let lastModel = '';
|
|
99
|
+
let lastError = null;
|
|
100
|
+
for (let attempt = 1; attempt <= 2; attempt++) {
|
|
101
|
+
const userPrompt = attempt === 1
|
|
102
|
+
? prompt.filled
|
|
103
|
+
: `${prompt.filled}\n\n---\n\nYour previous response failed validation:\n${lastError}\n\nReturn a SINGLE JSON array of exactly 3 strings (each a follow-up web query). No prose, no markdown.`;
|
|
104
|
+
const result = await (0, llm_router_1.callLlm)({
|
|
105
|
+
provider: opts.provider,
|
|
106
|
+
tier: 'plan', // cheap tier; this is structural follow-up
|
|
107
|
+
anthropicApiKey: opts.anthropicApiKey,
|
|
108
|
+
githubToken: opts.githubToken,
|
|
109
|
+
system,
|
|
110
|
+
prompt: userPrompt,
|
|
111
|
+
maxTokens: 1000,
|
|
112
|
+
fetchImpl: opts.fetchImpl,
|
|
113
|
+
});
|
|
114
|
+
totalInput += result.inputTokens;
|
|
115
|
+
totalOutput += result.outputTokens;
|
|
116
|
+
totalCost += result.costUsd;
|
|
117
|
+
lastModel = result.model;
|
|
118
|
+
const parsed = parseFollowUpQueries(result.text);
|
|
119
|
+
if (parsed.success) {
|
|
120
|
+
return {
|
|
121
|
+
followUpQueries: parsed.data,
|
|
122
|
+
prompt: { packPath: prompt.packPath, packSha256: prompt.packSha256 },
|
|
123
|
+
llm: { provider: opts.provider, model: lastModel, inputTokens: totalInput, outputTokens: totalOutput, costUsd: totalCost, attempts: attempt },
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
lastError = parsed.error;
|
|
127
|
+
}
|
|
128
|
+
throw new Error(`gap_analysis: LLM output failed validation after 2 attempts. Last error: ${lastError}`);
|
|
129
|
+
}
|
|
130
|
+
function parseFollowUpQueries(raw) {
|
|
131
|
+
const trimmed = raw.trim();
|
|
132
|
+
const fenceMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
133
|
+
const candidate = fenceMatch ? fenceMatch[1].trim() : trimmed;
|
|
134
|
+
let parsedJson;
|
|
135
|
+
try {
|
|
136
|
+
parsedJson = JSON.parse(candidate);
|
|
137
|
+
}
|
|
138
|
+
catch (e) {
|
|
139
|
+
return { success: false, error: `not valid JSON: ${e instanceof Error ? e.message : String(e)}` };
|
|
140
|
+
}
|
|
141
|
+
const result = FollowUpQueriesSchema.safeParse(parsedJson);
|
|
142
|
+
if (result.success) {
|
|
143
|
+
return { success: true, data: result.data };
|
|
144
|
+
}
|
|
145
|
+
return { success: false, error: result.error.issues.map(i => `${i.path.join('.') || '<root>'}: ${i.message}`).join('; ') };
|
|
146
|
+
}
|
|
147
|
+
function topProviderSummary(sources) {
|
|
148
|
+
const counts = new Map();
|
|
149
|
+
for (const s of sources) {
|
|
150
|
+
counts.set(s.provider, (counts.get(s.provider) ?? 0) + 1);
|
|
151
|
+
}
|
|
152
|
+
return [...counts.entries()].sort((a, b) => b[1] - a[1]).map(([p, n]) => `${p}=${n}`).join(', ');
|
|
153
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* generate_prd_manifest — pure node.
|
|
3
|
+
*
|
|
4
|
+
* Takes the validated PRD body + grounding result + brief, produces the
|
|
5
|
+
* PrdManifest JSON that lives alongside the published PRD markdown.
|
|
6
|
+
*
|
|
7
|
+
* The manifest is what Cheshire's `spec-ready-handler.yml` reads in target
|
|
8
|
+
* code repos to generate an RCTRO implementation issue. Keeping it
|
|
9
|
+
* separate from the PRD body means downstream consumers (Cheshire, audit
|
|
10
|
+
* dashboards) can read structured data without re-parsing markdown.
|
|
11
|
+
*/
|
|
12
|
+
import type { GroundingBlock, MeshContext, PrdBrief, PrdManifest, ImpactedBar } from '../../schemas';
|
|
13
|
+
import type { PrdCitationSignals } from './prd-validator';
|
|
14
|
+
export interface GenerateManifestOpts {
|
|
15
|
+
runId: string;
|
|
16
|
+
brief: PrdBrief;
|
|
17
|
+
meshContext: MeshContext;
|
|
18
|
+
prdBody: string;
|
|
19
|
+
signals: PrdCitationSignals;
|
|
20
|
+
grounding: GroundingBlock;
|
|
21
|
+
threshold: number;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Build the PrdManifest. Endpoints come from regex-extracted FR-NN entries
|
|
25
|
+
* that mention an HTTP-method + path; security requirements come straight
|
|
26
|
+
* from the validator's parsed SR entries.
|
|
27
|
+
*
|
|
28
|
+
* impacted_bars + target_repos come from computeImpactedBars() — see that
|
|
29
|
+
* function for the HIGH/LOW classification logic. target_repos is the
|
|
30
|
+
* union of HIGH-confidence BARs' repos (LOW bars surface only as footer
|
|
31
|
+
* mentions in the landing-issues, never as auto-created issues).
|
|
32
|
+
*/
|
|
33
|
+
export declare function generatePrdManifest(opts: GenerateManifestOpts): PrdManifest;
|
|
34
|
+
/**
|
|
35
|
+
* Classify every BAR in scope as HIGH or LOW confidence based on whether
|
|
36
|
+
* the PRD's citations touch a CALM node the BAR owns or a threat the BAR
|
|
37
|
+
* declared.
|
|
38
|
+
*
|
|
39
|
+
* Inputs:
|
|
40
|
+
* - endpoints[].calm_node — referenced CALM ids
|
|
41
|
+
* - security_requirements[].citations.THR-* — referenced threat ids
|
|
42
|
+
*
|
|
43
|
+
* Per BAR:
|
|
44
|
+
* - own_calm_nodes ∩ referenced_calm_nodes ≠ ∅ → HIGH (endpoint hit)
|
|
45
|
+
* - own_threat_ids ∩ referenced_threat_ids ≠ ∅ → HIGH (threat hit)
|
|
46
|
+
* - otherwise (BAR is in the platform but no citation overlap) → LOW
|
|
47
|
+
*
|
|
48
|
+
* At BAR scope, only that one BAR is in scope and it is always HIGH (the
|
|
49
|
+
* PRD is about it by definition; the linked_repos are the target). At
|
|
50
|
+
* platform scope, the current BAR is null and every sibling BAR is
|
|
51
|
+
* classified. Portfolio scope falls back to the placeholder.
|
|
52
|
+
*/
|
|
53
|
+
export declare function computeImpactedBars(mesh: MeshContext, endpoints: PrdManifest['endpoints'], securityRequirements: PrdManifest['security_requirements']): ImpactedBar[];
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.generatePrdManifest = generatePrdManifest;
|
|
4
|
+
exports.computeImpactedBars = computeImpactedBars;
|
|
5
|
+
/**
|
|
6
|
+
* Build the PrdManifest. Endpoints come from regex-extracted FR-NN entries
|
|
7
|
+
* that mention an HTTP-method + path; security requirements come straight
|
|
8
|
+
* from the validator's parsed SR entries.
|
|
9
|
+
*
|
|
10
|
+
* impacted_bars + target_repos come from computeImpactedBars() — see that
|
|
11
|
+
* function for the HIGH/LOW classification logic. target_repos is the
|
|
12
|
+
* union of HIGH-confidence BARs' repos (LOW bars surface only as footer
|
|
13
|
+
* mentions in the landing-issues, never as auto-created issues).
|
|
14
|
+
*/
|
|
15
|
+
function generatePrdManifest(opts) {
|
|
16
|
+
const topic = derivePrdTopic(opts.brief, opts.prdBody);
|
|
17
|
+
const endpoints = extractEndpoints(opts.prdBody);
|
|
18
|
+
const security_requirements = opts.signals.sr_entries.map(sr => ({
|
|
19
|
+
id: sr.id,
|
|
20
|
+
// The schema's citation regex accepts THR-* / A0* / NIST-XX-N — we filter
|
|
21
|
+
// to those before passing through (FR-side IDs would fail validation).
|
|
22
|
+
citations: sr.cited.filter(c => /^(?:THR-\d+|A\d{2}|NIST-[A-Z]{2}-\d+)$/.test(c)),
|
|
23
|
+
}));
|
|
24
|
+
const impacted_bars = computeImpactedBars(opts.meshContext, endpoints, security_requirements);
|
|
25
|
+
const target_repos = deriveTargetRepos(impacted_bars, opts.meshContext);
|
|
26
|
+
return {
|
|
27
|
+
run_id: opts.runId,
|
|
28
|
+
prd_topic: topic,
|
|
29
|
+
mesh_sha: opts.meshContext.mesh_sha,
|
|
30
|
+
target_repos,
|
|
31
|
+
impacted_bars,
|
|
32
|
+
endpoints,
|
|
33
|
+
security_requirements,
|
|
34
|
+
grounding: {
|
|
35
|
+
final_score: opts.grounding.final_score,
|
|
36
|
+
threshold: opts.threshold,
|
|
37
|
+
iterations: opts.grounding.final_iteration,
|
|
38
|
+
passed: opts.grounding.passed,
|
|
39
|
+
},
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Classify every BAR in scope as HIGH or LOW confidence based on whether
|
|
44
|
+
* the PRD's citations touch a CALM node the BAR owns or a threat the BAR
|
|
45
|
+
* declared.
|
|
46
|
+
*
|
|
47
|
+
* Inputs:
|
|
48
|
+
* - endpoints[].calm_node — referenced CALM ids
|
|
49
|
+
* - security_requirements[].citations.THR-* — referenced threat ids
|
|
50
|
+
*
|
|
51
|
+
* Per BAR:
|
|
52
|
+
* - own_calm_nodes ∩ referenced_calm_nodes ≠ ∅ → HIGH (endpoint hit)
|
|
53
|
+
* - own_threat_ids ∩ referenced_threat_ids ≠ ∅ → HIGH (threat hit)
|
|
54
|
+
* - otherwise (BAR is in the platform but no citation overlap) → LOW
|
|
55
|
+
*
|
|
56
|
+
* At BAR scope, only that one BAR is in scope and it is always HIGH (the
|
|
57
|
+
* PRD is about it by definition; the linked_repos are the target). At
|
|
58
|
+
* platform scope, the current BAR is null and every sibling BAR is
|
|
59
|
+
* classified. Portfolio scope falls back to the placeholder.
|
|
60
|
+
*/
|
|
61
|
+
function computeImpactedBars(mesh, endpoints, securityRequirements) {
|
|
62
|
+
const referencedCalm = new Set(endpoints.map(e => e.calm_node).filter(n => n && n !== 'unknown'));
|
|
63
|
+
const referencedThreats = new Set();
|
|
64
|
+
for (const sr of securityRequirements) {
|
|
65
|
+
for (const c of sr.citations) {
|
|
66
|
+
if (c.startsWith('THR-')) {
|
|
67
|
+
referencedThreats.add(c);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// BAR scope: only the current BAR; trivially HIGH (PRD is about it).
|
|
72
|
+
if (mesh.bar) {
|
|
73
|
+
const reasoning = endpoints.length > 0 || securityRequirements.length > 0
|
|
74
|
+
? `PRD scope is BAR ${mesh.bar.bar_id}; covers ${endpoints.length} endpoint(s), ${securityRequirements.length} security requirement(s).`
|
|
75
|
+
: `PRD scope is BAR ${mesh.bar.bar_id}.`;
|
|
76
|
+
return [{
|
|
77
|
+
bar_id: mesh.bar.bar_id,
|
|
78
|
+
repos: uniqueOwnerRepos(mesh.bar.linked_repos),
|
|
79
|
+
confidence: 'high',
|
|
80
|
+
reasoning,
|
|
81
|
+
}];
|
|
82
|
+
}
|
|
83
|
+
// Platform scope: classify each sibling. (When mesh.bar is null AND
|
|
84
|
+
// mesh.platform is set, the PRD is platform-scoped.)
|
|
85
|
+
const siblings = mesh.platform?.sibling_bars ?? [];
|
|
86
|
+
if (siblings.length === 0) {
|
|
87
|
+
return [];
|
|
88
|
+
}
|
|
89
|
+
return siblings.map(sib => {
|
|
90
|
+
const calmHits = sib.calm_node_ids.filter(id => referencedCalm.has(id));
|
|
91
|
+
const threatHits = sib.threat_ids.filter(id => referencedThreats.has(id));
|
|
92
|
+
if (calmHits.length > 0 || threatHits.length > 0) {
|
|
93
|
+
const parts = [];
|
|
94
|
+
if (calmHits.length > 0) {
|
|
95
|
+
parts.push(`owns CALM node${calmHits.length > 1 ? 's' : ''} ${calmHits.map(n => `\`${n}\``).join(', ')} referenced by ${endpoints.filter(e => calmHits.includes(e.calm_node)).map(e => e.fr_id).join(', ')}`);
|
|
96
|
+
}
|
|
97
|
+
if (threatHits.length > 0) {
|
|
98
|
+
parts.push(`owns threat${threatHits.length > 1 ? 's' : ''} ${threatHits.map(t => `\`${t}\``).join(', ')} cited by security requirements`);
|
|
99
|
+
}
|
|
100
|
+
return {
|
|
101
|
+
bar_id: sib.bar_id,
|
|
102
|
+
repos: uniqueOwnerRepos(sib.linked_repos),
|
|
103
|
+
confidence: 'high',
|
|
104
|
+
reasoning: parts.join('; '),
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
return {
|
|
108
|
+
bar_id: sib.bar_id,
|
|
109
|
+
repos: uniqueOwnerRepos(sib.linked_repos),
|
|
110
|
+
confidence: 'low',
|
|
111
|
+
reasoning: 'No CALM nodes or threats from this BAR are cited by the PRD. Listed as a low-confidence reference in case shared infra / downstream effects apply.',
|
|
112
|
+
};
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* target_repos = the union of HIGH-confidence BARs' repos. LOW BARs are
|
|
117
|
+
* intentionally excluded from auto-issue creation; they surface only as
|
|
118
|
+
* footer mentions in the HIGH BARs' landing-issues.
|
|
119
|
+
*
|
|
120
|
+
* Fallbacks (no HIGH bars + no impacted_bars at all): keep the manifest
|
|
121
|
+
* valid by emitting the BAR-id placeholder so the user notices something
|
|
122
|
+
* is misconfigured.
|
|
123
|
+
*/
|
|
124
|
+
function deriveTargetRepos(impactedBars, mesh) {
|
|
125
|
+
const highRepos = impactedBars
|
|
126
|
+
.filter(b => b.confidence === 'high')
|
|
127
|
+
.flatMap(b => b.repos);
|
|
128
|
+
if (highRepos.length > 0) {
|
|
129
|
+
return uniqueOwnerRepos(highRepos);
|
|
130
|
+
}
|
|
131
|
+
// Edge case: a PRD that didn't pull in any sibling-BAR citations and
|
|
132
|
+
// has no BAR scope. Surface a placeholder so the manifest still
|
|
133
|
+
// validates and the run log shows the user that nothing matched.
|
|
134
|
+
if (mesh.bar) {
|
|
135
|
+
return [`mesh/${mesh.bar.bar_id.toLowerCase()}`];
|
|
136
|
+
}
|
|
137
|
+
return ['placeholder/repo'];
|
|
138
|
+
}
|
|
139
|
+
function derivePrdTopic(brief, body) {
|
|
140
|
+
// Prefer the body's H1 if present; falls back to a research-source-derived label.
|
|
141
|
+
const h1 = body.match(/^#\s+(.+?)\s*$/m);
|
|
142
|
+
if (h1) {
|
|
143
|
+
return h1[1].trim().slice(0, 200);
|
|
144
|
+
}
|
|
145
|
+
if (brief.research_source.kind === 'pr') {
|
|
146
|
+
const m = brief.research_source.url.match(/\/pull\/(\d+)/);
|
|
147
|
+
return m ? `PRD from research PR #${m[1]}` : 'PRD (research source: PR)';
|
|
148
|
+
}
|
|
149
|
+
const base = brief.research_source.relative_path.split('/').pop()?.replace(/\.md$/, '') ?? 'topic';
|
|
150
|
+
return base.replace(/-/g, ' ');
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Extract endpoint declarations from FR-NN entries.
|
|
154
|
+
* Looks for HTTP-method + path within each FR's body lines.
|
|
155
|
+
*/
|
|
156
|
+
function extractEndpoints(body) {
|
|
157
|
+
const fnSection = sliceSection(body, 'Functional Requirements with Traceability');
|
|
158
|
+
if (!fnSection) {
|
|
159
|
+
return [];
|
|
160
|
+
}
|
|
161
|
+
const out = [];
|
|
162
|
+
const lines = fnSection.split('\n');
|
|
163
|
+
let currentFr = null;
|
|
164
|
+
let currentCalmNode = null;
|
|
165
|
+
for (const line of lines) {
|
|
166
|
+
const frMatch = line.match(/\b(FR-\d+)\b/);
|
|
167
|
+
if (frMatch) {
|
|
168
|
+
currentFr = frMatch[1];
|
|
169
|
+
currentCalmNode = null;
|
|
170
|
+
}
|
|
171
|
+
const calmHint = line.match(/CALM\s+node[:\s]+([\w-]+)/i);
|
|
172
|
+
if (calmHint) {
|
|
173
|
+
currentCalmNode = calmHint[1];
|
|
174
|
+
}
|
|
175
|
+
const epMatch = line.match(/\b(GET|POST|PUT|PATCH|DELETE|OPTIONS|HEAD)\s+(\/[\w/{}:.-]*)/);
|
|
176
|
+
if (epMatch && currentFr) {
|
|
177
|
+
out.push({
|
|
178
|
+
signature: `${epMatch[1]} ${epMatch[2]}`,
|
|
179
|
+
calm_node: currentCalmNode ?? 'unknown',
|
|
180
|
+
fr_id: currentFr,
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
return out;
|
|
185
|
+
}
|
|
186
|
+
function sliceSection(body, sectionName) {
|
|
187
|
+
const lines = body.split('\n');
|
|
188
|
+
let inSection = false;
|
|
189
|
+
const collected = [];
|
|
190
|
+
for (const line of lines) {
|
|
191
|
+
const h2 = line.match(/^##\s+(.+?)\s*$/);
|
|
192
|
+
if (h2) {
|
|
193
|
+
if (h2[1].trim() === sectionName) {
|
|
194
|
+
inSection = true;
|
|
195
|
+
continue;
|
|
196
|
+
}
|
|
197
|
+
if (inSection) {
|
|
198
|
+
break;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
if (inSection) {
|
|
202
|
+
collected.push(line);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
return collected.length === 0 ? null : collected.join('\n');
|
|
206
|
+
}
|
|
207
|
+
function uniqueOwnerRepos(list) {
|
|
208
|
+
return [...new Set(list)];
|
|
209
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { ProviderResult } from '../../search/provider-result';
|
|
2
|
+
import type { QueryEnvelope } from './tavily-search';
|
|
3
|
+
export interface HackerNewsSearchNodeOpts {
|
|
4
|
+
queries: string[];
|
|
5
|
+
hitsPerQuery?: number;
|
|
6
|
+
fetchImpl?: typeof fetch;
|
|
7
|
+
}
|
|
8
|
+
export interface HackerNewsSearchNodeResult {
|
|
9
|
+
envelopes: QueryEnvelope[];
|
|
10
|
+
results: ProviderResult[];
|
|
11
|
+
}
|
|
12
|
+
export declare function runHackerNewsSearch(opts: HackerNewsSearchNodeOpts): Promise<HackerNewsSearchNodeResult>;
|