@maintainabilityai/research-runner 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +82 -0
- package/bin/research-runner.js +2 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +209 -0
- package/dist/llm/anthropic-client.d.ts +39 -0
- package/dist/llm/anthropic-client.js +74 -0
- package/dist/llm/github-models-client.d.ts +46 -0
- package/dist/llm/github-models-client.js +78 -0
- package/dist/llm/llm-router.d.ts +46 -0
- package/dist/llm/llm-router.js +60 -0
- package/dist/mesh/get-mesh-sha.d.ts +1 -0
- package/dist/mesh/get-mesh-sha.js +27 -0
- package/dist/mesh/mesh-reader.d.ts +14 -0
- package/dist/mesh/mesh-reader.js +392 -0
- package/dist/mesh/prompt-loader.d.ts +22 -0
- package/dist/mesh/prompt-loader.js +119 -0
- package/dist/mesh/threat-model-reader.d.ts +33 -0
- package/dist/mesh/threat-model-reader.js +123 -0
- package/dist/runner/archeologist.d.ts +39 -0
- package/dist/runner/archeologist.js +620 -0
- package/dist/runner/audit-emitter.d.ts +62 -0
- package/dist/runner/audit-emitter.js +210 -0
- package/dist/runner/hatters-tag-builder.d.ts +52 -0
- package/dist/runner/hatters-tag-builder.js +40 -0
- package/dist/runner/nodes/analyze-architecture.d.ts +10 -0
- package/dist/runner/nodes/analyze-architecture.js +447 -0
- package/dist/runner/nodes/arxiv-search.d.ts +12 -0
- package/dist/runner/nodes/arxiv-search.js +52 -0
- package/dist/runner/nodes/clone-and-index.d.ts +32 -0
- package/dist/runner/nodes/clone-and-index.js +158 -0
- package/dist/runner/nodes/dedupe-and-rank.d.ts +27 -0
- package/dist/runner/nodes/dedupe-and-rank.js +98 -0
- package/dist/runner/nodes/deterministic-review.d.ts +55 -0
- package/dist/runner/nodes/deterministic-review.js +206 -0
- package/dist/runner/nodes/expert-review.d.ts +68 -0
- package/dist/runner/nodes/expert-review.js +197 -0
- package/dist/runner/nodes/gap-analysis.d.ts +48 -0
- package/dist/runner/nodes/gap-analysis.js +153 -0
- package/dist/runner/nodes/generate-prd-manifest.d.ts +53 -0
- package/dist/runner/nodes/generate-prd-manifest.js +209 -0
- package/dist/runner/nodes/hackernews-search.d.ts +12 -0
- package/dist/runner/nodes/hackernews-search.js +63 -0
- package/dist/runner/nodes/identify-gaps.d.ts +33 -0
- package/dist/runner/nodes/identify-gaps.js +185 -0
- package/dist/runner/nodes/plan-queries.d.ts +28 -0
- package/dist/runner/nodes/plan-queries.js +120 -0
- package/dist/runner/nodes/prd-validator.d.ts +51 -0
- package/dist/runner/nodes/prd-validator.js +203 -0
- package/dist/runner/nodes/synthesis-archaeology-validator.d.ts +22 -0
- package/dist/runner/nodes/synthesis-archaeology-validator.js +131 -0
- package/dist/runner/nodes/synthesis-validator.d.ts +51 -0
- package/dist/runner/nodes/synthesis-validator.js +185 -0
- package/dist/runner/nodes/synthesize-prd.d.ts +84 -0
- package/dist/runner/nodes/synthesize-prd.js +202 -0
- package/dist/runner/nodes/synthesize-report.d.ts +53 -0
- package/dist/runner/nodes/synthesize-report.js +188 -0
- package/dist/runner/nodes/tavily-search.d.ts +21 -0
- package/dist/runner/nodes/tavily-search.js +57 -0
- package/dist/runner/nodes/uspto-search.d.ts +13 -0
- package/dist/runner/nodes/uspto-search.js +62 -0
- package/dist/runner/nodes/verify-grounding.d.ts +54 -0
- package/dist/runner/nodes/verify-grounding.js +134 -0
- package/dist/runner/prd.d.ts +28 -0
- package/dist/runner/prd.js +494 -0
- package/dist/schemas/audit-event.d.ts +1151 -0
- package/dist/schemas/audit-event.js +141 -0
- package/dist/schemas/index.d.ts +17 -0
- package/dist/schemas/index.js +33 -0
- package/dist/schemas/mesh-context.d.ts +415 -0
- package/dist/schemas/mesh-context.js +95 -0
- package/dist/schemas/observed-architecture.d.ts +262 -0
- package/dist/schemas/observed-architecture.js +90 -0
- package/dist/schemas/prd-brief.d.ts +111 -0
- package/dist/schemas/prd-brief.js +37 -0
- package/dist/schemas/prd-doc.d.ts +249 -0
- package/dist/schemas/prd-doc.js +42 -0
- package/dist/schemas/prd-manifest.d.ts +171 -0
- package/dist/schemas/prd-manifest.js +73 -0
- package/dist/schemas/primitives.d.ts +47 -0
- package/dist/schemas/primitives.js +41 -0
- package/dist/schemas/query-plan.d.ts +33 -0
- package/dist/schemas/query-plan.js +25 -0
- package/dist/schemas/ranked-source.d.ts +82 -0
- package/dist/schemas/ranked-source.js +29 -0
- package/dist/schemas/research-brief.d.ts +114 -0
- package/dist/schemas/research-brief.js +49 -0
- package/dist/schemas/research-doc.d.ts +104 -0
- package/dist/schemas/research-doc.js +37 -0
- package/dist/search/arxiv-client.d.ts +41 -0
- package/dist/search/arxiv-client.js +88 -0
- package/dist/search/hackernews-client.d.ts +33 -0
- package/dist/search/hackernews-client.js +44 -0
- package/dist/search/provider-result.d.ts +25 -0
- package/dist/search/provider-result.js +2 -0
- package/dist/search/tavily-client.d.ts +38 -0
- package/dist/search/tavily-client.js +53 -0
- package/dist/search/uspto-client.d.ts +50 -0
- package/dist/search/uspto-client.js +112 -0
- package/dist/utils/run-id.d.ts +2 -0
- package/dist/utils/run-id.js +22 -0
- package/package.json +53 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* synthesize_report — LLM node.
|
|
3
|
+
*
|
|
4
|
+
* Second LLM hop in the archeologist research path. Loads
|
|
5
|
+
* `.caterpillar/prompts/research/synthesis.md`, fills it with the brief +
|
|
6
|
+
* mesh context + ranked sources + gap_analysis flag, calls Anthropic
|
|
7
|
+
* (sonnet by default — synthesis is more demanding than planning),
|
|
8
|
+
* runs the structural validator on the body, and either returns the
|
|
9
|
+
* validated body or retries once with feedback.
|
|
10
|
+
*
|
|
11
|
+
* Returns the synthesised body, the prompt-pack telemetry (path + sha256),
|
|
12
|
+
* LLM token/cost totals, and the citation_stats the audit log + Hatter's
|
|
13
|
+
* Tag both consume.
|
|
14
|
+
*/
|
|
15
|
+
import type { ArchaeologyGap, LlmProvider, MeshContext, ObservedArchitecture, RankedSource, ResearchBrief, ResearchPath } from '../../schemas';
|
|
16
|
+
import { type LoadedPrompt } from '../../mesh/prompt-loader';
|
|
17
|
+
import { type CitationStats, type ValidationReport } from './synthesis-validator';
|
|
18
|
+
export interface SynthesizeReportOpts {
|
|
19
|
+
meshDir: string;
|
|
20
|
+
brief: ResearchBrief;
|
|
21
|
+
meshContext: MeshContext;
|
|
22
|
+
rankedSources: RankedSource[];
|
|
23
|
+
/** Provider routing — comes from brief.llm_provider unless overridden. */
|
|
24
|
+
provider?: LlmProvider;
|
|
25
|
+
/** Required when provider === 'anthropic'. */
|
|
26
|
+
anthropicApiKey?: string;
|
|
27
|
+
/** Required when provider === 'github-models'. */
|
|
28
|
+
githubToken?: string;
|
|
29
|
+
/** Flipped true by the orchestrator after gap-analysis fires. */
|
|
30
|
+
gapAnalysisRan?: boolean;
|
|
31
|
+
/** Defaults to brief.path. Overrideable for tests. */
|
|
32
|
+
path?: ResearchPath;
|
|
33
|
+
/** Archaeology-path only: observed architecture extracted from the target repo. */
|
|
34
|
+
observedArchitecture?: ObservedArchitecture;
|
|
35
|
+
/** Archaeology-path only: gaps identified by identify_gaps. */
|
|
36
|
+
archaeologyGaps?: ArchaeologyGap[];
|
|
37
|
+
fetchImpl?: typeof fetch;
|
|
38
|
+
}
|
|
39
|
+
export interface SynthesizeReportResult {
|
|
40
|
+
body_md: string;
|
|
41
|
+
prompt: LoadedPrompt;
|
|
42
|
+
validation: ValidationReport;
|
|
43
|
+
citation_stats: CitationStats;
|
|
44
|
+
llm: {
|
|
45
|
+
provider: LlmProvider;
|
|
46
|
+
model: string;
|
|
47
|
+
inputTokens: number;
|
|
48
|
+
outputTokens: number;
|
|
49
|
+
costUsd: number;
|
|
50
|
+
attempts: number;
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
export declare function synthesizeReport(opts: SynthesizeReportOpts): Promise<SynthesizeReportResult>;
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.synthesizeReport = synthesizeReport;
|
|
4
|
+
const llm_router_1 = require("../../llm/llm-router");
|
|
5
|
+
const prompt_loader_1 = require("../../mesh/prompt-loader");
|
|
6
|
+
const synthesis_validator_1 = require("./synthesis-validator");
|
|
7
|
+
const synthesis_archaeology_validator_1 = require("./synthesis-archaeology-validator");
|
|
8
|
+
const MAX_TOKENS = 8000;
|
|
9
|
+
async function synthesizeReport(opts) {
|
|
10
|
+
const provider = opts.provider ?? opts.brief.llm_provider;
|
|
11
|
+
const path = opts.path ?? opts.brief.path;
|
|
12
|
+
// Two different prompt packs + validators per path. Same LLM router, same
|
|
13
|
+
// retry-with-feedback loop — only the pack name + the validator differ.
|
|
14
|
+
const packId = path === 'archaeology' ? 'research/synthesis-archaeology' : 'research/synthesis';
|
|
15
|
+
const validate = path === 'archaeology' ? synthesis_archaeology_validator_1.validateArchaeologySynthesis : synthesis_validator_1.validateSynthesis;
|
|
16
|
+
const promptContext = path === 'archaeology'
|
|
17
|
+
? buildArchaeologyPromptContext(opts.brief, opts.meshContext, opts.rankedSources, opts.observedArchitecture, opts.archaeologyGaps ?? [])
|
|
18
|
+
: buildPromptContext(opts.brief, opts.meshContext, opts.rankedSources, opts.gapAnalysisRan ?? false);
|
|
19
|
+
const prompt = (0, prompt_loader_1.loadPrompt)({
|
|
20
|
+
meshDir: opts.meshDir,
|
|
21
|
+
packId,
|
|
22
|
+
context: promptContext,
|
|
23
|
+
});
|
|
24
|
+
const system = path === 'archaeology'
|
|
25
|
+
? 'You write structured markdown architecture-archaeology reports with strict section discipline. Every gap (G[N]) carries a severity. Every Recommendation traces to a G[N] and cites at least one grounding token (S[N] or OA[…]). The 9 H2 sections appear in the exact order requested. No prose before the first `##` heading.'
|
|
26
|
+
: 'You write structured markdown documents with strict section + citation discipline. Every claim has an S[N] citation; every C[N] cites ≥2 sources; every Recommendation traces to a C[N]. Headings appear in the exact order requested. No prose before the first `##` heading.';
|
|
27
|
+
let lastReport = null;
|
|
28
|
+
let totalInput = 0;
|
|
29
|
+
let totalOutput = 0;
|
|
30
|
+
let totalCost = 0;
|
|
31
|
+
let lastModel = '';
|
|
32
|
+
for (let attempt = 1; attempt <= 2; attempt++) {
|
|
33
|
+
const userPrompt = attempt === 1
|
|
34
|
+
? prompt.filled
|
|
35
|
+
: `${prompt.filled}\n\n---\n\nYour previous response failed structural validation:\n${lastReport.errors.map(e => `- ${e}`).join('\n')}\n\nRewrite the document and fix EVERY error above. The 10 H2 sections must appear in the exact order specified; every C[N] must cite ≥2 S[N] (or ≥1 if confidence is LOW); every Recommendation must reference at least one C[N].`;
|
|
36
|
+
const result = await (0, llm_router_1.callLlm)({
|
|
37
|
+
provider,
|
|
38
|
+
tier: 'synth',
|
|
39
|
+
anthropicApiKey: opts.anthropicApiKey,
|
|
40
|
+
githubToken: opts.githubToken,
|
|
41
|
+
system,
|
|
42
|
+
prompt: userPrompt,
|
|
43
|
+
maxTokens: MAX_TOKENS,
|
|
44
|
+
fetchImpl: opts.fetchImpl,
|
|
45
|
+
});
|
|
46
|
+
totalInput += result.inputTokens;
|
|
47
|
+
totalOutput += result.outputTokens;
|
|
48
|
+
totalCost += result.costUsd;
|
|
49
|
+
lastModel = result.model;
|
|
50
|
+
const body = stripFences(result.text);
|
|
51
|
+
const report = validate(body);
|
|
52
|
+
if (report.valid) {
|
|
53
|
+
return {
|
|
54
|
+
body_md: body,
|
|
55
|
+
prompt,
|
|
56
|
+
validation: report,
|
|
57
|
+
citation_stats: report.citation_stats,
|
|
58
|
+
llm: { provider, model: lastModel, inputTokens: totalInput, outputTokens: totalOutput, costUsd: totalCost, attempts: attempt },
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
lastReport = report;
|
|
62
|
+
}
|
|
63
|
+
throw new Error(`synthesize_report: structural validation failed after 2 attempts. Last errors: ${lastReport.errors.join('; ')}`);
|
|
64
|
+
}
|
|
65
|
+
/** If the model wraps the doc in ```markdown … ``` fences, unwrap. Otherwise pass through. */
|
|
66
|
+
function stripFences(raw) {
|
|
67
|
+
const trimmed = raw.trim();
|
|
68
|
+
const fenceMatch = trimmed.match(/^```(?:markdown|md)?\s*([\s\S]*?)```\s*$/);
|
|
69
|
+
return fenceMatch ? fenceMatch[1].trim() : trimmed;
|
|
70
|
+
}
|
|
71
|
+
/** Build the dotted-key context the synthesis prompt asks for. */
|
|
72
|
+
function buildPromptContext(brief, mesh, rankedSources, gapAnalysisRan) {
|
|
73
|
+
return {
|
|
74
|
+
brief: {
|
|
75
|
+
topic: brief.topic,
|
|
76
|
+
scope_level: brief.scope.level,
|
|
77
|
+
},
|
|
78
|
+
mesh: {
|
|
79
|
+
context_summary: summarizeMeshContext(mesh),
|
|
80
|
+
},
|
|
81
|
+
ranked_sources: rankedSources.length === 0
|
|
82
|
+
? '(no sources retrieved)'
|
|
83
|
+
: rankedSources.map(formatRankedSource).join('\n\n'),
|
|
84
|
+
gap_analysis_ran: gapAnalysisRan,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
function summarizeMeshContext(mesh) {
|
|
88
|
+
const parts = [];
|
|
89
|
+
parts.push(`Portfolio: ${mesh.portfolio.name}`);
|
|
90
|
+
if (mesh.portfolio.related_research_summaries.length > 0) {
|
|
91
|
+
parts.push(`Portfolio research (${mesh.portfolio.related_research_summaries.length}): ${mesh.portfolio.related_research_summaries.map(r => r.topic).slice(0, 5).join('; ')}`);
|
|
92
|
+
}
|
|
93
|
+
if (mesh.platform) {
|
|
94
|
+
parts.push(`Platform: ${mesh.platform.platform_id} (${mesh.platform.sibling_bars.length} sibling BAR${mesh.platform.sibling_bars.length === 1 ? '' : 's'})`);
|
|
95
|
+
}
|
|
96
|
+
if (mesh.bar) {
|
|
97
|
+
parts.push(`BAR: ${mesh.bar.name} (${mesh.bar.bar_id}); tier=${mesh.bar.tier}; ADRs=${mesh.bar.adrs.length}; prior research=${mesh.bar.related_research.length}; prior PRDs=${mesh.bar.related_prds.length}; mesh gaps: ${mesh.bar.mesh_gaps.join(', ') || 'none'}`);
|
|
98
|
+
if (Array.isArray(mesh.bar.threats)) {
|
|
99
|
+
const ts = mesh.bar.threats;
|
|
100
|
+
parts.push(`STRIDE threats (${ts.length}): ${ts.map(t => `${t.id}/${t.category}`).slice(0, 6).join('; ')}`);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return parts.join('\n');
|
|
104
|
+
}
|
|
105
|
+
function formatRankedSource(s) {
|
|
106
|
+
const lines = [
|
|
107
|
+
`- **${s.id}** "${s.title}" (${s.provider}, salience ${s.salience_score})`,
|
|
108
|
+
` URL: ${s.url}`,
|
|
109
|
+
` Retrieved: ${s.retrieved_at}`,
|
|
110
|
+
];
|
|
111
|
+
if (s.published_at) {
|
|
112
|
+
lines.push(` Published: ${s.published_at}`);
|
|
113
|
+
}
|
|
114
|
+
if (s.excerpt) {
|
|
115
|
+
lines.push(` Excerpt: ${s.excerpt.slice(0, 280)}${s.excerpt.length > 280 ? '…' : ''}`);
|
|
116
|
+
}
|
|
117
|
+
return lines.join('\n');
|
|
118
|
+
}
|
|
119
|
+
/** Build the dotted-key context the archaeology synthesis prompt asks for. */
|
|
120
|
+
function buildArchaeologyPromptContext(brief, mesh, rankedSources, observed, gaps) {
|
|
121
|
+
return {
|
|
122
|
+
target_repo: brief.target_repo ?? '(unknown target)',
|
|
123
|
+
observed_architecture: observed
|
|
124
|
+
? formatObservedArchitecture(observed)
|
|
125
|
+
: '(analyzer did not run)',
|
|
126
|
+
mesh: {
|
|
127
|
+
bar: {
|
|
128
|
+
calm_summary: mesh.bar?.calm_model ? summarizeCalmModelArchaeology(mesh.bar.calm_model) : '(no CALM model loaded)',
|
|
129
|
+
threats_summary: mesh.bar?.threats ? summarizeThreatsArchaeology(mesh.bar.threats) : '(no threat model on file)',
|
|
130
|
+
},
|
|
131
|
+
},
|
|
132
|
+
gap_signals: gaps.length === 0 ? '(no structural gaps detected)' : gaps.map(g => `- **${g.id}** [${g.severity}] ${g.kind}: ${g.summary}`).join('\n'),
|
|
133
|
+
ranked_sources: rankedSources.length === 0
|
|
134
|
+
? '(no web sources retrieved)'
|
|
135
|
+
: rankedSources.map(formatRankedSource).join('\n\n'),
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
function formatObservedArchitecture(o) {
|
|
139
|
+
const lines = [];
|
|
140
|
+
lines.push(`Repo: ${o.profile.slug} @ ${o.profile.cloneSha.slice(0, 12)}`);
|
|
141
|
+
lines.push(`Languages: ${o.profile.languages.join(', ') || '(none detected)'}`);
|
|
142
|
+
lines.push(`Frameworks: ${o.profile.frameworks.join(', ') || '(none detected)'}`);
|
|
143
|
+
lines.push(`Manifests: ${o.profile.manifests.join(', ') || '(none)'}`);
|
|
144
|
+
lines.push(`Files: ${o.profile.totalFiles} totalling ${o.profile.totalBytes} bytes`);
|
|
145
|
+
lines.push('');
|
|
146
|
+
lines.push('Modules (top 12 by file count):');
|
|
147
|
+
for (const m of o.modules.slice(0, 12)) {
|
|
148
|
+
lines.push(` - OA[${m.name}] layer=${m.layer} files=${m.fileCount} endpoints=${m.endpointCount}`);
|
|
149
|
+
}
|
|
150
|
+
if (o.endpoints.length > 0) {
|
|
151
|
+
lines.push('');
|
|
152
|
+
lines.push('Endpoints (sample):');
|
|
153
|
+
for (const e of o.endpoints.slice(0, 15)) {
|
|
154
|
+
lines.push(` - ${e.method} ${e.path} (${e.framework}) — ${e.file}`);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
if (o.dependencies.length > 0) {
|
|
158
|
+
lines.push('');
|
|
159
|
+
lines.push(`Direct dependencies (${o.dependencies.length}): ${o.dependencies.slice(0, 25).join(', ')}${o.dependencies.length > 25 ? ', …' : ''}`);
|
|
160
|
+
}
|
|
161
|
+
return lines.join('\n');
|
|
162
|
+
}
|
|
163
|
+
function summarizeCalmModelArchaeology(calm) {
|
|
164
|
+
if (!calm || typeof calm !== 'object') {
|
|
165
|
+
return '(no CALM model loaded)';
|
|
166
|
+
}
|
|
167
|
+
const obj = calm;
|
|
168
|
+
const nodes = Array.isArray(obj.nodes) ? obj.nodes : [];
|
|
169
|
+
const relationships = Array.isArray(obj.relationships) ? obj.relationships : [];
|
|
170
|
+
const lines = [];
|
|
171
|
+
lines.push(`${nodes.length} node(s), ${relationships.length} relationship(s)`);
|
|
172
|
+
for (const n of nodes.slice(0, 10)) {
|
|
173
|
+
const o = n;
|
|
174
|
+
lines.push(` - ${o['unique-id'] ?? o.name ?? 'unknown'} (${o['node-type'] ?? 'unknown'})`);
|
|
175
|
+
}
|
|
176
|
+
return lines.join('\n');
|
|
177
|
+
}
|
|
178
|
+
function summarizeThreatsArchaeology(threats) {
|
|
179
|
+
if (!Array.isArray(threats) || threats.length === 0) {
|
|
180
|
+
return '(no threats)';
|
|
181
|
+
}
|
|
182
|
+
const byCategory = {};
|
|
183
|
+
for (const t of threats) {
|
|
184
|
+
const cat = t.category || 'unknown';
|
|
185
|
+
byCategory[cat] = (byCategory[cat] || 0) + 1;
|
|
186
|
+
}
|
|
187
|
+
return Object.entries(byCategory).map(([c, n]) => `${c} × ${n}`).join(', ');
|
|
188
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { ProviderResult } from '../../search/provider-result';
|
|
2
|
+
export interface TavilySearchNodeOpts {
|
|
3
|
+
apiKey: string;
|
|
4
|
+
queries: string[];
|
|
5
|
+
maxResultsPerQuery?: number;
|
|
6
|
+
searchDepth?: 'basic' | 'advanced';
|
|
7
|
+
fetchImpl?: typeof fetch;
|
|
8
|
+
}
|
|
9
|
+
export interface QueryEnvelope {
|
|
10
|
+
query: string;
|
|
11
|
+
httpStatus: number;
|
|
12
|
+
responseBytes: number;
|
|
13
|
+
resultCount: number;
|
|
14
|
+
/** Populated when this query failed. */
|
|
15
|
+
error?: string;
|
|
16
|
+
}
|
|
17
|
+
export interface TavilySearchNodeResult {
|
|
18
|
+
envelopes: QueryEnvelope[];
|
|
19
|
+
results: ProviderResult[];
|
|
20
|
+
}
|
|
21
|
+
export declare function runTavilySearch(opts: TavilySearchNodeOpts): Promise<TavilySearchNodeResult>;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.runTavilySearch = runTavilySearch;
|
|
4
|
+
/**
|
|
5
|
+
* tavily_search — pure_api node.
|
|
6
|
+
*
|
|
7
|
+
* Runs every web query from the QueryPlan against Tavily in parallel.
|
|
8
|
+
* Per-query failures are isolated (one query failing doesn't kill the run);
|
|
9
|
+
* the orchestrator records per-query telemetry for the audit log.
|
|
10
|
+
*
|
|
11
|
+
* Emits ProviderResult[] tagged with provider='tavily' for the shared
|
|
12
|
+
* dedupe-and-rank step that handles results from every provider.
|
|
13
|
+
*/
|
|
14
|
+
const tavily_client_1 = require("../../search/tavily-client");
|
|
15
|
+
async function runTavilySearch(opts) {
|
|
16
|
+
if (!opts.apiKey) {
|
|
17
|
+
throw new Error('TAVILY_API_KEY missing — set the env var or pass apiKey directly');
|
|
18
|
+
}
|
|
19
|
+
const settled = await Promise.allSettled(opts.queries.map(query => (0, tavily_client_1.tavilySearch)({
|
|
20
|
+
apiKey: opts.apiKey,
|
|
21
|
+
query,
|
|
22
|
+
maxResults: opts.maxResultsPerQuery ?? 5,
|
|
23
|
+
searchDepth: opts.searchDepth ?? 'basic',
|
|
24
|
+
fetchImpl: opts.fetchImpl,
|
|
25
|
+
})));
|
|
26
|
+
const envelopes = [];
|
|
27
|
+
const results = [];
|
|
28
|
+
for (let i = 0; i < opts.queries.length; i++) {
|
|
29
|
+
const query = opts.queries[i];
|
|
30
|
+
const outcome = settled[i];
|
|
31
|
+
if (outcome.status === 'fulfilled') {
|
|
32
|
+
const ok = outcome.value;
|
|
33
|
+
envelopes.push({
|
|
34
|
+
query,
|
|
35
|
+
httpStatus: ok.httpStatus,
|
|
36
|
+
responseBytes: ok.responseBytes,
|
|
37
|
+
resultCount: ok.results.length,
|
|
38
|
+
});
|
|
39
|
+
for (const r of ok.results) {
|
|
40
|
+
results.push({
|
|
41
|
+
provider: 'tavily',
|
|
42
|
+
fromQuery: query,
|
|
43
|
+
title: r.title,
|
|
44
|
+
url: r.url,
|
|
45
|
+
content: r.content,
|
|
46
|
+
score: r.score,
|
|
47
|
+
publishedDate: r.publishedDate,
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
else {
|
|
52
|
+
const err = outcome.reason instanceof Error ? outcome.reason.message : String(outcome.reason);
|
|
53
|
+
envelopes.push({ query, httpStatus: 0, responseBytes: 0, resultCount: 0, error: err });
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return { envelopes, results };
|
|
57
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { ProviderResult } from '../../search/provider-result';
|
|
2
|
+
import type { QueryEnvelope } from './tavily-search';
|
|
3
|
+
export interface UsptoSearchNodeOpts {
|
|
4
|
+
apiKey: string;
|
|
5
|
+
queries: string[];
|
|
6
|
+
maxResultsPerQuery?: number;
|
|
7
|
+
fetchImpl?: typeof fetch;
|
|
8
|
+
}
|
|
9
|
+
export interface UsptoSearchNodeResult {
|
|
10
|
+
envelopes: QueryEnvelope[];
|
|
11
|
+
results: ProviderResult[];
|
|
12
|
+
}
|
|
13
|
+
export declare function runUsptoSearch(opts: UsptoSearchNodeOpts): Promise<UsptoSearchNodeResult>;
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.runUsptoSearch = runUsptoSearch;
|
|
4
|
+
/**
|
|
5
|
+
* uspto_search — pure_api node.
|
|
6
|
+
*
|
|
7
|
+
* Runs each patent query against USPTO's Open Data Portal
|
|
8
|
+
* (api.uspto.gov). Requires USPTO_API_KEY.
|
|
9
|
+
*
|
|
10
|
+
* Salience score derived from result position (descending; first hit
|
|
11
|
+
* gets 0.85, decays by 0.1 per position, floor at 0.4) since the ODP
|
|
12
|
+
* endpoint doesn't return its own relevance score.
|
|
13
|
+
*
|
|
14
|
+
* When no apiKey is supplied, this node throws — the orchestrator catches
|
|
15
|
+
* and converts to a node_error envelope so the run continues without
|
|
16
|
+
* patent coverage rather than failing entirely.
|
|
17
|
+
*/
|
|
18
|
+
const uspto_client_1 = require("../../search/uspto-client");
|
|
19
|
+
async function runUsptoSearch(opts) {
|
|
20
|
+
if (!opts.apiKey) {
|
|
21
|
+
throw new Error('USPTO_API_KEY missing — request a key at https://data.uspto.gov/apis/getting-started');
|
|
22
|
+
}
|
|
23
|
+
const settled = await Promise.allSettled(opts.queries.map(query => (0, uspto_client_1.usptoSearch)({
|
|
24
|
+
apiKey: opts.apiKey,
|
|
25
|
+
query,
|
|
26
|
+
maxResults: opts.maxResultsPerQuery ?? 5,
|
|
27
|
+
fetchImpl: opts.fetchImpl,
|
|
28
|
+
})));
|
|
29
|
+
const envelopes = [];
|
|
30
|
+
const results = [];
|
|
31
|
+
for (let i = 0; i < opts.queries.length; i++) {
|
|
32
|
+
const query = opts.queries[i];
|
|
33
|
+
const outcome = settled[i];
|
|
34
|
+
if (outcome.status === 'fulfilled') {
|
|
35
|
+
const ok = outcome.value;
|
|
36
|
+
envelopes.push({
|
|
37
|
+
query,
|
|
38
|
+
httpStatus: ok.httpStatus,
|
|
39
|
+
responseBytes: ok.responseBytes,
|
|
40
|
+
resultCount: ok.results.length,
|
|
41
|
+
});
|
|
42
|
+
for (let j = 0; j < ok.results.length; j++) {
|
|
43
|
+
const r = ok.results[j];
|
|
44
|
+
results.push({
|
|
45
|
+
provider: 'uspto',
|
|
46
|
+
fromQuery: query,
|
|
47
|
+
title: r.title,
|
|
48
|
+
url: r.url,
|
|
49
|
+
content: r.abstract.slice(0, 500),
|
|
50
|
+
score: Math.max(0.4, 0.85 - j * 0.1),
|
|
51
|
+
publishedDate: r.grantedAt || undefined,
|
|
52
|
+
authors: r.inventors,
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
const err = outcome.reason instanceof Error ? outcome.reason.message : String(outcome.reason);
|
|
58
|
+
envelopes.push({ query, httpStatus: 0, responseBytes: 0, resultCount: 0, error: err });
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return { envelopes, results };
|
|
62
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* verify_grounding — pure node, v0.6.
|
|
3
|
+
*
|
|
4
|
+
* Combines FOUR signals to decide PASS / ITERATE / EXHAUSTED:
|
|
5
|
+
* (1) deterministic_architecture_review — citation-grep against premises
|
|
6
|
+
* (2) deterministic_security_review — citation-grep against threats + OWASP + NIST
|
|
7
|
+
* (3) architect_expert_review (LLM) — SCORE/SEVERITY/COVERED/MISSING/CHANGES
|
|
8
|
+
* (4) security_expert_review (LLM) — same shape
|
|
9
|
+
*
|
|
10
|
+
* Plus the deterministic citation-coverage stats derived from PrdCitationSignals
|
|
11
|
+
* (threats covered, CALM nodes referenced, self-reported NOs).
|
|
12
|
+
*
|
|
13
|
+
* Verdict rules (v0.6 "both-must-pass" semantics):
|
|
14
|
+
* - If EITHER deterministic reviewer is MAJOR (invalid citations exist),
|
|
15
|
+
* ITERATE — no amount of LLM rubber-stamping can excuse a wrong cite.
|
|
16
|
+
* - If the |arch_score − sec_score| disagreement is ≥ 0.2, ITERATE — the
|
|
17
|
+
* experts disagree strongly, treat as "needs another pass".
|
|
18
|
+
* - In strict mode, any BLOCKING LLM severity also forces ITERATE.
|
|
19
|
+
* - Otherwise: PASS iff composite ≥ threshold.
|
|
20
|
+
* - On the final allowed iteration, ITERATE becomes EXHAUSTED.
|
|
21
|
+
*/
|
|
22
|
+
import type { GroundingBlock, GroundingMode, MeshContext } from '../../schemas';
|
|
23
|
+
import type { DeterministicReview } from './deterministic-review';
|
|
24
|
+
import type { ExpertReview } from './expert-review';
|
|
25
|
+
import type { PrdCitationSignals } from './prd-validator';
|
|
26
|
+
export interface VerifyGroundingOpts {
|
|
27
|
+
iteration: number;
|
|
28
|
+
threshold: number;
|
|
29
|
+
mode: GroundingMode;
|
|
30
|
+
signals: PrdCitationSignals;
|
|
31
|
+
/** LLM reviewers — high-judgment scoring. */
|
|
32
|
+
architecture: ExpertReview;
|
|
33
|
+
security: ExpertReview;
|
|
34
|
+
/** Deterministic reviewers — citation grep. */
|
|
35
|
+
det_architecture: DeterministicReview;
|
|
36
|
+
det_security: DeterministicReview;
|
|
37
|
+
meshContext: MeshContext;
|
|
38
|
+
/** History of LLM reviews across prior iterations — for the GroundingBlock progression. */
|
|
39
|
+
history: ExpertReview[];
|
|
40
|
+
}
|
|
41
|
+
export type GroundingVerdict = 'PASS' | 'ITERATE' | 'EXHAUSTED';
|
|
42
|
+
export interface VerifyGroundingResult {
|
|
43
|
+
verdict: GroundingVerdict;
|
|
44
|
+
grounding: GroundingBlock;
|
|
45
|
+
reason: string;
|
|
46
|
+
/** Per-iteration signals — what iteration_summary audit events record. */
|
|
47
|
+
signals_snapshot: {
|
|
48
|
+
composite_score: number;
|
|
49
|
+
disagreement_delta: number;
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
/** Disagreement threshold from the v0.6 spec — re-iterate when experts disagree this much. */
|
|
53
|
+
export declare const DISAGREEMENT_DELTA_THRESHOLD = 0.2;
|
|
54
|
+
export declare function verifyGrounding(opts: VerifyGroundingOpts): VerifyGroundingResult;
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.DISAGREEMENT_DELTA_THRESHOLD = void 0;
|
|
4
|
+
exports.verifyGrounding = verifyGrounding;
|
|
5
|
+
/** Disagreement threshold from the v0.6 spec — re-iterate when experts disagree this much. */
|
|
6
|
+
exports.DISAGREEMENT_DELTA_THRESHOLD = 0.2;
|
|
7
|
+
function verifyGrounding(opts) {
|
|
8
|
+
const citation = computeCitationCoverage(opts.signals, opts.meshContext);
|
|
9
|
+
const compositeScore = combineScore(opts.architecture.score, opts.security.score, citation);
|
|
10
|
+
const disagreement = Math.abs(opts.architecture.score - opts.security.score);
|
|
11
|
+
const iterations = [
|
|
12
|
+
...opts.history,
|
|
13
|
+
opts.architecture,
|
|
14
|
+
opts.security,
|
|
15
|
+
];
|
|
16
|
+
const baseGrounding = {
|
|
17
|
+
final_iteration: opts.iteration,
|
|
18
|
+
iterations,
|
|
19
|
+
citation_coverage: citation,
|
|
20
|
+
final_score: round4(compositeScore),
|
|
21
|
+
passed: false, // overwritten below per verdict
|
|
22
|
+
};
|
|
23
|
+
// Rule 1: invalid citations from EITHER deterministic reviewer → ITERATE.
|
|
24
|
+
const detArchMajor = opts.det_architecture.severity === 'MAJOR';
|
|
25
|
+
const detSecMajor = opts.det_security.severity === 'MAJOR';
|
|
26
|
+
if (detArchMajor || detSecMajor) {
|
|
27
|
+
return {
|
|
28
|
+
verdict: 'ITERATE',
|
|
29
|
+
grounding: baseGrounding,
|
|
30
|
+
reason: `Deterministic reviewer flagged invalid citations (det_arch=${opts.det_architecture.severity}/${opts.det_architecture.invalid_citations.length}, det_sec=${opts.det_security.severity}/${opts.det_security.invalid_citations.length}) — the PRD references IDs that don't exist in the mesh. Composite=${round4(compositeScore)} ignored until cites are fixed.`,
|
|
31
|
+
signals_snapshot: { composite_score: round4(compositeScore), disagreement_delta: round4(disagreement) },
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
// Rule 2: BLOCKING LLM severity in strict mode → ITERATE.
|
|
35
|
+
const hasBlocking = opts.architecture.severity === 'BLOCKING' || opts.security.severity === 'BLOCKING';
|
|
36
|
+
if (opts.mode === 'strict' && hasBlocking) {
|
|
37
|
+
return {
|
|
38
|
+
verdict: 'ITERATE',
|
|
39
|
+
grounding: baseGrounding,
|
|
40
|
+
reason: `BLOCKING review in strict mode (arch=${opts.architecture.severity}, sec=${opts.security.severity}) — re-iterate even though composite=${round4(compositeScore)} would otherwise pass.`,
|
|
41
|
+
signals_snapshot: { composite_score: round4(compositeScore), disagreement_delta: round4(disagreement) },
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
// Rule 3: expert disagreement ≥ DISAGREEMENT_DELTA_THRESHOLD → ITERATE.
|
|
45
|
+
if (disagreement >= exports.DISAGREEMENT_DELTA_THRESHOLD) {
|
|
46
|
+
return {
|
|
47
|
+
verdict: 'ITERATE',
|
|
48
|
+
grounding: baseGrounding,
|
|
49
|
+
reason: `Expert disagreement ${round4(disagreement)} ≥ ${exports.DISAGREEMENT_DELTA_THRESHOLD} (arch=${opts.architecture.score}, sec=${opts.security.score}) — re-iterate so the experts can converge.`,
|
|
50
|
+
signals_snapshot: { composite_score: round4(compositeScore), disagreement_delta: round4(disagreement) },
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
// Rule 4: composite ≥ threshold → PASS.
|
|
54
|
+
if (compositeScore >= opts.threshold) {
|
|
55
|
+
return {
|
|
56
|
+
verdict: 'PASS',
|
|
57
|
+
grounding: { ...baseGrounding, passed: true },
|
|
58
|
+
reason: `composite=${round4(compositeScore)} ≥ threshold=${opts.threshold}; arch=${opts.architecture.score}/${opts.architecture.severity}; sec=${opts.security.score}/${opts.security.severity}; det_arch=${opts.det_architecture.severity}; det_sec=${opts.det_security.severity}; disagreement=${round4(disagreement)}.`,
|
|
59
|
+
signals_snapshot: { composite_score: round4(compositeScore), disagreement_delta: round4(disagreement) },
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
return {
|
|
63
|
+
verdict: 'ITERATE',
|
|
64
|
+
grounding: baseGrounding,
|
|
65
|
+
reason: `composite=${round4(compositeScore)} < threshold=${opts.threshold} (arch=${opts.architecture.score} × sec=${opts.security.score}; under-cited FR=${citation.calm_nodes_in_scope - citation.calm_nodes_cited_by_fr}, under-cited threats=${citation.threats_in_scope - citation.threats_covered_by_sr}, self-reported NO=${citation.self_reported_no_count}).`,
|
|
66
|
+
signals_snapshot: { composite_score: round4(compositeScore), disagreement_delta: round4(disagreement) },
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
// ============================================================================
|
|
70
|
+
// Citation coverage (deterministic — unchanged from earlier phase)
|
|
71
|
+
// ============================================================================
|
|
72
|
+
function computeCitationCoverage(signals, mesh) {
|
|
73
|
+
const strideIdsInScope = new Set();
|
|
74
|
+
if (Array.isArray(mesh.bar?.threats)) {
|
|
75
|
+
for (const t of mesh.bar.threats) {
|
|
76
|
+
if (t.id) {
|
|
77
|
+
strideIdsInScope.add(t.id);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
const threatsCitedBySr = new Set();
|
|
82
|
+
for (const sr of signals.sr_entries) {
|
|
83
|
+
for (const cite of sr.cited) {
|
|
84
|
+
if (cite.startsWith('THR-')) {
|
|
85
|
+
threatsCitedBySr.add(cite);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
const calmNodesInScope = new Set();
|
|
90
|
+
const calm = mesh.bar?.calm_model;
|
|
91
|
+
if (calm && typeof calm === 'object') {
|
|
92
|
+
const nodes = calm.nodes;
|
|
93
|
+
if (Array.isArray(nodes)) {
|
|
94
|
+
for (const n of nodes) {
|
|
95
|
+
const id = n['unique-id'];
|
|
96
|
+
if (typeof id === 'string') {
|
|
97
|
+
calmNodesInScope.add(id);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
const calmCitedByFr = Math.min(calmNodesInScope.size, signals.fr_entries.filter(f => f.cited.length > 0).length);
|
|
103
|
+
const selfReportedNo = signals.coverage_rows.filter(r => r.status === 'NO').length;
|
|
104
|
+
return {
|
|
105
|
+
threats_in_scope: strideIdsInScope.size,
|
|
106
|
+
threats_covered_by_sr: [...threatsCitedBySr].filter(id => strideIdsInScope.has(id)).length,
|
|
107
|
+
calm_nodes_in_scope: calmNodesInScope.size,
|
|
108
|
+
calm_nodes_cited_by_fr: calmCitedByFr,
|
|
109
|
+
self_reported_no_count: selfReportedNo,
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
// ============================================================================
|
|
113
|
+
// Combined score
|
|
114
|
+
// ============================================================================
|
|
115
|
+
function combineScore(archScore, secScore, citation) {
|
|
116
|
+
const threatCoverage = citation.threats_in_scope === 0
|
|
117
|
+
? 1
|
|
118
|
+
: citation.threats_covered_by_sr / citation.threats_in_scope;
|
|
119
|
+
const calmCoverage = citation.calm_nodes_in_scope === 0
|
|
120
|
+
? 1
|
|
121
|
+
: citation.calm_nodes_cited_by_fr / citation.calm_nodes_in_scope;
|
|
122
|
+
const noPenalty = Math.min(0.30, citation.self_reported_no_count * 0.05);
|
|
123
|
+
const citationScore = Math.max(0, harmonicMean(threatCoverage, calmCoverage) - noPenalty);
|
|
124
|
+
return 0.35 * archScore + 0.35 * secScore + 0.30 * citationScore;
|
|
125
|
+
}
|
|
126
|
+
function harmonicMean(a, b) {
|
|
127
|
+
if (a <= 0 || b <= 0) {
|
|
128
|
+
return 0;
|
|
129
|
+
}
|
|
130
|
+
return (2 * a * b) / (a + b);
|
|
131
|
+
}
|
|
132
|
+
function round4(n) {
|
|
133
|
+
return Math.round(n * 10000) / 10000;
|
|
134
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export interface PrdOptions {
|
|
2
|
+
brief: unknown;
|
|
3
|
+
meshDir: string;
|
|
4
|
+
outputDir: string;
|
|
5
|
+
auditDir: string;
|
|
6
|
+
emitPrBodyPath?: string;
|
|
7
|
+
agentVersion: string;
|
|
8
|
+
anthropicApiKey?: string;
|
|
9
|
+
githubToken?: string;
|
|
10
|
+
fetchImpl?: typeof fetch;
|
|
11
|
+
}
|
|
12
|
+
export interface PrdResult {
|
|
13
|
+
run_id: string;
|
|
14
|
+
topic: string;
|
|
15
|
+
artifact_path: string;
|
|
16
|
+
manifest_path: string;
|
|
17
|
+
audit_log_path: string;
|
|
18
|
+
chain_root_hash: string;
|
|
19
|
+
pr_body_path: string | null;
|
|
20
|
+
/** PASS / ITERATE / EXHAUSTED — the final verdict of verify_grounding. */
|
|
21
|
+
verdict: 'PASS' | 'ITERATE' | 'EXHAUSTED';
|
|
22
|
+
final_score: number;
|
|
23
|
+
iterations: number;
|
|
24
|
+
total_input_tokens: number;
|
|
25
|
+
total_output_tokens: number;
|
|
26
|
+
total_cost_usd: number;
|
|
27
|
+
}
|
|
28
|
+
export declare function runPrd(opts: PrdOptions): Promise<PrdResult>;
|