@maintainabilityai/research-runner 0.1.9 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +2 -2
- package/dist/runner/archeologist.d.ts +4 -5
- package/dist/runner/archeologist.js +34 -117
- package/dist/runner/nodes/format-for-human.d.ts +39 -0
- package/dist/runner/nodes/format-for-human.js +164 -0
- package/package.json +1 -1
- package/dist/runner/nodes/synthesis-archaeology-validator.d.ts +0 -22
- package/dist/runner/nodes/synthesis-archaeology-validator.js +0 -131
- package/dist/runner/nodes/synthesize-report.d.ts +0 -53
- package/dist/runner/nodes/synthesize-report.js +0 -188
package/dist/cli.js
CHANGED
|
@@ -111,7 +111,7 @@ async function archeologistCmd(argv) {
|
|
|
111
111
|
meshDir: flags.mesh ? path.resolve(flags.mesh) : process.cwd(),
|
|
112
112
|
outputDir: flags.output || 'research',
|
|
113
113
|
auditDir: flags.audit || '.research-audit',
|
|
114
|
-
|
|
114
|
+
emitIssueBodyPath: flags.emit_issue_body,
|
|
115
115
|
agentVersion: PKG.version,
|
|
116
116
|
});
|
|
117
117
|
process.stdout.write(JSON.stringify(result, null, 2) + '\n');
|
|
@@ -120,7 +120,7 @@ async function archeologistCmd(argv) {
|
|
|
120
120
|
topic: result.topic,
|
|
121
121
|
artifact_path: result.artifact_path,
|
|
122
122
|
chain_root_hash: result.chain_root_hash,
|
|
123
|
-
|
|
123
|
+
issue_body_path: result.issue_body_path || '',
|
|
124
124
|
});
|
|
125
125
|
}
|
|
126
126
|
async function prdCmd(argv) {
|
|
@@ -3,7 +3,7 @@ export interface ArcheologistOptions {
|
|
|
3
3
|
meshDir: string;
|
|
4
4
|
outputDir: string;
|
|
5
5
|
auditDir: string;
|
|
6
|
-
|
|
6
|
+
emitIssueBodyPath?: string;
|
|
7
7
|
agentVersion: string;
|
|
8
8
|
/** Provider keys — supply only the one your brief.llm_provider needs. Default from process.env. */
|
|
9
9
|
anthropicApiKey?: string;
|
|
@@ -18,10 +18,12 @@ export interface ArcheologistOptions {
|
|
|
18
18
|
export interface ArcheologistResult {
|
|
19
19
|
run_id: string;
|
|
20
20
|
topic: string;
|
|
21
|
+
/** Path to the issue-update markdown the runner wrote to outputDir. */
|
|
21
22
|
artifact_path: string;
|
|
22
23
|
audit_log_path: string;
|
|
23
24
|
chain_root_hash: string;
|
|
24
|
-
|
|
25
|
+
/** Path to the wrapped issue-body markdown (data + Hatter's Tag). Only set when --emit-issue-body was passed. */
|
|
26
|
+
issue_body_path: string | null;
|
|
25
27
|
total_input_tokens: number;
|
|
26
28
|
total_output_tokens: number;
|
|
27
29
|
total_cost_usd: number;
|
|
@@ -32,8 +34,5 @@ export interface ArcheologistResult {
|
|
|
32
34
|
gap_analysis_ran: boolean;
|
|
33
35
|
/** Number of archaeology gaps identified. Undefined on research-path runs. */
|
|
34
36
|
archaeology_gap_count?: number;
|
|
35
|
-
/** Synthesis structural validator outputs — quick reviewer signal. */
|
|
36
|
-
conclusion_count: number;
|
|
37
|
-
recommendation_count: number;
|
|
38
37
|
}
|
|
39
38
|
export declare function runArcheologist(opts: ArcheologistOptions): Promise<ArcheologistResult>;
|
|
@@ -72,7 +72,7 @@ const uspto_search_1 = require("./nodes/uspto-search");
|
|
|
72
72
|
const hackernews_search_1 = require("./nodes/hackernews-search");
|
|
73
73
|
const dedupe_and_rank_1 = require("./nodes/dedupe-and-rank");
|
|
74
74
|
const gap_analysis_1 = require("./nodes/gap-analysis");
|
|
75
|
-
const
|
|
75
|
+
const format_for_human_1 = require("./nodes/format-for-human");
|
|
76
76
|
const clone_and_index_1 = require("./nodes/clone-and-index");
|
|
77
77
|
const analyze_architecture_1 = require("./nodes/analyze-architecture");
|
|
78
78
|
const identify_gaps_1 = require("./nodes/identify-gaps");
|
|
@@ -456,77 +456,38 @@ async function runArcheologist(opts) {
|
|
|
456
456
|
gapAnalysisRan = true;
|
|
457
457
|
}
|
|
458
458
|
} // end research-path else branch
|
|
459
|
-
// -----
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
459
|
+
// ----- format_for_human (pure) -----
|
|
460
|
+
//
|
|
461
|
+
// The runner stops here. Composes the markdown comment that the
|
|
462
|
+
// workflow posts back to the originating research-request issue.
|
|
463
|
+
// Synthesis is now produced by the assigned agent (Copilot/Claude),
|
|
464
|
+
// not by the runner.
|
|
465
|
+
progress(`◐ format_for_human — composing issue-update markdown for ${rankedSources.length} ranked sources…`);
|
|
466
|
+
const formatStart = Date.now();
|
|
467
|
+
const formatted = (0, format_for_human_1.formatForHuman)({
|
|
464
468
|
brief,
|
|
469
|
+
runId,
|
|
465
470
|
meshContext,
|
|
471
|
+
queryPlan: researchQueryPlan,
|
|
466
472
|
rankedSources,
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
path: brief.path,
|
|
472
|
-
observedArchitecture,
|
|
473
|
-
archaeologyGaps,
|
|
474
|
-
fetchImpl: opts.fetchImpl,
|
|
475
|
-
});
|
|
476
|
-
totalInputTokens += synthesis.llm.inputTokens;
|
|
477
|
-
totalOutputTokens += synthesis.llm.outputTokens;
|
|
478
|
-
totalCostUsd += synthesis.llm.costUsd;
|
|
479
|
-
progress(`✓ synthesize_report (${synthesis.llm.provider} ${synthesis.llm.model}) in ${Date.now() - synthStart}ms — ${synthesis.llm.inputTokens} in / ${synthesis.llm.outputTokens} out tokens, ${synthesis.llm.attempts} attempt${synthesis.llm.attempts !== 1 ? 's' : ''}`);
|
|
480
|
-
emitter.emit({
|
|
481
|
-
node_kind: 'llm',
|
|
482
|
-
node_name: 'synthesize_report',
|
|
483
|
-
duration_ms: Date.now() - synthStart,
|
|
484
|
-
llm: {
|
|
485
|
-
provider: synthesis.llm.provider,
|
|
486
|
-
model: synthesis.llm.model,
|
|
487
|
-
prompt_pack: { path: synthesis.prompt.packPath, sha256: synthesis.prompt.packSha256 },
|
|
488
|
-
input_tokens: synthesis.llm.inputTokens,
|
|
489
|
-
output_tokens: synthesis.llm.outputTokens,
|
|
490
|
-
cost_usd: synthesis.llm.costUsd,
|
|
491
|
-
guardrails: { mode: brief.guardrails, pre: 'PASS', post: 'PASS' },
|
|
492
|
-
},
|
|
473
|
+
gapSignals: (0, gap_analysis_1.detectGapSignals)({ brief, rankedSources }),
|
|
474
|
+
gapFollowUpQueries: [], // already merged into rankedSources during the search loop above
|
|
475
|
+
providerResultCounts,
|
|
476
|
+
totalDurationMs: Date.now() - startedAt.getTime(),
|
|
493
477
|
});
|
|
494
|
-
|
|
495
|
-
const today = startedAt.toISOString().slice(0, 10);
|
|
496
|
-
const fileSlug = brief.topic
|
|
497
|
-
.toLowerCase()
|
|
498
|
-
.replace(/[^a-z0-9]+/g, '-')
|
|
499
|
-
.replace(/^-|-$/g, '')
|
|
500
|
-
.slice(0, 60) || 'research';
|
|
501
|
-
const artifactName = `${fileSlug}-${today}.md`;
|
|
478
|
+
const artifactName = `issue-update-${runId}.md`;
|
|
502
479
|
const artifactPath = path.join(absoluteOutputDir, artifactName);
|
|
503
|
-
|
|
504
|
-
? `bar **${meshContext.bar.name}** (\`${meshContext.bar.bar_id}\`), ${meshContext.bar.adrs.length} ADR(s), ${meshContext.bar.related_research.length} prior research doc(s), mesh gaps: ${meshContext.bar.mesh_gaps.join(', ') || '_none_'}`
|
|
505
|
-
: meshContext.platform
|
|
506
|
-
? `platform **${meshContext.platform.platform_id}** (${meshContext.platform.sibling_bars.length} sibling BAR(s))`
|
|
507
|
-
: `portfolio **${meshContext.portfolio.name}** (${meshContext.portfolio.related_research_summaries.length} prior research doc(s))`;
|
|
508
|
-
const bodyMd = buildResearchDoc({
|
|
509
|
-
brief,
|
|
510
|
-
runId,
|
|
511
|
-
meshSummary,
|
|
512
|
-
meshSha: meshContext.mesh_sha,
|
|
513
|
-
queryPlan: researchQueryPlan,
|
|
514
|
-
archaeologySummary: observedArchitecture
|
|
515
|
-
? `Cloned \`${observedArchitecture.profile.slug}\` @ \`${observedArchitecture.profile.cloneSha.slice(0, 12)}\`. ${observedArchitecture.profile.totalFiles} files; languages: ${observedArchitecture.profile.languages.join(', ') || 'n/a'}; frameworks: ${observedArchitecture.profile.frameworks.join(', ') || 'n/a'}; ${observedArchitecture.modules.length} modules; ${observedArchitecture.endpoints.length} endpoints; ${archaeologyGaps.length} structural gaps identified.`
|
|
516
|
-
: undefined,
|
|
517
|
-
synthesisBody: synthesis.body_md,
|
|
518
|
-
});
|
|
519
|
-
const writeStart = Date.now();
|
|
520
|
-
fs.writeFileSync(artifactPath, bodyMd, 'utf8');
|
|
480
|
+
fs.writeFileSync(artifactPath, formatted.body, 'utf8');
|
|
521
481
|
emitter.emit({
|
|
522
482
|
node_kind: 'pure',
|
|
523
|
-
node_name: '
|
|
524
|
-
duration_ms: Date.now() -
|
|
483
|
+
node_name: 'format_for_human',
|
|
484
|
+
duration_ms: Date.now() - formatStart,
|
|
525
485
|
pure: {
|
|
526
|
-
inputs_summary: `
|
|
527
|
-
outputs_summary:
|
|
486
|
+
inputs_summary: `ranked_sources=${rankedSources.length}; mesh_sha=${meshContext.mesh_sha.slice(0, 7)}`,
|
|
487
|
+
outputs_summary: `wrote ${path.relative(opts.meshDir, artifactPath)} (${formatted.body.length} bytes)`,
|
|
528
488
|
},
|
|
529
489
|
});
|
|
490
|
+
progress(`✓ format_for_human — ${formatted.body.length} bytes written to ${path.relative(opts.meshDir, artifactPath)}`);
|
|
530
491
|
// ----- run_complete -----
|
|
531
492
|
const complete = emitter.emitRunComplete({
|
|
532
493
|
node_kind: 'run_complete',
|
|
@@ -541,9 +502,9 @@ async function runArcheologist(opts) {
|
|
|
541
502
|
artifact_paths: [path.relative(opts.meshDir, artifactPath)],
|
|
542
503
|
},
|
|
543
504
|
});
|
|
544
|
-
// ----- Optionally
|
|
545
|
-
let
|
|
546
|
-
if (opts.
|
|
505
|
+
// ----- Optionally emit an issue-body markdown wrapping the artifact + Hatter's Tag -----
|
|
506
|
+
let issueBodyPath = null;
|
|
507
|
+
if (opts.emitIssueBodyPath) {
|
|
547
508
|
const hattersTag = (0, hatters_tag_builder_1.buildHattersTag)({
|
|
548
509
|
run_id: runId,
|
|
549
510
|
mesh_sha: meshContext.mesh_sha,
|
|
@@ -552,9 +513,9 @@ async function runArcheologist(opts) {
|
|
|
552
513
|
published_at: new Date().toISOString(),
|
|
553
514
|
llm: {
|
|
554
515
|
provider: brief.llm_provider,
|
|
555
|
-
//
|
|
556
|
-
//
|
|
557
|
-
model:
|
|
516
|
+
// plan_queries is the only LLM hop we run now (synth handed off
|
|
517
|
+
// to the assigned agent). Surface that model in the Hatter's Tag.
|
|
518
|
+
model: 'openai/gpt-4o-mini',
|
|
558
519
|
input_tokens: totalInputTokens,
|
|
559
520
|
output_tokens: totalOutputTokens,
|
|
560
521
|
cost_usd: roundUsd(totalCostUsd),
|
|
@@ -566,9 +527,9 @@ async function runArcheologist(opts) {
|
|
|
566
527
|
audit_log_path: path.relative(opts.meshDir, emitter.path),
|
|
567
528
|
},
|
|
568
529
|
});
|
|
569
|
-
const
|
|
570
|
-
fs.writeFileSync(opts.
|
|
571
|
-
|
|
530
|
+
const issueBody = [formatted.body, '', hattersTag].join('\n');
|
|
531
|
+
fs.writeFileSync(opts.emitIssueBodyPath, issueBody, 'utf8');
|
|
532
|
+
issueBodyPath = opts.emitIssueBodyPath;
|
|
572
533
|
}
|
|
573
534
|
// ----- archaeology cleanup: remove the shallow clone now that synthesis is done -----
|
|
574
535
|
if (cleanupCloneDir) {
|
|
@@ -578,14 +539,14 @@ async function runArcheologist(opts) {
|
|
|
578
539
|
catch { /* leave on disk — non-fatal, just a tmpdir entry */ }
|
|
579
540
|
}
|
|
580
541
|
const totalDurationMs = Date.now() - startedAt.getTime();
|
|
581
|
-
progress(`◆ done ${runId} in ${(totalDurationMs / 1000).toFixed(1)}s — ${totalInputTokens} in / ${totalOutputTokens} out tokens, $${roundUsd(totalCostUsd)} | sources=${rankedSources.length}
|
|
542
|
+
progress(`◆ done ${runId} in ${(totalDurationMs / 1000).toFixed(1)}s — ${totalInputTokens} in / ${totalOutputTokens} out tokens, $${roundUsd(totalCostUsd)} | sources=${rankedSources.length} | artifact=${path.relative(opts.meshDir, artifactPath)} (synthesis is the assignee's job)`);
|
|
582
543
|
return {
|
|
583
544
|
run_id: runId,
|
|
584
545
|
topic: brief.topic,
|
|
585
546
|
artifact_path: artifactPath,
|
|
586
547
|
audit_log_path: emitter.path,
|
|
587
548
|
chain_root_hash: complete.outcome.chain_root_hash,
|
|
588
|
-
|
|
549
|
+
issue_body_path: issueBodyPath,
|
|
589
550
|
total_input_tokens: totalInputTokens,
|
|
590
551
|
total_output_tokens: totalOutputTokens,
|
|
591
552
|
total_cost_usd: roundUsd(totalCostUsd),
|
|
@@ -594,52 +555,8 @@ async function runArcheologist(opts) {
|
|
|
594
555
|
gap_analysis_ran: gapAnalysisRan,
|
|
595
556
|
/** archaeology path only — undefined for research runs */
|
|
596
557
|
archaeology_gap_count: archaeologyGaps.length || undefined,
|
|
597
|
-
conclusion_count: synthesis.citation_stats.conclusion_count,
|
|
598
|
-
recommendation_count: synthesis.citation_stats.recommendation_count,
|
|
599
558
|
};
|
|
600
559
|
}
|
|
601
|
-
/**
|
|
602
|
-
* Compose the published artifact. The preamble differs by path:
|
|
603
|
-
* research: <metadata> + <mesh context> + <Query Plan table>
|
|
604
|
-
* archaeology: <metadata> + <mesh context> + <Target Repo Profile>
|
|
605
|
-
* The synthesis body owns every H2 from the canonical section list onward.
|
|
606
|
-
* The Hatter's Tag is appended separately by the PR-body path.
|
|
607
|
-
*/
|
|
608
|
-
function buildResearchDoc(opts) {
|
|
609
|
-
const lines = [];
|
|
610
|
-
lines.push(`# ${opts.brief.topic}`);
|
|
611
|
-
lines.push('');
|
|
612
|
-
lines.push(`- **Run id:** \`${opts.runId}\``);
|
|
613
|
-
lines.push(`- **Mesh sha:** \`${opts.meshSha.slice(0, 12)}\``);
|
|
614
|
-
lines.push(`- **Path:** ${opts.brief.path}${opts.brief.target_repo ? ` (\`${opts.brief.target_repo}\`)` : ''}`);
|
|
615
|
-
lines.push(`- **Scope:** ${opts.brief.scope.level}${opts.brief.scope.id ? ` / ${opts.brief.scope.id}` : ''}`);
|
|
616
|
-
lines.push('');
|
|
617
|
-
lines.push('## Run Metadata');
|
|
618
|
-
lines.push('');
|
|
619
|
-
lines.push(`Scope resolved to: ${opts.meshSummary}.`);
|
|
620
|
-
lines.push('');
|
|
621
|
-
if (opts.queryPlan) {
|
|
622
|
-
lines.push('### Query Plan (per-provider, LLM-generated)');
|
|
623
|
-
lines.push('');
|
|
624
|
-
lines.push('| Provider | Queries |');
|
|
625
|
-
lines.push('|---|---|');
|
|
626
|
-
lines.push(`| **web** (Tavily) | ${opts.queryPlan.web.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
|
|
627
|
-
lines.push(`| **arxiv** | ${opts.queryPlan.arxiv.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
|
|
628
|
-
lines.push(`| **patent** (USPTO) | ${opts.queryPlan.patent.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
|
|
629
|
-
lines.push(`| **community** (HN) | ${opts.queryPlan.community.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
|
|
630
|
-
lines.push('');
|
|
631
|
-
}
|
|
632
|
-
if (opts.archaeologySummary) {
|
|
633
|
-
lines.push('### Target Repository Profile (analyze_architecture)');
|
|
634
|
-
lines.push('');
|
|
635
|
-
lines.push(opts.archaeologySummary);
|
|
636
|
-
lines.push('');
|
|
637
|
-
}
|
|
638
|
-
// The synthesis body owns every H2 from the canonical section list onward.
|
|
639
|
-
lines.push(opts.synthesisBody.trim());
|
|
640
|
-
lines.push('');
|
|
641
|
-
return lines.join('\n');
|
|
642
|
-
}
|
|
643
560
|
function roundUsd(n) {
|
|
644
561
|
return Math.round(n * 10000) / 10000;
|
|
645
562
|
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* format-for-human — composes the issue-update markdown that
|
|
3
|
+
* `research-runner archeologist` posts back to the originating
|
|
4
|
+
* research-request issue.
|
|
5
|
+
*
|
|
6
|
+
* The runner deliberately stops at this point in the pipeline. The
|
|
7
|
+
* comment we emit here gives a human reviewer (or an assigned
|
|
8
|
+
* Copilot/Claude agent) everything needed to write a synthesis:
|
|
9
|
+
*
|
|
10
|
+
* - the brief + scope
|
|
11
|
+
* - mesh context (impacted BARs, prior PRDs, ADRs)
|
|
12
|
+
* - the LLM-generated query plan
|
|
13
|
+
* - top-ranked sources grouped by provider, with abstracts
|
|
14
|
+
* - the gap analysis (Jobs-to-be-Done style)
|
|
15
|
+
* - the synthesis instructions for the assignee
|
|
16
|
+
*
|
|
17
|
+
* No LLM call here — this is pure formatting. The audit_emitter still
|
|
18
|
+
* gets a `pure` event so the chain stays intact.
|
|
19
|
+
*/
|
|
20
|
+
import type { MeshContext, QueryPlan, RankedSource, ResearchBrief } from '../../schemas';
|
|
21
|
+
export interface FormatForHumanOpts {
|
|
22
|
+
brief: ResearchBrief;
|
|
23
|
+
runId: string;
|
|
24
|
+
meshContext: MeshContext;
|
|
25
|
+
queryPlan?: QueryPlan;
|
|
26
|
+
rankedSources: RankedSource[];
|
|
27
|
+
gapSignals: ReadonlyArray<{
|
|
28
|
+
kind: string;
|
|
29
|
+
}>;
|
|
30
|
+
gapFollowUpQueries: readonly string[];
|
|
31
|
+
providerResultCounts: Record<string, number>;
|
|
32
|
+
/** Total wall-clock for the runner's data-collection phase, ms. */
|
|
33
|
+
totalDurationMs: number;
|
|
34
|
+
}
|
|
35
|
+
export interface FormatForHumanResult {
|
|
36
|
+
/** The markdown body to post as an issue comment (or new issue body). */
|
|
37
|
+
body: string;
|
|
38
|
+
}
|
|
39
|
+
export declare function formatForHuman(opts: FormatForHumanOpts): FormatForHumanResult;
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.formatForHuman = formatForHuman;
|
|
4
|
+
/**
|
|
5
|
+
* Truncates an excerpt for display in the issue comment. Sources can
|
|
6
|
+
* carry up to 500 chars; we shorten to ~280 so the issue stays readable.
|
|
7
|
+
*/
|
|
8
|
+
function shortExcerpt(s, n = 280) {
|
|
9
|
+
const cleaned = s.replace(/\s+/g, ' ').trim();
|
|
10
|
+
return cleaned.length <= n ? cleaned : cleaned.slice(0, n - 1) + '…';
|
|
11
|
+
}
|
|
12
|
+
function meshSummary(meshContext) {
|
|
13
|
+
if (meshContext.bar) {
|
|
14
|
+
const b = meshContext.bar;
|
|
15
|
+
const gaps = b.mesh_gaps.length > 0 ? b.mesh_gaps.join(', ') : '_none_';
|
|
16
|
+
return `BAR **${b.name}** (\`${b.bar_id}\`) — ${b.adrs.length} ADR(s), ${b.related_research.length} prior research doc(s), mesh gaps: ${gaps}`;
|
|
17
|
+
}
|
|
18
|
+
if (meshContext.platform) {
|
|
19
|
+
const p = meshContext.platform;
|
|
20
|
+
const sibs = p.sibling_bars.length;
|
|
21
|
+
return `Platform **${p.platform_id}** — ${sibs} BAR(s) in scope`;
|
|
22
|
+
}
|
|
23
|
+
return `Portfolio (no platform/BAR scope — broad research)`;
|
|
24
|
+
}
|
|
25
|
+
function siblingBarTable(meshContext) {
|
|
26
|
+
if (!meshContext.platform || meshContext.platform.sibling_bars.length === 0) {
|
|
27
|
+
return [];
|
|
28
|
+
}
|
|
29
|
+
const lines = [];
|
|
30
|
+
lines.push('| BAR | Name | CALM nodes | Threats |');
|
|
31
|
+
lines.push('|---|---|---|---|');
|
|
32
|
+
for (const sb of meshContext.platform.sibling_bars) {
|
|
33
|
+
const calmCount = sb.calm_node_ids?.length ?? 0;
|
|
34
|
+
const threatCount = sb.threat_ids?.length ?? 0;
|
|
35
|
+
lines.push(`| \`${sb.bar_id}\` | ${sb.name} | ${calmCount} | ${threatCount} |`);
|
|
36
|
+
}
|
|
37
|
+
return lines;
|
|
38
|
+
}
|
|
39
|
+
function providerSection(label, emoji, provider, sources, totalCount) {
|
|
40
|
+
if (sources.length === 0) {
|
|
41
|
+
return [`### ${emoji} ${label}`, '', `_No ${provider} results in the top-ranked set (raw count: ${totalCount})._`, ''];
|
|
42
|
+
}
|
|
43
|
+
const lines = [];
|
|
44
|
+
lines.push(`### ${emoji} ${label} (${sources.length} of ${totalCount} ranked)`);
|
|
45
|
+
lines.push('');
|
|
46
|
+
for (const s of sources) {
|
|
47
|
+
const authors = s.authors && s.authors.length > 0 ? ` — _${s.authors.slice(0, 3).join(', ')}${s.authors.length > 3 ? ' et al.' : ''}_` : '';
|
|
48
|
+
const date = s.published_at ? ` _(${s.published_at.slice(0, 10)})_` : '';
|
|
49
|
+
lines.push(`- **[\`${s.id}\`] [${s.title}](${s.url})** — score ${s.salience_score.toFixed(2)}${date}${authors}`);
|
|
50
|
+
lines.push(` > ${shortExcerpt(s.excerpt)}`);
|
|
51
|
+
}
|
|
52
|
+
lines.push('');
|
|
53
|
+
return lines;
|
|
54
|
+
}
|
|
55
|
+
function formatForHuman(opts) {
|
|
56
|
+
const { brief, runId, meshContext, queryPlan, rankedSources, gapSignals, gapFollowUpQueries, providerResultCounts, totalDurationMs } = opts;
|
|
57
|
+
const byProvider = { tavily: [], arxiv: [], hackernews: [], uspto: [] };
|
|
58
|
+
for (const r of rankedSources) {
|
|
59
|
+
(byProvider[r.provider] ??= []).push(r);
|
|
60
|
+
}
|
|
61
|
+
const lines = [];
|
|
62
|
+
lines.push(`# 🔍 Research data collected — ready for synthesis`);
|
|
63
|
+
lines.push('');
|
|
64
|
+
lines.push(`> The Archeologist runner gathered ${rankedSources.length} ranked sources across ${Object.values(providerResultCounts).reduce((a, b) => a + b, 0)} raw results, ran a Jobs-to-be-Done gap analysis, and assembled the mesh context below. **Synthesis is your next step.**`);
|
|
65
|
+
lines.push('');
|
|
66
|
+
lines.push('## Brief');
|
|
67
|
+
lines.push('');
|
|
68
|
+
lines.push(`**Topic.** ${brief.topic}`);
|
|
69
|
+
lines.push('');
|
|
70
|
+
lines.push(`- **Scope:** ${brief.scope.level}${brief.scope.id ? ` / \`${brief.scope.id}\`` : ''}`);
|
|
71
|
+
lines.push(`- **Path:** ${brief.path}${brief.target_repo ? ` (target repo: \`${brief.target_repo}\`)` : ''}`);
|
|
72
|
+
lines.push(`- **Guardrails:** ${brief.guardrails}`);
|
|
73
|
+
lines.push(`- **Run id:** \`${runId}\``);
|
|
74
|
+
lines.push(`- **Mesh sha:** \`${meshContext.mesh_sha.slice(0, 12)}\``);
|
|
75
|
+
lines.push(`- **Data-collection wall-clock:** ${(totalDurationMs / 1000).toFixed(1)}s`);
|
|
76
|
+
lines.push('');
|
|
77
|
+
lines.push('## Mesh context');
|
|
78
|
+
lines.push('');
|
|
79
|
+
lines.push(meshSummary(meshContext));
|
|
80
|
+
lines.push('');
|
|
81
|
+
const siblingLines = siblingBarTable(meshContext);
|
|
82
|
+
if (siblingLines.length > 0) {
|
|
83
|
+
lines.push('### BARs in scope');
|
|
84
|
+
lines.push('');
|
|
85
|
+
lines.push(...siblingLines);
|
|
86
|
+
lines.push('');
|
|
87
|
+
}
|
|
88
|
+
if (meshContext.portfolio.related_research_summaries.length > 0) {
|
|
89
|
+
lines.push('### Prior research in this scope');
|
|
90
|
+
lines.push('');
|
|
91
|
+
for (const r of meshContext.portfolio.related_research_summaries.slice(0, 5)) {
|
|
92
|
+
lines.push(`- \`${r.research_id}\` — ${r.topic} _(${r.published_at.slice(0, 10)})_`);
|
|
93
|
+
}
|
|
94
|
+
lines.push('');
|
|
95
|
+
}
|
|
96
|
+
if (queryPlan) {
|
|
97
|
+
lines.push('## LLM-generated query plan');
|
|
98
|
+
lines.push('');
|
|
99
|
+
lines.push('| Provider | Queries |');
|
|
100
|
+
lines.push('|---|---|');
|
|
101
|
+
lines.push(`| **Tavily (web)** | ${queryPlan.web.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
|
|
102
|
+
lines.push(`| **arXiv** | ${queryPlan.arxiv.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
|
|
103
|
+
lines.push(`| **USPTO (patents)** | ${queryPlan.patent.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
|
|
104
|
+
lines.push(`| **Hacker News** | ${queryPlan.community.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
|
|
105
|
+
lines.push('');
|
|
106
|
+
}
|
|
107
|
+
lines.push('## Source coverage');
|
|
108
|
+
lines.push('');
|
|
109
|
+
lines.push('| Provider | Ranked (top-N) | Raw |');
|
|
110
|
+
lines.push('|---|---:|---:|');
|
|
111
|
+
lines.push(`| Tavily | ${byProvider.tavily.length} | ${providerResultCounts.tavily ?? 0} |`);
|
|
112
|
+
lines.push(`| arXiv | ${byProvider.arxiv.length} | ${providerResultCounts.arxiv ?? 0} |`);
|
|
113
|
+
lines.push(`| Hacker News | ${byProvider.hackernews.length} | ${providerResultCounts.hackernews ?? 0} |`);
|
|
114
|
+
lines.push(`| USPTO | ${byProvider.uspto.length} | ${providerResultCounts.uspto ?? 0} |`);
|
|
115
|
+
lines.push('');
|
|
116
|
+
lines.push('## Top-ranked sources');
|
|
117
|
+
lines.push('');
|
|
118
|
+
lines.push('Each source is tagged with a stable `S[N]` id. Use these in the synthesis: every claim should cite at least one, every Conclusion ≥2 (≥1 if confidence LOW), every Recommendation should cite at least one Conclusion `C[N]`.');
|
|
119
|
+
lines.push('');
|
|
120
|
+
lines.push(...providerSection('Tavily — web search', '🌐', 'tavily', byProvider.tavily, providerResultCounts.tavily ?? 0));
|
|
121
|
+
lines.push(...providerSection('arXiv — academic papers', '📚', 'arxiv', byProvider.arxiv, providerResultCounts.arxiv ?? 0));
|
|
122
|
+
lines.push(...providerSection('Hacker News — community signal', '🧑💻', 'hackernews', byProvider.hackernews, providerResultCounts.hackernews ?? 0));
|
|
123
|
+
lines.push(...providerSection('USPTO — patent landscape', '📜', 'uspto', byProvider.uspto, providerResultCounts.uspto ?? 0));
|
|
124
|
+
lines.push('## Jobs-to-be-Done / Gap analysis');
|
|
125
|
+
lines.push('');
|
|
126
|
+
if (gapSignals.length === 0) {
|
|
127
|
+
lines.push('_No coverage gaps detected — the search results adequately cover the brief._');
|
|
128
|
+
lines.push('');
|
|
129
|
+
}
|
|
130
|
+
else {
|
|
131
|
+
lines.push(`The runner detected the following coverage gaps:`);
|
|
132
|
+
lines.push('');
|
|
133
|
+
for (const sig of gapSignals) {
|
|
134
|
+
lines.push(`- \`${sig.kind}\``);
|
|
135
|
+
}
|
|
136
|
+
lines.push('');
|
|
137
|
+
if (gapFollowUpQueries.length > 0) {
|
|
138
|
+
lines.push('LLM-derived follow-up queries (already executed against Tavily, results merged above):');
|
|
139
|
+
lines.push('');
|
|
140
|
+
for (const q of gapFollowUpQueries) {
|
|
141
|
+
lines.push(`- \`${q}\``);
|
|
142
|
+
}
|
|
143
|
+
lines.push('');
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
lines.push('## ✍️ Synthesis instructions — for the assignee');
|
|
147
|
+
lines.push('');
|
|
148
|
+
lines.push('Assign this issue to `@github-copilot` (or another agent) to produce the synthesis. The assignee should:');
|
|
149
|
+
lines.push('');
|
|
150
|
+
lines.push('1. **Read every source** above. The `S[N]` ids are how you cite them.');
|
|
151
|
+
lines.push('2. **Open a PR** with a new file under `research/` containing the synthesis markdown.');
|
|
152
|
+
lines.push('3. **Follow the canonical structure** — see `.caterpillar/prompts/research/synthesis.md` in this mesh. The synthesis must have:');
|
|
153
|
+
lines.push(' - 10 H2 sections in the order: `Executive Summary`, `Cross-Source Analysis`, `Jobs-to-be-Done Analysis`, `Whitespace Analysis`, `Patent Landscape`, `Community Signal`, `Academic Foundation`, `Conclusions`, `Recommendations`, `Open Questions`.');
|
|
154
|
+
lines.push(' - Every claim cites at least one `S[N]`.');
|
|
155
|
+
lines.push(' - Every Conclusion `C[N]` cites ≥2 sources (≥1 if confidence is LOW).');
|
|
156
|
+
lines.push(' - Every Recommendation references at least one `C[N]`.');
|
|
157
|
+
lines.push('4. **PR labels** to apply: `research-synthesis`, `ai-assisted`.');
|
|
158
|
+
lines.push('5. **Once merged**, the PRD agent will read your synthesis + the mesh + impacted code repos to produce per-repo landing issues.');
|
|
159
|
+
lines.push('');
|
|
160
|
+
lines.push('---');
|
|
161
|
+
lines.push('');
|
|
162
|
+
lines.push(`🤖 Generated by \`research-runner archeologist\` v${'pkg'}. Run id: \`${runId}\`.`);
|
|
163
|
+
return { body: lines.join('\n') };
|
|
164
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@maintainabilityai/research-runner",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.10",
|
|
4
4
|
"description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "MaintainabilityAI",
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* synthesis-archaeology-validator — structural validator for the
|
|
3
|
-
* archaeology-path synthesis body.
|
|
4
|
-
*
|
|
5
|
-
* Mirrors synthesis-validator's shape (ValidationReport with citation_stats)
|
|
6
|
-
* but enforces the 9 canonical sections from
|
|
7
|
-
* `.caterpillar/prompts/research/synthesis-archaeology.md`:
|
|
8
|
-
*
|
|
9
|
-
* 1. Executive Summary
|
|
10
|
-
* 2. Repository Profile
|
|
11
|
-
* 3. Current Architecture
|
|
12
|
-
* 4. Gap Analysis (G[N] entries with severity)
|
|
13
|
-
* 5. External Research Findings
|
|
14
|
-
* 6. Recommendations (each cites ≥1 G[N] AND ≥1 grounding token)
|
|
15
|
-
* 7. Implementation Roadmap
|
|
16
|
-
* 8. Risk Factors
|
|
17
|
-
* 9. Untraced items (REQUIRED — may say "None.")
|
|
18
|
-
*/
|
|
19
|
-
import type { ValidationReport } from './synthesis-validator';
|
|
20
|
-
export declare const CANONICAL_ARCHAEOLOGY_SECTIONS: readonly ["Executive Summary", "Repository Profile", "Current Architecture", "Gap Analysis", "External Research Findings", "Recommendations", "Implementation Roadmap", "Risk Factors", "Untraced items"];
|
|
21
|
-
export type CanonicalArchaeologySection = typeof CANONICAL_ARCHAEOLOGY_SECTIONS[number];
|
|
22
|
-
export declare function validateArchaeologySynthesis(body: string): ValidationReport;
|
|
@@ -1,131 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.CANONICAL_ARCHAEOLOGY_SECTIONS = void 0;
|
|
4
|
-
exports.validateArchaeologySynthesis = validateArchaeologySynthesis;
|
|
5
|
-
exports.CANONICAL_ARCHAEOLOGY_SECTIONS = [
|
|
6
|
-
'Executive Summary',
|
|
7
|
-
'Repository Profile',
|
|
8
|
-
'Current Architecture',
|
|
9
|
-
'Gap Analysis',
|
|
10
|
-
'External Research Findings',
|
|
11
|
-
'Recommendations',
|
|
12
|
-
'Implementation Roadmap',
|
|
13
|
-
'Risk Factors',
|
|
14
|
-
'Untraced items',
|
|
15
|
-
];
|
|
16
|
-
function validateArchaeologySynthesis(body) {
|
|
17
|
-
const errors = [];
|
|
18
|
-
const sectionsFound = extractH2Sections(body);
|
|
19
|
-
// Sections present in canonical order
|
|
20
|
-
for (let i = 0; i < exports.CANONICAL_ARCHAEOLOGY_SECTIONS.length; i++) {
|
|
21
|
-
const expected = exports.CANONICAL_ARCHAEOLOGY_SECTIONS[i];
|
|
22
|
-
if (sectionsFound[i] !== expected) {
|
|
23
|
-
errors.push(`Section #${i + 1} expected "## ${expected}" but found ${sectionsFound[i] ? `"## ${sectionsFound[i]}"` : '(missing)'}.`);
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
// Gap Analysis: at least one G[N] entry with severity
|
|
27
|
-
const gapBlock = extractSection(body, 'Gap Analysis');
|
|
28
|
-
const gapEntries = splitOnGapMarkers(gapBlock);
|
|
29
|
-
const gapIds = gapEntries.map(g => g.id);
|
|
30
|
-
for (const g of gapEntries) {
|
|
31
|
-
// `\b\*\*` fails between space and `*` (both non-word) — drop the boundary
|
|
32
|
-
// before `**` and require the inner word boundary instead.
|
|
33
|
-
if (!/\bSEVERITY\s*[:=]\s*(HIGH|MEDIUM|LOW)\b|\*\*(HIGH|MEDIUM|LOW)\*\*/i.test(g.body)) {
|
|
34
|
-
errors.push(`Gap G${g.id} is missing a severity tag (HIGH / MEDIUM / LOW).`);
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
if (gapEntries.length === 0 && sectionsFound.includes('Gap Analysis')) {
|
|
38
|
-
errors.push('Gap Analysis section has no `G[N]` entries.');
|
|
39
|
-
}
|
|
40
|
-
// Recommendations: each cites ≥1 G[N]
|
|
41
|
-
const recsBlock = extractSection(body, 'Recommendations');
|
|
42
|
-
const recLines = recsBlock.split('\n').filter(l => /^\s*(?:[-*]|\d+\.)\s+/.test(l));
|
|
43
|
-
let untracedRecommendations = 0;
|
|
44
|
-
for (const rec of recLines) {
|
|
45
|
-
if (!/\bG\d+\b/.test(rec)) {
|
|
46
|
-
untracedRecommendations += 1;
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
if (recLines.length > 0 && untracedRecommendations === recLines.length) {
|
|
50
|
-
errors.push(`All ${recLines.length} Recommendation(s) lack G[N] traceability.`);
|
|
51
|
-
}
|
|
52
|
-
else if (untracedRecommendations > 0) {
|
|
53
|
-
errors.push(`${untracedRecommendations} of ${recLines.length} Recommendation(s) lack G[N] traceability.`);
|
|
54
|
-
}
|
|
55
|
-
// Untraced items REQUIRED — even if empty (must say "None." or similar)
|
|
56
|
-
const untracedBlock = extractSection(body, 'Untraced items').trim();
|
|
57
|
-
if (untracedBlock.length === 0) {
|
|
58
|
-
errors.push('Untraced items section is empty — must explicitly say "None." when there are none.');
|
|
59
|
-
}
|
|
60
|
-
// Citation stats
|
|
61
|
-
// For archaeology, source_count = unique S[N] across External Research Findings + Risk Factors.
|
|
62
|
-
// The synthesis prompt also asks the LLM to cite OA[<file>] / OA[<module>] in narrative
|
|
63
|
-
// sections; we don't try to enforce those at the validator level (heuristic untraced count
|
|
64
|
-
// would be too noisy across short body paragraphs).
|
|
65
|
-
const sourceCitations = new Set([...body.matchAll(/\bS(\d+)\b/g)].map(m => m[1]));
|
|
66
|
-
const citation_stats = {
|
|
67
|
-
source_count: sourceCitations.size,
|
|
68
|
-
conclusion_count: 0, // archaeology synthesis doesn't have C[N]
|
|
69
|
-
recommendation_count: recLines.length,
|
|
70
|
-
underCitedConclusions: 0,
|
|
71
|
-
untracedRecommendations,
|
|
72
|
-
untraced_claims: 0,
|
|
73
|
-
};
|
|
74
|
-
return {
|
|
75
|
-
valid: errors.length === 0,
|
|
76
|
-
errors,
|
|
77
|
-
sectionsFound,
|
|
78
|
-
citation_stats,
|
|
79
|
-
// Expose archaeology-specific data for the orchestrator's audit + Hatter's Tag
|
|
80
|
-
...(gapIds.length > 0 ? { archaeology: { gap_count: gapIds.length } } : {}),
|
|
81
|
-
};
|
|
82
|
-
}
|
|
83
|
-
// ============================================================================
|
|
84
|
-
// Helpers (copy of the research-side helpers — kept local to avoid coupling)
|
|
85
|
-
// ============================================================================
|
|
86
|
-
function extractH2Sections(body) {
|
|
87
|
-
const out = [];
|
|
88
|
-
for (const line of body.split('\n')) {
|
|
89
|
-
const m = line.match(/^##\s+(.+?)\s*$/);
|
|
90
|
-
if (m) {
|
|
91
|
-
out.push(m[1].trim());
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
return out;
|
|
95
|
-
}
|
|
96
|
-
function extractSection(body, sectionName) {
|
|
97
|
-
const lines = body.split('\n');
|
|
98
|
-
let inSection = false;
|
|
99
|
-
const collected = [];
|
|
100
|
-
for (const line of lines) {
|
|
101
|
-
const h2 = line.match(/^##\s+(.+?)\s*$/);
|
|
102
|
-
if (h2) {
|
|
103
|
-
if (h2[1].trim() === sectionName) {
|
|
104
|
-
inSection = true;
|
|
105
|
-
continue;
|
|
106
|
-
}
|
|
107
|
-
if (inSection) {
|
|
108
|
-
break;
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
if (inSection) {
|
|
112
|
-
collected.push(line);
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
return collected.join('\n');
|
|
116
|
-
}
|
|
117
|
-
function splitOnGapMarkers(block) {
|
|
118
|
-
const markerRe = /^\s*(?:\*\*G(\d+)\*\*|###\s+G(\d+))(?=\s|$)/;
|
|
119
|
-
const lines = block.split('\n');
|
|
120
|
-
const entries = [];
|
|
121
|
-
for (const line of lines) {
|
|
122
|
-
const m = line.match(markerRe);
|
|
123
|
-
if (m) {
|
|
124
|
-
entries.push({ id: m[1] ?? m[2], body: [line] });
|
|
125
|
-
}
|
|
126
|
-
else if (entries.length > 0) {
|
|
127
|
-
entries[entries.length - 1].body.push(line);
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
return entries.map(e => ({ id: e.id, body: e.body.join('\n') }));
|
|
131
|
-
}
|
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* synthesize_report — LLM node.
|
|
3
|
-
*
|
|
4
|
-
* Second LLM hop in the archeologist research path. Loads
|
|
5
|
-
* `.caterpillar/prompts/research/synthesis.md`, fills it with the brief +
|
|
6
|
-
* mesh context + ranked sources + gap_analysis flag, calls Anthropic
|
|
7
|
-
* (sonnet by default — synthesis is more demanding than planning),
|
|
8
|
-
* runs the structural validator on the body, and either returns the
|
|
9
|
-
* validated body or retries once with feedback.
|
|
10
|
-
*
|
|
11
|
-
* Returns the synthesised body, the prompt-pack telemetry (path + sha256),
|
|
12
|
-
* LLM token/cost totals, and the citation_stats the audit log + Hatter's
|
|
13
|
-
* Tag both consume.
|
|
14
|
-
*/
|
|
15
|
-
import type { ArchaeologyGap, LlmProvider, MeshContext, ObservedArchitecture, RankedSource, ResearchBrief, ResearchPath } from '../../schemas';
|
|
16
|
-
import { type LoadedPrompt } from '../../mesh/prompt-loader';
|
|
17
|
-
import { type CitationStats, type ValidationReport } from './synthesis-validator';
|
|
18
|
-
export interface SynthesizeReportOpts {
|
|
19
|
-
meshDir: string;
|
|
20
|
-
brief: ResearchBrief;
|
|
21
|
-
meshContext: MeshContext;
|
|
22
|
-
rankedSources: RankedSource[];
|
|
23
|
-
/** Provider routing — comes from brief.llm_provider unless overridden. */
|
|
24
|
-
provider?: LlmProvider;
|
|
25
|
-
/** Required when provider === 'anthropic'. */
|
|
26
|
-
anthropicApiKey?: string;
|
|
27
|
-
/** Required when provider === 'github-models'. */
|
|
28
|
-
githubToken?: string;
|
|
29
|
-
/** Flipped true by the orchestrator after gap-analysis fires. */
|
|
30
|
-
gapAnalysisRan?: boolean;
|
|
31
|
-
/** Defaults to brief.path. Overrideable for tests. */
|
|
32
|
-
path?: ResearchPath;
|
|
33
|
-
/** Archaeology-path only: observed architecture extracted from the target repo. */
|
|
34
|
-
observedArchitecture?: ObservedArchitecture;
|
|
35
|
-
/** Archaeology-path only: gaps identified by identify_gaps. */
|
|
36
|
-
archaeologyGaps?: ArchaeologyGap[];
|
|
37
|
-
fetchImpl?: typeof fetch;
|
|
38
|
-
}
|
|
39
|
-
export interface SynthesizeReportResult {
|
|
40
|
-
body_md: string;
|
|
41
|
-
prompt: LoadedPrompt;
|
|
42
|
-
validation: ValidationReport;
|
|
43
|
-
citation_stats: CitationStats;
|
|
44
|
-
llm: {
|
|
45
|
-
provider: LlmProvider;
|
|
46
|
-
model: string;
|
|
47
|
-
inputTokens: number;
|
|
48
|
-
outputTokens: number;
|
|
49
|
-
costUsd: number;
|
|
50
|
-
attempts: number;
|
|
51
|
-
};
|
|
52
|
-
}
|
|
53
|
-
export declare function synthesizeReport(opts: SynthesizeReportOpts): Promise<SynthesizeReportResult>;
|
|
@@ -1,188 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.synthesizeReport = synthesizeReport;
|
|
4
|
-
const llm_router_1 = require("../../llm/llm-router");
|
|
5
|
-
const prompt_loader_1 = require("../../mesh/prompt-loader");
|
|
6
|
-
const synthesis_validator_1 = require("./synthesis-validator");
|
|
7
|
-
const synthesis_archaeology_validator_1 = require("./synthesis-archaeology-validator");
|
|
8
|
-
const MAX_TOKENS = 8000;
|
|
9
|
-
async function synthesizeReport(opts) {
|
|
10
|
-
const provider = opts.provider ?? opts.brief.llm_provider;
|
|
11
|
-
const path = opts.path ?? opts.brief.path;
|
|
12
|
-
// Two different prompt packs + validators per path. Same LLM router, same
|
|
13
|
-
// retry-with-feedback loop — only the pack name + the validator differ.
|
|
14
|
-
const packId = path === 'archaeology' ? 'research/synthesis-archaeology' : 'research/synthesis';
|
|
15
|
-
const validate = path === 'archaeology' ? synthesis_archaeology_validator_1.validateArchaeologySynthesis : synthesis_validator_1.validateSynthesis;
|
|
16
|
-
const promptContext = path === 'archaeology'
|
|
17
|
-
? buildArchaeologyPromptContext(opts.brief, opts.meshContext, opts.rankedSources, opts.observedArchitecture, opts.archaeologyGaps ?? [])
|
|
18
|
-
: buildPromptContext(opts.brief, opts.meshContext, opts.rankedSources, opts.gapAnalysisRan ?? false);
|
|
19
|
-
const prompt = (0, prompt_loader_1.loadPrompt)({
|
|
20
|
-
meshDir: opts.meshDir,
|
|
21
|
-
packId,
|
|
22
|
-
context: promptContext,
|
|
23
|
-
});
|
|
24
|
-
const system = path === 'archaeology'
|
|
25
|
-
? 'You write structured markdown architecture-archaeology reports with strict section discipline. Every gap (G[N]) carries a severity. Every Recommendation traces to a G[N] and cites at least one grounding token (S[N] or OA[…]). The 9 H2 sections appear in the exact order requested. No prose before the first `##` heading.'
|
|
26
|
-
: 'You write structured markdown documents with strict section + citation discipline. Every claim has an S[N] citation; every C[N] cites ≥2 sources; every Recommendation traces to a C[N]. Headings appear in the exact order requested. No prose before the first `##` heading.';
|
|
27
|
-
let lastReport = null;
|
|
28
|
-
let totalInput = 0;
|
|
29
|
-
let totalOutput = 0;
|
|
30
|
-
let totalCost = 0;
|
|
31
|
-
let lastModel = '';
|
|
32
|
-
for (let attempt = 1; attempt <= 2; attempt++) {
|
|
33
|
-
const userPrompt = attempt === 1
|
|
34
|
-
? prompt.filled
|
|
35
|
-
: `${prompt.filled}\n\n---\n\nYour previous response failed structural validation:\n${lastReport.errors.map(e => `- ${e}`).join('\n')}\n\nRewrite the document and fix EVERY error above. The 10 H2 sections must appear in the exact order specified; every C[N] must cite ≥2 S[N] (or ≥1 if confidence is LOW); every Recommendation must reference at least one C[N].`;
|
|
36
|
-
const result = await (0, llm_router_1.callLlm)({
|
|
37
|
-
provider,
|
|
38
|
-
tier: 'synth',
|
|
39
|
-
anthropicApiKey: opts.anthropicApiKey,
|
|
40
|
-
githubToken: opts.githubToken,
|
|
41
|
-
system,
|
|
42
|
-
prompt: userPrompt,
|
|
43
|
-
maxTokens: MAX_TOKENS,
|
|
44
|
-
fetchImpl: opts.fetchImpl,
|
|
45
|
-
});
|
|
46
|
-
totalInput += result.inputTokens;
|
|
47
|
-
totalOutput += result.outputTokens;
|
|
48
|
-
totalCost += result.costUsd;
|
|
49
|
-
lastModel = result.model;
|
|
50
|
-
const body = stripFences(result.text);
|
|
51
|
-
const report = validate(body);
|
|
52
|
-
if (report.valid) {
|
|
53
|
-
return {
|
|
54
|
-
body_md: body,
|
|
55
|
-
prompt,
|
|
56
|
-
validation: report,
|
|
57
|
-
citation_stats: report.citation_stats,
|
|
58
|
-
llm: { provider, model: lastModel, inputTokens: totalInput, outputTokens: totalOutput, costUsd: totalCost, attempts: attempt },
|
|
59
|
-
};
|
|
60
|
-
}
|
|
61
|
-
lastReport = report;
|
|
62
|
-
}
|
|
63
|
-
throw new Error(`synthesize_report: structural validation failed after 2 attempts. Last errors: ${lastReport.errors.join('; ')}`);
|
|
64
|
-
}
|
|
65
|
-
/** If the model wraps the doc in ```markdown … ``` fences, unwrap. Otherwise pass through. */
|
|
66
|
-
function stripFences(raw) {
|
|
67
|
-
const trimmed = raw.trim();
|
|
68
|
-
const fenceMatch = trimmed.match(/^```(?:markdown|md)?\s*([\s\S]*?)```\s*$/);
|
|
69
|
-
return fenceMatch ? fenceMatch[1].trim() : trimmed;
|
|
70
|
-
}
|
|
71
|
-
/** Build the dotted-key context the synthesis prompt asks for. */
|
|
72
|
-
function buildPromptContext(brief, mesh, rankedSources, gapAnalysisRan) {
|
|
73
|
-
return {
|
|
74
|
-
brief: {
|
|
75
|
-
topic: brief.topic,
|
|
76
|
-
scope_level: brief.scope.level,
|
|
77
|
-
},
|
|
78
|
-
mesh: {
|
|
79
|
-
context_summary: summarizeMeshContext(mesh),
|
|
80
|
-
},
|
|
81
|
-
ranked_sources: rankedSources.length === 0
|
|
82
|
-
? '(no sources retrieved)'
|
|
83
|
-
: rankedSources.map(formatRankedSource).join('\n\n'),
|
|
84
|
-
gap_analysis_ran: gapAnalysisRan,
|
|
85
|
-
};
|
|
86
|
-
}
|
|
87
|
-
function summarizeMeshContext(mesh) {
|
|
88
|
-
const parts = [];
|
|
89
|
-
parts.push(`Portfolio: ${mesh.portfolio.name}`);
|
|
90
|
-
if (mesh.portfolio.related_research_summaries.length > 0) {
|
|
91
|
-
parts.push(`Portfolio research (${mesh.portfolio.related_research_summaries.length}): ${mesh.portfolio.related_research_summaries.map(r => r.topic).slice(0, 5).join('; ')}`);
|
|
92
|
-
}
|
|
93
|
-
if (mesh.platform) {
|
|
94
|
-
parts.push(`Platform: ${mesh.platform.platform_id} (${mesh.platform.sibling_bars.length} sibling BAR${mesh.platform.sibling_bars.length === 1 ? '' : 's'})`);
|
|
95
|
-
}
|
|
96
|
-
if (mesh.bar) {
|
|
97
|
-
parts.push(`BAR: ${mesh.bar.name} (${mesh.bar.bar_id}); tier=${mesh.bar.tier}; ADRs=${mesh.bar.adrs.length}; prior research=${mesh.bar.related_research.length}; prior PRDs=${mesh.bar.related_prds.length}; mesh gaps: ${mesh.bar.mesh_gaps.join(', ') || 'none'}`);
|
|
98
|
-
if (Array.isArray(mesh.bar.threats)) {
|
|
99
|
-
const ts = mesh.bar.threats;
|
|
100
|
-
parts.push(`STRIDE threats (${ts.length}): ${ts.map(t => `${t.id}/${t.category}`).slice(0, 6).join('; ')}`);
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
return parts.join('\n');
|
|
104
|
-
}
|
|
105
|
-
function formatRankedSource(s) {
|
|
106
|
-
const lines = [
|
|
107
|
-
`- **${s.id}** "${s.title}" (${s.provider}, salience ${s.salience_score})`,
|
|
108
|
-
` URL: ${s.url}`,
|
|
109
|
-
` Retrieved: ${s.retrieved_at}`,
|
|
110
|
-
];
|
|
111
|
-
if (s.published_at) {
|
|
112
|
-
lines.push(` Published: ${s.published_at}`);
|
|
113
|
-
}
|
|
114
|
-
if (s.excerpt) {
|
|
115
|
-
lines.push(` Excerpt: ${s.excerpt.slice(0, 280)}${s.excerpt.length > 280 ? '…' : ''}`);
|
|
116
|
-
}
|
|
117
|
-
return lines.join('\n');
|
|
118
|
-
}
|
|
119
|
-
/** Build the dotted-key context the archaeology synthesis prompt asks for. */
|
|
120
|
-
function buildArchaeologyPromptContext(brief, mesh, rankedSources, observed, gaps) {
|
|
121
|
-
return {
|
|
122
|
-
target_repo: brief.target_repo ?? '(unknown target)',
|
|
123
|
-
observed_architecture: observed
|
|
124
|
-
? formatObservedArchitecture(observed)
|
|
125
|
-
: '(analyzer did not run)',
|
|
126
|
-
mesh: {
|
|
127
|
-
bar: {
|
|
128
|
-
calm_summary: mesh.bar?.calm_model ? summarizeCalmModelArchaeology(mesh.bar.calm_model) : '(no CALM model loaded)',
|
|
129
|
-
threats_summary: mesh.bar?.threats ? summarizeThreatsArchaeology(mesh.bar.threats) : '(no threat model on file)',
|
|
130
|
-
},
|
|
131
|
-
},
|
|
132
|
-
gap_signals: gaps.length === 0 ? '(no structural gaps detected)' : gaps.map(g => `- **${g.id}** [${g.severity}] ${g.kind}: ${g.summary}`).join('\n'),
|
|
133
|
-
ranked_sources: rankedSources.length === 0
|
|
134
|
-
? '(no web sources retrieved)'
|
|
135
|
-
: rankedSources.map(formatRankedSource).join('\n\n'),
|
|
136
|
-
};
|
|
137
|
-
}
|
|
138
|
-
function formatObservedArchitecture(o) {
|
|
139
|
-
const lines = [];
|
|
140
|
-
lines.push(`Repo: ${o.profile.slug} @ ${o.profile.cloneSha.slice(0, 12)}`);
|
|
141
|
-
lines.push(`Languages: ${o.profile.languages.join(', ') || '(none detected)'}`);
|
|
142
|
-
lines.push(`Frameworks: ${o.profile.frameworks.join(', ') || '(none detected)'}`);
|
|
143
|
-
lines.push(`Manifests: ${o.profile.manifests.join(', ') || '(none)'}`);
|
|
144
|
-
lines.push(`Files: ${o.profile.totalFiles} totalling ${o.profile.totalBytes} bytes`);
|
|
145
|
-
lines.push('');
|
|
146
|
-
lines.push('Modules (top 12 by file count):');
|
|
147
|
-
for (const m of o.modules.slice(0, 12)) {
|
|
148
|
-
lines.push(` - OA[${m.name}] layer=${m.layer} files=${m.fileCount} endpoints=${m.endpointCount}`);
|
|
149
|
-
}
|
|
150
|
-
if (o.endpoints.length > 0) {
|
|
151
|
-
lines.push('');
|
|
152
|
-
lines.push('Endpoints (sample):');
|
|
153
|
-
for (const e of o.endpoints.slice(0, 15)) {
|
|
154
|
-
lines.push(` - ${e.method} ${e.path} (${e.framework}) — ${e.file}`);
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
if (o.dependencies.length > 0) {
|
|
158
|
-
lines.push('');
|
|
159
|
-
lines.push(`Direct dependencies (${o.dependencies.length}): ${o.dependencies.slice(0, 25).join(', ')}${o.dependencies.length > 25 ? ', …' : ''}`);
|
|
160
|
-
}
|
|
161
|
-
return lines.join('\n');
|
|
162
|
-
}
|
|
163
|
-
function summarizeCalmModelArchaeology(calm) {
|
|
164
|
-
if (!calm || typeof calm !== 'object') {
|
|
165
|
-
return '(no CALM model loaded)';
|
|
166
|
-
}
|
|
167
|
-
const obj = calm;
|
|
168
|
-
const nodes = Array.isArray(obj.nodes) ? obj.nodes : [];
|
|
169
|
-
const relationships = Array.isArray(obj.relationships) ? obj.relationships : [];
|
|
170
|
-
const lines = [];
|
|
171
|
-
lines.push(`${nodes.length} node(s), ${relationships.length} relationship(s)`);
|
|
172
|
-
for (const n of nodes.slice(0, 10)) {
|
|
173
|
-
const o = n;
|
|
174
|
-
lines.push(` - ${o['unique-id'] ?? o.name ?? 'unknown'} (${o['node-type'] ?? 'unknown'})`);
|
|
175
|
-
}
|
|
176
|
-
return lines.join('\n');
|
|
177
|
-
}
|
|
178
|
-
function summarizeThreatsArchaeology(threats) {
|
|
179
|
-
if (!Array.isArray(threats) || threats.length === 0) {
|
|
180
|
-
return '(no threats)';
|
|
181
|
-
}
|
|
182
|
-
const byCategory = {};
|
|
183
|
-
for (const t of threats) {
|
|
184
|
-
const cat = t.category || 'unknown';
|
|
185
|
-
byCategory[cat] = (byCategory[cat] || 0) + 1;
|
|
186
|
-
}
|
|
187
|
-
return Object.entries(byCategory).map(([c, n]) => `${c} × ${n}`).join(', ');
|
|
188
|
-
}
|