npm - @maintainabilityai/research-runner - Versions diffs - 0.1.9 → 0.1.11 - Mend

@maintainabilityai/research-runner 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/cli.js +22 -3
package/dist/runner/archeologist.d.ts +4 -5
package/dist/runner/archeologist.js +34 -117
package/dist/runner/nodes/format-for-human.d.ts +39 -0
package/dist/runner/nodes/format-for-human.js +164 -0
package/package.json +1 -1
package/dist/runner/nodes/synthesis-archaeology-validator.d.ts +0 -22
package/dist/runner/nodes/synthesis-archaeology-validator.js +0 -131
package/dist/runner/nodes/synthesize-report.d.ts +0 -53
package/dist/runner/nodes/synthesize-report.js +0 -188

package/dist/cli.js CHANGED Viewed

@@ -69,11 +69,30 @@ function parseFlags(argv) {
 }
 function emitGithubOutput(outputs) {
     // Run inside GitHub Actions, write to GITHUB_OUTPUT so `steps.<id>.outputs.*` works.
+    //
+    // GH Actions output file format:
+    //   single-line:  key=value
+    //   multi-line:   key<<EOF\nvalue\nEOF
+    //
+    // A research brief can be multi-line markdown (the wizard appends a
+    // "## Run metadata" footer to it), so naive `key=value` produced
+    // `Error: Unable to process file command 'output' successfully.` when
+    // the topic carried newlines. Switch every value to the heredoc form
+    // — works for both single- and multi-line values.
     const githubOutput = process.env.GITHUB_OUTPUT;
     if (!githubOutput) {
         return;
     }
-    const lines = Object.entries(outputs).map(([k, v]) => `${k}=${v}`);
+    const lines = [];
+    // Use a random delimiter to avoid collisions with content that happens
+    // to contain a literal "EOF" line. crypto.randomUUID is in Node 19+.
+    const delimiter = `gho_${process.pid}_${Date.now()}_${Math.random().toString(36).slice(2, 10)}`;
+    for (const [k, v] of Object.entries(outputs)) {
+        const value = String(v);
+        lines.push(`${k}<<${delimiter}`);
+        lines.push(value);
+        lines.push(delimiter);
+    }
     fs.appendFileSync(githubOutput, lines.join('\n') + '\n', 'utf8');
 }
 function abort(msg, code = 1) {
@@ -111,7 +130,7 @@ async function archeologistCmd(argv) {
         meshDir: flags.mesh ? path.resolve(flags.mesh) : process.cwd(),
         outputDir: flags.output || 'research',
         auditDir: flags.audit || '.research-audit',
-        emitPrBodyPath: flags.emit_pr_body,
+        emitIssueBodyPath: flags.emit_issue_body,
         agentVersion: PKG.version,
     });
     process.stdout.write(JSON.stringify(result, null, 2) + '\n');
@@ -120,7 +139,7 @@ async function archeologistCmd(argv) {
         topic: result.topic,
         artifact_path: result.artifact_path,
         chain_root_hash: result.chain_root_hash,
-        pr_body_path: result.pr_body_path || '',
+        issue_body_path: result.issue_body_path || '',
     });
 }
 async function prdCmd(argv) {

package/dist/runner/archeologist.d.ts CHANGED Viewed

@@ -3,7 +3,7 @@ export interface ArcheologistOptions {
     meshDir: string;
     outputDir: string;
     auditDir: string;
-    emitPrBodyPath?: string;
+    emitIssueBodyPath?: string;
     agentVersion: string;
     /** Provider keys — supply only the one your brief.llm_provider needs. Default from process.env. */
     anthropicApiKey?: string;
@@ -18,10 +18,12 @@ export interface ArcheologistOptions {
 export interface ArcheologistResult {
     run_id: string;
     topic: string;
+    /** Path to the issue-update markdown the runner wrote to outputDir. */
     artifact_path: string;
     audit_log_path: string;
     chain_root_hash: string;
-    pr_body_path: string | null;
+    /** Path to the wrapped issue-body markdown (data + Hatter's Tag). Only set when --emit-issue-body was passed. */
+    issue_body_path: string | null;
     total_input_tokens: number;
     total_output_tokens: number;
     total_cost_usd: number;
@@ -32,8 +34,5 @@ export interface ArcheologistResult {
     gap_analysis_ran: boolean;
     /** Number of archaeology gaps identified. Undefined on research-path runs. */
     archaeology_gap_count?: number;
-    /** Synthesis structural validator outputs — quick reviewer signal. */
-    conclusion_count: number;
-    recommendation_count: number;
 }
 export declare function runArcheologist(opts: ArcheologistOptions): Promise<ArcheologistResult>;

package/dist/runner/archeologist.js CHANGED Viewed

@@ -72,7 +72,7 @@ const uspto_search_1 = require("./nodes/uspto-search");
 const hackernews_search_1 = require("./nodes/hackernews-search");
 const dedupe_and_rank_1 = require("./nodes/dedupe-and-rank");
 const gap_analysis_1 = require("./nodes/gap-analysis");
-const synthesize_report_1 = require("./nodes/synthesize-report");
+const format_for_human_1 = require("./nodes/format-for-human");
 const clone_and_index_1 = require("./nodes/clone-and-index");
 const analyze_architecture_1 = require("./nodes/analyze-architecture");
 const identify_gaps_1 = require("./nodes/identify-gaps");
@@ -456,77 +456,38 @@ async function runArcheologist(opts) {
             gapAnalysisRan = true;
         }
     } // end research-path else branch
-    // ----- synthesize_report (LLM) -----
-    progress(`◐ synthesize_report — calling LLM (provider hint=${brief.llm_provider ?? 'anthropic'}, sources=${rankedSources.length}); hybrid routing will pick anthropic for synth if anthropic key is set…`);
-    const synthStart = Date.now();
-    const synthesis = await (0, synthesize_report_1.synthesizeReport)({
-        meshDir: opts.meshDir,
+    // ----- format_for_human (pure) -----
+    //
+    // The runner stops here. Composes the markdown comment that the
+    // workflow posts back to the originating research-request issue.
+    // Synthesis is now produced by the assigned agent (Copilot/Claude),
+    // not by the runner.
+    progress(`◐ format_for_human — composing issue-update markdown for ${rankedSources.length} ranked sources…`);
+    const formatStart = Date.now();
+    const formatted = (0, format_for_human_1.formatForHuman)({
         brief,
+        runId,
         meshContext,
+        queryPlan: researchQueryPlan,
         rankedSources,
-        provider: brief.llm_provider,
-        anthropicApiKey,
-        githubToken,
-        gapAnalysisRan,
-        path: brief.path,
-        observedArchitecture,
-        archaeologyGaps,
-        fetchImpl: opts.fetchImpl,
-    });
-    totalInputTokens += synthesis.llm.inputTokens;
-    totalOutputTokens += synthesis.llm.outputTokens;
-    totalCostUsd += synthesis.llm.costUsd;
-    progress(`✓ synthesize_report (${synthesis.llm.provider} ${synthesis.llm.model}) in ${Date.now() - synthStart}ms — ${synthesis.llm.inputTokens} in / ${synthesis.llm.outputTokens} out tokens, ${synthesis.llm.attempts} attempt${synthesis.llm.attempts !== 1 ? 's' : ''}`);
-    emitter.emit({
-        node_kind: 'llm',
-        node_name: 'synthesize_report',
-        duration_ms: Date.now() - synthStart,
-        llm: {
-            provider: synthesis.llm.provider,
-            model: synthesis.llm.model,
-            prompt_pack: { path: synthesis.prompt.packPath, sha256: synthesis.prompt.packSha256 },
-            input_tokens: synthesis.llm.inputTokens,
-            output_tokens: synthesis.llm.outputTokens,
-            cost_usd: synthesis.llm.costUsd,
-            guardrails: { mode: brief.guardrails, pre: 'PASS', post: 'PASS' },
-        },
+        gapSignals: (0, gap_analysis_1.detectGapSignals)({ brief, rankedSources }),
+        gapFollowUpQueries: [], // already merged into rankedSources during the search loop above
+        providerResultCounts,
+        totalDurationMs: Date.now() - startedAt.getTime(),
     });
-    // ----- publish (pure) -----
-    const today = startedAt.toISOString().slice(0, 10);
-    const fileSlug = brief.topic
-        .toLowerCase()
-        .replace(/[^a-z0-9]+/g, '-')
-        .replace(/^-|-$/g, '')
-        .slice(0, 60) || 'research';
-    const artifactName = `${fileSlug}-${today}.md`;
+    const artifactName = `issue-update-${runId}.md`;
     const artifactPath = path.join(absoluteOutputDir, artifactName);
-    const meshSummary = meshContext.bar
-        ? `bar **${meshContext.bar.name}** (\`${meshContext.bar.bar_id}\`), ${meshContext.bar.adrs.length} ADR(s), ${meshContext.bar.related_research.length} prior research doc(s), mesh gaps: ${meshContext.bar.mesh_gaps.join(', ') || '_none_'}`
-        : meshContext.platform
-            ? `platform **${meshContext.platform.platform_id}** (${meshContext.platform.sibling_bars.length} sibling BAR(s))`
-            : `portfolio **${meshContext.portfolio.name}** (${meshContext.portfolio.related_research_summaries.length} prior research doc(s))`;
-    const bodyMd = buildResearchDoc({
-        brief,
-        runId,
-        meshSummary,
-        meshSha: meshContext.mesh_sha,
-        queryPlan: researchQueryPlan,
-        archaeologySummary: observedArchitecture
-            ? `Cloned \`${observedArchitecture.profile.slug}\` @ \`${observedArchitecture.profile.cloneSha.slice(0, 12)}\`. ${observedArchitecture.profile.totalFiles} files; languages: ${observedArchitecture.profile.languages.join(', ') || 'n/a'}; frameworks: ${observedArchitecture.profile.frameworks.join(', ') || 'n/a'}; ${observedArchitecture.modules.length} modules; ${observedArchitecture.endpoints.length} endpoints; ${archaeologyGaps.length} structural gaps identified.`
-            : undefined,
-        synthesisBody: synthesis.body_md,
-    });
-    const writeStart = Date.now();
-    fs.writeFileSync(artifactPath, bodyMd, 'utf8');
+    fs.writeFileSync(artifactPath, formatted.body, 'utf8');
     emitter.emit({
         node_kind: 'pure',
-        node_name: 'publish',
-        duration_ms: Date.now() - writeStart,
+        node_name: 'format_for_human',
+        duration_ms: Date.now() - formatStart,
         pure: {
-            inputs_summary: `wrote ${artifactPath}`,
-            outputs_summary: `${bodyMd.length} bytes; ${rankedSources.length} citations`,
+            inputs_summary: `ranked_sources=${rankedSources.length}; mesh_sha=${meshContext.mesh_sha.slice(0, 7)}`,
+            outputs_summary: `wrote ${path.relative(opts.meshDir, artifactPath)} (${formatted.body.length} bytes)`,
         },
     });
+    progress(`✓ format_for_human — ${formatted.body.length} bytes written to ${path.relative(opts.meshDir, artifactPath)}`);
     // ----- run_complete -----
     const complete = emitter.emitRunComplete({
         node_kind: 'run_complete',
@@ -541,9 +502,9 @@ async function runArcheologist(opts) {
             artifact_paths: [path.relative(opts.meshDir, artifactPath)],
         },
     });
-    // ----- Optionally append a PR body that wraps the artifact + Hatter's Tag -----
-    let prBodyPath = null;
-    if (opts.emitPrBodyPath) {
+    // ----- Optionally emit an issue-body markdown wrapping the artifact + Hatter's Tag -----
+    let issueBodyPath = null;
+    if (opts.emitIssueBodyPath) {
         const hattersTag = (0, hatters_tag_builder_1.buildHattersTag)({
             run_id: runId,
             mesh_sha: meshContext.mesh_sha,
@@ -552,9 +513,9 @@ async function runArcheologist(opts) {
             published_at: new Date().toISOString(),
             llm: {
                 provider: brief.llm_provider,
-                // synthesis runs on both paths; archaeology runs skip plan_queries so we
-                // use the synthesis model id as the "primary" model for the Hatter's Tag.
-                model: synthesis.llm.model,
+                // plan_queries is the only LLM hop we run now (synth handed off
+                // to the assigned agent). Surface that model in the Hatter's Tag.
+                model: 'openai/gpt-4o-mini',
                 input_tokens: totalInputTokens,
                 output_tokens: totalOutputTokens,
                 cost_usd: roundUsd(totalCostUsd),
@@ -566,9 +527,9 @@ async function runArcheologist(opts) {
                 audit_log_path: path.relative(opts.meshDir, emitter.path),
             },
         });
-        const prBody = [bodyMd, '', hattersTag].join('\n');
-        fs.writeFileSync(opts.emitPrBodyPath, prBody, 'utf8');
-        prBodyPath = opts.emitPrBodyPath;
+        const issueBody = [formatted.body, '', hattersTag].join('\n');
+        fs.writeFileSync(opts.emitIssueBodyPath, issueBody, 'utf8');
+        issueBodyPath = opts.emitIssueBodyPath;
     }
     // ----- archaeology cleanup: remove the shallow clone now that synthesis is done -----
     if (cleanupCloneDir) {
@@ -578,14 +539,14 @@ async function runArcheologist(opts) {
         catch { /* leave on disk — non-fatal, just a tmpdir entry */ }
     }
     const totalDurationMs = Date.now() - startedAt.getTime();
-    progress(`◆ done ${runId} in ${(totalDurationMs / 1000).toFixed(1)}s — ${totalInputTokens} in / ${totalOutputTokens} out tokens, $${roundUsd(totalCostUsd)} | sources=${rankedSources.length} conclusions=${synthesis.citation_stats.conclusion_count} recs=${synthesis.citation_stats.recommendation_count} | artifact=${artifactPath}`);
+    progress(`◆ done ${runId} in ${(totalDurationMs / 1000).toFixed(1)}s — ${totalInputTokens} in / ${totalOutputTokens} out tokens, $${roundUsd(totalCostUsd)} | sources=${rankedSources.length} | artifact=${path.relative(opts.meshDir, artifactPath)} (synthesis is the assignee's job)`);
     return {
         run_id: runId,
         topic: brief.topic,
         artifact_path: artifactPath,
         audit_log_path: emitter.path,
         chain_root_hash: complete.outcome.chain_root_hash,
-        pr_body_path: prBodyPath,
+        issue_body_path: issueBodyPath,
         total_input_tokens: totalInputTokens,
         total_output_tokens: totalOutputTokens,
         total_cost_usd: roundUsd(totalCostUsd),
@@ -594,52 +555,8 @@ async function runArcheologist(opts) {
         gap_analysis_ran: gapAnalysisRan,
         /** archaeology path only — undefined for research runs */
         archaeology_gap_count: archaeologyGaps.length || undefined,
-        conclusion_count: synthesis.citation_stats.conclusion_count,
-        recommendation_count: synthesis.citation_stats.recommendation_count,
     };
 }
-/**
- * Compose the published artifact. The preamble differs by path:
- *   research:    <metadata> + <mesh context> + <Query Plan table>
- *   archaeology: <metadata> + <mesh context> + <Target Repo Profile>
- * The synthesis body owns every H2 from the canonical section list onward.
- * The Hatter's Tag is appended separately by the PR-body path.
- */
-function buildResearchDoc(opts) {
-    const lines = [];
-    lines.push(`# ${opts.brief.topic}`);
-    lines.push('');
-    lines.push(`- **Run id:** \`${opts.runId}\``);
-    lines.push(`- **Mesh sha:** \`${opts.meshSha.slice(0, 12)}\``);
-    lines.push(`- **Path:** ${opts.brief.path}${opts.brief.target_repo ? ` (\`${opts.brief.target_repo}\`)` : ''}`);
-    lines.push(`- **Scope:** ${opts.brief.scope.level}${opts.brief.scope.id ? ` / ${opts.brief.scope.id}` : ''}`);
-    lines.push('');
-    lines.push('## Run Metadata');
-    lines.push('');
-    lines.push(`Scope resolved to: ${opts.meshSummary}.`);
-    lines.push('');
-    if (opts.queryPlan) {
-        lines.push('### Query Plan (per-provider, LLM-generated)');
-        lines.push('');
-        lines.push('| Provider | Queries |');
-        lines.push('|---|---|');
-        lines.push(`| **web** (Tavily) | ${opts.queryPlan.web.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
-        lines.push(`| **arxiv** | ${opts.queryPlan.arxiv.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
-        lines.push(`| **patent** (USPTO) | ${opts.queryPlan.patent.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
-        lines.push(`| **community** (HN) | ${opts.queryPlan.community.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
-        lines.push('');
-    }
-    if (opts.archaeologySummary) {
-        lines.push('### Target Repository Profile (analyze_architecture)');
-        lines.push('');
-        lines.push(opts.archaeologySummary);
-        lines.push('');
-    }
-    // The synthesis body owns every H2 from the canonical section list onward.
-    lines.push(opts.synthesisBody.trim());
-    lines.push('');
-    return lines.join('\n');
-}
 function roundUsd(n) {
     return Math.round(n * 10000) / 10000;
 }

package/dist/runner/nodes/format-for-human.d.ts ADDED Viewed

@@ -0,0 +1,39 @@
+/**
+ * format-for-human — composes the issue-update markdown that
+ * `research-runner archeologist` posts back to the originating
+ * research-request issue.
+ *
+ * The runner deliberately stops at this point in the pipeline. The
+ * comment we emit here gives a human reviewer (or an assigned
+ * Copilot/Claude agent) everything needed to write a synthesis:
+ *
+ *   - the brief + scope
+ *   - mesh context (impacted BARs, prior PRDs, ADRs)
+ *   - the LLM-generated query plan
+ *   - top-ranked sources grouped by provider, with abstracts
+ *   - the gap analysis (Jobs-to-be-Done style)
+ *   - the synthesis instructions for the assignee
+ *
+ * No LLM call here — this is pure formatting. The audit_emitter still
+ * gets a `pure` event so the chain stays intact.
+ */
+import type { MeshContext, QueryPlan, RankedSource, ResearchBrief } from '../../schemas';
+export interface FormatForHumanOpts {
+    brief: ResearchBrief;
+    runId: string;
+    meshContext: MeshContext;
+    queryPlan?: QueryPlan;
+    rankedSources: RankedSource[];
+    gapSignals: ReadonlyArray<{
+        kind: string;
+    }>;
+    gapFollowUpQueries: readonly string[];
+    providerResultCounts: Record<string, number>;
+    /** Total wall-clock for the runner's data-collection phase, ms. */
+    totalDurationMs: number;
+}
+export interface FormatForHumanResult {
+    /** The markdown body to post as an issue comment (or new issue body). */
+    body: string;
+}
+export declare function formatForHuman(opts: FormatForHumanOpts): FormatForHumanResult;

package/dist/runner/nodes/format-for-human.js ADDED Viewed

@@ -0,0 +1,164 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.formatForHuman = formatForHuman;
+/**
+ * Truncates an excerpt for display in the issue comment. Sources can
+ * carry up to 500 chars; we shorten to ~280 so the issue stays readable.
+ */
+function shortExcerpt(s, n = 280) {
+    const cleaned = s.replace(/\s+/g, ' ').trim();
+    return cleaned.length <= n ? cleaned : cleaned.slice(0, n - 1) + '…';
+}
+function meshSummary(meshContext) {
+    if (meshContext.bar) {
+        const b = meshContext.bar;
+        const gaps = b.mesh_gaps.length > 0 ? b.mesh_gaps.join(', ') : '_none_';
+        return `BAR **${b.name}** (\`${b.bar_id}\`) — ${b.adrs.length} ADR(s), ${b.related_research.length} prior research doc(s), mesh gaps: ${gaps}`;
+    }
+    if (meshContext.platform) {
+        const p = meshContext.platform;
+        const sibs = p.sibling_bars.length;
+        return `Platform **${p.platform_id}** — ${sibs} BAR(s) in scope`;
+    }
+    return `Portfolio (no platform/BAR scope — broad research)`;
+}
+function siblingBarTable(meshContext) {
+    if (!meshContext.platform || meshContext.platform.sibling_bars.length === 0) {
+        return [];
+    }
+    const lines = [];
+    lines.push('| BAR | Name | CALM nodes | Threats |');
+    lines.push('|---|---|---|---|');
+    for (const sb of meshContext.platform.sibling_bars) {
+        const calmCount = sb.calm_node_ids?.length ?? 0;
+        const threatCount = sb.threat_ids?.length ?? 0;
+        lines.push(`| \`${sb.bar_id}\` | ${sb.name} | ${calmCount} | ${threatCount} |`);
+    }
+    return lines;
+}
+function providerSection(label, emoji, provider, sources, totalCount) {
+    if (sources.length === 0) {
+        return [`### ${emoji} ${label}`, '', `_No ${provider} results in the top-ranked set (raw count: ${totalCount})._`, ''];
+    }
+    const lines = [];
+    lines.push(`### ${emoji} ${label} (${sources.length} of ${totalCount} ranked)`);
+    lines.push('');
+    for (const s of sources) {
+        const authors = s.authors && s.authors.length > 0 ? ` — _${s.authors.slice(0, 3).join(', ')}${s.authors.length > 3 ? ' et al.' : ''}_` : '';
+        const date = s.published_at ? ` _(${s.published_at.slice(0, 10)})_` : '';
+        lines.push(`- **[\`${s.id}\`] [${s.title}](${s.url})** — score ${s.salience_score.toFixed(2)}${date}${authors}`);
+        lines.push(`  > ${shortExcerpt(s.excerpt)}`);
+    }
+    lines.push('');
+    return lines;
+}
+function formatForHuman(opts) {
+    const { brief, runId, meshContext, queryPlan, rankedSources, gapSignals, gapFollowUpQueries, providerResultCounts, totalDurationMs } = opts;
+    const byProvider = { tavily: [], arxiv: [], hackernews: [], uspto: [] };
+    for (const r of rankedSources) {
+        (byProvider[r.provider] ??= []).push(r);
+    }
+    const lines = [];
+    lines.push(`# 🔍 Research data collected — ready for synthesis`);
+    lines.push('');
+    lines.push(`> The Archeologist runner gathered ${rankedSources.length} ranked sources across ${Object.values(providerResultCounts).reduce((a, b) => a + b, 0)} raw results, ran a Jobs-to-be-Done gap analysis, and assembled the mesh context below. **Synthesis is your next step.**`);
+    lines.push('');
+    lines.push('## Brief');
+    lines.push('');
+    lines.push(`**Topic.** ${brief.topic}`);
+    lines.push('');
+    lines.push(`- **Scope:** ${brief.scope.level}${brief.scope.id ? ` / \`${brief.scope.id}\`` : ''}`);
+    lines.push(`- **Path:** ${brief.path}${brief.target_repo ? ` (target repo: \`${brief.target_repo}\`)` : ''}`);
+    lines.push(`- **Guardrails:** ${brief.guardrails}`);
+    lines.push(`- **Run id:** \`${runId}\``);
+    lines.push(`- **Mesh sha:** \`${meshContext.mesh_sha.slice(0, 12)}\``);
+    lines.push(`- **Data-collection wall-clock:** ${(totalDurationMs / 1000).toFixed(1)}s`);
+    lines.push('');
+    lines.push('## Mesh context');
+    lines.push('');
+    lines.push(meshSummary(meshContext));
+    lines.push('');
+    const siblingLines = siblingBarTable(meshContext);
+    if (siblingLines.length > 0) {
+        lines.push('### BARs in scope');
+        lines.push('');
+        lines.push(...siblingLines);
+        lines.push('');
+    }
+    if (meshContext.portfolio.related_research_summaries.length > 0) {
+        lines.push('### Prior research in this scope');
+        lines.push('');
+        for (const r of meshContext.portfolio.related_research_summaries.slice(0, 5)) {
+            lines.push(`- \`${r.research_id}\` — ${r.topic} _(${r.published_at.slice(0, 10)})_`);
+        }
+        lines.push('');
+    }
+    if (queryPlan) {
+        lines.push('## LLM-generated query plan');
+        lines.push('');
+        lines.push('| Provider | Queries |');
+        lines.push('|---|---|');
+        lines.push(`| **Tavily (web)** | ${queryPlan.web.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
+        lines.push(`| **arXiv** | ${queryPlan.arxiv.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
+        lines.push(`| **USPTO (patents)** | ${queryPlan.patent.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
+        lines.push(`| **Hacker News** | ${queryPlan.community.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
+        lines.push('');
+    }
+    lines.push('## Source coverage');
+    lines.push('');
+    lines.push('| Provider | Ranked (top-N) | Raw |');
+    lines.push('|---|---:|---:|');
+    lines.push(`| Tavily | ${byProvider.tavily.length} | ${providerResultCounts.tavily ?? 0} |`);
+    lines.push(`| arXiv | ${byProvider.arxiv.length} | ${providerResultCounts.arxiv ?? 0} |`);
+    lines.push(`| Hacker News | ${byProvider.hackernews.length} | ${providerResultCounts.hackernews ?? 0} |`);
+    lines.push(`| USPTO | ${byProvider.uspto.length} | ${providerResultCounts.uspto ?? 0} |`);
+    lines.push('');
+    lines.push('## Top-ranked sources');
+    lines.push('');
+    lines.push('Each source is tagged with a stable `S[N]` id. Use these in the synthesis: every claim should cite at least one, every Conclusion ≥2 (≥1 if confidence LOW), every Recommendation should cite at least one Conclusion `C[N]`.');
+    lines.push('');
+    lines.push(...providerSection('Tavily — web search', '🌐', 'tavily', byProvider.tavily, providerResultCounts.tavily ?? 0));
+    lines.push(...providerSection('arXiv — academic papers', '📚', 'arxiv', byProvider.arxiv, providerResultCounts.arxiv ?? 0));
+    lines.push(...providerSection('Hacker News — community signal', '🧑‍💻', 'hackernews', byProvider.hackernews, providerResultCounts.hackernews ?? 0));
+    lines.push(...providerSection('USPTO — patent landscape', '📜', 'uspto', byProvider.uspto, providerResultCounts.uspto ?? 0));
+    lines.push('## Jobs-to-be-Done / Gap analysis');
+    lines.push('');
+    if (gapSignals.length === 0) {
+        lines.push('_No coverage gaps detected — the search results adequately cover the brief._');
+        lines.push('');
+    }
+    else {
+        lines.push(`The runner detected the following coverage gaps:`);
+        lines.push('');
+        for (const sig of gapSignals) {
+            lines.push(`- \`${sig.kind}\``);
+        }
+        lines.push('');
+        if (gapFollowUpQueries.length > 0) {
+            lines.push('LLM-derived follow-up queries (already executed against Tavily, results merged above):');
+            lines.push('');
+            for (const q of gapFollowUpQueries) {
+                lines.push(`- \`${q}\``);
+            }
+            lines.push('');
+        }
+    }
+    lines.push('## ✍️ Synthesis instructions — for the assignee');
+    lines.push('');
+    lines.push('Assign this issue to `@github-copilot` (or another agent) to produce the synthesis. The assignee should:');
+    lines.push('');
+    lines.push('1. **Read every source** above. The `S[N]` ids are how you cite them.');
+    lines.push('2. **Open a PR** with a new file under `research/` containing the synthesis markdown.');
+    lines.push('3. **Follow the canonical structure** — see `.caterpillar/prompts/research/synthesis.md` in this mesh. The synthesis must have:');
+    lines.push('   - 10 H2 sections in the order: `Executive Summary`, `Cross-Source Analysis`, `Jobs-to-be-Done Analysis`, `Whitespace Analysis`, `Patent Landscape`, `Community Signal`, `Academic Foundation`, `Conclusions`, `Recommendations`, `Open Questions`.');
+    lines.push('   - Every claim cites at least one `S[N]`.');
+    lines.push('   - Every Conclusion `C[N]` cites ≥2 sources (≥1 if confidence is LOW).');
+    lines.push('   - Every Recommendation references at least one `C[N]`.');
+    lines.push('4. **PR labels** to apply: `research-synthesis`, `ai-assisted`.');
+    lines.push('5. **Once merged**, the PRD agent will read your synthesis + the mesh + impacted code repos to produce per-repo landing issues.');
+    lines.push('');
+    lines.push('---');
+    lines.push('');
+    lines.push(`🤖 Generated by \`research-runner archeologist\` v${'pkg'}. Run id: \`${runId}\`.`);
+    return { body: lines.join('\n') };
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@maintainabilityai/research-runner",
-  "version": "0.1.9",
+  "version": "0.1.11",
   "description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
   "license": "MIT",
   "author": "MaintainabilityAI",

package/dist/runner/nodes/synthesis-archaeology-validator.d.ts DELETED Viewed

@@ -1,22 +0,0 @@
-/**
- * synthesis-archaeology-validator — structural validator for the
- * archaeology-path synthesis body.
- *
- * Mirrors synthesis-validator's shape (ValidationReport with citation_stats)
- * but enforces the 9 canonical sections from
- * `.caterpillar/prompts/research/synthesis-archaeology.md`:
- *
- *   1. Executive Summary
- *   2. Repository Profile
- *   3. Current Architecture
- *   4. Gap Analysis            (G[N] entries with severity)
- *   5. External Research Findings
- *   6. Recommendations         (each cites ≥1 G[N] AND ≥1 grounding token)
- *   7. Implementation Roadmap
- *   8. Risk Factors
- *   9. Untraced items          (REQUIRED — may say "None.")
- */
-import type { ValidationReport } from './synthesis-validator';
-export declare const CANONICAL_ARCHAEOLOGY_SECTIONS: readonly ["Executive Summary", "Repository Profile", "Current Architecture", "Gap Analysis", "External Research Findings", "Recommendations", "Implementation Roadmap", "Risk Factors", "Untraced items"];
-export type CanonicalArchaeologySection = typeof CANONICAL_ARCHAEOLOGY_SECTIONS[number];
-export declare function validateArchaeologySynthesis(body: string): ValidationReport;

package/dist/runner/nodes/synthesis-archaeology-validator.js DELETED Viewed

@@ -1,131 +0,0 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", { value: true });
-exports.CANONICAL_ARCHAEOLOGY_SECTIONS = void 0;
-exports.validateArchaeologySynthesis = validateArchaeologySynthesis;
-exports.CANONICAL_ARCHAEOLOGY_SECTIONS = [
-    'Executive Summary',
-    'Repository Profile',
-    'Current Architecture',
-    'Gap Analysis',
-    'External Research Findings',
-    'Recommendations',
-    'Implementation Roadmap',
-    'Risk Factors',
-    'Untraced items',
-];
-function validateArchaeologySynthesis(body) {
-    const errors = [];
-    const sectionsFound = extractH2Sections(body);
-    // Sections present in canonical order
-    for (let i = 0; i < exports.CANONICAL_ARCHAEOLOGY_SECTIONS.length; i++) {
-        const expected = exports.CANONICAL_ARCHAEOLOGY_SECTIONS[i];
-        if (sectionsFound[i] !== expected) {
-            errors.push(`Section #${i + 1} expected "## ${expected}" but found ${sectionsFound[i] ? `"## ${sectionsFound[i]}"` : '(missing)'}.`);
-        }
-    }
-    // Gap Analysis: at least one G[N] entry with severity
-    const gapBlock = extractSection(body, 'Gap Analysis');
-    const gapEntries = splitOnGapMarkers(gapBlock);
-    const gapIds = gapEntries.map(g => g.id);
-    for (const g of gapEntries) {
-        // `\b\*\*` fails between space and `*` (both non-word) — drop the boundary
-        // before `**` and require the inner word boundary instead.
-        if (!/\bSEVERITY\s*[:=]\s*(HIGH|MEDIUM|LOW)\b|\*\*(HIGH|MEDIUM|LOW)\*\*/i.test(g.body)) {
-            errors.push(`Gap G${g.id} is missing a severity tag (HIGH / MEDIUM / LOW).`);
-        }
-    }
-    if (gapEntries.length === 0 && sectionsFound.includes('Gap Analysis')) {
-        errors.push('Gap Analysis section has no `G[N]` entries.');
-    }
-    // Recommendations: each cites ≥1 G[N]
-    const recsBlock = extractSection(body, 'Recommendations');
-    const recLines = recsBlock.split('\n').filter(l => /^\s*(?:[-*]|\d+\.)\s+/.test(l));
-    let untracedRecommendations = 0;
-    for (const rec of recLines) {
-        if (!/\bG\d+\b/.test(rec)) {
-            untracedRecommendations += 1;
-        }
-    }
-    if (recLines.length > 0 && untracedRecommendations === recLines.length) {
-        errors.push(`All ${recLines.length} Recommendation(s) lack G[N] traceability.`);
-    }
-    else if (untracedRecommendations > 0) {
-        errors.push(`${untracedRecommendations} of ${recLines.length} Recommendation(s) lack G[N] traceability.`);
-    }
-    // Untraced items REQUIRED — even if empty (must say "None." or similar)
-    const untracedBlock = extractSection(body, 'Untraced items').trim();
-    if (untracedBlock.length === 0) {
-        errors.push('Untraced items section is empty — must explicitly say "None." when there are none.');
-    }
-    // Citation stats
-    // For archaeology, source_count = unique S[N] across External Research Findings + Risk Factors.
-    // The synthesis prompt also asks the LLM to cite OA[<file>] / OA[<module>] in narrative
-    // sections; we don't try to enforce those at the validator level (heuristic untraced count
-    // would be too noisy across short body paragraphs).
-    const sourceCitations = new Set([...body.matchAll(/\bS(\d+)\b/g)].map(m => m[1]));
-    const citation_stats = {
-        source_count: sourceCitations.size,
-        conclusion_count: 0, // archaeology synthesis doesn't have C[N]
-        recommendation_count: recLines.length,
-        underCitedConclusions: 0,
-        untracedRecommendations,
-        untraced_claims: 0,
-    };
-    return {
-        valid: errors.length === 0,
-        errors,
-        sectionsFound,
-        citation_stats,
-        // Expose archaeology-specific data for the orchestrator's audit + Hatter's Tag
-        ...(gapIds.length > 0 ? { archaeology: { gap_count: gapIds.length } } : {}),
-    };
-}
-// ============================================================================
-// Helpers (copy of the research-side helpers — kept local to avoid coupling)
-// ============================================================================
-function extractH2Sections(body) {
-    const out = [];
-    for (const line of body.split('\n')) {
-        const m = line.match(/^##\s+(.+?)\s*$/);
-        if (m) {
-            out.push(m[1].trim());
-        }
-    }
-    return out;
-}
-function extractSection(body, sectionName) {
-    const lines = body.split('\n');
-    let inSection = false;
-    const collected = [];
-    for (const line of lines) {
-        const h2 = line.match(/^##\s+(.+?)\s*$/);
-        if (h2) {
-            if (h2[1].trim() === sectionName) {
-                inSection = true;
-                continue;
-            }
-            if (inSection) {
-                break;
-            }
-        }
-        if (inSection) {
-            collected.push(line);
-        }
-    }
-    return collected.join('\n');
-}
-function splitOnGapMarkers(block) {
-    const markerRe = /^\s*(?:\*\*G(\d+)\*\*|###\s+G(\d+))(?=\s|$)/;
-    const lines = block.split('\n');
-    const entries = [];
-    for (const line of lines) {
-        const m = line.match(markerRe);
-        if (m) {
-            entries.push({ id: m[1] ?? m[2], body: [line] });
-        }
-        else if (entries.length > 0) {
-            entries[entries.length - 1].body.push(line);
-        }
-    }
-    return entries.map(e => ({ id: e.id, body: e.body.join('\n') }));
-}

package/dist/runner/nodes/synthesize-report.d.ts DELETED Viewed

@@ -1,53 +0,0 @@
-/**
- * synthesize_report — LLM node.
- *
- * Second LLM hop in the archeologist research path. Loads
- * `.caterpillar/prompts/research/synthesis.md`, fills it with the brief +
- * mesh context + ranked sources + gap_analysis flag, calls Anthropic
- * (sonnet by default — synthesis is more demanding than planning),
- * runs the structural validator on the body, and either returns the
- * validated body or retries once with feedback.
- *
- * Returns the synthesised body, the prompt-pack telemetry (path + sha256),
- * LLM token/cost totals, and the citation_stats the audit log + Hatter's
- * Tag both consume.
- */
-import type { ArchaeologyGap, LlmProvider, MeshContext, ObservedArchitecture, RankedSource, ResearchBrief, ResearchPath } from '../../schemas';
-import { type LoadedPrompt } from '../../mesh/prompt-loader';
-import { type CitationStats, type ValidationReport } from './synthesis-validator';
-export interface SynthesizeReportOpts {
-    meshDir: string;
-    brief: ResearchBrief;
-    meshContext: MeshContext;
-    rankedSources: RankedSource[];
-    /** Provider routing — comes from brief.llm_provider unless overridden. */
-    provider?: LlmProvider;
-    /** Required when provider === 'anthropic'. */
-    anthropicApiKey?: string;
-    /** Required when provider === 'github-models'. */
-    githubToken?: string;
-    /** Flipped true by the orchestrator after gap-analysis fires. */
-    gapAnalysisRan?: boolean;
-    /** Defaults to brief.path. Overrideable for tests. */
-    path?: ResearchPath;
-    /** Archaeology-path only: observed architecture extracted from the target repo. */
-    observedArchitecture?: ObservedArchitecture;
-    /** Archaeology-path only: gaps identified by identify_gaps. */
-    archaeologyGaps?: ArchaeologyGap[];
-    fetchImpl?: typeof fetch;
-}
-export interface SynthesizeReportResult {
-    body_md: string;
-    prompt: LoadedPrompt;
-    validation: ValidationReport;
-    citation_stats: CitationStats;
-    llm: {
-        provider: LlmProvider;
-        model: string;
-        inputTokens: number;
-        outputTokens: number;
-        costUsd: number;
-        attempts: number;
-    };
-}
-export declare function synthesizeReport(opts: SynthesizeReportOpts): Promise<SynthesizeReportResult>;

package/dist/runner/nodes/synthesize-report.js DELETED Viewed

@@ -1,188 +0,0 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", { value: true });
-exports.synthesizeReport = synthesizeReport;
-const llm_router_1 = require("../../llm/llm-router");
-const prompt_loader_1 = require("../../mesh/prompt-loader");
-const synthesis_validator_1 = require("./synthesis-validator");
-const synthesis_archaeology_validator_1 = require("./synthesis-archaeology-validator");
-const MAX_TOKENS = 8000;
-async function synthesizeReport(opts) {
-    const provider = opts.provider ?? opts.brief.llm_provider;
-    const path = opts.path ?? opts.brief.path;
-    // Two different prompt packs + validators per path. Same LLM router, same
-    // retry-with-feedback loop — only the pack name + the validator differ.
-    const packId = path === 'archaeology' ? 'research/synthesis-archaeology' : 'research/synthesis';
-    const validate = path === 'archaeology' ? synthesis_archaeology_validator_1.validateArchaeologySynthesis : synthesis_validator_1.validateSynthesis;
-    const promptContext = path === 'archaeology'
-        ? buildArchaeologyPromptContext(opts.brief, opts.meshContext, opts.rankedSources, opts.observedArchitecture, opts.archaeologyGaps ?? [])
-        : buildPromptContext(opts.brief, opts.meshContext, opts.rankedSources, opts.gapAnalysisRan ?? false);
-    const prompt = (0, prompt_loader_1.loadPrompt)({
-        meshDir: opts.meshDir,
-        packId,
-        context: promptContext,
-    });
-    const system = path === 'archaeology'
-        ? 'You write structured markdown architecture-archaeology reports with strict section discipline. Every gap (G[N]) carries a severity. Every Recommendation traces to a G[N] and cites at least one grounding token (S[N] or OA[…]). The 9 H2 sections appear in the exact order requested. No prose before the first `##` heading.'
-        : 'You write structured markdown documents with strict section + citation discipline. Every claim has an S[N] citation; every C[N] cites ≥2 sources; every Recommendation traces to a C[N]. Headings appear in the exact order requested. No prose before the first `##` heading.';
-    let lastReport = null;
-    let totalInput = 0;
-    let totalOutput = 0;
-    let totalCost = 0;
-    let lastModel = '';
-    for (let attempt = 1; attempt <= 2; attempt++) {
-        const userPrompt = attempt === 1
-            ? prompt.filled
-            : `${prompt.filled}\n\n---\n\nYour previous response failed structural validation:\n${lastReport.errors.map(e => `- ${e}`).join('\n')}\n\nRewrite the document and fix EVERY error above. The 10 H2 sections must appear in the exact order specified; every C[N] must cite ≥2 S[N] (or ≥1 if confidence is LOW); every Recommendation must reference at least one C[N].`;
-        const result = await (0, llm_router_1.callLlm)({
-            provider,
-            tier: 'synth',
-            anthropicApiKey: opts.anthropicApiKey,
-            githubToken: opts.githubToken,
-            system,
-            prompt: userPrompt,
-            maxTokens: MAX_TOKENS,
-            fetchImpl: opts.fetchImpl,
-        });
-        totalInput += result.inputTokens;
-        totalOutput += result.outputTokens;
-        totalCost += result.costUsd;
-        lastModel = result.model;
-        const body = stripFences(result.text);
-        const report = validate(body);
-        if (report.valid) {
-            return {
-                body_md: body,
-                prompt,
-                validation: report,
-                citation_stats: report.citation_stats,
-                llm: { provider, model: lastModel, inputTokens: totalInput, outputTokens: totalOutput, costUsd: totalCost, attempts: attempt },
-            };
-        }
-        lastReport = report;
-    }
-    throw new Error(`synthesize_report: structural validation failed after 2 attempts. Last errors: ${lastReport.errors.join('; ')}`);
-}
-/** If the model wraps the doc in ```markdown … ``` fences, unwrap. Otherwise pass through. */
-function stripFences(raw) {
-    const trimmed = raw.trim();
-    const fenceMatch = trimmed.match(/^```(?:markdown|md)?\s*([\s\S]*?)```\s*$/);
-    return fenceMatch ? fenceMatch[1].trim() : trimmed;
-}
-/** Build the dotted-key context the synthesis prompt asks for. */
-function buildPromptContext(brief, mesh, rankedSources, gapAnalysisRan) {
-    return {
-        brief: {
-            topic: brief.topic,
-            scope_level: brief.scope.level,
-        },
-        mesh: {
-            context_summary: summarizeMeshContext(mesh),
-        },
-        ranked_sources: rankedSources.length === 0
-            ? '(no sources retrieved)'
-            : rankedSources.map(formatRankedSource).join('\n\n'),
-        gap_analysis_ran: gapAnalysisRan,
-    };
-}
-function summarizeMeshContext(mesh) {
-    const parts = [];
-    parts.push(`Portfolio: ${mesh.portfolio.name}`);
-    if (mesh.portfolio.related_research_summaries.length > 0) {
-        parts.push(`Portfolio research (${mesh.portfolio.related_research_summaries.length}): ${mesh.portfolio.related_research_summaries.map(r => r.topic).slice(0, 5).join('; ')}`);
-    }
-    if (mesh.platform) {
-        parts.push(`Platform: ${mesh.platform.platform_id} (${mesh.platform.sibling_bars.length} sibling BAR${mesh.platform.sibling_bars.length === 1 ? '' : 's'})`);
-    }
-    if (mesh.bar) {
-        parts.push(`BAR: ${mesh.bar.name} (${mesh.bar.bar_id}); tier=${mesh.bar.tier}; ADRs=${mesh.bar.adrs.length}; prior research=${mesh.bar.related_research.length}; prior PRDs=${mesh.bar.related_prds.length}; mesh gaps: ${mesh.bar.mesh_gaps.join(', ') || 'none'}`);
-        if (Array.isArray(mesh.bar.threats)) {
-            const ts = mesh.bar.threats;
-            parts.push(`STRIDE threats (${ts.length}): ${ts.map(t => `${t.id}/${t.category}`).slice(0, 6).join('; ')}`);
-        }
-    }
-    return parts.join('\n');
-}
-function formatRankedSource(s) {
-    const lines = [
-        `- **${s.id}** "${s.title}" (${s.provider}, salience ${s.salience_score})`,
-        `  URL: ${s.url}`,
-        `  Retrieved: ${s.retrieved_at}`,
-    ];
-    if (s.published_at) {
-        lines.push(`  Published: ${s.published_at}`);
-    }
-    if (s.excerpt) {
-        lines.push(`  Excerpt: ${s.excerpt.slice(0, 280)}${s.excerpt.length > 280 ? '…' : ''}`);
-    }
-    return lines.join('\n');
-}
-/** Build the dotted-key context the archaeology synthesis prompt asks for. */
-function buildArchaeologyPromptContext(brief, mesh, rankedSources, observed, gaps) {
-    return {
-        target_repo: brief.target_repo ?? '(unknown target)',
-        observed_architecture: observed
-            ? formatObservedArchitecture(observed)
-            : '(analyzer did not run)',
-        mesh: {
-            bar: {
-                calm_summary: mesh.bar?.calm_model ? summarizeCalmModelArchaeology(mesh.bar.calm_model) : '(no CALM model loaded)',
-                threats_summary: mesh.bar?.threats ? summarizeThreatsArchaeology(mesh.bar.threats) : '(no threat model on file)',
-            },
-        },
-        gap_signals: gaps.length === 0 ? '(no structural gaps detected)' : gaps.map(g => `- **${g.id}** [${g.severity}] ${g.kind}: ${g.summary}`).join('\n'),
-        ranked_sources: rankedSources.length === 0
-            ? '(no web sources retrieved)'
-            : rankedSources.map(formatRankedSource).join('\n\n'),
-    };
-}
-function formatObservedArchitecture(o) {
-    const lines = [];
-    lines.push(`Repo: ${o.profile.slug} @ ${o.profile.cloneSha.slice(0, 12)}`);
-    lines.push(`Languages: ${o.profile.languages.join(', ') || '(none detected)'}`);
-    lines.push(`Frameworks: ${o.profile.frameworks.join(', ') || '(none detected)'}`);
-    lines.push(`Manifests: ${o.profile.manifests.join(', ') || '(none)'}`);
-    lines.push(`Files: ${o.profile.totalFiles} totalling ${o.profile.totalBytes} bytes`);
-    lines.push('');
-    lines.push('Modules (top 12 by file count):');
-    for (const m of o.modules.slice(0, 12)) {
-        lines.push(`  - OA[${m.name}] layer=${m.layer} files=${m.fileCount} endpoints=${m.endpointCount}`);
-    }
-    if (o.endpoints.length > 0) {
-        lines.push('');
-        lines.push('Endpoints (sample):');
-        for (const e of o.endpoints.slice(0, 15)) {
-            lines.push(`  - ${e.method} ${e.path} (${e.framework}) — ${e.file}`);
-        }
-    }
-    if (o.dependencies.length > 0) {
-        lines.push('');
-        lines.push(`Direct dependencies (${o.dependencies.length}): ${o.dependencies.slice(0, 25).join(', ')}${o.dependencies.length > 25 ? ', …' : ''}`);
-    }
-    return lines.join('\n');
-}
-function summarizeCalmModelArchaeology(calm) {
-    if (!calm || typeof calm !== 'object') {
-        return '(no CALM model loaded)';
-    }
-    const obj = calm;
-    const nodes = Array.isArray(obj.nodes) ? obj.nodes : [];
-    const relationships = Array.isArray(obj.relationships) ? obj.relationships : [];
-    const lines = [];
-    lines.push(`${nodes.length} node(s), ${relationships.length} relationship(s)`);
-    for (const n of nodes.slice(0, 10)) {
-        const o = n;
-        lines.push(`  - ${o['unique-id'] ?? o.name ?? 'unknown'} (${o['node-type'] ?? 'unknown'})`);
-    }
-    return lines.join('\n');
-}
-function summarizeThreatsArchaeology(threats) {
-    if (!Array.isArray(threats) || threats.length === 0) {
-        return '(no threats)';
-    }
-    const byCategory = {};
-    for (const t of threats) {
-        const cat = t.category || 'unknown';
-        byCategory[cat] = (byCategory[cat] || 0) + 1;
-    }
-    return Object.entries(byCategory).map(([c, n]) => `${c} × ${n}`).join(', ');
-}