npm - @maintainabilityai/research-runner - Versions diffs - 0.1.1 - Mend

@maintainabilityai/research-runner 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

package/LICENSE +21 -0
package/README.md +82 -0
package/bin/research-runner.js +2 -0
package/dist/cli.d.ts +1 -0
package/dist/cli.js +209 -0
package/dist/llm/anthropic-client.d.ts +39 -0
package/dist/llm/anthropic-client.js +74 -0
package/dist/llm/github-models-client.d.ts +46 -0
package/dist/llm/github-models-client.js +78 -0
package/dist/llm/llm-router.d.ts +46 -0
package/dist/llm/llm-router.js +60 -0
package/dist/mesh/get-mesh-sha.d.ts +1 -0
package/dist/mesh/get-mesh-sha.js +27 -0
package/dist/mesh/mesh-reader.d.ts +14 -0
package/dist/mesh/mesh-reader.js +392 -0
package/dist/mesh/prompt-loader.d.ts +22 -0
package/dist/mesh/prompt-loader.js +119 -0
package/dist/mesh/threat-model-reader.d.ts +33 -0
package/dist/mesh/threat-model-reader.js +123 -0
package/dist/runner/archeologist.d.ts +39 -0
package/dist/runner/archeologist.js +620 -0
package/dist/runner/audit-emitter.d.ts +62 -0
package/dist/runner/audit-emitter.js +210 -0
package/dist/runner/hatters-tag-builder.d.ts +52 -0
package/dist/runner/hatters-tag-builder.js +40 -0
package/dist/runner/nodes/analyze-architecture.d.ts +10 -0
package/dist/runner/nodes/analyze-architecture.js +447 -0
package/dist/runner/nodes/arxiv-search.d.ts +12 -0
package/dist/runner/nodes/arxiv-search.js +52 -0
package/dist/runner/nodes/clone-and-index.d.ts +32 -0
package/dist/runner/nodes/clone-and-index.js +158 -0
package/dist/runner/nodes/dedupe-and-rank.d.ts +27 -0
package/dist/runner/nodes/dedupe-and-rank.js +98 -0
package/dist/runner/nodes/deterministic-review.d.ts +55 -0
package/dist/runner/nodes/deterministic-review.js +206 -0
package/dist/runner/nodes/expert-review.d.ts +68 -0
package/dist/runner/nodes/expert-review.js +197 -0
package/dist/runner/nodes/gap-analysis.d.ts +48 -0
package/dist/runner/nodes/gap-analysis.js +153 -0
package/dist/runner/nodes/generate-prd-manifest.d.ts +53 -0
package/dist/runner/nodes/generate-prd-manifest.js +209 -0
package/dist/runner/nodes/hackernews-search.d.ts +12 -0
package/dist/runner/nodes/hackernews-search.js +63 -0
package/dist/runner/nodes/identify-gaps.d.ts +33 -0
package/dist/runner/nodes/identify-gaps.js +185 -0
package/dist/runner/nodes/plan-queries.d.ts +28 -0
package/dist/runner/nodes/plan-queries.js +120 -0
package/dist/runner/nodes/prd-validator.d.ts +51 -0
package/dist/runner/nodes/prd-validator.js +203 -0
package/dist/runner/nodes/synthesis-archaeology-validator.d.ts +22 -0
package/dist/runner/nodes/synthesis-archaeology-validator.js +131 -0
package/dist/runner/nodes/synthesis-validator.d.ts +51 -0
package/dist/runner/nodes/synthesis-validator.js +185 -0
package/dist/runner/nodes/synthesize-prd.d.ts +84 -0
package/dist/runner/nodes/synthesize-prd.js +202 -0
package/dist/runner/nodes/synthesize-report.d.ts +53 -0
package/dist/runner/nodes/synthesize-report.js +188 -0
package/dist/runner/nodes/tavily-search.d.ts +21 -0
package/dist/runner/nodes/tavily-search.js +57 -0
package/dist/runner/nodes/uspto-search.d.ts +13 -0
package/dist/runner/nodes/uspto-search.js +62 -0
package/dist/runner/nodes/verify-grounding.d.ts +54 -0
package/dist/runner/nodes/verify-grounding.js +134 -0
package/dist/runner/prd.d.ts +28 -0
package/dist/runner/prd.js +494 -0
package/dist/schemas/audit-event.d.ts +1151 -0
package/dist/schemas/audit-event.js +141 -0
package/dist/schemas/index.d.ts +17 -0
package/dist/schemas/index.js +33 -0
package/dist/schemas/mesh-context.d.ts +415 -0
package/dist/schemas/mesh-context.js +95 -0
package/dist/schemas/observed-architecture.d.ts +262 -0
package/dist/schemas/observed-architecture.js +90 -0
package/dist/schemas/prd-brief.d.ts +111 -0
package/dist/schemas/prd-brief.js +37 -0
package/dist/schemas/prd-doc.d.ts +249 -0
package/dist/schemas/prd-doc.js +42 -0
package/dist/schemas/prd-manifest.d.ts +171 -0
package/dist/schemas/prd-manifest.js +73 -0
package/dist/schemas/primitives.d.ts +47 -0
package/dist/schemas/primitives.js +41 -0
package/dist/schemas/query-plan.d.ts +33 -0
package/dist/schemas/query-plan.js +25 -0
package/dist/schemas/ranked-source.d.ts +82 -0
package/dist/schemas/ranked-source.js +29 -0
package/dist/schemas/research-brief.d.ts +114 -0
package/dist/schemas/research-brief.js +49 -0
package/dist/schemas/research-doc.d.ts +104 -0
package/dist/schemas/research-doc.js +37 -0
package/dist/search/arxiv-client.d.ts +41 -0
package/dist/search/arxiv-client.js +88 -0
package/dist/search/hackernews-client.d.ts +33 -0
package/dist/search/hackernews-client.js +44 -0
package/dist/search/provider-result.d.ts +25 -0
package/dist/search/provider-result.js +2 -0
package/dist/search/tavily-client.d.ts +38 -0
package/dist/search/tavily-client.js +53 -0
package/dist/search/uspto-client.d.ts +50 -0
package/dist/search/uspto-client.js +112 -0
package/dist/utils/run-id.d.ts +2 -0
package/dist/utils/run-id.js +22 -0
package/package.json +53 -0

package/dist/runner/archeologist.js ADDED Viewed

@@ -0,0 +1,620 @@
+"use strict";
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.runArcheologist = runArcheologist;
+/**
+ * Archeologist pipeline orchestrator — Phase 2d.
+ *
+ * Wires nodes for the research path:
+ *   validate_brief         (pure)
+ *   gather_mesh_context    (pure)
+ *   plan_queries           (LLM)
+ *   tavily_search × 5      (pure_api)
+ *   arxiv_search × 3       (pure_api)              ← phase 2d
+ *   uspto_search × 3       (pure_api, optional)    ← phase 2d
+ *   hackernews_search × 3  (pure_api)              ← phase 2d
+ *   dedupe_and_rank        (pure)
+ *   [gap_analysis          (pure trigger + LLM)    ← phase 2d, optional]
+ *   [tavily_search × 3     (pure_api, follow-up)   ← phase 2d, optional]
+ *   [dedupe_and_rank       (pure, re-rank)         ← phase 2d, optional]
+ *   synthesize_report      (LLM)
+ *   publish                (pure)
+ *   verify_and_trigger     (run_complete)
+ *
+ * Search runs across all 4 providers in parallel. uspto is skipped (logged
+ * as node_error envelope) when USPTO_API_KEY is absent — coverage gap, not
+ * a run failure. Gap-analysis is bounded one-shot: at most one follow-up
+ * round of tavily queries before synthesis.
+ */
+const fs = __importStar(require("node:fs"));
+const path = __importStar(require("node:path"));
+const schemas_1 = require("../schemas");
+const mesh_reader_1 = require("../mesh/mesh-reader");
+const run_id_1 = require("../utils/run-id");
+const audit_emitter_1 = require("./audit-emitter");
+const hatters_tag_builder_1 = require("./hatters-tag-builder");
+const plan_queries_1 = require("./nodes/plan-queries");
+const tavily_search_1 = require("./nodes/tavily-search");
+const arxiv_search_1 = require("./nodes/arxiv-search");
+const uspto_search_1 = require("./nodes/uspto-search");
+const hackernews_search_1 = require("./nodes/hackernews-search");
+const dedupe_and_rank_1 = require("./nodes/dedupe-and-rank");
+const gap_analysis_1 = require("./nodes/gap-analysis");
+const synthesize_report_1 = require("./nodes/synthesize-report");
+const clone_and_index_1 = require("./nodes/clone-and-index");
+const analyze_architecture_1 = require("./nodes/analyze-architecture");
+const identify_gaps_1 = require("./nodes/identify-gaps");
+async function runArcheologist(opts) {
+    // ----- validate_brief (pure) -----
+    const briefParsed = schemas_1.ResearchBrief.safeParse(opts.brief);
+    if (!briefParsed.success) {
+        throw new Error(`Invalid research brief: ${briefParsed.error.message}`);
+    }
+    const brief = briefParsed.data;
+    const runId = (0, run_id_1.generateRunId)('RES');
+    const startedAt = new Date();
+    const anthropicApiKey = opts.anthropicApiKey ?? process.env.ANTHROPIC_API_KEY ?? '';
+    const githubToken = opts.githubToken ?? process.env.GITHUB_TOKEN ?? '';
+    const tavilyApiKey = opts.tavilyApiKey ?? process.env.TAVILY_API_KEY ?? '';
+    const usptoApiKey = opts.usptoApiKey ?? process.env.USPTO_API_KEY ?? '';
+    const absoluteAuditDir = path.resolve(opts.meshDir, opts.auditDir);
+    const absoluteOutputDir = path.resolve(opts.meshDir, opts.outputDir);
+    fs.mkdirSync(absoluteOutputDir, { recursive: true });
+    const emitter = new audit_emitter_1.AuditEmitter(absoluteAuditDir, runId);
+    emitter.emit({
+        node_kind: 'pure',
+        node_name: 'validate_brief',
+        duration_ms: 0,
+        pure: {
+            inputs_summary: `topic="${brief.topic.slice(0, 80)}"; scope=${brief.scope.level}${brief.scope.id ? `(${brief.scope.id})` : ''}; path=${brief.path}`,
+            outputs_summary: 'ResearchBrief validated',
+        },
+    });
+    // ----- gather_mesh_context (pure) -----
+    const meshStart = Date.now();
+    const meshContext = (0, mesh_reader_1.gatherMeshContext)({
+        meshDir: opts.meshDir,
+        scope: { level: brief.scope.level, id: brief.scope.id },
+    });
+    emitter.emit({
+        node_kind: 'pure',
+        node_name: 'gather_mesh_context',
+        duration_ms: Date.now() - meshStart,
+        pure: {
+            inputs_summary: `scope=${meshContext.scope.level}${meshContext.scope.bar_id ? `(${meshContext.scope.bar_id})` : ''}; mesh_sha=${meshContext.mesh_sha.slice(0, 7)}`,
+            outputs_summary: `portfolio.related_research=${meshContext.portfolio.related_research_summaries.length}; bar_loaded=${!!meshContext.bar}; mesh_gaps=${meshContext.bar?.mesh_gaps.join(',') || 'n/a'}; adrs=${meshContext.bar?.adrs.length ?? 0}; prior_prds=${meshContext.bar?.related_prds.length ?? 0}`,
+        },
+    });
+    // Path-conditional outputs the synthesis + publish blocks consume below.
+    let totalInputTokens = 0;
+    let totalOutputTokens = 0;
+    let totalCostUsd = 0;
+    let rankedSources = [];
+    const providerResultCounts = { tavily: 0, arxiv: 0, uspto: 0, hackernews: 0 };
+    let gapAnalysisRan = false;
+    let observedArchitecture;
+    let archaeologyGaps = [];
+    let cleanupCloneDir = null;
+    let researchQueryPlan;
+    if (brief.path === 'archaeology') {
+        // ============================================================================
+        // ARCHAEOLOGY PATH — replaces plan_queries + 4-provider search + gap-analysis
+        // with clone → analyze → identify-gaps → web-research (tavily only).
+        // ============================================================================
+        if (!brief.target_repo) {
+            throw new Error('Archaeology path requires brief.target_repo (owner/repo)');
+        }
+        // 1. clone_and_index (pure)
+        const cloneStart = Date.now();
+        const clone = (0, clone_and_index_1.cloneAndIndex)({ targetRepo: brief.target_repo });
+        cleanupCloneDir = clone.cloneDir;
+        emitter.emit({
+            node_kind: 'pure',
+            node_name: 'clone_and_index',
+            duration_ms: Date.now() - cloneStart,
+            pure: {
+                inputs_summary: `target=${brief.target_repo}`,
+                outputs_summary: `clone_sha=${clone.cloneSha.slice(0, 12)}; files=${clone.inventory.totalFiles}; bytes=${clone.inventory.totalBytes}; manifests=${clone.inventory.rootManifests.join(',') || 'none'}`,
+            },
+        });
+        // 2. analyze_architecture (pure, file-based)
+        const analyzeStart = Date.now();
+        observedArchitecture = (0, analyze_architecture_1.analyzeArchitecture)({
+            cloneDir: clone.cloneDir,
+            targetRepo: brief.target_repo,
+            cloneSha: clone.cloneSha,
+            inventory: clone.inventory,
+        });
+        emitter.emit({
+            node_kind: 'pure',
+            node_name: 'analyze_architecture',
+            duration_ms: Date.now() - analyzeStart,
+            pure: {
+                inputs_summary: `clone_sha=${clone.cloneSha.slice(0, 12)}; analyzer=${analyze_architecture_1.ANALYZER_VERSION}`,
+                outputs_summary: `languages=${observedArchitecture.profile.languages.join(',')}; frameworks=${observedArchitecture.profile.frameworks.join(',') || 'none'}; modules=${observedArchitecture.modules.length}; endpoints=${observedArchitecture.endpoints.length}`,
+            },
+        });
+        // 3. identify_gaps (pure, comparison) → derives 3 web queries
+        const gapsStart = Date.now();
+        const gapsResult = (0, identify_gaps_1.identifyGaps)({ observed: observedArchitecture, meshContext });
+        archaeologyGaps = gapsResult.gaps;
+        emitter.emit({
+            node_kind: 'pure',
+            node_name: 'identify_gaps',
+            duration_ms: Date.now() - gapsStart,
+            pure: {
+                inputs_summary: `observed_modules=${observedArchitecture.modules.length}; calm_nodes=${(meshContext.bar?.calm_model && Array.isArray(meshContext.bar.calm_model.nodes)) ? meshContext.bar.calm_model.nodes.length : 0}`,
+                outputs_summary: `gaps=${archaeologyGaps.length} (${archaeologyGaps.filter(g => g.severity === 'HIGH').length} HIGH); web_queries=${gapsResult.webQueries.length}`,
+            },
+        });
+        // 4. web_research via tavily (gap-derived queries, no other providers)
+        if (tavilyApiKey) {
+            const webStart = Date.now();
+            const web = await (0, tavily_search_1.runTavilySearch)({
+                apiKey: tavilyApiKey,
+                queries: gapsResult.webQueries,
+                fetchImpl: opts.fetchImpl,
+            });
+            const perQueryMs = Math.round((Date.now() - webStart) / Math.max(1, web.envelopes.length));
+            for (const envelope of web.envelopes) {
+                if (envelope.error) {
+                    emitter.emit({
+                        node_kind: 'node_error',
+                        node_name: 'tavily_search',
+                        duration_ms: 0,
+                        error: { message: `gap-derived query="${envelope.query.slice(0, 80)}": ${envelope.error}`, retryable: true },
+                    });
+                }
+                else {
+                    emitter.emit({
+                        node_kind: 'pure_api',
+                        node_name: 'tavily_search',
+                        duration_ms: perQueryMs,
+                        api: {
+                            provider: 'tavily',
+                            endpoint: 'POST /search (archaeology gap-derived)',
+                            request_summary: `query="${envelope.query.slice(0, 120)}"`,
+                            http_status: envelope.httpStatus,
+                            response_byte_count: envelope.responseBytes,
+                        },
+                    });
+                }
+            }
+            providerResultCounts.tavily = web.results.length;
+            // dedupe (smaller pool — just the gap-derived web results)
+            const dedupeStart = Date.now();
+            rankedSources = (0, dedupe_and_rank_1.dedupeAndRank)({ results: web.results, topN: 15 });
+            emitter.emit({
+                node_kind: 'pure',
+                node_name: 'dedupe_and_rank',
+                duration_ms: Date.now() - dedupeStart,
+                pure: {
+                    inputs_summary: `raw_results=${web.results.length}; queries=${web.envelopes.length} (gap-derived)`,
+                    outputs_summary: `ranked_sources=${rankedSources.length}; top_score=${rankedSources[0]?.salience_score ?? 0}`,
+                },
+            });
+        }
+        else {
+            // No tavily key — synthesise without external grounding (still useful from the gaps alone)
+            emitter.emit({
+                node_kind: 'node_error',
+                node_name: 'tavily_search',
+                duration_ms: 0,
+                error: { message: 'TAVILY_API_KEY not configured — archaeology synthesis will lack external research grounding', retryable: false },
+            });
+        }
+    }
+    else {
+        // ============================================================================
+        // RESEARCH PATH (existing): plan_queries → 4 providers → dedupe → gap-analysis
+        // ============================================================================
+        const planStart = Date.now();
+        const plan = await (0, plan_queries_1.planQueries)({
+            meshDir: opts.meshDir,
+            brief,
+            meshContext,
+            provider: brief.llm_provider,
+            anthropicApiKey,
+            githubToken,
+            fetchImpl: opts.fetchImpl,
+        });
+        researchQueryPlan = plan.queryPlan;
+        totalInputTokens += plan.llm.inputTokens;
+        totalOutputTokens += plan.llm.outputTokens;
+        totalCostUsd += plan.llm.costUsd;
+        emitter.emit({
+            node_kind: 'llm',
+            node_name: 'plan_queries',
+            duration_ms: Date.now() - planStart,
+            llm: {
+                provider: plan.llm.provider,
+                model: plan.llm.model,
+                prompt_pack: { path: plan.prompt.packPath, sha256: plan.prompt.packSha256 },
+                input_tokens: plan.llm.inputTokens,
+                output_tokens: plan.llm.outputTokens,
+                cost_usd: plan.llm.costUsd,
+                guardrails: { mode: brief.guardrails, pre: 'PASS', post: 'PASS' },
+            },
+        });
+        // ----- four-provider search (pure_api each, parallel across providers) -----
+        // We run all four providers concurrently with Promise.allSettled so a
+        // provider-level failure (e.g. PatentsView outage) doesn't block the rest.
+        const searchStart = Date.now();
+        const [tavily, arxiv, hn, uspto] = await Promise.allSettled([
+            (0, tavily_search_1.runTavilySearch)({ apiKey: tavilyApiKey, queries: plan.queryPlan.web, fetchImpl: opts.fetchImpl }),
+            (0, arxiv_search_1.runArxivSearch)({ queries: plan.queryPlan.arxiv, fetchImpl: opts.fetchImpl }),
+            (0, hackernews_search_1.runHackerNewsSearch)({ queries: plan.queryPlan.community, fetchImpl: opts.fetchImpl }),
+            usptoApiKey
+                ? (0, uspto_search_1.runUsptoSearch)({ apiKey: usptoApiKey, queries: plan.queryPlan.patent, fetchImpl: opts.fetchImpl })
+                : Promise.reject(new Error('USPTO_API_KEY not configured — patent coverage skipped')),
+        ]);
+        const searchDuration = Date.now() - searchStart;
+        // Record per-provider envelopes (audit log) + collect ProviderResult[] (dedupe input).
+        // providerResultCounts is declared at the top of runArcheologist so the
+        // archaeology branch can populate it too.
+        const allProviderResults = [];
+        // Helper: emit per-query envelopes (or one node_error per provider-level failure)
+        const handleProvider = (settled, nodeName, providerLabel, endpoint) => {
+            if (settled.status === 'rejected') {
+                const msg = settled.reason instanceof Error ? settled.reason.message : String(settled.reason);
+                emitter.emit({
+                    node_kind: 'node_error',
+                    node_name: nodeName,
+                    duration_ms: 0,
+                    error: { message: msg, retryable: false },
+                });
+                return;
+            }
+            const { envelopes, results } = settled.value;
+            const perQueryMs = Math.round(searchDuration / Math.max(1, envelopes.length));
+            for (const envelope of envelopes) {
+                if (envelope.error) {
+                    emitter.emit({
+                        node_kind: 'node_error',
+                        node_name: nodeName,
+                        duration_ms: 0,
+                        error: { message: `query="${envelope.query.slice(0, 80)}": ${envelope.error}`, retryable: true },
+                    });
+                }
+                else {
+                    emitter.emit({
+                        node_kind: 'pure_api',
+                        node_name: nodeName,
+                        duration_ms: perQueryMs,
+                        api: {
+                            provider: providerLabel,
+                            endpoint,
+                            request_summary: `query="${envelope.query.slice(0, 120)}"`,
+                            http_status: envelope.httpStatus,
+                            response_byte_count: envelope.responseBytes,
+                        },
+                    });
+                }
+            }
+            providerResultCounts[providerLabel] = results.length;
+            allProviderResults.push(...results);
+        };
+        handleProvider(tavily, 'tavily_search', 'tavily', 'POST /search');
+        handleProvider(arxiv, 'arxiv_search', 'arxiv', 'GET /api/query');
+        handleProvider(hn, 'hackernews_search', 'hackernews', 'GET /api/v1/search');
+        handleProvider(uspto, 'uspto_search', 'uspto', 'POST /api/v1/patent/');
+        // ----- dedupe_and_rank (pure) — first pass -----
+        let dedupeStart = Date.now();
+        rankedSources = (0, dedupe_and_rank_1.dedupeAndRank)({ results: allProviderResults, topN: 20 });
+        emitter.emit({
+            node_kind: 'pure',
+            node_name: 'dedupe_and_rank',
+            duration_ms: Date.now() - dedupeStart,
+            pure: {
+                inputs_summary: `raw_results=${allProviderResults.length}; providers=tavily(${providerResultCounts.tavily})+arxiv(${providerResultCounts.arxiv})+hn(${providerResultCounts.hackernews})+uspto(${providerResultCounts.uspto})`,
+                outputs_summary: `ranked_sources=${rankedSources.length}; top_score=${rankedSources[0]?.salience_score ?? 0}`,
+            },
+        });
+        // ----- gap_analysis (optional, bounded one-shot) -----
+        const gapSignals = (0, gap_analysis_1.detectGapSignals)({ brief, rankedSources });
+        if (gapSignals.length > 0) {
+            emitter.emit({
+                node_kind: 'pure',
+                node_name: 'gap_analysis_trigger',
+                duration_ms: 0,
+                pure: {
+                    inputs_summary: `ranked_sources=${rankedSources.length}; providers=${Object.entries(providerResultCounts).filter(([, n]) => n > 0).map(([p, n]) => `${p}(${n})`).join('+')}`,
+                    outputs_summary: `signals=${gapSignals.map(s => s.kind).join(',')}`,
+                },
+            });
+            const gapStart = Date.now();
+            const gap = await (0, gap_analysis_1.runGapAnalysis)({
+                meshDir: opts.meshDir,
+                brief,
+                rankedSources,
+                signals: gapSignals,
+                provider: brief.llm_provider,
+                anthropicApiKey,
+                githubToken,
+                fetchImpl: opts.fetchImpl,
+            });
+            totalInputTokens += gap.llm.inputTokens;
+            totalOutputTokens += gap.llm.outputTokens;
+            totalCostUsd += gap.llm.costUsd;
+            emitter.emit({
+                node_kind: 'llm',
+                node_name: 'gap_analysis',
+                duration_ms: Date.now() - gapStart,
+                llm: {
+                    provider: gap.llm.provider,
+                    model: gap.llm.model,
+                    prompt_pack: { path: gap.prompt.packPath, sha256: gap.prompt.packSha256 },
+                    input_tokens: gap.llm.inputTokens,
+                    output_tokens: gap.llm.outputTokens,
+                    cost_usd: gap.llm.costUsd,
+                    guardrails: { mode: brief.guardrails, pre: 'PASS', post: 'PASS' },
+                },
+            });
+            // Bounded follow-up: one extra round of tavily, then re-dedupe.
+            if (tavilyApiKey) {
+                const followStart = Date.now();
+                const followUp = await (0, tavily_search_1.runTavilySearch)({
+                    apiKey: tavilyApiKey,
+                    queries: gap.followUpQueries,
+                    fetchImpl: opts.fetchImpl,
+                });
+                const followDuration = Date.now() - followStart;
+                const followPerQueryMs = Math.round(followDuration / Math.max(1, followUp.envelopes.length));
+                for (const envelope of followUp.envelopes) {
+                    if (envelope.error) {
+                        emitter.emit({
+                            node_kind: 'node_error',
+                            node_name: 'tavily_search',
+                            duration_ms: 0,
+                            error: { message: `gap-followup query="${envelope.query.slice(0, 80)}": ${envelope.error}`, retryable: true },
+                        });
+                    }
+                    else {
+                        emitter.emit({
+                            node_kind: 'pure_api',
+                            node_name: 'tavily_search',
+                            duration_ms: followPerQueryMs,
+                            api: {
+                                provider: 'tavily',
+                                endpoint: 'POST /search (gap-followup)',
+                                request_summary: `query="${envelope.query.slice(0, 120)}"`,
+                                http_status: envelope.httpStatus,
+                                response_byte_count: envelope.responseBytes,
+                            },
+                        });
+                    }
+                }
+                allProviderResults.push(...followUp.results);
+                providerResultCounts.tavily += followUp.results.length;
+                // Re-dedupe with the expanded result pool — emits a second dedupe event so
+                // the audit log clearly shows the loop happened.
+                dedupeStart = Date.now();
+                rankedSources = (0, dedupe_and_rank_1.dedupeAndRank)({ results: allProviderResults, topN: 20 });
+                emitter.emit({
+                    node_kind: 'pure',
+                    node_name: 'dedupe_and_rank',
+                    duration_ms: Date.now() - dedupeStart,
+                    pure: {
+                        inputs_summary: `raw_results=${allProviderResults.length} (post gap-followup)`,
+                        outputs_summary: `ranked_sources=${rankedSources.length}; top_score=${rankedSources[0]?.salience_score ?? 0}`,
+                    },
+                });
+            }
+            gapAnalysisRan = true;
+        }
+    } // end research-path else branch
+    // ----- synthesize_report (LLM) -----
+    const synthStart = Date.now();
+    const synthesis = await (0, synthesize_report_1.synthesizeReport)({
+        meshDir: opts.meshDir,
+        brief,
+        meshContext,
+        rankedSources,
+        provider: brief.llm_provider,
+        anthropicApiKey,
+        githubToken,
+        gapAnalysisRan,
+        path: brief.path,
+        observedArchitecture,
+        archaeologyGaps,
+        fetchImpl: opts.fetchImpl,
+    });
+    totalInputTokens += synthesis.llm.inputTokens;
+    totalOutputTokens += synthesis.llm.outputTokens;
+    totalCostUsd += synthesis.llm.costUsd;
+    emitter.emit({
+        node_kind: 'llm',
+        node_name: 'synthesize_report',
+        duration_ms: Date.now() - synthStart,
+        llm: {
+            provider: synthesis.llm.provider,
+            model: synthesis.llm.model,
+            prompt_pack: { path: synthesis.prompt.packPath, sha256: synthesis.prompt.packSha256 },
+            input_tokens: synthesis.llm.inputTokens,
+            output_tokens: synthesis.llm.outputTokens,
+            cost_usd: synthesis.llm.costUsd,
+            guardrails: { mode: brief.guardrails, pre: 'PASS', post: 'PASS' },
+        },
+    });
+    // ----- publish (pure) -----
+    const today = startedAt.toISOString().slice(0, 10);
+    const fileSlug = brief.topic
+        .toLowerCase()
+        .replace(/[^a-z0-9]+/g, '-')
+        .replace(/^-|-$/g, '')
+        .slice(0, 60) || 'research';
+    const artifactName = `${fileSlug}-${today}.md`;
+    const artifactPath = path.join(absoluteOutputDir, artifactName);
+    const meshSummary = meshContext.bar
+        ? `bar **${meshContext.bar.name}** (\`${meshContext.bar.bar_id}\`), ${meshContext.bar.adrs.length} ADR(s), ${meshContext.bar.related_research.length} prior research doc(s), mesh gaps: ${meshContext.bar.mesh_gaps.join(', ') || '_none_'}`
+        : meshContext.platform
+            ? `platform **${meshContext.platform.platform_id}** (${meshContext.platform.sibling_bars.length} sibling BAR(s))`
+            : `portfolio **${meshContext.portfolio.name}** (${meshContext.portfolio.related_research_summaries.length} prior research doc(s))`;
+    const bodyMd = buildResearchDoc({
+        brief,
+        runId,
+        meshSummary,
+        meshSha: meshContext.mesh_sha,
+        queryPlan: researchQueryPlan,
+        archaeologySummary: observedArchitecture
+            ? `Cloned \`${observedArchitecture.profile.slug}\` @ \`${observedArchitecture.profile.cloneSha.slice(0, 12)}\`. ${observedArchitecture.profile.totalFiles} files; languages: ${observedArchitecture.profile.languages.join(', ') || 'n/a'}; frameworks: ${observedArchitecture.profile.frameworks.join(', ') || 'n/a'}; ${observedArchitecture.modules.length} modules; ${observedArchitecture.endpoints.length} endpoints; ${archaeologyGaps.length} structural gaps identified.`
+            : undefined,
+        synthesisBody: synthesis.body_md,
+    });
+    const writeStart = Date.now();
+    fs.writeFileSync(artifactPath, bodyMd, 'utf8');
+    emitter.emit({
+        node_kind: 'pure',
+        node_name: 'publish',
+        duration_ms: Date.now() - writeStart,
+        pure: {
+            inputs_summary: `wrote ${artifactPath}`,
+            outputs_summary: `${bodyMd.length} bytes; ${rankedSources.length} citations`,
+        },
+    });
+    // ----- run_complete -----
+    const complete = emitter.emitRunComplete({
+        node_kind: 'run_complete',
+        node_name: 'verify_and_trigger',
+        duration_ms: Date.now() - startedAt.getTime(),
+        outcome: {
+            status: 'ok',
+            mesh_sha: meshContext.mesh_sha,
+            total_input_tokens: totalInputTokens,
+            total_output_tokens: totalOutputTokens,
+            total_cost_usd: roundUsd(totalCostUsd),
+            artifact_paths: [path.relative(opts.meshDir, artifactPath)],
+        },
+    });
+    // ----- Optionally append a PR body that wraps the artifact + Hatter's Tag -----
+    let prBodyPath = null;
+    if (opts.emitPrBodyPath) {
+        const hattersTag = (0, hatters_tag_builder_1.buildHattersTag)({
+            run_id: runId,
+            mesh_sha: meshContext.mesh_sha,
+            prompt_library_version: 'phase3a',
+            agent_version: opts.agentVersion,
+            published_at: new Date().toISOString(),
+            llm: {
+                provider: brief.llm_provider,
+                // synthesis runs on both paths; archaeology runs skip plan_queries so we
+                // use the synthesis model id as the "primary" model for the Hatter's Tag.
+                model: synthesis.llm.model,
+                input_tokens: totalInputTokens,
+                output_tokens: totalOutputTokens,
+                cost_usd: roundUsd(totalCostUsd),
+            },
+            guardrails: { mode: brief.guardrails, blocks: 0, warns: 0 },
+            audit: {
+                event_count: complete.event_id,
+                chain_root_hash: complete.outcome.chain_root_hash,
+                audit_log_path: path.relative(opts.meshDir, emitter.path),
+            },
+        });
+        const prBody = [bodyMd, '', hattersTag].join('\n');
+        fs.writeFileSync(opts.emitPrBodyPath, prBody, 'utf8');
+        prBodyPath = opts.emitPrBodyPath;
+    }
+    // ----- archaeology cleanup: remove the shallow clone now that synthesis is done -----
+    if (cleanupCloneDir) {
+        try {
+            fs.rmSync(cleanupCloneDir, { recursive: true, force: true });
+        }
+        catch { /* leave on disk — non-fatal, just a tmpdir entry */ }
+    }
+    return {
+        run_id: runId,
+        topic: brief.topic,
+        artifact_path: artifactPath,
+        audit_log_path: emitter.path,
+        chain_root_hash: complete.outcome.chain_root_hash,
+        pr_body_path: prBodyPath,
+        total_input_tokens: totalInputTokens,
+        total_output_tokens: totalOutputTokens,
+        total_cost_usd: roundUsd(totalCostUsd),
+        source_count: rankedSources.length,
+        provider_result_counts: providerResultCounts,
+        gap_analysis_ran: gapAnalysisRan,
+        /** archaeology path only — undefined for research runs */
+        archaeology_gap_count: archaeologyGaps.length || undefined,
+        conclusion_count: synthesis.citation_stats.conclusion_count,
+        recommendation_count: synthesis.citation_stats.recommendation_count,
+    };
+}
+/**
+ * Compose the published artifact. The preamble differs by path:
+ *   research:    <metadata> + <mesh context> + <Query Plan table>
+ *   archaeology: <metadata> + <mesh context> + <Target Repo Profile>
+ * The synthesis body owns every H2 from the canonical section list onward.
+ * The Hatter's Tag is appended separately by the PR-body path.
+ */
+function buildResearchDoc(opts) {
+    const lines = [];
+    lines.push(`# ${opts.brief.topic}`);
+    lines.push('');
+    lines.push(`- **Run id:** \`${opts.runId}\``);
+    lines.push(`- **Mesh sha:** \`${opts.meshSha.slice(0, 12)}\``);
+    lines.push(`- **Path:** ${opts.brief.path}${opts.brief.target_repo ? ` (\`${opts.brief.target_repo}\`)` : ''}`);
+    lines.push(`- **Scope:** ${opts.brief.scope.level}${opts.brief.scope.id ? ` / ${opts.brief.scope.id}` : ''}`);
+    lines.push('');
+    lines.push('## Run Metadata');
+    lines.push('');
+    lines.push(`Scope resolved to: ${opts.meshSummary}.`);
+    lines.push('');
+    if (opts.queryPlan) {
+        lines.push('### Query Plan (per-provider, LLM-generated)');
+        lines.push('');
+        lines.push('| Provider | Queries |');
+        lines.push('|---|---|');
+        lines.push(`| **web** (Tavily) | ${opts.queryPlan.web.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
+        lines.push(`| **arxiv** | ${opts.queryPlan.arxiv.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
+        lines.push(`| **patent** (USPTO) | ${opts.queryPlan.patent.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
+        lines.push(`| **community** (HN) | ${opts.queryPlan.community.map(q => `\`${q.replace(/`/g, "'")}\``).join(' · ')} |`);
+        lines.push('');
+    }
+    if (opts.archaeologySummary) {
+        lines.push('### Target Repository Profile (analyze_architecture)');
+        lines.push('');
+        lines.push(opts.archaeologySummary);
+        lines.push('');
+    }
+    // The synthesis body owns every H2 from the canonical section list onward.
+    lines.push(opts.synthesisBody.trim());
+    lines.push('');
+    return lines.join('\n');
+}
+function roundUsd(n) {
+    return Math.round(n * 10000) / 10000;
+}

package/dist/runner/audit-emitter.d.ts ADDED Viewed

@@ -0,0 +1,62 @@
+import { type AuditEvent as AuditEventType, type RunCompleteEvent } from '../schemas';
+/**
+ * Distributive Omit — preserves the discriminated union when stripping the
+ * envelope fields the emitter fills in itself.
+ */
+type DistributiveOmit<T, K extends PropertyKey> = T extends unknown ? Omit<T, K> : never;
+/**
+ * Caller-supplied partial event — the emitter fills in `run_id`, `event_id`,
+ * `ts`, `prev_event_hash`, and `event_hash`. Per-variant payload fields
+ * (`pure`, `llm`, `api`, `outcome`, `error`) come from the node.
+ */
+export type EventInput = DistributiveOmit<AuditEventType, 'run_id' | 'event_id' | 'ts' | 'prev_event_hash' | 'event_hash'> & {
+    /** Optional ISO timestamp override for tests; defaults to "now". */
+    ts?: string;
+};
+/** Helper input for `emitRunComplete` — same shape as EventInput restricted to run_complete, minus chain_root_hash. */
+export type RunCompleteInput = DistributiveOmit<RunCompleteEvent, 'run_id' | 'event_id' | 'ts' | 'prev_event_hash' | 'event_hash' | 'outcome'> & {
+    ts?: string;
+    outcome: Omit<RunCompleteEvent['outcome'], 'chain_root_hash'>;
+};
+export declare class AuditEmitter {
+    private readonly runId;
+    private readonly filePath;
+    private nextEventId;
+    private prevEventHash;
+    private rootHash;
+    private closed;
+    /**
+     * @param auditDir   target directory (created on demand)
+     * @param runId      the run id; becomes `<runId>.jsonl`
+     */
+    constructor(auditDir: string, runId: string);
+    /**
+     * Emit one event. Returns the canonical serialized form (useful for tests).
+     * Validates against the AuditEvent schema before writing.
+     */
+    emit(input: EventInput): AuditEventType;
+    /**
+     * Emit a `run_complete` event. The emitter computes `chain_root_hash` itself
+     * (the hash of the run_complete event), so callers leave that field blank
+     * (or omit it) — it's filled in here.
+     */
+    emitRunComplete(input: RunCompleteInput): RunCompleteEvent;
+    /** SHA-256 of the most recent event — equal to `chain_root_hash` after run_complete. */
+    get currentRootHash(): string | null;
+    /** Absolute path to the JSONL file this emitter writes to. */
+    get path(): string;
+}
+/**
+ * Parse a JSONL audit file back into typed events. Re-validates every event
+ * against the schema; returns null on malformed input.
+ */
+export declare function readAuditLog(filePath: string): AuditEventType[] | null;
+/**
+ * Verify the hash chain of a sequence of events.
+ *   - Each event's prev_event_hash must match the previous event's event_hash.
+ *   - Each event's event_hash must match a recomputation against the line.
+ *   - The first event must have prev_event_hash === null.
+ * Returns the chain root hash (= final event_hash) on success, null on any failure.
+ */
+export declare function verifyChain(events: AuditEventType[]): string | null;
+export {};