npm - @maintainabilityai/research-runner - Versions diffs - 0.1.1 - Mend

@maintainabilityai/research-runner 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

package/LICENSE +21 -0
package/README.md +82 -0
package/bin/research-runner.js +2 -0
package/dist/cli.d.ts +1 -0
package/dist/cli.js +209 -0
package/dist/llm/anthropic-client.d.ts +39 -0
package/dist/llm/anthropic-client.js +74 -0
package/dist/llm/github-models-client.d.ts +46 -0
package/dist/llm/github-models-client.js +78 -0
package/dist/llm/llm-router.d.ts +46 -0
package/dist/llm/llm-router.js +60 -0
package/dist/mesh/get-mesh-sha.d.ts +1 -0
package/dist/mesh/get-mesh-sha.js +27 -0
package/dist/mesh/mesh-reader.d.ts +14 -0
package/dist/mesh/mesh-reader.js +392 -0
package/dist/mesh/prompt-loader.d.ts +22 -0
package/dist/mesh/prompt-loader.js +119 -0
package/dist/mesh/threat-model-reader.d.ts +33 -0
package/dist/mesh/threat-model-reader.js +123 -0
package/dist/runner/archeologist.d.ts +39 -0
package/dist/runner/archeologist.js +620 -0
package/dist/runner/audit-emitter.d.ts +62 -0
package/dist/runner/audit-emitter.js +210 -0
package/dist/runner/hatters-tag-builder.d.ts +52 -0
package/dist/runner/hatters-tag-builder.js +40 -0
package/dist/runner/nodes/analyze-architecture.d.ts +10 -0
package/dist/runner/nodes/analyze-architecture.js +447 -0
package/dist/runner/nodes/arxiv-search.d.ts +12 -0
package/dist/runner/nodes/arxiv-search.js +52 -0
package/dist/runner/nodes/clone-and-index.d.ts +32 -0
package/dist/runner/nodes/clone-and-index.js +158 -0
package/dist/runner/nodes/dedupe-and-rank.d.ts +27 -0
package/dist/runner/nodes/dedupe-and-rank.js +98 -0
package/dist/runner/nodes/deterministic-review.d.ts +55 -0
package/dist/runner/nodes/deterministic-review.js +206 -0
package/dist/runner/nodes/expert-review.d.ts +68 -0
package/dist/runner/nodes/expert-review.js +197 -0
package/dist/runner/nodes/gap-analysis.d.ts +48 -0
package/dist/runner/nodes/gap-analysis.js +153 -0
package/dist/runner/nodes/generate-prd-manifest.d.ts +53 -0
package/dist/runner/nodes/generate-prd-manifest.js +209 -0
package/dist/runner/nodes/hackernews-search.d.ts +12 -0
package/dist/runner/nodes/hackernews-search.js +63 -0
package/dist/runner/nodes/identify-gaps.d.ts +33 -0
package/dist/runner/nodes/identify-gaps.js +185 -0
package/dist/runner/nodes/plan-queries.d.ts +28 -0
package/dist/runner/nodes/plan-queries.js +120 -0
package/dist/runner/nodes/prd-validator.d.ts +51 -0
package/dist/runner/nodes/prd-validator.js +203 -0
package/dist/runner/nodes/synthesis-archaeology-validator.d.ts +22 -0
package/dist/runner/nodes/synthesis-archaeology-validator.js +131 -0
package/dist/runner/nodes/synthesis-validator.d.ts +51 -0
package/dist/runner/nodes/synthesis-validator.js +185 -0
package/dist/runner/nodes/synthesize-prd.d.ts +84 -0
package/dist/runner/nodes/synthesize-prd.js +202 -0
package/dist/runner/nodes/synthesize-report.d.ts +53 -0
package/dist/runner/nodes/synthesize-report.js +188 -0
package/dist/runner/nodes/tavily-search.d.ts +21 -0
package/dist/runner/nodes/tavily-search.js +57 -0
package/dist/runner/nodes/uspto-search.d.ts +13 -0
package/dist/runner/nodes/uspto-search.js +62 -0
package/dist/runner/nodes/verify-grounding.d.ts +54 -0
package/dist/runner/nodes/verify-grounding.js +134 -0
package/dist/runner/prd.d.ts +28 -0
package/dist/runner/prd.js +494 -0
package/dist/schemas/audit-event.d.ts +1151 -0
package/dist/schemas/audit-event.js +141 -0
package/dist/schemas/index.d.ts +17 -0
package/dist/schemas/index.js +33 -0
package/dist/schemas/mesh-context.d.ts +415 -0
package/dist/schemas/mesh-context.js +95 -0
package/dist/schemas/observed-architecture.d.ts +262 -0
package/dist/schemas/observed-architecture.js +90 -0
package/dist/schemas/prd-brief.d.ts +111 -0
package/dist/schemas/prd-brief.js +37 -0
package/dist/schemas/prd-doc.d.ts +249 -0
package/dist/schemas/prd-doc.js +42 -0
package/dist/schemas/prd-manifest.d.ts +171 -0
package/dist/schemas/prd-manifest.js +73 -0
package/dist/schemas/primitives.d.ts +47 -0
package/dist/schemas/primitives.js +41 -0
package/dist/schemas/query-plan.d.ts +33 -0
package/dist/schemas/query-plan.js +25 -0
package/dist/schemas/ranked-source.d.ts +82 -0
package/dist/schemas/ranked-source.js +29 -0
package/dist/schemas/research-brief.d.ts +114 -0
package/dist/schemas/research-brief.js +49 -0
package/dist/schemas/research-doc.d.ts +104 -0
package/dist/schemas/research-doc.js +37 -0
package/dist/search/arxiv-client.d.ts +41 -0
package/dist/search/arxiv-client.js +88 -0
package/dist/search/hackernews-client.d.ts +33 -0
package/dist/search/hackernews-client.js +44 -0
package/dist/search/provider-result.d.ts +25 -0
package/dist/search/provider-result.js +2 -0
package/dist/search/tavily-client.d.ts +38 -0
package/dist/search/tavily-client.js +53 -0
package/dist/search/uspto-client.d.ts +50 -0
package/dist/search/uspto-client.js +112 -0
package/dist/utils/run-id.d.ts +2 -0
package/dist/utils/run-id.js +22 -0
package/package.json +53 -0

package/dist/runner/nodes/hackernews-search.js ADDED Viewed

@@ -0,0 +1,63 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.runHackerNewsSearch = runHackerNewsSearch;
+/**
+ * hackernews_search — pure_api node.
+ *
+ * Runs each community query through Algolia's HN Search. Salience score
+ * derived from HN's `points` field with a soft cap: log(1+points)/8 →
+ * a 100-point story scores ~0.58, a 1000-point story scores ~0.86,
+ * everything ≥ 5000 saturates at 1.0.
+ */
+const hackernews_client_1 = require("../../search/hackernews-client");
+async function runHackerNewsSearch(opts) {
+    const settled = await Promise.allSettled(opts.queries.map(query => (0, hackernews_client_1.hackerNewsSearch)({
+        query,
+        hitsPerPage: opts.hitsPerQuery ?? 5,
+        fetchImpl: opts.fetchImpl,
+    })));
+    const envelopes = [];
+    const results = [];
+    for (let i = 0; i < opts.queries.length; i++) {
+        const query = opts.queries[i];
+        const outcome = settled[i];
+        if (outcome.status === 'fulfilled') {
+            const ok = outcome.value;
+            envelopes.push({
+                query,
+                httpStatus: ok.httpStatus,
+                responseBytes: ok.responseBytes,
+                resultCount: ok.results.length,
+            });
+            for (const r of ok.results) {
+                // Prefer the external article URL; fall back to the HN discussion thread.
+                const url = r.url || r.hnUrl;
+                if (!url) {
+                    continue;
+                }
+                results.push({
+                    provider: 'hackernews',
+                    fromQuery: query,
+                    title: r.title,
+                    url,
+                    content: '', // HN search returns no abstract
+                    score: pointsToScore(r.points),
+                    publishedDate: r.createdAt || undefined,
+                    authors: r.author ? [r.author] : undefined,
+                });
+            }
+        }
+        else {
+            const err = outcome.reason instanceof Error ? outcome.reason.message : String(outcome.reason);
+            envelopes.push({ query, httpStatus: 0, responseBytes: 0, resultCount: 0, error: err });
+        }
+    }
+    return { envelopes, results };
+}
+function pointsToScore(points) {
+    if (points <= 0) {
+        return 0.3;
+    }
+    // log scale with saturation: points=100 → 0.58, 1000 → 0.86, 5000+ → 1.0
+    return Math.min(1, Math.log(1 + points) / 8);
+}

package/dist/runner/nodes/identify-gaps.d.ts ADDED Viewed

@@ -0,0 +1,33 @@
+/**
+ * identify_gaps — pure node (archaeology path).
+ *
+ * Compares the ObservedArchitecture from analyze_architecture against the
+ * MeshContext.bar.calm_model and produces:
+ *   - Gap[]   — structured discrepancies tagged with severity + evidence
+ *   - 3 web queries — used by web_research to ground the synthesis in
+ *     external best-practice guidance for the most significant gaps
+ *
+ * Phase 3a uses a conservative, heuristic comparison. CALM nodes are
+ * matched against observed modules by case-insensitive substring on
+ * either `unique-id` or `name`. Endpoints are matched against CALM
+ * `interface` declarations when present; otherwise flagged as
+ * "endpoint_not_in_calm" and downgraded to LOW severity to avoid
+ * crying wolf when the CALM model is just terse.
+ *
+ * Phase 3b (with tree-sitter) will tighten the matching with import-graph
+ * reachability and control-flow analysis. For now the synthesis prompt
+ * knows the gaps are heuristic; reviewers triage.
+ */
+import type { ArchaeologyGap, MeshContext, ObservedArchitecture } from '../../schemas';
+export interface IdentifyGapsOpts {
+    observed: ObservedArchitecture;
+    meshContext: MeshContext;
+    /** Cap on returned gaps. Default 15. */
+    topN?: number;
+}
+export interface IdentifyGapsResult {
+    gaps: ArchaeologyGap[];
+    /** Three web queries the next node (web_research) will run via Tavily. */
+    webQueries: string[];
+}
+export declare function identifyGaps(opts: IdentifyGapsOpts): IdentifyGapsResult;

package/dist/runner/nodes/identify-gaps.js ADDED Viewed

@@ -0,0 +1,185 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.identifyGaps = identifyGaps;
+function identifyGaps(opts) {
+    const topN = opts.topN ?? 15;
+    const calmNodes = extractCalmNodes(opts.meshContext);
+    const observed = opts.observed;
+    const gaps = [];
+    let nextId = 1;
+    const nextGapId = () => `G${nextId++}`;
+    // Rule 1: missing_module — CALM mentions a node that observed modules don't match
+    for (const calm of calmNodes) {
+        if (matchesAnyModule(calm, observed)) {
+            continue;
+        }
+        gaps.push({
+            id: nextGapId(),
+            kind: 'missing_module',
+            severity: 'HIGH',
+            summary: `CALM node \`${calm.uniqueId}\` (${calm.name}, type=${calm.nodeType}) has no matching module in the code.`,
+            observedEvidence: [`(no module named or containing "${calm.name}")`],
+            meshReferences: [calm.uniqueId],
+        });
+    }
+    // Rule 2: orphan_module — observed module with no matching CALM node
+    // (downgraded when the module is shared/util-ish — those are infrastructure, not features)
+    for (const mod of observed.modules) {
+        if (calmNodes.some(n => moduleMatchesCalm(mod.name, n))) {
+            continue;
+        }
+        if (mod.layer === 'shared') {
+            continue;
+        }
+        if (mod.fileCount < 3) {
+            continue;
+        } // tiny dirs are noise
+        gaps.push({
+            id: nextGapId(),
+            kind: 'orphan_module',
+            severity: mod.layer === 'unknown' ? 'LOW' : 'MEDIUM',
+            summary: `Module \`${mod.name}\` (layer=${mod.layer}, ${mod.fileCount} file(s)) has no matching CALM node.`,
+            observedEvidence: [`OA[${mod.name}]`],
+            meshReferences: [],
+        });
+    }
+    // Rule 3: endpoint_not_in_calm — observed endpoints not represented as CALM interface declarations
+    // We only count this once per file to avoid 60-endpoint spam in a single file.
+    const flaggedFiles = new Set();
+    for (const ep of observed.endpoints) {
+        if (flaggedFiles.has(ep.file)) {
+            continue;
+        }
+        // Loose check: any CALM node id contains the file's directory name → considered covered
+        const dirHint = ep.file.split('/')[0].toLowerCase();
+        const covered = calmNodes.some(n => n.uniqueId.toLowerCase().includes(dirHint) || n.name.toLowerCase().includes(dirHint));
+        if (covered) {
+            continue;
+        }
+        flaggedFiles.add(ep.file);
+        gaps.push({
+            id: nextGapId(),
+            kind: 'endpoint_not_in_calm',
+            severity: 'LOW',
+            summary: `Endpoint \`${ep.method} ${ep.path}\` (framework=${ep.framework}) in \`${ep.file}\` is not represented in the CALM model.`,
+            observedEvidence: [`OA[${ep.file}]`],
+            meshReferences: [],
+        });
+    }
+    // Rule 4: framework_choice_undeclared — observed frameworks not mentioned in mesh decisions
+    const adrText = (opts.meshContext.bar?.adrs ?? [])
+        .map(a => `${a.title} ${a.decision}`)
+        .join(' ')
+        .toLowerCase();
+    for (const fw of observed.profile.frameworks) {
+        if (!adrText.includes(fw.toLowerCase())) {
+            gaps.push({
+                id: nextGapId(),
+                kind: 'framework_choice_undeclared',
+                severity: 'MEDIUM',
+                summary: `Framework \`${fw}\` is in use but no ADR mentions it.`,
+                observedEvidence: [`OA[manifests:${observed.profile.manifests.join(',')}]`],
+                meshReferences: [],
+            });
+        }
+    }
+    // Cap (severity HIGH > MEDIUM > LOW; preserve discovery order within a tier)
+    const sevOrder = { HIGH: 0, MEDIUM: 1, LOW: 2 };
+    const ranked = gaps.sort((a, b) => sevOrder[a.severity] - sevOrder[b.severity]).slice(0, topN);
+    return { gaps: ranked, webQueries: deriveQueriesFromGaps(ranked, observed) };
+}
+// ============================================================================
+// Helpers
+// ============================================================================
+function extractCalmNodes(meshContext) {
+    const calm = meshContext.bar?.calm_model;
+    if (!calm || typeof calm !== 'object') {
+        return [];
+    }
+    const nodesRaw = calm.nodes;
+    if (!Array.isArray(nodesRaw)) {
+        return [];
+    }
+    return nodesRaw
+        .map(n => {
+        if (!n || typeof n !== 'object') {
+            return null;
+        }
+        const obj = n;
+        const uniqueId = String(obj['unique-id'] ?? obj['uniqueId'] ?? '');
+        if (!uniqueId) {
+            return null;
+        }
+        return {
+            uniqueId,
+            name: String(obj['name'] ?? uniqueId),
+            nodeType: String(obj['node-type'] ?? obj['nodeType'] ?? 'unknown'),
+        };
+    })
+        .filter((n) => n !== null);
+}
+function matchesAnyModule(calm, observed) {
+    const needle = (calm.name || calm.uniqueId).toLowerCase();
+    if (needle.length < 2) {
+        return false;
+    }
+    return observed.modules.some(m => m.name.toLowerCase().includes(needle))
+        || observed.endpoints.some(e => e.file.toLowerCase().includes(needle));
+}
+function moduleMatchesCalm(moduleName, calm) {
+    const moduleLeaf = moduleName.split('/').pop().toLowerCase();
+    return calm.uniqueId.toLowerCase().includes(moduleLeaf)
+        || calm.name.toLowerCase().includes(moduleLeaf)
+        || moduleLeaf.includes(calm.uniqueId.toLowerCase())
+        || moduleLeaf.includes(calm.name.toLowerCase());
+}
+/**
+ * Turn the top gaps into 3 web research queries. Always 3 — pads with
+ * generic-but-relevant fallbacks when fewer gaps surfaced.
+ */
+function deriveQueriesFromGaps(gaps, observed) {
+    const year = new Date().getUTCFullYear();
+    const primaryFw = observed.profile.frameworks[0] ?? observed.profile.languages[0] ?? 'web service';
+    const queries = [];
+    for (const gap of gaps) {
+        if (queries.length >= 3) {
+            break;
+        }
+        switch (gap.kind) {
+            case 'missing_module': {
+                const ref = gap.meshReferences[0] ?? 'service';
+                queries.push(`how to introduce ${ref} into a ${primaryFw} architecture ${year}`);
+                break;
+            }
+            case 'orphan_module': {
+                const evidence = gap.observedEvidence[0]?.replace(/^OA\[(.*)\]$/, '$1') ?? 'module';
+                queries.push(`document architecture decision for ${evidence} in CALM ${year}`);
+                break;
+            }
+            case 'endpoint_not_in_calm':
+                queries.push(`best practices for representing REST endpoints in architecture-as-code ${year}`);
+                break;
+            case 'missing_security_control':
+                queries.push(`implementing security controls in ${primaryFw} services ${year}`);
+                break;
+            case 'framework_choice_undeclared':
+                queries.push(`when to write an ADR for a new framework adoption ${year}`);
+                break;
+        }
+    }
+    // Pad to 3 with generic queries — always include year for recency.
+    const fallbacks = [
+        `architecture-as-code best practices ${primaryFw} ${year}`,
+        `${primaryFw} layered architecture review checklist ${year}`,
+        `CALM architecture model adoption case studies ${year}`,
+    ];
+    for (const fb of fallbacks) {
+        if (queries.length >= 3) {
+            break;
+        }
+        if (!queries.includes(fb)) {
+            queries.push(fb);
+        }
+    }
+    return queries.slice(0, 3);
+}

package/dist/runner/nodes/plan-queries.d.ts ADDED Viewed

@@ -0,0 +1,28 @@
+import { type LlmProvider, type MeshContext, type QueryPlan, type ResearchBrief } from '../../schemas';
+import { type LoadedPrompt } from '../../mesh/prompt-loader';
+export interface PlanQueriesOpts {
+    meshDir: string;
+    brief: ResearchBrief;
+    meshContext: MeshContext;
+    /** Provider routing — comes from brief.llm_provider unless overridden. */
+    provider?: LlmProvider;
+    /** Required when provider === 'anthropic'. */
+    anthropicApiKey?: string;
+    /** Required when provider === 'github-models'. */
+    githubToken?: string;
+    fetchImpl?: typeof fetch;
+}
+export interface PlanQueriesResult {
+    queryPlan: QueryPlan;
+    prompt: LoadedPrompt;
+    llm: {
+        provider: LlmProvider;
+        model: string;
+        inputTokens: number;
+        outputTokens: number;
+        costUsd: number;
+        /** How many LLM calls we ended up making (1 happy path, 2 on retry). */
+        attempts: number;
+    };
+}
+export declare function planQueries(opts: PlanQueriesOpts): Promise<PlanQueriesResult>;

package/dist/runner/nodes/plan-queries.js ADDED Viewed

@@ -0,0 +1,120 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.planQueries = planQueries;
+const schemas_1 = require("../../schemas");
+const llm_router_1 = require("../../llm/llm-router");
+const prompt_loader_1 = require("../../mesh/prompt-loader");
+async function planQueries(opts) {
+    const provider = opts.provider ?? opts.brief.llm_provider;
+    const promptContext = buildPromptContext(opts.brief, opts.meshContext);
+    const prompt = (0, prompt_loader_1.loadPrompt)({
+        meshDir: opts.meshDir,
+        packId: 'research/query-plan',
+        context: promptContext,
+    });
+    const baseSystem = 'You output a SINGLE JSON object exactly matching the schema described. No prose before or after, no markdown fence. The first character of your response MUST be `{`.';
+    let lastError = null;
+    let totalInput = 0;
+    let totalOutput = 0;
+    let totalCost = 0;
+    let lastModel = '';
+    for (let attempt = 1; attempt <= 2; attempt++) {
+        const userPrompt = attempt === 1
+            ? prompt.filled
+            : `${prompt.filled}\n\n---\n\nYour previous response failed validation:\n${lastError}\n\nReturn a SINGLE JSON object with exactly 4 keys (web, arxiv, patent, community) and the exact counts (5, 3, 3, 3) requested. Web queries MUST contain a 4-digit year; patent queries MUST contain the literal token "AND".`;
+        const result = await (0, llm_router_1.callLlm)({
+            provider,
+            tier: 'plan',
+            anthropicApiKey: opts.anthropicApiKey,
+            githubToken: opts.githubToken,
+            system: baseSystem,
+            prompt: userPrompt,
+            maxTokens: 2000,
+            fetchImpl: opts.fetchImpl,
+        });
+        totalInput += result.inputTokens;
+        totalOutput += result.outputTokens;
+        totalCost += result.costUsd;
+        lastModel = result.model;
+        const parsed = parseQueryPlanResponse(result.text);
+        if (parsed.success) {
+            return {
+                queryPlan: parsed.data,
+                prompt,
+                llm: { provider, model: lastModel, inputTokens: totalInput, outputTokens: totalOutput, costUsd: totalCost, attempts: attempt },
+            };
+        }
+        lastError = parsed.error;
+    }
+    throw new Error(`plan_queries: LLM output failed QueryPlan validation after 2 attempts. Last error: ${lastError}`);
+}
+/** Walk the response, extract the first JSON object, validate against QueryPlan. */
+function parseQueryPlanResponse(raw) {
+    const trimmed = raw.trim();
+    // Tolerate the model wrapping the JSON in ```json ... ```
+    const fenceMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/);
+    const candidate = fenceMatch ? fenceMatch[1].trim() : trimmed;
+    let parsedJson;
+    try {
+        parsedJson = JSON.parse(candidate);
+    }
+    catch (e) {
+        return { success: false, error: `not valid JSON: ${e instanceof Error ? e.message : String(e)}` };
+    }
+    const result = schemas_1.QueryPlan.safeParse(parsedJson);
+    if (result.success) {
+        return { success: true, data: result.data };
+    }
+    return { success: false, error: result.error.issues.map(formatIssue).join('; ') };
+}
+function formatIssue(issue) {
+    return `${issue.path.join('.') || '<root>'}: ${issue.message}`;
+}
+/** Project the inputs the prompt asks for into a flat-dotted shape. */
+function buildPromptContext(brief, mesh) {
+    const calmSummary = mesh.bar?.calm_model ? summarizeCalmModel(mesh.bar.calm_model) : '(no CALM model loaded)';
+    const threatsSummary = mesh.bar?.threats ? summarizeThreats(mesh.bar.threats) : '(no threat model on file)';
+    const relatedResearch = mesh.bar?.related_research?.length
+        ? mesh.bar.related_research.map(r => r.topic)
+        : [];
+    return {
+        brief: {
+            topic: brief.topic,
+            scope_level: brief.scope.level,
+        },
+        mesh: {
+            bar: {
+                name: mesh.bar?.name ?? '(no bar in scope)',
+                calm_summary: calmSummary,
+                threats_summary: threatsSummary,
+            },
+            related_research: relatedResearch,
+        },
+    };
+}
+function summarizeCalmModel(calm) {
+    if (!calm || typeof calm !== 'object') {
+        return '(no CALM model loaded)';
+    }
+    const obj = calm;
+    const nodeCount = Array.isArray(obj.nodes) ? obj.nodes.length : 0;
+    const relCount = Array.isArray(obj.relationships) ? obj.relationships.length : 0;
+    const nodeKinds = Array.isArray(obj.nodes)
+        ? Array.from(new Set(obj.nodes.map(n => n['node-type'] || 'unknown'))).join(', ')
+        : '';
+    return `${nodeCount} node(s) [${nodeKinds || 'no node-types'}], ${relCount} relationship(s)`;
+}
+function summarizeThreats(threats) {
+    if (!Array.isArray(threats)) {
+        return '(no threats)';
+    }
+    if (threats.length === 0) {
+        return '(no threats)';
+    }
+    const byCategory = {};
+    for (const t of threats) {
+        const cat = t.category || 'unknown';
+        byCategory[cat] = (byCategory[cat] || 0) + 1;
+    }
+    return Object.entries(byCategory).map(([c, n]) => `${c} × ${n}`).join(', ');
+}

package/dist/runner/nodes/prd-validator.d.ts ADDED Viewed

@@ -0,0 +1,51 @@
+/**
+ * prd-validator — structural validator for the markdown body produced by
+ * synthesize_prd.
+ *
+ * Enforces the canonical 10-section PRD shape defined in
+ * `.caterpillar/prompts/prd/synthesis.md`:
+ *
+ *   1. Input Premises          (R[N] / E[N] numbered list)
+ *   2. Problem Statement and Scope
+ *   3. Goals and Non-Goals
+ *   4. Functional Requirements with Traceability   (FR-NN; cites ≥1 R/E)
+ *   5. Non-Functional Requirements                  (NFR-NN; cites ≥1 R/E)
+ *   6. Security Requirements with Threat Tracing   (SR-NN; cites ≥1 THR/A0X/NIST)
+ *   7. Coverage Analysis                            (table; every premise tagged YES/PARTIAL/NO)
+ *   8. Risk Matrix
+ *   9. Success Metrics
+ *  10. References
+ *
+ * Returns ValidationReport + extra per-FR / per-SR / per-coverage signals
+ * verify_grounding consumes.
+ */
+import type { ValidationReport } from './synthesis-validator';
+export declare const CANONICAL_PRD_SECTIONS: readonly ["Input Premises", "Problem Statement and Scope", "Goals and Non-Goals", "Functional Requirements with Traceability", "Non-Functional Requirements", "Security Requirements with Threat Tracing", "Coverage Analysis", "Risk Matrix", "Success Metrics", "References"];
+export type CanonicalPrdSection = typeof CANONICAL_PRD_SECTIONS[number];
+export type CoverageStatus = 'YES' | 'PARTIAL' | 'NO';
+export interface PrdCitationSignals {
+    /** Numbered premise IDs (R1, R2, E1, E2, …) in the Input Premises section. */
+    premise_ids: string[];
+    /** FR entries with the upstream IDs they cite (R/E). */
+    fr_entries: Array<{
+        id: string;
+        cited: string[];
+    }>;
+    /** SR entries with the upstream IDs they cite (THR/A0X/NIST). */
+    sr_entries: Array<{
+        id: string;
+        cited: string[];
+    }>;
+    /** Coverage Analysis table rows — premise → self-reported status. */
+    coverage_rows: Array<{
+        premise: string;
+        status: CoverageStatus;
+        whereAddressed: string;
+    }>;
+}
+export interface PrdValidationReport extends ValidationReport {
+    /** Detailed citation signals verify_grounding needs. */
+    signals: PrdCitationSignals;
+}
+export declare function validatePrd(body: string): PrdValidationReport;
+export declare function extractCitationSignals(body: string): PrdCitationSignals;

package/dist/runner/nodes/prd-validator.js ADDED Viewed

@@ -0,0 +1,203 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.CANONICAL_PRD_SECTIONS = void 0;
+exports.validatePrd = validatePrd;
+exports.extractCitationSignals = extractCitationSignals;
+exports.CANONICAL_PRD_SECTIONS = [
+    'Input Premises',
+    'Problem Statement and Scope',
+    'Goals and Non-Goals',
+    'Functional Requirements with Traceability',
+    'Non-Functional Requirements',
+    'Security Requirements with Threat Tracing',
+    'Coverage Analysis',
+    'Risk Matrix',
+    'Success Metrics',
+    'References',
+];
+const FR_REQUIREMENT_RE = /\bFR-\d+\b/g;
+const SR_REQUIREMENT_RE = /\bSR-\d+\b/g;
+const R_OR_E_CITATION_RE = /\b[RE]\d+\b/g;
+const THR_OR_OWASP_OR_NIST_RE = /\b(?:THR-\d+|A\d{2}|NIST-[A-Z]{2}-\d+)\b/g;
+const COVERAGE_STATUS_VALID = new Set(['YES', 'PARTIAL', 'NO']);
+function validatePrd(body) {
+    const errors = [];
+    const sectionsFound = extractH2Sections(body);
+    // Rule 1 + 2: sections present in canonical order
+    for (let i = 0; i < exports.CANONICAL_PRD_SECTIONS.length; i++) {
+        const expected = exports.CANONICAL_PRD_SECTIONS[i];
+        if (sectionsFound[i] !== expected) {
+            errors.push(`Section #${i + 1} expected "## ${expected}" but found ${sectionsFound[i] ? `"## ${sectionsFound[i]}"` : '(missing)'}.`);
+        }
+    }
+    // Parse signals (used for verify_grounding even when rules fail)
+    const signals = extractCitationSignals(body);
+    // Rule 3: every FR cites ≥1 R or E
+    for (const fr of signals.fr_entries) {
+        if (fr.cited.length === 0) {
+            errors.push(`Functional Requirement ${fr.id} has no R[N] / E[N] citation.`);
+        }
+    }
+    // Rule 4: every SR cites ≥1 THR / A0X / NIST
+    for (const sr of signals.sr_entries) {
+        if (sr.cited.length === 0) {
+            errors.push(`Security Requirement ${sr.id} has no THR-NNN / A0X / NIST-XX-NN citation.`);
+        }
+    }
+    // Rule 5: Coverage Analysis table covers every input premise
+    const tableCovered = new Set(signals.coverage_rows.map(r => r.premise));
+    for (const pid of signals.premise_ids) {
+        if (!tableCovered.has(pid)) {
+            errors.push(`Coverage Analysis table is missing a row for premise ${pid}.`);
+        }
+    }
+    // Rule 6: every coverage status is YES / PARTIAL / NO (no free-text drift)
+    for (const row of signals.coverage_rows) {
+        if (!COVERAGE_STATUS_VALID.has(row.status)) {
+            errors.push(`Coverage row for ${row.premise} has invalid status "${row.status}"; must be YES / PARTIAL / NO.`);
+        }
+    }
+    // Heuristic untraced-claims signal across narrative sections (informational)
+    const untraced_claims = countUntracedClaims(body);
+    return {
+        valid: errors.length === 0,
+        errors,
+        sectionsFound,
+        signals,
+        citation_stats: {
+            source_count: signals.premise_ids.length,
+            conclusion_count: 0, // PRDs don't have C[N]
+            recommendation_count: signals.fr_entries.length + signals.sr_entries.length,
+            underCitedConclusions: 0,
+            untracedRecommendations: signals.fr_entries.filter(f => f.cited.length === 0).length
+                + signals.sr_entries.filter(s => s.cited.length === 0).length,
+            untraced_claims,
+        },
+    };
+}
+// ============================================================================
+// Section parsing
+// ============================================================================
+function extractH2Sections(body) {
+    const out = [];
+    for (const line of body.split('\n')) {
+        const m = line.match(/^##\s+(.+?)\s*$/);
+        if (m) {
+            out.push(m[1].trim());
+        }
+    }
+    return out;
+}
+function extractSection(body, sectionName) {
+    const lines = body.split('\n');
+    let inSection = false;
+    const collected = [];
+    for (const line of lines) {
+        const h2 = line.match(/^##\s+(.+?)\s*$/);
+        if (h2) {
+            if (h2[1].trim() === sectionName) {
+                inSection = true;
+                continue;
+            }
+            if (inSection) {
+                break;
+            }
+        }
+        if (inSection) {
+            collected.push(line);
+        }
+    }
+    return collected.join('\n');
+}
+// ============================================================================
+// Citation signal extraction
+// ============================================================================
+function extractCitationSignals(body) {
+    const premiseIds = extractPremiseIds(extractSection(body, 'Input Premises'));
+    const frEntries = extractRequirementEntries(extractSection(body, 'Functional Requirements with Traceability'), FR_REQUIREMENT_RE, R_OR_E_CITATION_RE);
+    const srEntries = extractRequirementEntries(extractSection(body, 'Security Requirements with Threat Tracing'), SR_REQUIREMENT_RE, THR_OR_OWASP_OR_NIST_RE);
+    const coverageRows = extractCoverageRows(extractSection(body, 'Coverage Analysis'));
+    return {
+        premise_ids: premiseIds,
+        fr_entries: frEntries,
+        sr_entries: srEntries,
+        coverage_rows: coverageRows,
+    };
+}
+function extractPremiseIds(block) {
+    const ids = new Set();
+    for (const m of block.matchAll(/\b([RE]\d+)\b/g)) {
+        ids.add(m[1]);
+    }
+    return [...ids].sort(naturalCompare);
+}
+/**
+ * Split a requirements block on each `FR-NN` / `SR-NN` marker, then extract
+ * citations from the chunk body up to (but not including) the next marker.
+ * Robust against multi-line requirement bodies + Markdown bullet formatting.
+ */
+function extractRequirementEntries(block, idRe, citationRe) {
+    const lines = block.split('\n');
+    const idAtStartRe = new RegExp(`^\\s*(?:[-*]|\\d+\\.)?\\s*(?:\\*\\*)?(${idRe.source.replace(/\\b|g/g, '')})(?:\\*\\*)?`, 'i');
+    const entries = [];
+    for (const line of lines) {
+        const m = line.match(idAtStartRe);
+        if (m) {
+            entries.push({ id: m[1].toUpperCase(), bodyLines: [line] });
+        }
+        else if (entries.length > 0) {
+            entries[entries.length - 1].bodyLines.push(line);
+        }
+    }
+    // Dedupe by id (keep first occurrence's body for citation extraction)
+    const seen = new Set();
+    const unique = [];
+    for (const e of entries) {
+        if (seen.has(e.id)) {
+            continue;
+        }
+        seen.add(e.id);
+        const body = e.bodyLines.join('\n');
+        // Strip the leading `FR-NN` / `SR-NN` token itself so it doesn't count as its own citation.
+        const stripped = body.replace(new RegExp(`\\b${e.id}\\b`, 'gi'), '');
+        const cited = new Set();
+        for (const cm of stripped.matchAll(citationRe)) {
+            cited.add(cm[0]);
+        }
+        unique.push({ id: e.id, cited: [...cited].sort(naturalCompare) });
+    }
+    return unique.sort((a, b) => naturalCompare(a.id, b.id));
+}
+function extractCoverageRows(block) {
+    const rows = [];
+    for (const line of block.split('\n')) {
+        // Match `| R1 | YES | FR-01, FR-04 |` (markdown table rows)
+        const m = line.match(/^\s*\|\s*([RE]\d+)\s*\|\s*([A-Z]+)\s*\|\s*([^|]*?)\s*\|\s*$/);
+        if (!m) {
+            continue;
+        }
+        const premise = m[1];
+        const status = m[2].toUpperCase();
+        const whereAddressed = m[3].trim();
+        rows.push({ premise, status, whereAddressed });
+    }
+    return rows;
+}
+function countUntracedClaims(body) {
+    const narrative = ['Problem Statement and Scope', 'Goals and Non-Goals'];
+    let count = 0;
+    for (const sec of narrative) {
+        const block = extractSection(body, sec);
+        const sentences = block.match(/[^.!?\n]+[.!?]/g) ?? [];
+        for (const s of sentences) {
+            if (!/\b[RE]\d+\b/.test(s) && s.trim().length > 40) {
+                count += 1;
+            }
+        }
+    }
+    return count;
+}
+/** Natural compare for `R10` < `R2` correctness. */
+function naturalCompare(a, b) {
+    return a.localeCompare(b, undefined, { numeric: true });
+}