npm - @geotechcli/core - Versions diffs - 0.4.90 → 0.4.92 - Mend

@geotechcli/core 0.4.90 → 0.4.92

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/dist/agents/brain.d.ts +2 -0
package/dist/agents/brain.d.ts.map +1 -1
package/dist/agents/brain.js +92 -2
package/dist/agents/brain.js.map +1 -1
package/dist/agents/data-tools.js +1 -6
package/dist/agents/data-tools.js.map +1 -1
package/dist/agents/fem-tools.js +99 -3
package/dist/agents/fem-tools.js.map +1 -1
package/dist/agents/safety.d.ts.map +1 -1
package/dist/agents/safety.js +35 -2
package/dist/agents/safety.js.map +1 -1
package/dist/agents/swarm-planner.js +2 -2
package/dist/agents/swarm-planner.js.map +1 -1
package/dist/agents/swarm.d.ts.map +1 -1
package/dist/agents/swarm.js +3 -0
package/dist/agents/swarm.js.map +1 -1
package/dist/agents/tool-normalization.d.ts.map +1 -1
package/dist/agents/tool-normalization.js +372 -0
package/dist/agents/tool-normalization.js.map +1 -1
package/dist/config/index.d.ts.map +1 -1
package/dist/config/index.js +4 -4
package/dist/config/index.js.map +1 -1
package/dist/fem/ground-model-draft.d.ts +19 -0
package/dist/fem/ground-model-draft.d.ts.map +1 -1
package/dist/fem/ground-model-draft.js +273 -6
package/dist/fem/ground-model-draft.js.map +1 -1
package/dist/fem/index.d.ts +3 -1
package/dist/fem/index.d.ts.map +1 -1
package/dist/fem/index.js +3 -1
package/dist/fem/index.js.map +1 -1
package/dist/fem/production-readiness.d.ts +34 -0
package/dist/fem/production-readiness.d.ts.map +1 -0
package/dist/fem/production-readiness.js +174 -0
package/dist/fem/production-readiness.js.map +1 -0
package/dist/fem/routing.js +3 -3
package/dist/fem/routing.js.map +1 -1
package/dist/fem/scenario-validation.d.ts +53 -0
package/dist/fem/scenario-validation.d.ts.map +1 -0
package/dist/fem/scenario-validation.js +125 -0
package/dist/fem/scenario-validation.js.map +1 -0
package/dist/index.d.ts +1 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1 -1
package/dist/index.js.map +1 -1
package/dist/ingest/document-evidence-packet.d.ts +50 -50
package/dist/ingest/geotech-benchmark-corpus.d.ts +124 -2
package/dist/ingest/geotech-benchmark-corpus.d.ts.map +1 -1
package/dist/ingest/geotech-benchmark-corpus.js +420 -55
package/dist/ingest/geotech-benchmark-corpus.js.map +1 -1
package/dist/ingest/geotech-document-benchmark.d.ts +4 -0
package/dist/ingest/geotech-document-benchmark.d.ts.map +1 -1
package/dist/ingest/geotech-document-benchmark.js +196 -41
package/dist/ingest/geotech-document-benchmark.js.map +1 -1
package/dist/ingest/index.d.ts +2 -1
package/dist/ingest/index.d.ts.map +1 -1
package/dist/ingest/index.js +2 -1
package/dist/ingest/index.js.map +1 -1
package/dist/ingest/preprocessing-fixture-benchmark.d.ts +175 -0
package/dist/ingest/preprocessing-fixture-benchmark.d.ts.map +1 -0
package/dist/ingest/preprocessing-fixture-benchmark.js +598 -0
package/dist/ingest/preprocessing-fixture-benchmark.js.map +1 -0
package/dist/llm/byok-benchmark.d.ts +61 -0
package/dist/llm/byok-benchmark.d.ts.map +1 -1
package/dist/llm/byok-benchmark.js +382 -6
package/dist/llm/byok-benchmark.js.map +1 -1
package/dist/llm/index.d.ts +1 -1
package/dist/llm/index.d.ts.map +1 -1
package/dist/llm/index.js +1 -1
package/dist/llm/index.js.map +1 -1
package/dist/meta/metadata.json +1 -1
package/dist/signal/index.d.ts +112 -0
package/dist/signal/index.d.ts.map +1 -1
package/dist/signal/index.js +648 -1
package/dist/signal/index.js.map +1 -1
package/dist/standards/index.d.ts +6 -0
package/dist/standards/index.d.ts.map +1 -1
package/dist/standards/index.js +243 -0
package/dist/standards/index.js.map +1 -1
package/dist/verifier/findings.d.ts +6 -0
package/dist/verifier/findings.d.ts.map +1 -1
package/dist/verifier/findings.js +192 -1
package/dist/verifier/findings.js.map +1 -1
package/dist/verifier/index.d.ts +1 -1
package/dist/verifier/index.d.ts.map +1 -1
package/dist/verifier/index.js +1 -1
package/dist/verifier/index.js.map +1 -1
package/package.json +1 -1

package/dist/ingest/geotech-benchmark-corpus.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import { collectFemDraftReadinessGuardrailFailures, } from './geotech-document-benchmark.js';
 export function buildGeotechBenchmarkCorpusReport(inputs, options = {}) {
     const fixtures = redactGeotechBenchmarkCorpusArtifact(normalizeFixtures(inputs.map((input) => input.fixture)));
     const runs = inputs.map((input) => buildCorpusRun(input));
@@ -30,6 +31,95 @@ export function buildGeotechBenchmarkCorpusReport(inputs, options = {}) {
 export function redactGeotechBenchmarkCorpusArtifact(value) {
     return redactCorpusArtifactValue(value, new WeakMap());
 }
+export function inspectGeotechBenchmarkCorpusArtifactSafety(value) {
+    const scan = scanObjectForPathSafetyLeaks(value, 'artifact');
+    const leaks = deduplicateArtifactSafetyLeaks(scan.leaks);
+    return {
+        ok: leaks.length === 0,
+        leakCount: leaks.length,
+        leaks,
+    };
+}
+export function buildGeotechBenchmarkCorpusTrend(report, options = {}) {
+    const previousHistory = options.previousHistory ?? [];
+    const current = buildGeotechBenchmarkCorpusHistoryEntry(report, {
+        mode: options.mode ?? 'local-corpus-benchmark',
+        providerProfiles: options.providerProfiles ?? report.summary.providerProfiles,
+        preprocessingModes: options.preprocessingModes ?? report.summary.preprocessingModes,
+        skippedFixtureCount: options.skippedFixtureCount ?? 0,
+    });
+    const previous = previousHistory.at(-1)
+        ?? (options.previousReport
+            ? buildGeotechBenchmarkCorpusHistoryEntry(options.previousReport, {
+                mode: 'previous-local-report',
+                providerProfiles: options.previousReport.summary.providerProfiles,
+                preprocessingModes: options.previousReport.summary.preprocessingModes,
+                skippedFixtureCount: 0,
+            })
+            : null);
+    const history = [...previousHistory, current].slice(-50);
+    return {
+        history,
+        report: {
+            kind: 'geotech-benchmark-corpus-trend',
+            schemaVersion: 1,
+            generatedAt: current.generatedAt,
+            current,
+            previous,
+            delta: previous ? buildGeotechBenchmarkCorpusTrendDelta(current, previous) : null,
+            runDeltas: previous ? buildGeotechBenchmarkCorpusRunDeltas(current.runs, previous.runs) : [],
+            historyCount: history.length,
+            note: 'Local corpus trend output stores benchmark summaries only. Raw benchmark JSON, fixture bytes, report text, model IDs, private paths, and provider tokens are intentionally excluded.',
+        },
+    };
+}
+export function validateGeotechBenchmarkCorpusTrendContract(report) {
+    const failures = [];
+    const warnings = [];
+    if (report.kind !== 'geotech-benchmark-corpus-trend') {
+        failures.push('wrong_trend_kind');
+    }
+    if (report.schemaVersion !== 1) {
+        failures.push('wrong_trend_schema_version');
+    }
+    if (!report.generatedAt) {
+        failures.push('trend_missing_generated_at');
+    }
+    if (!Number.isInteger(report.historyCount) || report.historyCount < 1) {
+        failures.push('trend_history_count_invalid');
+    }
+    if (!/raw benchmark JSON|fixture bytes|model IDs|provider tokens/i.test(report.note ?? '')) {
+        warnings.push('trend_note_should_state_excluded_raw_and_sensitive_inputs');
+    }
+    validateGeotechBenchmarkCorpusHistoryEntry(report.current, failures, 'current');
+    if (report.previous !== null) {
+        validateGeotechBenchmarkCorpusHistoryEntry(report.previous, failures, 'previous');
+    }
+    if (report.previous && report.delta == null) {
+        failures.push('trend_delta_required_when_previous_exists');
+    }
+    if (!report.previous && report.delta != null) {
+        failures.push('trend_delta_must_be_null_without_previous');
+    }
+    if (report.previous && report.runDeltas.length !== report.current.runs.length) {
+        failures.push('trend_run_delta_count_mismatch');
+    }
+    if (!report.previous && report.runDeltas.length !== 0) {
+        failures.push('trend_run_deltas_must_be_empty_without_previous');
+    }
+    const serialized = JSON.stringify(report);
+    if (/"(?:fixtures|benchmark|benchmarks|source|sourceEvidence|snippet|response|prompt|modelId|visionModelId|filePath|sourcePath|pages|rawText|pageText|ocrText|layoutText|modelCalls)"\s*:/.test(serialized)) {
+        failures.push('trend_contains_raw_benchmark_source_prompt_response_or_model_payload');
+    }
+    for (const leak of inspectGeotechBenchmarkCorpusArtifactSafety(report).leaks) {
+        failures.push(`trend_sensitive_value_leak_${sanitizeFailureToken(leak.location)}_${leak.kind}`);
+    }
+    return {
+        ok: failures.length === 0,
+        failures: [...new Set(failures)],
+        warnings: [...new Set(warnings)],
+    };
+}
 export function renderGeotechBenchmarkCorpusSvg(report) {
     const width = 980;
     const rowHeight = 34;
@@ -135,6 +225,310 @@ ${report.warnings.length ? `<h2>Warnings</h2><ul>${report.warnings.map((warning)
 </html>
 `;
 }
+export function renderGeotechBenchmarkCorpusTrendHtml(trend) {
+    const delta = trend.delta;
+    const runRows = trend.runDeltas.map((run) => `
+    <tr>
+      <td>${escapeHtml(run.fixtureId)}</td>
+      <td>${escapeHtml(run.providerProfile)}</td>
+      <td>${escapeHtml(run.preprocessingMode)}</td>
+      <td class="${run.passed ? 'pass' : 'fail'}">${run.passed ? 'pass' : 'fail'}</td>
+      <td>${formatDelta(run.traceabilityDelta, true)}</td>
+      <td>${formatDelta(run.qualityDelta, true)}</td>
+      <td>${formatDelta(run.reviewGateDelta, false)}</td>
+      <td>${formatDelta(run.hostedCallDelta, false)}</td>
+      <td>${formatDelta(run.latencyDeltaMs, false)}</td>
+    </tr>`).join('');
+    return `<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>GeotechCLI Corpus Trend</title>
+<style>
+body{margin:0;font-family:Inter,Arial,sans-serif;background:#f8fafc;color:#0f172a}
+main{max-width:1040px;margin:0 auto;padding:32px 20px 56px}
+h1{margin:0 0 8px;font-size:28px}
+.note{color:#475569;font-size:13px}
+.summary{display:grid;grid-template-columns:repeat(auto-fit,minmax(180px,1fr));gap:12px;margin:22px 0}
+.metric{background:white;border:1px solid #dbe5ea;border-radius:8px;padding:14px}.metric strong{display:block;font-size:24px}
+table{width:100%;border-collapse:collapse;background:white;border:1px solid #dbe5ea;border-radius:8px;overflow:hidden;margin-top:16px}
+th,td{padding:10px 12px;border-bottom:1px solid #e2e8f0;text-align:left;font-size:13px}
+th{background:#0f172a;color:#f8fafc}.pass{color:#0f766e;font-weight:700}.fail{color:#b91c1c;font-weight:700}
+</style>
+</head>
+<body>
+<main>
+<h1>GeotechCLI Corpus Trend</h1>
+<p class="note">${escapeHtml(trend.note)} Generated ${escapeHtml(trend.generatedAt)}.</p>
+<section class="summary">
+  <div class="metric"><span>History Entries</span><strong>${trend.historyCount}</strong></div>
+  <div class="metric"><span>Run Delta</span><strong>${delta ? signed(delta.runCount) : 'new'}</strong></div>
+  <div class="metric"><span>Extraction Trust Delta</span><strong>${delta ? signed(delta.averageExtractionConfidence) : 'new'}</strong></div>
+  <div class="metric"><span>Corroboration Delta</span><strong>${delta ? signed(delta.averageCorroborationScore) : 'new'}</strong></div>
+  <div class="metric"><span>Traceability Delta</span><strong>${delta ? signedPercent(delta.averageTraceabilityRate) : 'new'}</strong></div>
+  <div class="metric"><span>Quality Delta</span><strong>${delta ? signedPercent(delta.averagePreprocessingQualityScore) : 'new'}</strong></div>
+</section>
+<h2>Run Deltas</h2>
+<table><thead><tr><th>Fixture</th><th>Provider</th><th>Preprocessing</th><th>Status</th><th>Trace</th><th>Quality</th><th>Review gates</th><th>Hosted calls</th><th>Latency ms</th></tr></thead><tbody>${runRows || '<tr><td colspan="9">No previous local run is available yet.</td></tr>'}</tbody></table>
+</main>
+</body>
+</html>
+`;
+}
+function buildGeotechBenchmarkCorpusHistoryEntry(report, context) {
+    return {
+        kind: 'geotech-benchmark-corpus-history-entry',
+        schemaVersion: 1,
+        generatedAt: report.generatedAt,
+        mode: context.mode,
+        skippedFixtureCount: context.skippedFixtureCount,
+        providerProfiles: [...context.providerProfiles],
+        preprocessingModes: [...context.preprocessingModes],
+        summary: {
+            fixtureCount: finiteNumber(report.summary.fixtureCount),
+            runCount: finiteNumber(report.summary.runCount),
+            passedRuns: finiteNumber(report.summary.passedRuns),
+            failedRuns: finiteNumber(report.summary.failedRuns),
+            passed: Boolean(report.summary.passed),
+            averageConfidence: finiteNumber(report.summary.averageConfidence),
+            averageConfidenceBreakdown: summarizeHistoryConfidenceBreakdown(report.summary.averageConfidenceBreakdown),
+            averageTraceabilityRate: finiteNumber(report.summary.averageTraceabilityRate),
+            averageGroundModelReadinessScore: finiteNumber(report.summary.averageGroundModelReadinessScore),
+            averagePreprocessingQualityScore: finiteNumber(report.summary.averagePreprocessingQualityScore),
+            totalEstimatedHostedCalls: finiteNumber(report.summary.totalEstimatedHostedCalls),
+            pathLeakCount: report.pathSafety?.leakCount ?? inspectGeotechBenchmarkCorpusArtifactSafety(report).leakCount,
+        },
+        runs: report.runs.map((run) => ({
+            key: corpusRunKey(run),
+            fixtureId: run.fixtureId,
+            category: run.category,
+            providerProfile: run.providerProfile,
+            preprocessingMode: run.preprocessingMode,
+            passed: run.passed,
+            successfulPageRate: finiteNumber(run.successfulPageRate),
+            cacheHitRate: finiteNumber(run.cacheHitRate),
+            estimatedHostedCalls: finiteNumber(run.estimatedHostedCalls),
+            directTraceabilityRate: finiteNumber(run.directTraceabilityRate),
+            confidenceBreakdown: summarizeHistoryConfidenceBreakdown(run.confidenceBreakdown),
+            groundModelReadinessScore: finiteNumber(run.groundModelReadinessScore),
+            preprocessingQualityScore: finiteNumber(run.preprocessingQualityScore),
+            preprocessingRegionQualityScore: finiteNumber(run.preprocessingRegionQualityScore),
+            reviewGates: Array.isArray(run.reviewGates) ? [...run.reviewGates] : [],
+            latencyMs: typeof run.latencyMs === 'number' && Number.isFinite(run.latencyMs) ? run.latencyMs : null,
+        })),
+    };
+}
+function validateGeotechBenchmarkCorpusHistoryEntry(entry, failures, prefix) {
+    if (!entry || typeof entry !== 'object') {
+        failures.push(`${prefix}_history_entry_missing`);
+        return;
+    }
+    if (entry.kind !== 'geotech-benchmark-corpus-history-entry') {
+        failures.push(`${prefix}_history_wrong_kind`);
+    }
+    if (entry.schemaVersion !== 1) {
+        failures.push(`${prefix}_history_wrong_schema_version`);
+    }
+    if (!entry.generatedAt) {
+        failures.push(`${prefix}_history_missing_generated_at`);
+    }
+    if (!entry.mode) {
+        failures.push(`${prefix}_history_mode_missing`);
+    }
+    if (!Number.isInteger(entry.skippedFixtureCount) || entry.skippedFixtureCount < 0) {
+        failures.push(`${prefix}_history_skipped_fixture_count_invalid`);
+    }
+    if (!Array.isArray(entry.providerProfiles)) {
+        failures.push(`${prefix}_history_provider_profiles_invalid`);
+    }
+    if (!Array.isArray(entry.preprocessingModes)) {
+        failures.push(`${prefix}_history_preprocessing_modes_invalid`);
+    }
+    const summary = entry.summary;
+    const runs = Array.isArray(entry.runs) ? entry.runs : [];
+    if (!summary || typeof summary !== 'object') {
+        failures.push(`${prefix}_history_summary_missing`);
+        return;
+    }
+    for (const key of [
+        'fixtureCount',
+        'runCount',
+        'passedRuns',
+        'failedRuns',
+        'totalEstimatedHostedCalls',
+        'pathLeakCount',
+    ]) {
+        if (!Number.isInteger(summary[key]) || summary[key] < 0) {
+            failures.push(`${prefix}_history_${key}_invalid`);
+        }
+    }
+    for (const key of [
+        'averageConfidence',
+        'averageTraceabilityRate',
+        'averageGroundModelReadinessScore',
+        'averagePreprocessingQualityScore',
+    ]) {
+        if (!Number.isFinite(summary[key])) {
+            failures.push(`${prefix}_history_${key}_invalid`);
+        }
+    }
+    validateHistoryConfidenceBreakdown(summary.averageConfidenceBreakdown, failures, `${prefix}_summary`);
+    if (summary.runCount !== runs.length) {
+        failures.push(`${prefix}_history_run_count_mismatch`);
+    }
+    if (summary.passedRuns !== runs.filter((run) => run.passed).length) {
+        failures.push(`${prefix}_history_passed_runs_mismatch`);
+    }
+    if (summary.failedRuns !== runs.filter((run) => !run.passed).length) {
+        failures.push(`${prefix}_history_failed_runs_mismatch`);
+    }
+    if (summary.runCount !== summary.passedRuns + summary.failedRuns) {
+        failures.push(`${prefix}_history_summary_run_count_mismatch`);
+    }
+    if (summary.passed !== (summary.runCount > 0 && summary.failedRuns === 0 && summary.pathLeakCount === 0)) {
+        failures.push(`${prefix}_history_passed_flag_mismatch`);
+    }
+    if (summary.pathLeakCount !== 0) {
+        failures.push(`${prefix}_history_path_leaks_present`);
+    }
+    const observedProviders = new Set(runs.map((run) => run.providerProfile));
+    const observedModes = new Set(runs.map((run) => run.preprocessingMode));
+    for (const provider of observedProviders) {
+        if (!entry.providerProfiles.includes(provider)) {
+            failures.push(`${prefix}_history_provider_profile_missing_${sanitizeFailureToken(provider)}`);
+        }
+    }
+    for (const mode of observedModes) {
+        if (!entry.preprocessingModes.includes(mode)) {
+            failures.push(`${prefix}_history_preprocessing_mode_missing_${sanitizeFailureToken(mode)}`);
+        }
+    }
+    for (const [index, run] of runs.entries()) {
+        const label = `${prefix}_run_${sanitizeFailureToken(run.key || String(index))}`;
+        if (run.key !== corpusRunKey(run)) {
+            failures.push(`${label}_key_mismatch`);
+        }
+        for (const key of ['fixtureId', 'category', 'providerProfile', 'preprocessingMode']) {
+            if (typeof run[key] !== 'string' || !run[key].trim()) {
+                failures.push(`${label}_${key}_missing`);
+            }
+        }
+        for (const key of [
+            'successfulPageRate',
+            'cacheHitRate',
+            'directTraceabilityRate',
+            'preprocessingQualityScore',
+            'preprocessingRegionQualityScore',
+        ]) {
+            if (!Number.isFinite(run[key]) || run[key] < 0 || run[key] > 1) {
+                failures.push(`${label}_${key}_invalid`);
+            }
+        }
+        if (!Number.isInteger(run.estimatedHostedCalls) || run.estimatedHostedCalls < 0) {
+            failures.push(`${label}_estimatedHostedCalls_invalid`);
+        }
+        if (!Number.isFinite(run.groundModelReadinessScore) || run.groundModelReadinessScore < 0) {
+            failures.push(`${label}_groundModelReadinessScore_invalid`);
+        }
+        validateHistoryConfidenceBreakdown(run.confidenceBreakdown, failures, `${label}_confidence`);
+        if (!Array.isArray(run.reviewGates)) {
+            failures.push(`${label}_reviewGates_invalid`);
+        }
+        if (run.latencyMs != null && (!Number.isFinite(run.latencyMs) || run.latencyMs < 0)) {
+            failures.push(`${label}_latencyMs_invalid`);
+        }
+    }
+}
+function buildGeotechBenchmarkCorpusTrendDelta(current, previous) {
+    return {
+        fixtureCount: current.summary.fixtureCount - previous.summary.fixtureCount,
+        runCount: current.summary.runCount - previous.summary.runCount,
+        passedRuns: current.summary.passedRuns - previous.summary.passedRuns,
+        failedRuns: current.summary.failedRuns - previous.summary.failedRuns,
+        averageConfidence: roundRatio(current.summary.averageConfidence - previous.summary.averageConfidence),
+        averageExtractionConfidence: roundRatio(current.summary.averageConfidenceBreakdown.extractionConfidence
+            - previous.summary.averageConfidenceBreakdown.extractionConfidence),
+        averageCorroborationScore: roundRatio(current.summary.averageConfidenceBreakdown.corroborationScore
+            - previous.summary.averageConfidenceBreakdown.corroborationScore),
+        averageTraceabilityRate: roundRatio(current.summary.averageTraceabilityRate - previous.summary.averageTraceabilityRate),
+        averageGroundModelReadinessScore: current.summary.averageGroundModelReadinessScore
+            - previous.summary.averageGroundModelReadinessScore,
+        averagePreprocessingQualityScore: roundRatio(current.summary.averagePreprocessingQualityScore - previous.summary.averagePreprocessingQualityScore),
+        totalEstimatedHostedCalls: current.summary.totalEstimatedHostedCalls - previous.summary.totalEstimatedHostedCalls,
+        pathLeakCount: current.summary.pathLeakCount - previous.summary.pathLeakCount,
+    };
+}
+function buildGeotechBenchmarkCorpusRunDeltas(currentRuns, previousRuns) {
+    const previousByKey = new Map(previousRuns.map((run) => [run.key, run]));
+    return currentRuns.map((current) => {
+        const previous = previousByKey.get(current.key);
+        const status = previous
+            ? (current.passed === previous.passed ? 'unchanged' : 'changed')
+            : 'new';
+        return {
+            key: current.key,
+            fixtureId: current.fixtureId,
+            providerProfile: current.providerProfile,
+            preprocessingMode: current.preprocessingMode,
+            status,
+            passed: current.passed,
+            previousPassed: previous?.passed ?? null,
+            cacheHitRateDelta: previous ? roundRatio(current.cacheHitRate - previous.cacheHitRate) : null,
+            hostedCallDelta: previous ? current.estimatedHostedCalls - previous.estimatedHostedCalls : null,
+            traceabilityDelta: previous ? roundRatio(current.directTraceabilityRate - previous.directTraceabilityRate) : null,
+            extractionConfidenceDelta: previous
+                ? roundRatio(current.confidenceBreakdown.extractionConfidence
+                    - previous.confidenceBreakdown.extractionConfidence)
+                : null,
+            corroborationScoreDelta: previous
+                ? roundRatio(current.confidenceBreakdown.corroborationScore
+                    - previous.confidenceBreakdown.corroborationScore)
+                : null,
+            groundModelReadinessDelta: previous
+                ? current.groundModelReadinessScore - previous.groundModelReadinessScore
+                : null,
+            qualityDelta: previous ? roundRatio(current.preprocessingQualityScore - previous.preprocessingQualityScore) : null,
+            reviewGateDelta: previous ? current.reviewGates.length - previous.reviewGates.length : null,
+            latencyDeltaMs: previous && current.latencyMs != null && previous.latencyMs != null
+                ? current.latencyMs - previous.latencyMs
+                : null,
+        };
+    }).sort((left, right) => left.key.localeCompare(right.key));
+}
+function summarizeHistoryConfidenceBreakdown(value) {
+    return {
+        overall: finiteNumber(value?.overall),
+        extractionConfidence: finiteNumber(value?.extractionConfidence),
+        engineeringCompleteness: finiteNumber(value?.engineeringCompleteness),
+        traceabilityScore: finiteNumber(value?.traceabilityScore),
+        corroborationScore: finiteNumber(value?.corroborationScore),
+        readinessScore: finiteNumber(value?.readinessScore),
+        pageEvidenceConfidence: finiteNumber(value?.pageEvidenceConfidence),
+    };
+}
+function validateHistoryConfidenceBreakdown(value, failures, prefix) {
+    if (!value || typeof value !== 'object') {
+        failures.push(`${prefix}_confidence_breakdown_missing`);
+        return;
+    }
+    for (const key of [
+        'overall',
+        'extractionConfidence',
+        'engineeringCompleteness',
+        'traceabilityScore',
+        'corroborationScore',
+        'readinessScore',
+        'pageEvidenceConfidence',
+    ]) {
+        if (!Number.isFinite(value[key]) || value[key] < 0) {
+            failures.push(`${prefix}_${key}_invalid`);
+        }
+    }
+}
+function corpusRunKey(run) {
+    return `${run.fixtureId}::${run.providerProfile}::${run.preprocessingMode}`;
+}
 function buildCorpusRun(input) {
     const benchmark = input.benchmark;
     const providerProfile = input.providerProfile
@@ -410,58 +804,7 @@ function validateFemExecutionBoundary(benchmark) {
     if (!fem) {
         return ['FEM draft readiness block missing from benchmark output'];
     }
-    const agentRunAllowedRoutes = fem.agentRunAllowedRoutes ?? [];
-    const agentWebglAllowedRoutes = fem.agentWebglAllowedRoutes ?? [];
-    const agentResultManifestAllowedRoutes = fem.agentResultManifestAllowedRoutes ?? [];
-    const caseOutputAvailableRoutes = fem.caseOutputAvailableRoutes ?? [];
-    const humanRunCommandAvailableRoutes = fem.humanRunCommandAvailableRoutes ?? [];
-    const staleRunCommandRoutes = fem.staleRunCommandRoutes ?? [];
-    const failures = [
-        fem.canAutoProceed
-            ? 'FEM draft readiness became auto-proceedable'
-            : null,
-        agentRunAllowedRoutes.length > 0
-            ? `FEM benchmark exposed agent-run routes: ${routeList(agentRunAllowedRoutes)}`
-            : null,
-        agentWebglAllowedRoutes.length > 0
-            ? `FEM benchmark exposed agent WebGL routes: ${routeList(agentWebglAllowedRoutes)}`
-            : null,
-        agentResultManifestAllowedRoutes.length > 0
-            ? `FEM benchmark exposed agent result-manifest routes: ${routeList(agentResultManifestAllowedRoutes)}`
-            : null,
-        caseOutputAvailableRoutes.length > 0
-            ? `FEM benchmark exposed unreviewed case-output routes: ${routeList(caseOutputAvailableRoutes)}`
-            : null,
-        humanRunCommandAvailableRoutes.length > 0
-            ? `FEM benchmark exposed unreviewed human-run routes: ${routeList(humanRunCommandAvailableRoutes)}`
-            : null,
-        staleRunCommandRoutes.length > 0
-            ? `FEM benchmark recommended stale run commands: ${routeList(staleRunCommandRoutes)}`
-            : null,
-    ];
-    for (const route of fem.routes ?? []) {
-        const boundary = route.executionBoundary;
-        if (!boundary) {
-            failures.push(`FEM route ${route.objective} has no execution boundary`);
-            continue;
-        }
-        failures.push(route.agentRunAllowed || boundary.agentRunAllowed
-            ? `FEM route ${route.objective} exposed agent solver execution`
-            : null, boundary.agentWebglRenderAllowed
-            ? `FEM route ${route.objective} exposed agent WebGL rendering`
-            : null, boundary.agentResultManifestAllowed
-            ? `FEM route ${route.objective} exposed agent result-manifest creation`
-            : null, boundary.caseOutputAvailable
-            ? `FEM route ${route.objective} exposed unreviewed case output`
-            : null, boundary.humanRunCommandAvailable
-            ? `FEM route ${route.objective} exposed an unreviewed human run command`
-            : null, !boundary.humanReviewRequired
-            ? `FEM route ${route.objective} no longer requires human review`
-            : null, /\bfem run\b/i.test(route.recommendedCommand ?? '')
-            ? `FEM route ${route.objective} recommended a run command instead of a draft command`
-            : null);
-    }
-    return [...new Set(failures.filter((value) => value != null))];
+    return collectFemDraftReadinessGuardrailFailures(fem).map((failure) => failure.endsWith('.') ? failure.slice(0, -1) : failure);
 }
 function buildRunWarnings(fixture, run) {
     return [
@@ -575,9 +918,25 @@ function deduplicatePathSafetyLeaks(leaks) {
     }
     return unique;
 }
+function deduplicateArtifactSafetyLeaks(leaks) {
+    const seen = new Set();
+    const unique = [];
+    for (const leak of leaks) {
+        const key = `${leak.kind}:${leak.location}`;
+        if (seen.has(key)) {
+            continue;
+        }
+        seen.add(key);
+        unique.push(leak);
+    }
+    return unique;
+}
 function sanitizeLocationKey(value) {
     return value.replace(/[^a-zA-Z0-9_$-]/g, '_');
 }
+function sanitizeFailureToken(value) {
+    return value.replace(/[^a-zA-Z0-9_-]+/g, '_').slice(0, 72);
+}
 function looksLikeAbsoluteLocalPath(value) {
     if (!value || /^(?:https?|s3|gs|file):\/\//i.test(value)) {
         return false;
@@ -697,9 +1056,6 @@ function successfulPageRate(benchmark) {
     const successfulPages = finiteNumber(benchmark.source?.successfulPages);
     return totalPages > 0 ? roundRatio(successfulPages / totalPages) : 0;
 }
-function routeList(values) {
-    return values && values.length > 0 ? values.join(', ') : 'none';
-}
 function formatReviewGates(values) {
     return values.length > 0 ? values.join(', ') : 'none';
 }
@@ -745,6 +1101,15 @@ function signedPercent(value) {
     const sign = value > 0 ? '+' : '';
     return `${sign}${Math.round(value * 100)}%`;
 }
+function formatDelta(value, asPercent) {
+    if (value == null) {
+        return 'new';
+    }
+    return asPercent ? signedPercent(value) : signed(value);
+}
+function signed(value) {
+    return value > 0 ? `+${value}` : String(value);
+}
 function escapeHtml(value) {
     return value
         .replaceAll('&', '&amp;')