npm - @tangle-network/agent-eval - Versions diffs - 0.19.1 → 0.20.0 - Mend

@tangle-network/agent-eval 0.19.1 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/index.d.ts +244 -4
package/dist/index.js +317 -14
package/dist/index.js.map +1 -1
package/docs/knowledge-readiness.md +84 -0
package/docs/multi-shot-optimization.md +7 -0
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -2251,6 +2251,151 @@ async function finish(emitter, result) {
   return result;
 }
+// src/knowledge/readiness.ts
+function scoreKnowledgeReadiness(options) {
+  const requirements = options.requirements.map(normalizeRequirement);
+  const missing = requirements.filter((requirement) => requirement.currentConfidence < requirement.confidenceNeeded);
+  const blockingMissingRequirements = missing.filter(isBlockingGap);
+  const nonBlockingGaps = missing.filter((requirement) => !isBlockingGap(requirement));
+  const readinessScore = weightedReadiness(requirements);
+  const bundle = {
+    taskId: options.taskId,
+    requirements,
+    evidenceIds: unique([...options.evidenceIds ?? [], ...requirements.flatMap((r) => r.evidenceIds)]),
+    claimIds: unique(options.claimIds ?? []),
+    wikiPageIds: unique(options.wikiPageIds ?? []),
+    userAnswers: options.userAnswers ?? {},
+    missing,
+    readinessScore,
+    metadata: options.metadata
+  };
+  const recommendedAction = chooseRecommendedAction(blockingMissingRequirements, nonBlockingGaps);
+  const severity = blockingMissingRequirements.length > 0 ? "critical" : nonBlockingGaps.some((gap) => gap.importance === "high") ? "warning" : "info";
+  const reason = blockingMissingRequirements.length > 0 ? `${blockingMissingRequirements.length} blocking knowledge requirement(s) are missing.` : nonBlockingGaps.length > 0 ? `${nonBlockingGaps.length} non-blocking knowledge gap(s) remain.` : "All declared knowledge requirements are ready.";
+  return {
+    taskId: options.taskId,
+    readinessScore,
+    blockingMissingRequirements,
+    nonBlockingGaps,
+    recommendedAction,
+    bundle,
+    severity,
+    reason
+  };
+}
+function blockingKnowledgeEval(report, options = {}) {
+  const minimumScore = options.minimumScore ?? 0.7;
+  const passed = report.blockingMissingRequirements.length === 0 && report.readinessScore >= minimumScore;
+  return objectiveEval({
+    id: options.id ?? "knowledge-ready",
+    passed,
+    score: report.readinessScore,
+    severity: passed ? "info" : report.severity,
+    detail: report.reason,
+    evidence: report.blockingMissingRequirements.map((r) => r.id).join(", ") || void 0,
+    metadata: { knowledgeReadiness: report }
+  });
+}
+function userQuestionsForKnowledgeGaps(gaps) {
+  return gaps.filter((gap) => gap.acquisitionMode === "ask_user" || gap.fallbackPolicy === "ask").map((gap) => ({
+    id: `question_${gap.id}`,
+    question: `Please provide: ${gap.description}`,
+    reason: `Required for ${gap.requiredFor.join(", ") || "the task"}.`,
+    requirementId: gap.id,
+    importance: gap.importance,
+    answerType: gap.sensitivity === "secret" ? "credential" : "free_text",
+    impactIfUnknown: impactFor(gap)
+  }));
+}
+function acquisitionPlansForKnowledgeGaps(gaps) {
+  const byMode = /* @__PURE__ */ new Map();
+  for (const gap of gaps) {
+    const mode = planMode(gap.acquisitionMode);
+    if (!mode) continue;
+    const bucket = byMode.get(mode) ?? [];
+    bucket.push(gap);
+    byMode.set(mode, bucket);
+  }
+  return [...byMode.entries()].map(([mode, requirements]) => ({
+    id: `acquire_${mode}`,
+    requirementIds: requirements.map((r) => r.id),
+    mode,
+    description: descriptionForPlan(mode, requirements),
+    priority: maxImportance(requirements.map((r) => r.importance)),
+    questions: mode === "ask_user" ? userQuestionsForKnowledgeGaps(requirements) : void 0
+  }));
+}
+function normalizeRequirement(requirement) {
+  return {
+    ...requirement,
+    confidenceNeeded: clamp01(requirement.confidenceNeeded),
+    currentConfidence: clamp01(requirement.currentConfidence),
+    evidenceIds: unique(requirement.evidenceIds)
+  };
+}
+function weightedReadiness(requirements) {
+  if (requirements.length === 0) return 1;
+  let weightSum = 0;
+  let scoreSum = 0;
+  for (const requirement of requirements) {
+    const weight = importanceWeight(requirement.importance);
+    const score = requirement.confidenceNeeded <= 0 ? 1 : Math.min(1, requirement.currentConfidence / requirement.confidenceNeeded);
+    weightSum += weight;
+    scoreSum += weight * score;
+  }
+  return clamp01(scoreSum / weightSum);
+}
+function isBlockingGap(requirement) {
+  return requirement.importance === "blocking" || requirement.fallbackPolicy === "block" || requirement.sensitivity === "secret";
+}
+function chooseRecommendedAction(blocking, nonBlocking) {
+  const gaps = blocking.length > 0 ? blocking : nonBlocking;
+  if (gaps.length === 0) return "run_agent";
+  if (blocking.some((gap) => gap.acquisitionMode === "ask_user" || gap.fallbackPolicy === "ask")) return "ask_user";
+  if (blocking.some((gap) => gap.acquisitionMode === "query_connector")) return "query_connectors";
+  if (blocking.some((gap) => gap.acquisitionMode === "inspect_repo" || gap.acquisitionMode === "run_command")) return "inspect_repo";
+  if (blocking.some((gap) => gap.acquisitionMode === "search_web")) return "collect_web_data";
+  if (blocking.some((gap) => gap.acquisitionMode === "not_available")) return "abort_or_rescope";
+  if (nonBlocking.some((gap) => gap.importance === "high")) return "build_domain_wiki";
+  return "continue_with_caveat";
+}
+function planMode(mode) {
+  if (mode === "infer_low_confidence" || mode === "not_available") return null;
+  return mode;
+}
+function descriptionForPlan(mode, requirements) {
+  const labels = requirements.map((r) => r.description).join("; ");
+  if (mode === "ask_user") return `Ask the user for: ${labels}`;
+  if (mode === "search_web") return `Search web or documentation sources for: ${labels}`;
+  if (mode === "query_connector") return `Query configured connectors for: ${labels}`;
+  if (mode === "inspect_repo") return `Inspect repository context for: ${labels}`;
+  if (mode === "run_command") return `Run local commands to collect: ${labels}`;
+  return `Build domain wiki evidence for: ${labels}`;
+}
+function impactFor(requirement) {
+  if (requirement.fallbackPolicy === "block") return "The agent should not run until this is known.";
+  if (requirement.fallbackPolicy === "continue_with_caveat") return "The agent may continue, but must disclose uncertainty.";
+  if (requirement.fallbackPolicy === "use_default") return "The agent will use the configured default if skipped.";
+  return "The agent should ask before continuing.";
+}
+function maxImportance(values) {
+  const order = ["blocking", "high", "medium", "low"];
+  return order.find((value) => values.includes(value)) ?? "low";
+}
+function importanceWeight(importance) {
+  if (importance === "blocking") return 8;
+  if (importance === "high") return 4;
+  if (importance === "medium") return 2;
+  return 1;
+}
+function clamp01(value) {
+  if (!Number.isFinite(value)) return 0;
+  return Math.max(0, Math.min(1, value));
+}
+function unique(items) {
+  return [...new Set(items)];
+}
 // src/feedback-trajectory.ts
 var DEFAULT_SPLIT_POLICY = {
   trainPct: 70,
@@ -3521,9 +3666,9 @@ var DEFAULT_RUN_SCORE_WEIGHTS = {
 };
 function aggregateRunScore(score, weights = {}) {
   const w = { ...DEFAULT_RUN_SCORE_WEIGHTS, ...weights };
-  return w.success * clamp01(score.success) + w.goalProgress * clamp01(score.goalProgress) + w.repoGroundedness * clamp01(score.repoGroundedness) + w.driftPenalty * clamp01(score.driftPenalty) + w.toolUseQuality * clamp01(score.toolUseQuality) + w.patchQuality * clamp01(score.patchQuality) + w.testReality * clamp01(score.testReality) + w.finalGate * clamp01(score.finalGate) + w.reviewerBlockers * clamp01(score.reviewerBlockers) + w.costUsd * Math.max(0, score.costUsd) + w.wallSeconds * Math.max(0, score.wallSeconds / 60);
+  return w.success * clamp012(score.success) + w.goalProgress * clamp012(score.goalProgress) + w.repoGroundedness * clamp012(score.repoGroundedness) + w.driftPenalty * clamp012(score.driftPenalty) + w.toolUseQuality * clamp012(score.toolUseQuality) + w.patchQuality * clamp012(score.patchQuality) + w.testReality * clamp012(score.testReality) + w.finalGate * clamp012(score.finalGate) + w.reviewerBlockers * clamp012(score.reviewerBlockers) + w.costUsd * Math.max(0, score.costUsd) + w.wallSeconds * Math.max(0, score.wallSeconds / 60);
 }
-function clamp01(value) {
+function clamp012(value) {
   if (!Number.isFinite(value)) return 0;
   return Math.max(0, Math.min(1, value));
 }
@@ -3567,13 +3712,13 @@ var RunCritic = class {
     const success = trace.run.outcome?.pass === true ? 1 : trace.run.status === "completed" ? 0.5 : 0;
     if (!success) notes.push("run did not complete with pass=true");
     const judgeAverage = judgeSpans2.length ? judgeSpans2.reduce((sum2, span) => sum2 + normalizeJudgeScore(span.score), 0) / judgeSpans2.length : void 0;
-    const outcomeScore = typeof trace.run.outcome?.score === "number" ? clamp01(trace.run.outcome.score > 1 ? trace.run.outcome.score / 100 : trace.run.outcome.score) : void 0;
+    const outcomeScore = typeof trace.run.outcome?.score === "number" ? clamp012(trace.run.outcome.score > 1 ? trace.run.outcome.score / 100 : trace.run.outcome.score) : void 0;
     const goalProgress = outcomeScore ?? judgeAverage ?? success;
     const successfulTools = toolSpans2.filter((span) => span.status !== "error").length;
     const toolUseQuality = toolSpans2.length === 0 ? 0 : successfulTools / toolSpans2.length;
     if (toolSpans2.length === 0) notes.push("no tool spans recorded");
     const patchEvidence = trace.artifacts.length + toolSpans2.filter((span) => /write|edit|patch|apply/i.test(span.toolName)).length;
-    const patchQuality = patchEvidence > 0 ? clamp01(patchEvidence / 4) : 0;
+    const patchQuality = patchEvidence > 0 ? clamp012(patchEvidence / 4) : 0;
     if (!patchQuality) notes.push("no artifact or edit evidence recorded");
     const sandboxTests = sandboxSpans.filter((span) => typeof span.testsTotal === "number" && span.testsTotal > 0);
     const testReality = sandboxTests.length ? sandboxTests.reduce((sum2, span) => sum2 + (span.testsPassed ?? 0) / Math.max(1, span.testsTotal ?? 1), 0) / sandboxTests.length : toolSpans2.some((span) => /\btest|vitest|pytest|jest|build|tsc\b/i.test(JSON.stringify(span.args))) ? 0.4 : 0;
@@ -3617,7 +3762,7 @@ var RunCritic = class {
   }
 };
 function normalizeJudgeScore(score) {
-  return score > 1 ? clamp01(score / 10) : clamp01(score);
+  return score > 1 ? clamp012(score / 10) : clamp012(score);
 }
 function looksRepoGrounded(text) {
   return /(?:src\/|tests?\/|package\.json|tsconfig|\.ts\b|\.tsx\b|git status|pnpm |npm |vitest|pytest|jest)/i.test(text);
@@ -4973,6 +5118,17 @@ var FAILURE_CLASSES = [
   "cost_overrun",
   "timeout",
   "sandbox_failure",
+  "missing_user_data",
+  "missing_domain_data",
+  "missing_codebase_context",
+  "missing_runtime_context",
+  "missing_credentials",
+  "stale_external_data",
+  "bad_retrieval",
+  "insufficient_evidence",
+  "contradictory_evidence",
+  "ambiguous_user_intent",
+  "knowledge_readiness_blocked",
   "unknown"
 ];
 function isLlmSpan(s) {
@@ -5329,6 +5485,62 @@ var DEFAULT_RULES = [
       return null;
     }
   },
+  {
+    id: "knowledge-readiness-blocked",
+    match: ({ events }) => {
+      const event = events.find((e) => e.kind === "custom" && e.payload.kind === "readiness_scored" && e.payload.passed === false);
+      return event ? {
+        failureClass: "knowledge_readiness_blocked",
+        reason: "knowledge readiness report blocked execution",
+        triggerEventId: event.eventId
+      } : null;
+    }
+  },
+  {
+    id: "missing-credentials",
+    match: ({ events }) => {
+      const event = events.find((e) => e.kind === "custom" && e.payload.kind === "knowledge_gap" && e.payload.category === "credential_or_secret");
+      return event ? {
+        failureClass: "missing_credentials",
+        reason: "required credential or secret was missing",
+        triggerEventId: event.eventId
+      } : null;
+    }
+  },
+  {
+    id: "bad-retrieval",
+    match: ({ run, spans }) => {
+      if (run.outcome?.pass !== false) return null;
+      const retrieval = spans.find((s) => s.kind === "retrieval" && (s.hits.length === 0 || s.hits.every((hit) => hit.score <= 0)));
+      return retrieval ? {
+        failureClass: "bad_retrieval",
+        reason: "retrieval returned no useful hits for a failed run",
+        triggerSpanId: retrieval.spanId
+      } : null;
+    }
+  },
+  {
+    id: "insufficient-evidence",
+    match: ({ events }) => {
+      const event = events.find((e) => e.kind === "custom" && e.payload.kind === "knowledge_gap" && e.payload.reason === "insufficient_evidence");
+      return event ? {
+        failureClass: "insufficient_evidence",
+        reason: "task proceeded with insufficient supporting evidence",
+        triggerEventId: event.eventId
+      } : null;
+    }
+  },
+  {
+    id: "contradictory-evidence",
+    match: ({ events }) => {
+      const event = events.find((e) => e.kind === "custom" && e.payload.kind === "knowledge_gap" && e.payload.reason === "contradictory_evidence");
+      return event ? {
+        failureClass: "contradictory_evidence",
+        reason: "supporting evidence contradicted itself",
+        triggerEventId: event.eventId
+      } : null;
+    }
+  },
   // Budget breach events
   {
     id: "budget-breach",
@@ -5667,11 +5879,14 @@ async function failureClusterView(store, options = {}) {
     const cls = classifyFailure({ run, spans, events }, rules);
     let toolName;
     let argPrefix;
+    let dimension;
     if (cls.triggerSpanId) {
       const trig = spans.find((s) => s.spanId === cls.triggerSpanId);
       if (trig?.kind === "tool") {
         toolName = trig.toolName;
         argPrefix = argHash(trig.args).slice(0, 16);
+      } else if (trig?.kind === "judge") {
+        dimension = trig.dimension;
       }
     }
     if (!toolName) {
@@ -5682,13 +5897,18 @@ async function failureClusterView(store, options = {}) {
         argPrefix = argHash(errored.args).slice(0, 16);
       }
     }
-    const key = `${cls.failureClass}|${toolName ?? ""}|${argPrefix ?? ""}`;
+    if (!dimension) {
+      const judge = spans.find((s) => s.kind === "judge" && typeof s.dimension === "string");
+      if (judge?.kind === "judge") dimension = judge.dimension;
+    }
+    const key = `${cls.failureClass}|${toolName ?? ""}|${argPrefix ?? ""}|${dimension ?? ""}`;
     let cluster = clusters.get(key);
     if (!cluster) {
       cluster = {
         failureClass: cls.failureClass,
         toolName,
         argPrefix,
+        dimension,
         runCount: 0,
         scenarioIds: [],
         exampleRunId: run.runId,
@@ -6673,6 +6893,46 @@ function checkCanaries(output, scenarios) {
   }
   return leaks;
 }
+function checkBehavioralCanary(output, scenario) {
+  const pattern = scenario.forbiddenPattern ?? scenario.canary;
+  if (!pattern) return null;
+  const hit = matchForbidden(output, pattern);
+  if (!hit) return null;
+  return {
+    scenarioId: scenario.id,
+    canary: pattern,
+    evidence: excerpt2(output, hit)
+  };
+}
+function runBehavioralCanaries(cases) {
+  const leaks = [];
+  for (const c of cases) {
+    const leak = checkBehavioralCanary(c.output, c.scenario);
+    if (leak) leaks.push({ ...leak, runId: c.runId ?? leak.runId });
+  }
+  return leaks;
+}
+function matchForbidden(output, pattern) {
+  const re = tryParseRegex(pattern);
+  if (re) {
+    const m = output.match(re);
+    return m && m[0].length > 0 ? m[0] : null;
+  }
+  return output.includes(pattern) ? pattern : null;
+}
+function tryParseRegex(pattern) {
+  if (pattern.length < 2 || pattern[0] !== "/") return null;
+  const last = pattern.lastIndexOf("/");
+  if (last <= 0) return null;
+  const body = pattern.slice(1, last);
+  const flags = pattern.slice(last + 1);
+  if (!/^[gimsuy]*$/.test(flags)) return null;
+  try {
+    return new RegExp(body, flags);
+  } catch {
+    return null;
+  }
+}
 async function canaryLeakView(store, scenarios) {
   const targets = scenarios.filter((s) => !!s.canary);
   if (targets.length === 0) return [];
@@ -7519,6 +7779,41 @@ var DEFAULT_MUTATORS = [
   { id: "politeness-prefix", fn: politenessPrefixMutator },
   { id: "whitespace-collapse", fn: whitespaceCollapseMutator }
 ];
+async function paraphraseRobustnessScenarios(args) {
+  const reps = Math.max(1, args.reps ?? 1);
+  const mutatorNames = args.mutators.map((m) => m.name);
+  const perScenario = [];
+  for (const scenario of args.scenarios) {
+    const baseline = await args.runScenario({
+      id: scenario.id,
+      userTurns: scenario.userTurns
+    });
+    const originalScore = baseline.score;
+    const deltas = {};
+    const paraphrasedAll = [];
+    for (const m of args.mutators) {
+      const scores2 = [];
+      for (let r = 0; r < reps; r++) {
+        const mutatedTurns = scenario.userTurns.map((t) => m.mutator(t));
+        const out = await args.runScenario({
+          id: scenario.id,
+          userTurns: mutatedTurns
+        });
+        scores2.push(out.score);
+      }
+      const mean10 = scores2.reduce((a, b) => a + b, 0) / scores2.length;
+      deltas[m.name] = mean10 - originalScore;
+      paraphrasedAll.push(...scores2);
+    }
+    const paraphrasedMean = paraphrasedAll.length === 0 ? originalScore : paraphrasedAll.reduce((a, b) => a + b, 0) / paraphrasedAll.length;
+    perScenario.push({ id: scenario.id, originalScore, paraphrasedMean, deltas });
+  }
+  const meanOriginal = perScenario.length === 0 ? 0 : perScenario.reduce((a, p) => a + p.originalScore, 0) / perScenario.length;
+  const meanParaphrased = perScenario.length === 0 ? 0 : perScenario.reduce((a, p) => a + p.paraphrasedMean, 0) / perScenario.length;
+  const ratio2 = meanOriginal <= 0 ? 0 : meanParaphrased / meanOriginal;
+  const score = Math.max(0, Math.min(1, ratio2));
+  return { score, perScenario, mutators: mutatorNames };
+}
 // src/visual-diff.ts
 function visualDiff(a, b, options = {}) {
@@ -8747,10 +9042,11 @@ async function signManifest(m) {
   const bytes = new TextEncoder().encode(JSON.stringify(canonical));
   const digest = await globalThis.crypto.subtle.digest("SHA-256", bytes);
   const hash = Array.from(new Uint8Array(digest)).map((b) => b.toString(16).padStart(2, "0")).join("");
-  return { ...m, contentHash: hash };
+  return { ...m, contentHash: hash, algo: "sha256-content" };
 }
 async function verifyManifest(m) {
-  const { contentHash, ...rest } = m;
+  const { contentHash, algo: _algo, ...rest } = m;
+  void _algo;
   const resigned = await signManifest(rest);
   return resigned.contentHash === contentHash;
 }
@@ -10989,7 +11285,7 @@ function defaultReferenceReplayMatcher(reference, candidate) {
   const textScore = tokenJaccard(referenceText, candidateText);
   const severityScore = reference.severity && candidate.severity ? normalize(reference.severity) === normalize(candidate.severity) ? 0.1 : -0.05 : 0;
   const tagScore = tagOverlap(reference.tags, candidate.tags) * 0.15;
-  const score = clamp012(textScore * 0.85 + tagScore + severityScore);
+  const score = clamp013(textScore * 0.85 + tagScore + severityScore);
   return { score, reason: `token=${textScore.toFixed(2)} tags=${tagScore.toFixed(2)} severity=${severityScore.toFixed(2)}` };
 }
 function scoreScenario(scenario, matcher, threshold, matchStrategy) {
@@ -11089,7 +11385,7 @@ function scorePair(scenario, matcher, reference, candidate) {
   if (!Number.isFinite(result.score)) {
     throw new Error(`reference replay matcher returned non-finite score for ${scenario.id}:${reference.id}:${candidate.id}`);
   }
-  return { score: clamp012(result.score), reason: result.reason ?? "" };
+  return { score: clamp013(result.score), reason: result.reason ?? "" };
 }
 function buildScenarioScore(scenario, matches2, falsePositives) {
   const matched = matches2.filter((match) => match.matched).length;
@@ -11188,7 +11484,7 @@ function tokens(text) {
 function normalize(text) {
   return text.toLowerCase().replace(/[^a-z0-9]+/g, " ").trim();
 }
-function clamp012(value) {
+function clamp013(value) {
   if (!Number.isFinite(value)) return 0;
   return Math.max(0, Math.min(1, value));
 }
@@ -12653,7 +12949,7 @@ async function scoreOne(config, variant, scenarioId, rep, split) {
       scenarioId,
       rep,
       ok: scored.ok ?? true,
-      score: clamp013(scored.score),
+      score: clamp014(scored.score),
       cost: scored.costUsd ?? run.costUsd ?? 0,
       durationMs: scored.durationMs ?? run.durationMs ?? 0,
       metrics: {
@@ -12765,7 +13061,7 @@ function stableHash2(input) {
   }
   return h >>> 0;
 }
-function clamp013(n) {
+function clamp014(n) {
   if (!Number.isFinite(n)) return 0;
   return Math.max(0, Math.min(1, n));
 }
@@ -14148,6 +14444,7 @@ export {
   TraceEmitter,
   TrialTelemetry,
   UNIVERSAL_FINDERS,
+  acquisitionPlansForKnowledgeGaps,
   adversarialJudge,
   aggregateLlm,
   aggregateRunScore,
@@ -14163,6 +14460,7 @@ export {
   benjaminiHochberg,
   bhAdjust,
   bisect,
+  blockingKnowledgeEval,
   bonferroni,
   bootstrapCi,
   budgetBreachView,
@@ -14176,9 +14474,10 @@ export {
   callLlmJson,
   canaryLeakView,
   causalAttribution,
+  checkBehavioralCanary,
   checkCanaries,
   checkSlos,
-  clamp01,
+  clamp012 as clamp01,
   classifyEuAiRisk,
   classifyFailure,
   codeExecutionJudge,
@@ -14299,6 +14598,7 @@ export {
   pairedTTest,
   pairedWilcoxon,
   paraphraseRobustness,
+  paraphraseRobustnessScenarios,
   paretoChart,
   paretoFrontier,
   paretoFrontierWithCrowding,
@@ -14344,6 +14644,7 @@ export {
   rowWhere,
   runAgentControlLoop,
   runAssertions,
+  runBehavioralCanaries,
   runCanaries,
   runCounterfactual,
   runE2EWorkflow,
@@ -14367,6 +14668,7 @@ export {
   scanForMuffledGates,
   scoreAllProjects,
   scoreContinuity,
+  scoreKnowledgeReadiness,
   scoreProject,
   scoreRedTeamOutput,
   scoreReferenceReplay,
@@ -14401,6 +14703,7 @@ export {
   trialTraceFromMultiShotTrial,
   typoMutator,
   urlContains,
+  userQuestionsForKnowledgeGaps,
   validateRunRecord,
   verbosityBias,
   verifyManifest,