@tangle-network/agent-eval 0.31.1 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -2900,6 +2900,11 @@ var MetricsCollector = class {
2900
2900
  };
2901
2901
 
2902
2902
  // src/driver.ts
2903
+ var RIGOR_STANCE = {
2904
+ cooperative: "Your stance: a pragmatic early adopter. You accept reasonable answers and only push back on clear gaps or outright errors.",
2905
+ demanding: "Your stance: an experienced professional with no time to waste. You do not accept vague, hedged, or generic answers \u2014 you expect specifics, and you say so plainly when you do not get them.",
2906
+ relentless: "Your stance: a senior partner reviewing this work for a client who will litigate if it is wrong. You interrogate every claim. You accept nothing undefended. You find the single weakest point in every answer and attack it. Courteous, never satisfied."
2907
+ };
2903
2908
  var AgentDriver = class {
2904
2909
  tc;
2905
2910
  client;
@@ -2929,12 +2934,14 @@ var AgentDriver = class {
2929
2934
  const conversationHistory = [];
2930
2935
  let completed = false;
2931
2936
  let turnsToCompletion = null;
2937
+ let criteriaMetAtTurn = null;
2932
2938
  for (let turn = 1; turn <= persona.maxTurns; turn++) {
2933
2939
  const state = await metrics.getState();
2934
2940
  const userMessage = await this.decideNextMessage(persona, state, conversationHistory);
2935
2941
  if (userMessage === "DONE") {
2936
2942
  completed = true;
2937
2943
  turnsToCompletion = turn - 1;
2944
+ console.log(` SIGNED OFF by simulated ${persona.role} after turn ${turn - 1}`);
2938
2945
  break;
2939
2946
  }
2940
2947
  const turnStart = Date.now();
@@ -2963,11 +2970,9 @@ var AgentDriver = class {
2963
2970
  console.log(
2964
2971
  ` [turn ${turn}] ${conv.completionPercent.toFixed(0)}% \u2014 ${criteriaStr} (${(latency / 1e3).toFixed(1)}s)`
2965
2972
  );
2966
- if (conv.complete) {
2967
- completed = true;
2968
- turnsToCompletion = turn;
2969
- console.log(` COMPLETE at turn ${turn}`);
2970
- break;
2973
+ if (conv.complete && criteriaMetAtTurn === null) {
2974
+ criteriaMetAtTurn = turn;
2975
+ console.log(` criteria met at turn ${turn} \u2014 driver continues pressure-testing`);
2971
2976
  }
2972
2977
  }
2973
2978
  const finalState = await metrics.getState();
@@ -2975,6 +2980,7 @@ var AgentDriver = class {
2975
2980
  personaId: persona.id,
2976
2981
  completed,
2977
2982
  turnsToCompletion,
2983
+ criteriaMetAtTurn,
2978
2984
  totalTurns: turnMetrics.length,
2979
2985
  metrics: turnMetrics,
2980
2986
  finalState,
@@ -2985,51 +2991,13 @@ var AgentDriver = class {
2985
2991
  }
2986
2992
  /** Use the driver LLM to decide what the "user" says next */
2987
2993
  async decideNextMessage(persona, state, history) {
2988
- const lastResponse = history.length > 0 ? history[history.length - 1].content.slice(0, 2e3) : "(no conversation yet \u2014 this is the first message)";
2989
- const recentHistory = history.slice(-6).map((h) => `${h.role}: ${h.content.slice(0, 500)}`).join("\n\n");
2990
- const resp = await this.tc.chat({
2991
- model: this.driverModel,
2992
- messages: [
2993
- {
2994
- role: "system",
2995
- content: `You are playing the role of a ${persona.role} testing an AI agent.
2996
- Your goal: ${persona.goal}
2997
-
2998
- ${this.productContext ? `Product context:
2999
- ${this.productContext}
3000
- ` : ""}
3001
- Current state:
3002
- - Tasks: ${state.tasks}
3003
- - Events: ${state.events}
3004
- - Proposals: pending=${state.proposals.pending}, approved=${state.proposals.approved}, rejected=${state.proposals.rejected}
3005
- - Vault files: ${state.vaultFiles.length} (${state.vaultFiles.slice(0, 10).join(", ")}${state.vaultFiles.length > 10 ? "..." : ""})
3006
-
3007
- Completion criteria met: ${this.describeCompletion(persona, state)}
3008
-
3009
- Decide what to do next:
3010
- 1. If completion is 100% \u2014 respond with exactly "DONE"
3011
- 2. If a proposal is pending \u2014 approve or reject it (with reason)
3012
- 3. If the agent is on track \u2014 push for the next deliverable
3013
- 4. If the agent is off track \u2014 give specific corrective feedback
3014
- 5. If this is the first message \u2014 start with a clear, actionable request
3015
-
3016
- Output ONLY your next message to the agent. Be specific. Be realistic.
3017
- Don't be patient \u2014 a real ${persona.role} wouldn't accept vague answers.`
3018
- },
3019
- {
3020
- role: "user",
3021
- content: recentHistory ? `Recent conversation:
3022
- ${recentHistory}
3023
-
3024
- The agent just said:
3025
- ${lastResponse}` : "No conversation yet. Send your opening message."
3026
- }
3027
- ],
3028
- temperature: 0.5,
3029
- maxTokens: 500
2994
+ return decideNextUserTurn(this.tc, {
2995
+ persona,
2996
+ state,
2997
+ history,
2998
+ productContext: this.productContext,
2999
+ model: this.driverModel
3030
3000
  });
3031
- const content = resp.choices?.[0]?.message?.content ?? "";
3032
- return content.trim();
3033
3001
  }
3034
3002
  /** Handle pending approvals based on persona feedback patterns */
3035
3003
  async handleApprovals(persona, workspaceId, _state) {
@@ -3049,16 +3017,77 @@ ${lastResponse}` : "No conversation yet. Send your opening message."
3049
3017
  }
3050
3018
  }
3051
3019
  }
3052
- /** Describe which completion criteria are met */
3053
- describeCompletion(persona, state) {
3054
- const results = persona.completionCriteria.map((c) => {
3055
- const met = c.check(state);
3056
- return `${c.name}: ${met ? "MET" : "NOT MET"}`;
3057
- });
3058
- const metCount = results.filter((r) => r.includes("MET") && !r.includes("NOT")).length;
3059
- return `${metCount}/${persona.completionCriteria.length} \u2014 ${results.join(", ")}`;
3060
- }
3061
3020
  };
3021
+ function describeCompletion(persona, state) {
3022
+ const results = persona.completionCriteria.map((c) => {
3023
+ const met = c.check(state);
3024
+ return `${c.name}: ${met ? "MET" : "NOT MET"}`;
3025
+ });
3026
+ const metCount = results.filter((r) => r.includes("MET") && !r.includes("NOT")).length;
3027
+ return `${metCount}/${persona.completionCriteria.length} \u2014 ${results.join(", ")}`;
3028
+ }
3029
+ function buildDriverSystemPrompt(persona, state, productContext = "") {
3030
+ const rigor = persona.rigor ?? "demanding";
3031
+ const expertise = persona.expertise ? ` You are ${persona.expertise}.` : "";
3032
+ const pressure = persona.pressurePoints && persona.pressurePoints.length > 0 ? `
3033
+ A competent ${persona.role} here MUST get the agent to address each of:
3034
+ ${persona.pressurePoints.map((p) => ` - ${p}`).join(
3035
+ "\n"
3036
+ )}
3037
+ Do NOT hand these to the agent. Probe whether it surfaces them itself. If it misses one, press on exactly that gap until it delivers or demonstrably fails.
3038
+ ` : "";
3039
+ const curveballs = persona.curveballs && persona.curveballs.length > 0 ? `
3040
+ Once the agent is coasting on easy answers, introduce ONE of these as a genuine new development \u2014 never as a quiz:
3041
+ ${persona.curveballs.map((c) => ` - ${c}`).join("\n")}
3042
+ ` : "";
3043
+ return `You are role-playing a real ${persona.role} putting an AI agent through its paces.${expertise}
3044
+ Your objective: ${persona.goal}
3045
+ You are deciding whether this agent's work is good enough to stake your professional reputation on. Assume it is not \u2014 until it proves otherwise.
3046
+
3047
+ ${RIGOR_STANCE[rigor]}
3048
+ ${productContext ? `Product context:
3049
+ ${productContext}
3050
+ ` : ""}Current workspace state:
3051
+ - Tasks: ${state.tasks} | Events: ${state.events}
3052
+ - Proposals: pending=${state.proposals.pending}, approved=${state.proposals.approved}, rejected=${state.proposals.rejected}
3053
+ - Vault files (${state.vaultFiles.length}): ${state.vaultFiles.slice(0, 10).join(", ")}${state.vaultFiles.length > 10 ? " \u2026" : ""}
3054
+ - Nominal task criteria: ${describeCompletion(persona, state)}
3055
+ ${pressure}${curveballs}
3056
+ How to choose your next message:
3057
+ 1. Silently judge the agent's last response the way a ${persona.role} would. Is every claim defended with a specific authority, figure, or mechanism? Or is it vague, hedged, or generic?
3058
+ 2. If it is vague or hand-waved \u2014 do NOT move on. Name the gap and demand the specific authority / figure / mechanism. "It depends" is not an answer; force the decision.
3059
+ 3. If it makes a claim you can challenge \u2014 challenge it. Make the agent defend or correct it.
3060
+ 4. If it missed something a ${persona.role} would catch \u2014 press on exactly that, without naming it for the agent.
3061
+ 5. If it is genuinely solid \u2014 escalate: go a layer deeper, or introduce a curveball.
3062
+ 6. First message \u2014 state your situation as you really would: realistic, specific, with the messy detail, but do not coach the agent.
3063
+
3064
+ Sign-off: respond with exactly "DONE" only when a ${persona.role} would act on this work without redoing it. Nominal task completion is NOT sign-off \u2014 sloppy-but-complete still fails. If the agent never gets there, keep pushing; never sign off on weak work.
3065
+
3066
+ Output ONLY your next message to the agent \u2014 in character, first person, no meta-commentary, no stage directions.`;
3067
+ }
3068
+ async function decideNextUserTurn(tc, opts) {
3069
+ const { persona, state, history, productContext = "", model = "claude-sonnet-4-6" } = opts;
3070
+ const lastResponse = history.length > 0 ? history[history.length - 1].content.slice(0, 2e3) : "(no conversation yet \u2014 this is the first message)";
3071
+ const recentHistory = history.slice(-6).map((h) => `${h.role}: ${h.content.slice(0, 500)}`).join("\n\n");
3072
+ const resp = await tc.chat({
3073
+ model,
3074
+ messages: [
3075
+ { role: "system", content: buildDriverSystemPrompt(persona, state, productContext) },
3076
+ {
3077
+ role: "user",
3078
+ content: recentHistory ? `Recent conversation:
3079
+ ${recentHistory}
3080
+
3081
+ The agent's latest response:
3082
+ ${lastResponse}` : "No conversation yet. Send your opening message \u2014 in character, phrased as this person actually would."
3083
+ }
3084
+ ],
3085
+ temperature: 0.5,
3086
+ maxTokens: 700
3087
+ });
3088
+ const content = resp.choices?.[0]?.message?.content ?? "";
3089
+ return content.trim();
3090
+ }
3062
3091
 
3063
3092
  // src/integration-gates.ts
3064
3093
  function integrationManifestValidatedPayload(input) {
@@ -4520,6 +4549,194 @@ function pathExists(obj, path) {
4520
4549
  return true;
4521
4550
  }
4522
4551
 
4552
+ // src/completion-verifier.ts
4553
+ var STOPWORDS = /* @__PURE__ */ new Set([
4554
+ "the",
4555
+ "a",
4556
+ "an",
4557
+ "of",
4558
+ "for",
4559
+ "and",
4560
+ "or",
4561
+ "to",
4562
+ "in",
4563
+ "on",
4564
+ "with",
4565
+ "by"
4566
+ ]);
4567
+ var MATCH_THRESHOLD = 0.5;
4568
+ var MIN_CONTENT_CHARS = 50;
4569
+ function tokens(s) {
4570
+ return new Set(
4571
+ s.toLowerCase().split(/[^a-z0-9]+/).filter((t) => t.length > 1 && !STOPWORDS.has(t))
4572
+ );
4573
+ }
4574
+ function tokenRecall(requirementText, candidateText) {
4575
+ const req = tokens(requirementText);
4576
+ if (req.size === 0) return 0;
4577
+ const cand = tokens(candidateText);
4578
+ let hit = 0;
4579
+ for (const t of req) if (cand.has(t)) hit++;
4580
+ return hit / req.size;
4581
+ }
4582
+ function artifactCandidates(req, reqIndex, artifacts) {
4583
+ const reqText = `${req.title} ${req.category ?? ""}`;
4584
+ const out = [];
4585
+ artifacts.forEach((a, i) => {
4586
+ if ((a.content ?? "").trim().length < MIN_CONTENT_CHARS) return;
4587
+ let score = tokenRecall(reqText, `${a.path ?? ""} ${a.kind}`);
4588
+ if (req.category && a.kind && req.category.toLowerCase() === a.kind.toLowerCase()) {
4589
+ score = Math.max(score, 1);
4590
+ }
4591
+ if (score < MATCH_THRESHOLD) return;
4592
+ out.push({
4593
+ reqIndex,
4594
+ itemKey: `artifact:${i}`,
4595
+ score,
4596
+ evidence: `artifact '${a.path ?? a.kind}' matched (token recall ${score.toFixed(2)})`,
4597
+ content: a.content ?? null
4598
+ });
4599
+ });
4600
+ return out;
4601
+ }
4602
+ function proposalCandidates(req, reqIndex, proposals) {
4603
+ const reqText = `${req.title} ${req.category ?? ""}`;
4604
+ const out = [];
4605
+ for (const p of proposals) {
4606
+ if (p.status !== "approved") continue;
4607
+ const score = tokenRecall(reqText, p.title);
4608
+ if (score < MATCH_THRESHOLD) continue;
4609
+ const body = p.content ?? "";
4610
+ out.push({
4611
+ reqIndex,
4612
+ itemKey: `proposal:${p.id}`,
4613
+ score,
4614
+ evidence: `approved proposal '${p.title}' matched (token recall ${score.toFixed(2)})`,
4615
+ content: body.trim().length >= MIN_CONTENT_CHARS ? body : null
4616
+ });
4617
+ }
4618
+ return out;
4619
+ }
4620
+ function toolCallCandidates(req, reqIndex, toolCalls) {
4621
+ const out = [];
4622
+ toolCalls.forEach((name, i) => {
4623
+ const score = tokenRecall(req.title, name);
4624
+ if (score < MATCH_THRESHOLD) return;
4625
+ out.push({
4626
+ reqIndex,
4627
+ itemKey: `tool:${i}`,
4628
+ score,
4629
+ evidence: `tool call '${name}' matched (token recall ${score.toFixed(2)})`,
4630
+ content: null
4631
+ });
4632
+ });
4633
+ return out;
4634
+ }
4635
+ async function verifyCompletion(gold, state, checkCorrectness) {
4636
+ if (gold.requirements.length === 0) {
4637
+ throw new Error(
4638
+ `verifyCompletion: task '${gold.taskId}' has no requirements \u2014 malformed gold spec`
4639
+ );
4640
+ }
4641
+ const candidates = [];
4642
+ gold.requirements.forEach((req, i) => {
4643
+ const by = req.satisfiedBy ?? "any";
4644
+ if (by === "artifact" || by === "any") {
4645
+ candidates.push(...artifactCandidates(req, i, state.artifacts));
4646
+ }
4647
+ if (by === "proposal" || by === "any") {
4648
+ candidates.push(...proposalCandidates(req, i, state.proposals));
4649
+ }
4650
+ if (by === "tool-call" || by === "any") {
4651
+ candidates.push(...toolCallCandidates(req, i, state.toolCalls));
4652
+ }
4653
+ });
4654
+ candidates.sort((a, b) => b.score - a.score);
4655
+ const assigned = /* @__PURE__ */ new Map();
4656
+ const itemTaken = /* @__PURE__ */ new Set();
4657
+ for (const c of candidates) {
4658
+ if (assigned.has(c.reqIndex) || itemTaken.has(c.itemKey)) continue;
4659
+ assigned.set(c.reqIndex, c);
4660
+ itemTaken.add(c.itemKey);
4661
+ }
4662
+ const requirements = [];
4663
+ for (let i = 0; i < gold.requirements.length; i++) {
4664
+ const req = gold.requirements[i];
4665
+ const match = assigned.get(i);
4666
+ const evidence = [];
4667
+ let correct = null;
4668
+ if (match) {
4669
+ evidence.push(match.evidence);
4670
+ if (match.content !== null) {
4671
+ const r = await checkCorrectness(req, match.content);
4672
+ correct = r.correct;
4673
+ evidence.push(`correctness: ${r.correct ? "pass" : "fail"} \u2014 ${r.reason}`);
4674
+ } else {
4675
+ evidence.push("correctness: not assessed \u2014 matched item carries no content");
4676
+ }
4677
+ } else {
4678
+ const by = req.satisfiedBy ?? "any";
4679
+ const kind = by === "any" ? "artifact/proposal/tool-call" : by;
4680
+ evidence.push(`no produced ${kind} matched this requirement`);
4681
+ }
4682
+ const structurallyPresent = match !== void 0;
4683
+ const satisfied = structurallyPresent && correct !== false;
4684
+ requirements.push({
4685
+ reqId: req.reqId,
4686
+ title: req.title,
4687
+ structurallyPresent,
4688
+ correct,
4689
+ satisfied,
4690
+ evidence
4691
+ });
4692
+ }
4693
+ const satisfiedCount = requirements.filter((r) => r.satisfied).length;
4694
+ return {
4695
+ taskId: gold.taskId,
4696
+ requirements,
4697
+ completionRate: satisfiedCount / requirements.length,
4698
+ fullyComplete: satisfiedCount === requirements.length
4699
+ };
4700
+ }
4701
+ function parseCorrectnessResponse(raw) {
4702
+ const match = raw.match(/\{[\s\S]*\}/);
4703
+ if (!match) {
4704
+ throw new Error(`correctness checker: no JSON object in model response: ${raw.slice(0, 200)}`);
4705
+ }
4706
+ const parsed = JSON.parse(match[0]);
4707
+ if (typeof parsed.correct !== "boolean") {
4708
+ throw new Error(`correctness checker: 'correct' is not a boolean in: ${match[0].slice(0, 200)}`);
4709
+ }
4710
+ return { correct: parsed.correct, reason: typeof parsed.reason === "string" ? parsed.reason : "" };
4711
+ }
4712
+ function createLlmCorrectnessChecker(tc, opts = {}) {
4713
+ const model = opts.model ?? "claude-sonnet-4-6";
4714
+ const maxContentChars = opts.maxContentChars ?? 8e3;
4715
+ return async (requirement, content) => {
4716
+ const resp = await tc.chat({
4717
+ model,
4718
+ messages: [
4719
+ {
4720
+ role: "system",
4721
+ content: 'You verify whether a produced work artifact actually fulfils a stated requirement. Judge fulfilment only \u2014 is the deliverable substantively present and on-point \u2014 not polish. A plan to do it later, a vague gesture, or a description of what should be done does NOT fulfil a requirement; the artifact must BE the deliverable. Respond with a single JSON object: {"correct": boolean, "reason": string (<= 30 words)}.'
4722
+ },
4723
+ {
4724
+ role: "user",
4725
+ content: `Requirement: ${requirement.title}
4726
+ ${requirement.category ? `Category: ${requirement.category}
4727
+ ` : ""}
4728
+ Produced artifact:
4729
+ ${content.slice(0, maxContentChars)}`
4730
+ }
4731
+ ],
4732
+ temperature: 0,
4733
+ maxTokens: 200
4734
+ });
4735
+ const raw = resp.choices?.[0]?.message?.content ?? "";
4736
+ return parseCorrectnessResponse(raw);
4737
+ };
4738
+ }
4739
+
4523
4740
  // src/dual-agent-bench.ts
4524
4741
  var DualAgentBench = class {
4525
4742
  async run(config) {
@@ -5174,6 +5391,40 @@ function canonicalInstruction(value) {
5174
5391
  return normalized.length === 0 ? normalized : normalized[0].toUpperCase() + normalized.slice(1);
5175
5392
  }
5176
5393
 
5394
+ // src/produced-state.ts
5395
+ function artifactKind(mimeType) {
5396
+ if (!mimeType) return "file";
5397
+ if (mimeType.includes("json")) return "json";
5398
+ if (mimeType.startsWith("text/")) return "text";
5399
+ return "file";
5400
+ }
5401
+ function extractProducedState(events) {
5402
+ const artifacts = [];
5403
+ const proposals = [];
5404
+ const toolCalls = [];
5405
+ const seenTools = /* @__PURE__ */ new Set();
5406
+ for (const ev of events) {
5407
+ if (ev.type === "tool_call") {
5408
+ const name = ev.toolName;
5409
+ if (name && !seenTools.has(name)) {
5410
+ seenTools.add(name);
5411
+ toolCalls.push(name);
5412
+ }
5413
+ } else if (ev.type === "artifact") {
5414
+ const a = ev;
5415
+ artifacts.push({
5416
+ kind: artifactKind(a.mimeType),
5417
+ path: a.name ?? a.uri ?? a.artifactId,
5418
+ content: a.content ?? ""
5419
+ });
5420
+ } else if (ev.type === "proposal_created") {
5421
+ const p = ev;
5422
+ proposals.push({ id: p.proposalId, title: p.title, status: p.status ?? "pending" });
5423
+ }
5424
+ }
5425
+ return { artifacts, proposals, toolCalls };
5426
+ }
5427
+
5177
5428
  // src/prompt-registry.ts
5178
5429
  var PromptRegistry = class {
5179
5430
  entries = /* @__PURE__ */ new Map();
@@ -9092,8 +9343,8 @@ function ratio(numerator, denominator) {
9092
9343
  return denominator > 0 ? numerator / denominator : 0;
9093
9344
  }
9094
9345
  function tokenJaccard(a, b) {
9095
- const left = new Set(tokens(a));
9096
- const right = new Set(tokens(b));
9346
+ const left = new Set(tokens2(a));
9347
+ const right = new Set(tokens2(b));
9097
9348
  if (left.size === 0 || right.size === 0) return 0;
9098
9349
  let intersection = 0;
9099
9350
  for (const token of left) {
@@ -9111,7 +9362,7 @@ function tagOverlap(a, b) {
9111
9362
  }
9112
9363
  return intersection / Math.max(left.size, right.size);
9113
9364
  }
9114
- function tokens(text) {
9365
+ function tokens2(text) {
9115
9366
  return normalize(text).split(/\s+/).filter((token) => token.length >= 3 && !STOP_WORDS.has(token));
9116
9367
  }
9117
9368
  function normalize(text) {
@@ -10545,6 +10796,7 @@ export {
10545
10796
  blockingKnowledgeEval,
10546
10797
  bonferroni,
10547
10798
  bootstrapCi,
10799
+ buildDriverSystemPrompt,
10548
10800
  buildReflectionPrompt,
10549
10801
  buildReviewerPrompt,
10550
10802
  buildTraceAnalystTools,
@@ -10595,6 +10847,7 @@ export {
10595
10847
  createFeedbackTrajectory,
10596
10848
  createIntentMatchJudge,
10597
10849
  createJudgeAdapter,
10850
+ createLlmCorrectnessChecker,
10598
10851
  createLlmReviewer,
10599
10852
  createReplayFetch,
10600
10853
  createRunCriticAdapter,
@@ -10607,6 +10860,7 @@ export {
10607
10860
  createVerifierAdapter,
10608
10861
  crossTraceDiff,
10609
10862
  crowdingDistance,
10863
+ decideNextUserTurn,
10610
10864
  decideReferenceReplayPromotion,
10611
10865
  decideReferenceReplayRunPromotion,
10612
10866
  defaultIsMaterial,
@@ -10637,6 +10891,7 @@ export {
10637
10891
  exportRunAsOtlp,
10638
10892
  extractAssetUrls,
10639
10893
  extractErrorCount,
10894
+ extractProducedState,
10640
10895
  feedbackTrajectoriesToDatasetScenarios,
10641
10896
  feedbackTrajectoriesToOptimizerRows,
10642
10897
  feedbackTrajectoryToDatasetScenario,
@@ -10714,6 +10969,7 @@ export {
10714
10969
  paretoChart,
10715
10970
  paretoFrontier,
10716
10971
  paretoFrontierWithCrowding,
10972
+ parseCorrectnessResponse,
10717
10973
  parseFeedbackTrajectoriesJsonl,
10718
10974
  parseFindingSubject,
10719
10975
  parseRawFinding,
@@ -10825,6 +11081,7 @@ export {
10825
11081
  userQuestionsForKnowledgeGaps,
10826
11082
  validateRunRecord,
10827
11083
  verbosityBias,
11084
+ verifyCompletion,
10828
11085
  verifyManifest,
10829
11086
  visualDiff,
10830
11087
  viteDeployRunner,