@agentgrader/core 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -653,6 +653,7 @@ interface RunSingleInput {
653
653
  extraScorers?: Scorer[];
654
654
  /** links this run to an optimizer matrix run, if any */
655
655
  matrixId?: string;
656
+ onStep?: (step: StepEvent) => void;
656
657
  }
657
658
  interface RunSingleResult {
658
659
  runId: string;
package/dist/index.js CHANGED
@@ -641,6 +641,7 @@ async function runSingle(input) {
641
641
  tokensIn += stepEvent.tokensIn || 0;
642
642
  tokensOut += stepEvent.tokensOut || 0;
643
643
  costUsd += stepEvent.costUsd || 0;
644
+ input.onStep?.(stepEvent);
644
645
  if (db) {
645
646
  addTrace(db, {
646
647
  runId,
@@ -1008,7 +1009,7 @@ async function validateTestCase(input) {
1008
1009
  checks.push(...checkStaticFields(testCase));
1009
1010
  if (!testCase.test_command) {
1010
1011
  checks.push({
1011
- name: "execution-checks",
1012
+ name: "execution-checks (skipped - no test_command)",
1012
1013
  passed: true,
1013
1014
  detail: "No test_command configured; skipping pre/post-patch execution checks."
1014
1015
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agentgrader/core",
3
- "version": "1.1.2",
3
+ "version": "1.1.3",
4
4
  "description": "Core schemas, contracts, and runner for the Agentgrader benchmarking framework",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -22,7 +22,7 @@
22
22
  "dev": "bun run src/index.ts"
23
23
  },
24
24
  "dependencies": {
25
- "@agentgrader/store": "^1.0.2",
25
+ "@agentgrader/store": "^1.0.3",
26
26
  "@mastra/core": "^1.41.0",
27
27
  "yaml": "^2.5.1",
28
28
  "zod": "^3.23.8"