@agentv/core 4.15.5-next.1 → 4.15.6-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -661,6 +661,10 @@ type WorkspaceConfig = {
661
661
  readonly path?: string;
662
662
  /** Docker-based workspace: run grader commands inside a container */
663
663
  readonly docker?: DockerWorkspaceConfig;
664
+ /** Directory containing the workspace file when workspace is a file reference.
665
+ * Used as default cwd for hook commands so that file-referenced templates resolve
666
+ * relative paths from their own directory, not the eval file's directory. */
667
+ readonly workspaceFileDir?: string;
664
668
  };
665
669
  type CodeEvaluatorConfig = {
666
670
  readonly name: string;
@@ -3574,8 +3578,12 @@ interface ScriptExecutionContext {
3574
3578
  readonly evalRunId: string;
3575
3579
  readonly caseInput?: string;
3576
3580
  readonly caseMetadata?: Record<string, unknown>;
3577
- /** Directory containing the eval YAML file. Used as default cwd. */
3581
+ /** Directory containing the eval YAML file. Used as fallback cwd. */
3578
3582
  readonly evalDir?: string;
3583
+ /** Directory containing the workspace file (when workspace is a file reference).
3584
+ * Takes priority over evalDir as default cwd so that file-referenced templates
3585
+ * resolve relative paths from their own directory. */
3586
+ readonly workspaceFileDir?: string;
3579
3587
  }
3580
3588
  type ScriptFailureMode = 'fatal' | 'warn';
3581
3589
  /**
package/dist/index.d.ts CHANGED
@@ -661,6 +661,10 @@ type WorkspaceConfig = {
661
661
  readonly path?: string;
662
662
  /** Docker-based workspace: run grader commands inside a container */
663
663
  readonly docker?: DockerWorkspaceConfig;
664
+ /** Directory containing the workspace file when workspace is a file reference.
665
+ * Used as default cwd for hook commands so that file-referenced templates resolve
666
+ * relative paths from their own directory, not the eval file's directory. */
667
+ readonly workspaceFileDir?: string;
664
668
  };
665
669
  type CodeEvaluatorConfig = {
666
670
  readonly name: string;
@@ -3574,8 +3578,12 @@ interface ScriptExecutionContext {
3574
3578
  readonly evalRunId: string;
3575
3579
  readonly caseInput?: string;
3576
3580
  readonly caseMetadata?: Record<string, unknown>;
3577
- /** Directory containing the eval YAML file. Used as default cwd. */
3581
+ /** Directory containing the eval YAML file. Used as fallback cwd. */
3578
3582
  readonly evalDir?: string;
3583
+ /** Directory containing the workspace file (when workspace is a file reference).
3584
+ * Takes priority over evalDir as default cwd so that file-referenced templates
3585
+ * resolve relative paths from their own directory. */
3586
+ readonly workspaceFileDir?: string;
3579
3587
  }
3580
3588
  type ScriptFailureMode = 'fatal' | 'warn';
3581
3589
  /**
package/dist/index.js CHANGED
@@ -25,7 +25,7 @@ import {
25
25
  resolveDelegatedTargetDefinition,
26
26
  resolveFileReference,
27
27
  resolveTargetDefinition
28
- } from "./chunk-AOOU6PLC.js";
28
+ } from "./chunk-HVEQNYTC.js";
29
29
  import {
30
30
  execFileWithStdin,
31
31
  execShellWithStdin
@@ -3897,7 +3897,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
3897
3897
  const workspaceFileDir = path8.dirname(workspaceFilePath);
3898
3898
  const resolvedWorkspace = parseWorkspaceConfig(parsed, workspaceFileDir);
3899
3899
  if (resolvedWorkspace) {
3900
- return resolvedWorkspace;
3900
+ return { ...resolvedWorkspace, workspaceFileDir };
3901
3901
  }
3902
3902
  const parsedObject = parsed;
3903
3903
  if ("workspace" in parsedObject && isJsonObject(parsedObject.workspace)) {
@@ -3988,7 +3988,8 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
3988
3988
  ...hasHooks && { hooks: mergedHooks },
3989
3989
  mode: caseLevel.mode ?? suiteLevel.mode,
3990
3990
  path: caseLevel.path ?? suiteLevel.path,
3991
- docker: caseLevel.docker ?? suiteLevel.docker
3991
+ docker: caseLevel.docker ?? suiteLevel.docker,
3992
+ workspaceFileDir: caseLevel.workspaceFileDir ?? suiteLevel.workspaceFileDir
3992
3993
  };
3993
3994
  }
3994
3995
  function asString5(value) {
@@ -16634,7 +16635,7 @@ async function executeWorkspaceScript(config, context, failureMode = "fatal") {
16634
16635
  case_metadata: context.caseMetadata ?? null
16635
16636
  });
16636
16637
  const timeoutMs = config.timeout_ms ?? (failureMode === "fatal" ? 6e4 : 3e4);
16637
- const cwd = config.cwd ?? context.evalDir;
16638
+ const cwd = config.cwd ?? context.workspaceFileDir ?? context.evalDir;
16638
16639
  if (config.script !== void 0 && config.command === void 0) {
16639
16640
  console.warn(
16640
16641
  "\x1B[33mWarning: 'script' is deprecated in workspace config. Use 'command' instead.\x1B[0m"
@@ -17247,7 +17248,8 @@ async function runEvaluation(options) {
17247
17248
  workspacePath: sharedWorkspacePath,
17248
17249
  testId: "__before_all__",
17249
17250
  evalRunId,
17250
- evalDir
17251
+ evalDir,
17252
+ workspaceFileDir: suiteWorkspace?.workspaceFileDir
17251
17253
  };
17252
17254
  try {
17253
17255
  beforeAllOutput = await executeWorkspaceScript(
@@ -17272,7 +17274,8 @@ async function runEvaluation(options) {
17272
17274
  workspacePath: slot.path,
17273
17275
  testId: "__before_all__",
17274
17276
  evalRunId,
17275
- evalDir
17277
+ evalDir,
17278
+ workspaceFileDir: suiteWorkspace?.workspaceFileDir
17276
17279
  };
17277
17280
  try {
17278
17281
  const output = await executeWorkspaceScript(
@@ -17581,7 +17584,8 @@ async function runEvaluation(options) {
17581
17584
  workspacePath: wsPath,
17582
17585
  testId: "__after_all__",
17583
17586
  evalRunId,
17584
- evalDir
17587
+ evalDir,
17588
+ workspaceFileDir: suiteWorkspace?.workspaceFileDir
17585
17589
  };
17586
17590
  try {
17587
17591
  const afterAllOutput = await executeWorkspaceScript(
@@ -17957,7 +17961,8 @@ async function runEvalCase(options) {
17957
17961
  evalRunId: evalRunId ?? "",
17958
17962
  caseInput: evalCase.question,
17959
17963
  caseMetadata: evalCase.metadata,
17960
- evalDir
17964
+ evalDir,
17965
+ workspaceFileDir: evalCase.workspace?.workspaceFileDir
17961
17966
  };
17962
17967
  try {
17963
17968
  beforeAllOutput = await executeWorkspaceScript(
@@ -18027,7 +18032,8 @@ async function runEvalCase(options) {
18027
18032
  evalRunId: evalRunId ?? "",
18028
18033
  caseInput: evalCase.question,
18029
18034
  caseMetadata: evalCase.metadata,
18030
- evalDir
18035
+ evalDir,
18036
+ workspaceFileDir: evalCase.workspace?.workspaceFileDir
18031
18037
  };
18032
18038
  try {
18033
18039
  beforeEachOutput = await executeWorkspaceScript(
@@ -18231,7 +18237,8 @@ ${providerFileChanges}` : providerFileChanges;
18231
18237
  evalRunId: evalRunId ?? "",
18232
18238
  caseInput: evalCase.question,
18233
18239
  caseMetadata: evalCase.metadata,
18234
- evalDir
18240
+ evalDir,
18241
+ workspaceFileDir: evalCase.workspace?.workspaceFileDir
18235
18242
  };
18236
18243
  try {
18237
18244
  afterEachOutput = await executeWorkspaceScript(