@agentv/core 2.11.0 → 2.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -1155,9 +1155,16 @@ declare const MetadataSchema: z.ZodObject<{
1155
1155
  type EvalMetadata = z.infer<typeof MetadataSchema>;
1156
1156
 
1157
1157
  declare const DEFAULT_EVAL_PATTERNS: readonly string[];
1158
+ type ExecutionDefaults = {
1159
+ readonly verbose?: boolean;
1160
+ readonly trace_file?: string;
1161
+ readonly keep_workspaces?: boolean;
1162
+ readonly otel_file?: string;
1163
+ };
1158
1164
  type AgentVConfig$1 = {
1159
1165
  readonly guideline_patterns?: readonly string[];
1160
1166
  readonly eval_patterns?: readonly string[];
1167
+ readonly execution?: ExecutionDefaults;
1161
1168
  };
1162
1169
  /**
1163
1170
  * Load optional .agentv/config.yaml configuration file.
@@ -1365,8 +1372,8 @@ declare const CliTargetConfigSchema: z.ZodObject<{
1365
1372
  keepTempFiles: z.ZodOptional<z.ZodBoolean>;
1366
1373
  }, "strict", z.ZodTypeAny, {
1367
1374
  command: string;
1368
- cwd?: string | undefined;
1369
1375
  verbose?: boolean | undefined;
1376
+ cwd?: string | undefined;
1370
1377
  filesFormat?: string | undefined;
1371
1378
  workspaceTemplate?: string | undefined;
1372
1379
  healthcheck?: {
@@ -1381,8 +1388,8 @@ declare const CliTargetConfigSchema: z.ZodObject<{
1381
1388
  timeoutMs?: number | undefined;
1382
1389
  }, {
1383
1390
  command: string;
1384
- cwd?: string | undefined;
1385
1391
  verbose?: boolean | undefined;
1392
+ cwd?: string | undefined;
1386
1393
  filesFormat?: string | undefined;
1387
1394
  workspaceTemplate?: string | undefined;
1388
1395
  healthcheck?: {
@@ -2298,7 +2305,7 @@ declare class RepoManager {
2298
2305
  * Creates on first access, fetches updates on subsequent calls.
2299
2306
  * Returns the absolute path to the cache directory.
2300
2307
  */
2301
- ensureCache(source: RepoSource): Promise<string>;
2308
+ ensureCache(source: RepoSource, depth?: number): Promise<string>;
2302
2309
  /**
2303
2310
  * Clone a repo from cache into the workspace at the configured path.
2304
2311
  * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
@@ -2308,6 +2315,13 @@ declare class RepoManager {
2308
2315
  materializeAll(repos: readonly RepoConfig[], workspacePath: string): Promise<void>;
2309
2316
  /** Reset repos in workspace to their checkout state. */
2310
2317
  reset(repos: readonly RepoConfig[], workspacePath: string, strategy: 'hard' | 'recreate'): Promise<void>;
2318
+ /**
2319
+ * Seed the cache from a local repository, setting the remote to a given URL.
2320
+ * Useful for avoiding slow network clones when a local clone already exists.
2321
+ */
2322
+ seedCache(localPath: string, remoteUrl: string, opts?: {
2323
+ force?: boolean;
2324
+ }): Promise<string>;
2311
2325
  /** Remove the entire cache directory. */
2312
2326
  cleanCache(): Promise<void>;
2313
2327
  }
@@ -3082,4 +3096,4 @@ type AgentKernel = {
3082
3096
  };
3083
3097
  declare function createAgentKernel(): AgentKernel;
3084
3098
 
3085
- export { AgentJudgeEvaluator, type AgentJudgeEvaluatorConfig, type AgentJudgeEvaluatorOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type LlmJudgePromptAssembly, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResetConfig, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceScriptConfig, assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, getWorkspacePath, initializeBaseline, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, trimBaselineResult };
3099
+ export { AgentJudgeEvaluator, type AgentJudgeEvaluatorConfig, type AgentJudgeEvaluatorOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type LlmJudgePromptAssembly, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResetConfig, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceScriptConfig, assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, getWorkspacePath, initializeBaseline, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, trimBaselineResult };
package/dist/index.d.ts CHANGED
@@ -1155,9 +1155,16 @@ declare const MetadataSchema: z.ZodObject<{
1155
1155
  type EvalMetadata = z.infer<typeof MetadataSchema>;
1156
1156
 
1157
1157
  declare const DEFAULT_EVAL_PATTERNS: readonly string[];
1158
+ type ExecutionDefaults = {
1159
+ readonly verbose?: boolean;
1160
+ readonly trace_file?: string;
1161
+ readonly keep_workspaces?: boolean;
1162
+ readonly otel_file?: string;
1163
+ };
1158
1164
  type AgentVConfig$1 = {
1159
1165
  readonly guideline_patterns?: readonly string[];
1160
1166
  readonly eval_patterns?: readonly string[];
1167
+ readonly execution?: ExecutionDefaults;
1161
1168
  };
1162
1169
  /**
1163
1170
  * Load optional .agentv/config.yaml configuration file.
@@ -1365,8 +1372,8 @@ declare const CliTargetConfigSchema: z.ZodObject<{
1365
1372
  keepTempFiles: z.ZodOptional<z.ZodBoolean>;
1366
1373
  }, "strict", z.ZodTypeAny, {
1367
1374
  command: string;
1368
- cwd?: string | undefined;
1369
1375
  verbose?: boolean | undefined;
1376
+ cwd?: string | undefined;
1370
1377
  filesFormat?: string | undefined;
1371
1378
  workspaceTemplate?: string | undefined;
1372
1379
  healthcheck?: {
@@ -1381,8 +1388,8 @@ declare const CliTargetConfigSchema: z.ZodObject<{
1381
1388
  timeoutMs?: number | undefined;
1382
1389
  }, {
1383
1390
  command: string;
1384
- cwd?: string | undefined;
1385
1391
  verbose?: boolean | undefined;
1392
+ cwd?: string | undefined;
1386
1393
  filesFormat?: string | undefined;
1387
1394
  workspaceTemplate?: string | undefined;
1388
1395
  healthcheck?: {
@@ -2298,7 +2305,7 @@ declare class RepoManager {
2298
2305
  * Creates on first access, fetches updates on subsequent calls.
2299
2306
  * Returns the absolute path to the cache directory.
2300
2307
  */
2301
- ensureCache(source: RepoSource): Promise<string>;
2308
+ ensureCache(source: RepoSource, depth?: number): Promise<string>;
2302
2309
  /**
2303
2310
  * Clone a repo from cache into the workspace at the configured path.
2304
2311
  * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
@@ -2308,6 +2315,13 @@ declare class RepoManager {
2308
2315
  materializeAll(repos: readonly RepoConfig[], workspacePath: string): Promise<void>;
2309
2316
  /** Reset repos in workspace to their checkout state. */
2310
2317
  reset(repos: readonly RepoConfig[], workspacePath: string, strategy: 'hard' | 'recreate'): Promise<void>;
2318
+ /**
2319
+ * Seed the cache from a local repository, setting the remote to a given URL.
2320
+ * Useful for avoiding slow network clones when a local clone already exists.
2321
+ */
2322
+ seedCache(localPath: string, remoteUrl: string, opts?: {
2323
+ force?: boolean;
2324
+ }): Promise<string>;
2311
2325
  /** Remove the entire cache directory. */
2312
2326
  cleanCache(): Promise<void>;
2313
2327
  }
@@ -3082,4 +3096,4 @@ type AgentKernel = {
3082
3096
  };
3083
3097
  declare function createAgentKernel(): AgentKernel;
3084
3098
 
3085
- export { AgentJudgeEvaluator, type AgentJudgeEvaluatorConfig, type AgentJudgeEvaluatorOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type LlmJudgePromptAssembly, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResetConfig, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceScriptConfig, assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, getWorkspacePath, initializeBaseline, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, trimBaselineResult };
3099
+ export { AgentJudgeEvaluator, type AgentJudgeEvaluatorConfig, type AgentJudgeEvaluatorOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type LlmJudgePromptAssembly, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResetConfig, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceScriptConfig, assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, getWorkspacePath, initializeBaseline, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, trimBaselineResult };
package/dist/index.js CHANGED
@@ -414,9 +414,14 @@ async function loadConfig(evalFilePath, repoRoot) {
414
414
  logWarning(`Invalid eval_patterns in ${configPath}, all entries must be strings`);
415
415
  continue;
416
416
  }
417
+ const executionDefaults = parseExecutionDefaults(
418
+ parsed.execution,
419
+ configPath
420
+ );
417
421
  return {
418
422
  guideline_patterns: guidelinePatterns,
419
- eval_patterns: evalPatterns
423
+ eval_patterns: evalPatterns,
424
+ execution: executionDefaults
420
425
  };
421
426
  } catch (error) {
422
427
  logWarning(
@@ -557,6 +562,36 @@ function extractTotalBudgetUsd(suite) {
557
562
  );
558
563
  return void 0;
559
564
  }
565
+ function parseExecutionDefaults(raw, configPath) {
566
+ if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
567
+ return void 0;
568
+ }
569
+ const obj = raw;
570
+ const result = {};
571
+ if (typeof obj.verbose === "boolean") {
572
+ result.verbose = obj.verbose;
573
+ } else if (obj.verbose !== void 0) {
574
+ logWarning(`Invalid execution.verbose in ${configPath}, expected boolean`);
575
+ }
576
+ const traceFile = obj.trace_file;
577
+ if (typeof traceFile === "string" && traceFile.trim().length > 0) {
578
+ result.trace_file = traceFile.trim();
579
+ } else if (traceFile !== void 0) {
580
+ logWarning(`Invalid execution.trace_file in ${configPath}, expected non-empty string`);
581
+ }
582
+ if (typeof obj.keep_workspaces === "boolean") {
583
+ result.keep_workspaces = obj.keep_workspaces;
584
+ } else if (obj.keep_workspaces !== void 0) {
585
+ logWarning(`Invalid execution.keep_workspaces in ${configPath}, expected boolean`);
586
+ }
587
+ const otelFile = obj.otel_file;
588
+ if (typeof otelFile === "string" && otelFile.trim().length > 0) {
589
+ result.otel_file = otelFile.trim();
590
+ } else if (otelFile !== void 0) {
591
+ logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
592
+ }
593
+ return Object.keys(result).length > 0 ? result : void 0;
594
+ }
560
595
  function logWarning(message) {
561
596
  console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET2}`);
562
597
  }
@@ -5293,6 +5328,16 @@ var CopilotCliProvider = class {
5293
5328
  }
5294
5329
  const endTime = (/* @__PURE__ */ new Date()).toISOString();
5295
5330
  const durationMs = Date.now() - startMs;
5331
+ const rejectedCalls = completedToolCalls.filter((tc) => {
5332
+ const out = tc.output;
5333
+ return out && (out.code === "rejected" || out.code === "denied");
5334
+ });
5335
+ if (rejectedCalls.length > 0) {
5336
+ const tools = rejectedCalls.map((tc) => tc.tool).join(", ");
5337
+ throw new Error(
5338
+ `Copilot rejected ${rejectedCalls.length} tool call(s): ${tools}. Add args: ["--yolo"] to your target config or re-run with --yolo to bypass permission checks.`
5339
+ );
5340
+ }
5296
5341
  const outputMessages = [];
5297
5342
  if (completedToolCalls.length > 0) {
5298
5343
  outputMessages.push({
@@ -5325,7 +5370,7 @@ var CopilotCliProvider = class {
5325
5370
  }
5326
5371
  }
5327
5372
  buildCliArgs() {
5328
- const args = ["--acp", "--stdio", "--allow-all-tools"];
5373
+ const args = ["--acp", "--stdio", "--allow-all-tools", "--yolo"];
5329
5374
  if (this.config.model) {
5330
5375
  args.push("--model", this.config.model);
5331
5376
  }
@@ -12608,7 +12653,7 @@ var RepoManager = class {
12608
12653
  * Creates on first access, fetches updates on subsequent calls.
12609
12654
  * Returns the absolute path to the cache directory.
12610
12655
  */
12611
- async ensureCache(source) {
12656
+ async ensureCache(source, depth) {
12612
12657
  const key = cacheKey(source);
12613
12658
  const cachePath = path35.join(this.cacheDir, key);
12614
12659
  const lockPath = `${cachePath}.lock`;
@@ -12616,9 +12661,20 @@ var RepoManager = class {
12616
12661
  await acquireLock(lockPath);
12617
12662
  try {
12618
12663
  if (existsSync2(path35.join(cachePath, "HEAD"))) {
12619
- await git(["fetch", "--prune"], { cwd: cachePath });
12664
+ const fetchArgs = ["fetch", "--prune"];
12665
+ if (depth) {
12666
+ fetchArgs.push("--depth", String(depth));
12667
+ }
12668
+ await git(fetchArgs, { cwd: cachePath });
12620
12669
  } else {
12621
- await git(["clone", "--mirror", "--bare", getSourceUrl(source), cachePath]);
12670
+ const cloneArgs = ["clone", "--mirror", "--bare"];
12671
+ if (depth) {
12672
+ cloneArgs.push("--depth", String(depth));
12673
+ }
12674
+ const sourceUrl = getSourceUrl(source);
12675
+ const cloneUrl = depth && source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
12676
+ cloneArgs.push(cloneUrl, cachePath);
12677
+ await git(cloneArgs);
12622
12678
  }
12623
12679
  } finally {
12624
12680
  await releaseLock(lockPath);
@@ -12631,7 +12687,7 @@ var RepoManager = class {
12631
12687
  */
12632
12688
  async materialize(repo, workspacePath) {
12633
12689
  const targetDir = path35.join(workspacePath, repo.path);
12634
- const cachePath = await this.ensureCache(repo.source);
12690
+ const cachePath = await this.ensureCache(repo.source, repo.clone?.depth);
12635
12691
  const cloneArgs = ["clone"];
12636
12692
  if (repo.clone?.depth) {
12637
12693
  cloneArgs.push("--depth", String(repo.clone.depth));
@@ -12707,6 +12763,33 @@ var RepoManager = class {
12707
12763
  await git(["clean", "-fd"], { cwd: targetDir });
12708
12764
  }
12709
12765
  }
12766
+ /**
12767
+ * Seed the cache from a local repository, setting the remote to a given URL.
12768
+ * Useful for avoiding slow network clones when a local clone already exists.
12769
+ */
12770
+ async seedCache(localPath, remoteUrl, opts) {
12771
+ const source = { type: "git", url: remoteUrl };
12772
+ const key = cacheKey(source);
12773
+ const cachePath = path35.join(this.cacheDir, key);
12774
+ const lockPath = `${cachePath}.lock`;
12775
+ await mkdir11(this.cacheDir, { recursive: true });
12776
+ await acquireLock(lockPath);
12777
+ try {
12778
+ if (existsSync2(path35.join(cachePath, "HEAD"))) {
12779
+ if (!opts?.force) {
12780
+ throw new Error(
12781
+ `Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
12782
+ );
12783
+ }
12784
+ await rm5(cachePath, { recursive: true, force: true });
12785
+ }
12786
+ await git(["clone", "--mirror", "--bare", localPath, cachePath]);
12787
+ await git(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
12788
+ } finally {
12789
+ await releaseLock(lockPath);
12790
+ }
12791
+ return cachePath;
12792
+ }
12710
12793
  /** Remove the entire cache directory. */
12711
12794
  async cleanCache() {
12712
12795
  await rm5(this.cacheDir, { recursive: true, force: true });