@agentv/core 2.11.0 → 2.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +89 -6
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +18 -4
- package/dist/index.d.ts +18 -4
- package/dist/index.js +89 -6
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -1155,9 +1155,16 @@ declare const MetadataSchema: z.ZodObject<{
|
|
|
1155
1155
|
type EvalMetadata = z.infer<typeof MetadataSchema>;
|
|
1156
1156
|
|
|
1157
1157
|
declare const DEFAULT_EVAL_PATTERNS: readonly string[];
|
|
1158
|
+
type ExecutionDefaults = {
|
|
1159
|
+
readonly verbose?: boolean;
|
|
1160
|
+
readonly trace_file?: string;
|
|
1161
|
+
readonly keep_workspaces?: boolean;
|
|
1162
|
+
readonly otel_file?: string;
|
|
1163
|
+
};
|
|
1158
1164
|
type AgentVConfig$1 = {
|
|
1159
1165
|
readonly guideline_patterns?: readonly string[];
|
|
1160
1166
|
readonly eval_patterns?: readonly string[];
|
|
1167
|
+
readonly execution?: ExecutionDefaults;
|
|
1161
1168
|
};
|
|
1162
1169
|
/**
|
|
1163
1170
|
* Load optional .agentv/config.yaml configuration file.
|
|
@@ -1365,8 +1372,8 @@ declare const CliTargetConfigSchema: z.ZodObject<{
|
|
|
1365
1372
|
keepTempFiles: z.ZodOptional<z.ZodBoolean>;
|
|
1366
1373
|
}, "strict", z.ZodTypeAny, {
|
|
1367
1374
|
command: string;
|
|
1368
|
-
cwd?: string | undefined;
|
|
1369
1375
|
verbose?: boolean | undefined;
|
|
1376
|
+
cwd?: string | undefined;
|
|
1370
1377
|
filesFormat?: string | undefined;
|
|
1371
1378
|
workspaceTemplate?: string | undefined;
|
|
1372
1379
|
healthcheck?: {
|
|
@@ -1381,8 +1388,8 @@ declare const CliTargetConfigSchema: z.ZodObject<{
|
|
|
1381
1388
|
timeoutMs?: number | undefined;
|
|
1382
1389
|
}, {
|
|
1383
1390
|
command: string;
|
|
1384
|
-
cwd?: string | undefined;
|
|
1385
1391
|
verbose?: boolean | undefined;
|
|
1392
|
+
cwd?: string | undefined;
|
|
1386
1393
|
filesFormat?: string | undefined;
|
|
1387
1394
|
workspaceTemplate?: string | undefined;
|
|
1388
1395
|
healthcheck?: {
|
|
@@ -2298,7 +2305,7 @@ declare class RepoManager {
|
|
|
2298
2305
|
* Creates on first access, fetches updates on subsequent calls.
|
|
2299
2306
|
* Returns the absolute path to the cache directory.
|
|
2300
2307
|
*/
|
|
2301
|
-
ensureCache(source: RepoSource): Promise<string>;
|
|
2308
|
+
ensureCache(source: RepoSource, depth?: number): Promise<string>;
|
|
2302
2309
|
/**
|
|
2303
2310
|
* Clone a repo from cache into the workspace at the configured path.
|
|
2304
2311
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
@@ -2308,6 +2315,13 @@ declare class RepoManager {
|
|
|
2308
2315
|
materializeAll(repos: readonly RepoConfig[], workspacePath: string): Promise<void>;
|
|
2309
2316
|
/** Reset repos in workspace to their checkout state. */
|
|
2310
2317
|
reset(repos: readonly RepoConfig[], workspacePath: string, strategy: 'hard' | 'recreate'): Promise<void>;
|
|
2318
|
+
/**
|
|
2319
|
+
* Seed the cache from a local repository, setting the remote to a given URL.
|
|
2320
|
+
* Useful for avoiding slow network clones when a local clone already exists.
|
|
2321
|
+
*/
|
|
2322
|
+
seedCache(localPath: string, remoteUrl: string, opts?: {
|
|
2323
|
+
force?: boolean;
|
|
2324
|
+
}): Promise<string>;
|
|
2311
2325
|
/** Remove the entire cache directory. */
|
|
2312
2326
|
cleanCache(): Promise<void>;
|
|
2313
2327
|
}
|
|
@@ -3082,4 +3096,4 @@ type AgentKernel = {
|
|
|
3082
3096
|
};
|
|
3083
3097
|
declare function createAgentKernel(): AgentKernel;
|
|
3084
3098
|
|
|
3085
|
-
export { AgentJudgeEvaluator, type AgentJudgeEvaluatorConfig, type AgentJudgeEvaluatorOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type LlmJudgePromptAssembly, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResetConfig, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceScriptConfig, assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, getWorkspacePath, initializeBaseline, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, trimBaselineResult };
|
|
3099
|
+
export { AgentJudgeEvaluator, type AgentJudgeEvaluatorConfig, type AgentJudgeEvaluatorOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type LlmJudgePromptAssembly, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResetConfig, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceScriptConfig, assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, getWorkspacePath, initializeBaseline, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, trimBaselineResult };
|
package/dist/index.d.ts
CHANGED
|
@@ -1155,9 +1155,16 @@ declare const MetadataSchema: z.ZodObject<{
|
|
|
1155
1155
|
type EvalMetadata = z.infer<typeof MetadataSchema>;
|
|
1156
1156
|
|
|
1157
1157
|
declare const DEFAULT_EVAL_PATTERNS: readonly string[];
|
|
1158
|
+
type ExecutionDefaults = {
|
|
1159
|
+
readonly verbose?: boolean;
|
|
1160
|
+
readonly trace_file?: string;
|
|
1161
|
+
readonly keep_workspaces?: boolean;
|
|
1162
|
+
readonly otel_file?: string;
|
|
1163
|
+
};
|
|
1158
1164
|
type AgentVConfig$1 = {
|
|
1159
1165
|
readonly guideline_patterns?: readonly string[];
|
|
1160
1166
|
readonly eval_patterns?: readonly string[];
|
|
1167
|
+
readonly execution?: ExecutionDefaults;
|
|
1161
1168
|
};
|
|
1162
1169
|
/**
|
|
1163
1170
|
* Load optional .agentv/config.yaml configuration file.
|
|
@@ -1365,8 +1372,8 @@ declare const CliTargetConfigSchema: z.ZodObject<{
|
|
|
1365
1372
|
keepTempFiles: z.ZodOptional<z.ZodBoolean>;
|
|
1366
1373
|
}, "strict", z.ZodTypeAny, {
|
|
1367
1374
|
command: string;
|
|
1368
|
-
cwd?: string | undefined;
|
|
1369
1375
|
verbose?: boolean | undefined;
|
|
1376
|
+
cwd?: string | undefined;
|
|
1370
1377
|
filesFormat?: string | undefined;
|
|
1371
1378
|
workspaceTemplate?: string | undefined;
|
|
1372
1379
|
healthcheck?: {
|
|
@@ -1381,8 +1388,8 @@ declare const CliTargetConfigSchema: z.ZodObject<{
|
|
|
1381
1388
|
timeoutMs?: number | undefined;
|
|
1382
1389
|
}, {
|
|
1383
1390
|
command: string;
|
|
1384
|
-
cwd?: string | undefined;
|
|
1385
1391
|
verbose?: boolean | undefined;
|
|
1392
|
+
cwd?: string | undefined;
|
|
1386
1393
|
filesFormat?: string | undefined;
|
|
1387
1394
|
workspaceTemplate?: string | undefined;
|
|
1388
1395
|
healthcheck?: {
|
|
@@ -2298,7 +2305,7 @@ declare class RepoManager {
|
|
|
2298
2305
|
* Creates on first access, fetches updates on subsequent calls.
|
|
2299
2306
|
* Returns the absolute path to the cache directory.
|
|
2300
2307
|
*/
|
|
2301
|
-
ensureCache(source: RepoSource): Promise<string>;
|
|
2308
|
+
ensureCache(source: RepoSource, depth?: number): Promise<string>;
|
|
2302
2309
|
/**
|
|
2303
2310
|
* Clone a repo from cache into the workspace at the configured path.
|
|
2304
2311
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
@@ -2308,6 +2315,13 @@ declare class RepoManager {
|
|
|
2308
2315
|
materializeAll(repos: readonly RepoConfig[], workspacePath: string): Promise<void>;
|
|
2309
2316
|
/** Reset repos in workspace to their checkout state. */
|
|
2310
2317
|
reset(repos: readonly RepoConfig[], workspacePath: string, strategy: 'hard' | 'recreate'): Promise<void>;
|
|
2318
|
+
/**
|
|
2319
|
+
* Seed the cache from a local repository, setting the remote to a given URL.
|
|
2320
|
+
* Useful for avoiding slow network clones when a local clone already exists.
|
|
2321
|
+
*/
|
|
2322
|
+
seedCache(localPath: string, remoteUrl: string, opts?: {
|
|
2323
|
+
force?: boolean;
|
|
2324
|
+
}): Promise<string>;
|
|
2311
2325
|
/** Remove the entire cache directory. */
|
|
2312
2326
|
cleanCache(): Promise<void>;
|
|
2313
2327
|
}
|
|
@@ -3082,4 +3096,4 @@ type AgentKernel = {
|
|
|
3082
3096
|
};
|
|
3083
3097
|
declare function createAgentKernel(): AgentKernel;
|
|
3084
3098
|
|
|
3085
|
-
export { AgentJudgeEvaluator, type AgentJudgeEvaluatorConfig, type AgentJudgeEvaluatorOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type LlmJudgePromptAssembly, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResetConfig, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceScriptConfig, assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, getWorkspacePath, initializeBaseline, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, trimBaselineResult };
|
|
3099
|
+
export { AgentJudgeEvaluator, type AgentJudgeEvaluatorConfig, type AgentJudgeEvaluatorOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type LlmJudgePromptAssembly, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResetConfig, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceScriptConfig, assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, getWorkspacePath, initializeBaseline, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, trimBaselineResult };
|
package/dist/index.js
CHANGED
|
@@ -414,9 +414,14 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
414
414
|
logWarning(`Invalid eval_patterns in ${configPath}, all entries must be strings`);
|
|
415
415
|
continue;
|
|
416
416
|
}
|
|
417
|
+
const executionDefaults = parseExecutionDefaults(
|
|
418
|
+
parsed.execution,
|
|
419
|
+
configPath
|
|
420
|
+
);
|
|
417
421
|
return {
|
|
418
422
|
guideline_patterns: guidelinePatterns,
|
|
419
|
-
eval_patterns: evalPatterns
|
|
423
|
+
eval_patterns: evalPatterns,
|
|
424
|
+
execution: executionDefaults
|
|
420
425
|
};
|
|
421
426
|
} catch (error) {
|
|
422
427
|
logWarning(
|
|
@@ -557,6 +562,36 @@ function extractTotalBudgetUsd(suite) {
|
|
|
557
562
|
);
|
|
558
563
|
return void 0;
|
|
559
564
|
}
|
|
565
|
+
function parseExecutionDefaults(raw, configPath) {
|
|
566
|
+
if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
|
|
567
|
+
return void 0;
|
|
568
|
+
}
|
|
569
|
+
const obj = raw;
|
|
570
|
+
const result = {};
|
|
571
|
+
if (typeof obj.verbose === "boolean") {
|
|
572
|
+
result.verbose = obj.verbose;
|
|
573
|
+
} else if (obj.verbose !== void 0) {
|
|
574
|
+
logWarning(`Invalid execution.verbose in ${configPath}, expected boolean`);
|
|
575
|
+
}
|
|
576
|
+
const traceFile = obj.trace_file;
|
|
577
|
+
if (typeof traceFile === "string" && traceFile.trim().length > 0) {
|
|
578
|
+
result.trace_file = traceFile.trim();
|
|
579
|
+
} else if (traceFile !== void 0) {
|
|
580
|
+
logWarning(`Invalid execution.trace_file in ${configPath}, expected non-empty string`);
|
|
581
|
+
}
|
|
582
|
+
if (typeof obj.keep_workspaces === "boolean") {
|
|
583
|
+
result.keep_workspaces = obj.keep_workspaces;
|
|
584
|
+
} else if (obj.keep_workspaces !== void 0) {
|
|
585
|
+
logWarning(`Invalid execution.keep_workspaces in ${configPath}, expected boolean`);
|
|
586
|
+
}
|
|
587
|
+
const otelFile = obj.otel_file;
|
|
588
|
+
if (typeof otelFile === "string" && otelFile.trim().length > 0) {
|
|
589
|
+
result.otel_file = otelFile.trim();
|
|
590
|
+
} else if (otelFile !== void 0) {
|
|
591
|
+
logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
|
|
592
|
+
}
|
|
593
|
+
return Object.keys(result).length > 0 ? result : void 0;
|
|
594
|
+
}
|
|
560
595
|
function logWarning(message) {
|
|
561
596
|
console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET2}`);
|
|
562
597
|
}
|
|
@@ -5293,6 +5328,16 @@ var CopilotCliProvider = class {
|
|
|
5293
5328
|
}
|
|
5294
5329
|
const endTime = (/* @__PURE__ */ new Date()).toISOString();
|
|
5295
5330
|
const durationMs = Date.now() - startMs;
|
|
5331
|
+
const rejectedCalls = completedToolCalls.filter((tc) => {
|
|
5332
|
+
const out = tc.output;
|
|
5333
|
+
return out && (out.code === "rejected" || out.code === "denied");
|
|
5334
|
+
});
|
|
5335
|
+
if (rejectedCalls.length > 0) {
|
|
5336
|
+
const tools = rejectedCalls.map((tc) => tc.tool).join(", ");
|
|
5337
|
+
throw new Error(
|
|
5338
|
+
`Copilot rejected ${rejectedCalls.length} tool call(s): ${tools}. Add args: ["--yolo"] to your target config or re-run with --yolo to bypass permission checks.`
|
|
5339
|
+
);
|
|
5340
|
+
}
|
|
5296
5341
|
const outputMessages = [];
|
|
5297
5342
|
if (completedToolCalls.length > 0) {
|
|
5298
5343
|
outputMessages.push({
|
|
@@ -5325,7 +5370,7 @@ var CopilotCliProvider = class {
|
|
|
5325
5370
|
}
|
|
5326
5371
|
}
|
|
5327
5372
|
buildCliArgs() {
|
|
5328
|
-
const args = ["--acp", "--stdio", "--allow-all-tools"];
|
|
5373
|
+
const args = ["--acp", "--stdio", "--allow-all-tools", "--yolo"];
|
|
5329
5374
|
if (this.config.model) {
|
|
5330
5375
|
args.push("--model", this.config.model);
|
|
5331
5376
|
}
|
|
@@ -12608,7 +12653,7 @@ var RepoManager = class {
|
|
|
12608
12653
|
* Creates on first access, fetches updates on subsequent calls.
|
|
12609
12654
|
* Returns the absolute path to the cache directory.
|
|
12610
12655
|
*/
|
|
12611
|
-
async ensureCache(source) {
|
|
12656
|
+
async ensureCache(source, depth) {
|
|
12612
12657
|
const key = cacheKey(source);
|
|
12613
12658
|
const cachePath = path35.join(this.cacheDir, key);
|
|
12614
12659
|
const lockPath = `${cachePath}.lock`;
|
|
@@ -12616,9 +12661,20 @@ var RepoManager = class {
|
|
|
12616
12661
|
await acquireLock(lockPath);
|
|
12617
12662
|
try {
|
|
12618
12663
|
if (existsSync2(path35.join(cachePath, "HEAD"))) {
|
|
12619
|
-
|
|
12664
|
+
const fetchArgs = ["fetch", "--prune"];
|
|
12665
|
+
if (depth) {
|
|
12666
|
+
fetchArgs.push("--depth", String(depth));
|
|
12667
|
+
}
|
|
12668
|
+
await git(fetchArgs, { cwd: cachePath });
|
|
12620
12669
|
} else {
|
|
12621
|
-
|
|
12670
|
+
const cloneArgs = ["clone", "--mirror", "--bare"];
|
|
12671
|
+
if (depth) {
|
|
12672
|
+
cloneArgs.push("--depth", String(depth));
|
|
12673
|
+
}
|
|
12674
|
+
const sourceUrl = getSourceUrl(source);
|
|
12675
|
+
const cloneUrl = depth && source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
|
|
12676
|
+
cloneArgs.push(cloneUrl, cachePath);
|
|
12677
|
+
await git(cloneArgs);
|
|
12622
12678
|
}
|
|
12623
12679
|
} finally {
|
|
12624
12680
|
await releaseLock(lockPath);
|
|
@@ -12631,7 +12687,7 @@ var RepoManager = class {
|
|
|
12631
12687
|
*/
|
|
12632
12688
|
async materialize(repo, workspacePath) {
|
|
12633
12689
|
const targetDir = path35.join(workspacePath, repo.path);
|
|
12634
|
-
const cachePath = await this.ensureCache(repo.source);
|
|
12690
|
+
const cachePath = await this.ensureCache(repo.source, repo.clone?.depth);
|
|
12635
12691
|
const cloneArgs = ["clone"];
|
|
12636
12692
|
if (repo.clone?.depth) {
|
|
12637
12693
|
cloneArgs.push("--depth", String(repo.clone.depth));
|
|
@@ -12707,6 +12763,33 @@ var RepoManager = class {
|
|
|
12707
12763
|
await git(["clean", "-fd"], { cwd: targetDir });
|
|
12708
12764
|
}
|
|
12709
12765
|
}
|
|
12766
|
+
/**
|
|
12767
|
+
* Seed the cache from a local repository, setting the remote to a given URL.
|
|
12768
|
+
* Useful for avoiding slow network clones when a local clone already exists.
|
|
12769
|
+
*/
|
|
12770
|
+
async seedCache(localPath, remoteUrl, opts) {
|
|
12771
|
+
const source = { type: "git", url: remoteUrl };
|
|
12772
|
+
const key = cacheKey(source);
|
|
12773
|
+
const cachePath = path35.join(this.cacheDir, key);
|
|
12774
|
+
const lockPath = `${cachePath}.lock`;
|
|
12775
|
+
await mkdir11(this.cacheDir, { recursive: true });
|
|
12776
|
+
await acquireLock(lockPath);
|
|
12777
|
+
try {
|
|
12778
|
+
if (existsSync2(path35.join(cachePath, "HEAD"))) {
|
|
12779
|
+
if (!opts?.force) {
|
|
12780
|
+
throw new Error(
|
|
12781
|
+
`Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
|
|
12782
|
+
);
|
|
12783
|
+
}
|
|
12784
|
+
await rm5(cachePath, { recursive: true, force: true });
|
|
12785
|
+
}
|
|
12786
|
+
await git(["clone", "--mirror", "--bare", localPath, cachePath]);
|
|
12787
|
+
await git(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
|
|
12788
|
+
} finally {
|
|
12789
|
+
await releaseLock(lockPath);
|
|
12790
|
+
}
|
|
12791
|
+
return cachePath;
|
|
12792
|
+
}
|
|
12710
12793
|
/** Remove the entire cache directory. */
|
|
12711
12794
|
async cleanCache() {
|
|
12712
12795
|
await rm5(this.cacheDir, { recursive: true, force: true });
|