@agentv/core 4.30.0 → 4.31.1-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-Z2BBOGE4.js → chunk-A27NE3R7.js} +28 -27
- package/dist/chunk-A27NE3R7.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +42 -33
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +42 -33
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +297 -76
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +34 -19
- package/dist/index.d.ts +34 -19
- package/dist/index.js +277 -51
- package/dist/index.js.map +1 -1
- package/dist/{ts-eval-loader-JL5DGTJL.js → ts-eval-loader-XR6DNOZ3.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-Z2BBOGE4.js.map +0 -1
- /package/dist/{ts-eval-loader-JL5DGTJL.js.map → ts-eval-loader-XR6DNOZ3.js.map} +0 -0
package/dist/index.d.cts
CHANGED
|
@@ -1988,9 +1988,11 @@ type ExecutionDefaults = {
|
|
|
1988
1988
|
readonly pool_workspaces?: boolean;
|
|
1989
1989
|
readonly pool_slots?: number;
|
|
1990
1990
|
};
|
|
1991
|
-
type
|
|
1991
|
+
type ResultsConfig = {
|
|
1992
|
+
readonly mode: 'github';
|
|
1992
1993
|
readonly repo: string;
|
|
1993
|
-
|
|
1994
|
+
/** Local filesystem path for the results clone. Optional; defaults to ~/.agentv/results/<slug>/. */
|
|
1995
|
+
readonly path?: string;
|
|
1994
1996
|
readonly auto_push?: boolean;
|
|
1995
1997
|
readonly branch_prefix?: string;
|
|
1996
1998
|
};
|
|
@@ -2002,9 +2004,7 @@ type AgentVConfig$1 = {
|
|
|
2002
2004
|
readonly required_version?: string;
|
|
2003
2005
|
readonly eval_patterns?: readonly string[];
|
|
2004
2006
|
readonly execution?: ExecutionDefaults;
|
|
2005
|
-
readonly results?:
|
|
2006
|
-
readonly export?: ResultsExportConfig;
|
|
2007
|
-
};
|
|
2007
|
+
readonly results?: ResultsConfig;
|
|
2008
2008
|
readonly hooks?: HooksConfig;
|
|
2009
2009
|
};
|
|
2010
2010
|
/**
|
|
@@ -4174,7 +4174,7 @@ declare function toSnakeCaseDeep(obj: unknown): unknown;
|
|
|
4174
4174
|
*/
|
|
4175
4175
|
declare function toCamelCaseDeep(obj: unknown): unknown;
|
|
4176
4176
|
|
|
4177
|
-
interface
|
|
4177
|
+
interface ResultsRepoLocalPaths {
|
|
4178
4178
|
readonly rootDir: string;
|
|
4179
4179
|
readonly repoDir: string;
|
|
4180
4180
|
readonly statusFile: string;
|
|
@@ -4186,7 +4186,7 @@ interface ResultsRepoStatus {
|
|
|
4186
4186
|
readonly path?: string;
|
|
4187
4187
|
readonly auto_push?: boolean;
|
|
4188
4188
|
readonly branch_prefix?: string;
|
|
4189
|
-
readonly
|
|
4189
|
+
readonly local_dir?: string;
|
|
4190
4190
|
readonly last_synced_at?: string;
|
|
4191
4191
|
readonly last_error?: string;
|
|
4192
4192
|
}
|
|
@@ -4198,27 +4198,27 @@ interface CheckedOutResultsRepoBranch {
|
|
|
4198
4198
|
interface PreparedResultsRepoBranch extends CheckedOutResultsRepoBranch {
|
|
4199
4199
|
readonly cleanup: () => Promise<void>;
|
|
4200
4200
|
}
|
|
4201
|
-
declare function
|
|
4201
|
+
declare function normalizeResultsConfig(config: ResultsConfig): Required<ResultsConfig>;
|
|
4202
4202
|
declare function resolveResultsRepoUrl(repo: string): string;
|
|
4203
|
-
declare function
|
|
4204
|
-
declare function ensureResultsRepoClone(config:
|
|
4205
|
-
declare function getResultsRepoStatus(config?:
|
|
4206
|
-
declare function syncResultsRepo(config:
|
|
4207
|
-
declare function checkoutResultsRepoBranch(config:
|
|
4208
|
-
declare function prepareResultsRepoBranch(config:
|
|
4203
|
+
declare function getResultsRepoLocalPaths(repo: string): ResultsRepoLocalPaths;
|
|
4204
|
+
declare function ensureResultsRepoClone(config: ResultsConfig): Promise<string>;
|
|
4205
|
+
declare function getResultsRepoStatus(config?: ResultsConfig): ResultsRepoStatus;
|
|
4206
|
+
declare function syncResultsRepo(config: ResultsConfig): Promise<ResultsRepoStatus>;
|
|
4207
|
+
declare function checkoutResultsRepoBranch(config: ResultsConfig, branchName: string): Promise<CheckedOutResultsRepoBranch>;
|
|
4208
|
+
declare function prepareResultsRepoBranch(config: ResultsConfig, branchName: string): Promise<PreparedResultsRepoBranch>;
|
|
4209
4209
|
declare function stageResultsArtifacts(params: {
|
|
4210
4210
|
readonly repoDir: string;
|
|
4211
4211
|
readonly sourceDir: string;
|
|
4212
4212
|
readonly destinationDir: string;
|
|
4213
4213
|
}): Promise<void>;
|
|
4214
|
-
declare function resolveResultsRepoRunsDir(config:
|
|
4214
|
+
declare function resolveResultsRepoRunsDir(config: ResultsConfig): string;
|
|
4215
4215
|
declare function directorySizeBytes(targetPath: string): Promise<number>;
|
|
4216
4216
|
declare function commitAndPushResultsBranch(params: {
|
|
4217
4217
|
readonly repoDir: string;
|
|
4218
4218
|
readonly branchName: string;
|
|
4219
4219
|
readonly commitMessage: string;
|
|
4220
4220
|
}): Promise<boolean>;
|
|
4221
|
-
declare function pushResultsRepoBranch(config:
|
|
4221
|
+
declare function pushResultsRepoBranch(config: ResultsConfig, branchName: string, cwd?: string): Promise<void>;
|
|
4222
4222
|
declare function createDraftResultsPr(params: {
|
|
4223
4223
|
readonly repo: string;
|
|
4224
4224
|
readonly repoDir: string;
|
|
@@ -4229,15 +4229,30 @@ declare function createDraftResultsPr(params: {
|
|
|
4229
4229
|
}): Promise<string>;
|
|
4230
4230
|
/**
|
|
4231
4231
|
* Push results directly to the base branch of the results repo.
|
|
4232
|
-
* Handles non-fast-forward conflicts by
|
|
4232
|
+
* Handles non-fast-forward conflicts by fetching, rebasing, and retrying.
|
|
4233
4233
|
* Returns true if artifacts were pushed, false if no changes were detected.
|
|
4234
4234
|
*/
|
|
4235
4235
|
declare function directPushResults(params: {
|
|
4236
|
-
readonly config:
|
|
4236
|
+
readonly config: ResultsConfig;
|
|
4237
4237
|
readonly sourceDir: string;
|
|
4238
4238
|
readonly destinationPath: string;
|
|
4239
4239
|
readonly commitMessage: string;
|
|
4240
4240
|
}): Promise<boolean>;
|
|
4241
|
+
interface GitListedRun {
|
|
4242
|
+
run_id: string;
|
|
4243
|
+
experiment: string;
|
|
4244
|
+
timestamp: string;
|
|
4245
|
+
pass_rate?: number;
|
|
4246
|
+
target?: string;
|
|
4247
|
+
manifest_path: string;
|
|
4248
|
+
benchmark_path: string;
|
|
4249
|
+
display_name: string;
|
|
4250
|
+
test_count: number;
|
|
4251
|
+
avg_score: number;
|
|
4252
|
+
size_bytes: number;
|
|
4253
|
+
}
|
|
4254
|
+
declare function listGitRuns(repoDir: string, ref?: string): Promise<GitListedRun[]>;
|
|
4255
|
+
declare function materializeGitRun(repoDir: string, relativeRunPath: string, ref?: string): Promise<void>;
|
|
4241
4256
|
|
|
4242
4257
|
/**
|
|
4243
4258
|
* The default config directory (~/.agentv). Always resolves to the user's home
|
|
@@ -4951,4 +4966,4 @@ type AgentKernel = {
|
|
|
4951
4966
|
};
|
|
4952
4967
|
declare function createAgentKernel(): AgentKernel;
|
|
4953
4968
|
|
|
4954
|
-
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AgentVConfig$1 as AgentVYamlConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type CheckedOutResultsRepoBranch, type ChildGraderResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeGrader, type CodeGraderConfig, type CodeGraderOptions, type CodexDiscoverOptions, type CodexSession, type CommandExecutor, type CompositeAggregatorConfig, CompositeGrader, type CompositeGraderConfig, type CompositeGraderOptions, type ConfidenceIntervalAggregation, type ContainsAllGraderConfig, type ContainsAnyGraderConfig, type ContainsGraderConfig, type Content, type ContentFile, type ContentImage, type ContentPreprocessorConfig, type ContentText, type ConversationAggregation, type ConversationMode, type ConversationTurn, type ConversationTurnInput, type CopilotCliResolvedConfig, type DiscoverOptions as CopilotDiscoverOptions, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, type CopilotSessionMeta, CostGrader, type CostGraderConfig, type CostGraderOptions, type CreateContainerOptions, DEFAULT_CATEGORY, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DEFAULT_GRADER_TEMPLATE, DEFAULT_THRESHOLD, type DependencyFailurePolicy, type DependencyResult, type DepsScanResult, DeterministicAssertionGrader, type DockerWorkspaceConfig, DockerWorkspaceProvider, type EndsWithGraderConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsGraderConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTargetRef, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type ExecInContainerOptions, type ExecResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsGrader, type ExecutionMetricsGraderConfig, type ExecutionMetricsGraderOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyGrader, type FieldAccuracyGraderConfig, type FieldAccuracyGraderOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type Grader, type GraderConfig, type GraderDispatchContext, type GraderFactory, type GraderFactoryFn, type GraderKind, GraderRegistry, type GraderResult, type IcontainsAllGraderConfig, type IcontainsAnyGraderConfig, type IcontainsGraderConfig, type InlineAssertEvaluatorConfig, type IsJsonGraderConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyGrader, type LatencyGraderConfig, type LatencyGraderOptions, LlmGrader, type LlmGraderConfig, type LlmGraderOptions, type LlmGraderPromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type ParsedCopilotSession, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type PreparedResultsRepoBranch, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type ProjectSource, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexGraderConfig, type RepoCheckout, type RepoClone, type RepoConfig, type RepoDep, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type
|
|
4969
|
+
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AgentVConfig$1 as AgentVYamlConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type CheckedOutResultsRepoBranch, type ChildGraderResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeGrader, type CodeGraderConfig, type CodeGraderOptions, type CodexDiscoverOptions, type CodexSession, type CommandExecutor, type CompositeAggregatorConfig, CompositeGrader, type CompositeGraderConfig, type CompositeGraderOptions, type ConfidenceIntervalAggregation, type ContainsAllGraderConfig, type ContainsAnyGraderConfig, type ContainsGraderConfig, type Content, type ContentFile, type ContentImage, type ContentPreprocessorConfig, type ContentText, type ConversationAggregation, type ConversationMode, type ConversationTurn, type ConversationTurnInput, type CopilotCliResolvedConfig, type DiscoverOptions as CopilotDiscoverOptions, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, type CopilotSessionMeta, CostGrader, type CostGraderConfig, type CostGraderOptions, type CreateContainerOptions, DEFAULT_CATEGORY, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DEFAULT_GRADER_TEMPLATE, DEFAULT_THRESHOLD, type DependencyFailurePolicy, type DependencyResult, type DepsScanResult, DeterministicAssertionGrader, type DockerWorkspaceConfig, DockerWorkspaceProvider, type EndsWithGraderConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsGraderConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTargetRef, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type ExecInContainerOptions, type ExecResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsGrader, type ExecutionMetricsGraderConfig, type ExecutionMetricsGraderOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyGrader, type FieldAccuracyGraderConfig, type FieldAccuracyGraderOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type GitListedRun, type Grader, type GraderConfig, type GraderDispatchContext, type GraderFactory, type GraderFactoryFn, type GraderKind, GraderRegistry, type GraderResult, type IcontainsAllGraderConfig, type IcontainsAnyGraderConfig, type IcontainsGraderConfig, type InlineAssertEvaluatorConfig, type IsJsonGraderConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyGrader, type LatencyGraderConfig, type LatencyGraderOptions, LlmGrader, type LlmGraderConfig, type LlmGraderOptions, type LlmGraderPromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type ParsedCopilotSession, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type PreparedResultsRepoBranch, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type ProjectSource, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexGraderConfig, type RepoCheckout, type RepoClone, type RepoConfig, type RepoDep, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type ResultsConfig, type ResultsRepoLocalPaths, type ResultsRepoStatus, type RubricItem, type RubricsEvaluatorConfig, RunBudgetTracker, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerGrader, type SkillTriggerGraderConfig, type StartsWithGraderConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TargetHooksConfig, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageGrader, type TokenUsageGraderConfig, type TokenUsageGraderOptions, type ToolCall, type ToolTestMessage, type ToolTrajectoryExpectedItem, ToolTrajectoryGrader, type ToolTrajectoryGraderConfig, type ToolTrajectoryGraderOptions, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptJsonLine, TranscriptProvider, type TranscriptReplayEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type TsEvalResult, type TurnFailurePolicy, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceEnvConfig, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addProject, assembleLlmGraderPrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, checkoutResultsRepoBranch, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, commitAndPushResultsBranch, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createDraftResultsPr, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, deriveProjectId, detectFormat, directPushResults, directorySizeBytes, discoverAssertions, discoverClaudeSessions, discoverCodexSessions, discoverCopilotSessions, discoverGraders, discoverProjects, discoverProviders, ensureResultsRepoClone, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetRefsFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, formatToolCalls, freeformEvaluationSchema, generateRubrics, getAgentvConfigDir, getAgentvHome, getOutputFilenames, getProject, getProjectsRegistryPath, getResultsRepoLocalPaths, getResultsRepoStatus, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, groupTranscriptJsonLines, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isGraderKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, killAllTrackedChildren, listGitRuns, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadProjectRegistry, loadTestById, loadTestSuite, loadTests, loadTsConfig, loadTsEvalFile, materializeGitRun, mergeExecutionMetrics, negateScore, normalizeLineEndings, normalizeResultsConfig, parseAgentSkillsEvals, parseClaudeSession, parseCodexSession, parseCopilotEvents, parseEnvOutput, parseJsonFromText, parseJsonSafe, parseYamlValue, prepareResultsRepoBranch, pushResultsRepoBranch, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, readTranscriptJsonl, removeProject, resolveAndCreateProvider, resolveDelegatedTargetDefinition, resolveFileReference, resolveResultsRepoRunsDir, resolveResultsRepoUrl, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runBeforeSessionHook, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveProjectRegistry, scanRepoDeps, scoreRangeEvaluationSchema, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, stageResultsArtifacts, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, syncProject, syncProjects, syncResultsRepo, toCamelCaseDeep, toSnakeCaseDeep, toTranscriptJsonLines, tokensPerTool, touchProject, trackChild, trackedChildCount, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
|
package/dist/index.d.ts
CHANGED
|
@@ -1988,9 +1988,11 @@ type ExecutionDefaults = {
|
|
|
1988
1988
|
readonly pool_workspaces?: boolean;
|
|
1989
1989
|
readonly pool_slots?: number;
|
|
1990
1990
|
};
|
|
1991
|
-
type
|
|
1991
|
+
type ResultsConfig = {
|
|
1992
|
+
readonly mode: 'github';
|
|
1992
1993
|
readonly repo: string;
|
|
1993
|
-
|
|
1994
|
+
/** Local filesystem path for the results clone. Optional; defaults to ~/.agentv/results/<slug>/. */
|
|
1995
|
+
readonly path?: string;
|
|
1994
1996
|
readonly auto_push?: boolean;
|
|
1995
1997
|
readonly branch_prefix?: string;
|
|
1996
1998
|
};
|
|
@@ -2002,9 +2004,7 @@ type AgentVConfig$1 = {
|
|
|
2002
2004
|
readonly required_version?: string;
|
|
2003
2005
|
readonly eval_patterns?: readonly string[];
|
|
2004
2006
|
readonly execution?: ExecutionDefaults;
|
|
2005
|
-
readonly results?:
|
|
2006
|
-
readonly export?: ResultsExportConfig;
|
|
2007
|
-
};
|
|
2007
|
+
readonly results?: ResultsConfig;
|
|
2008
2008
|
readonly hooks?: HooksConfig;
|
|
2009
2009
|
};
|
|
2010
2010
|
/**
|
|
@@ -4174,7 +4174,7 @@ declare function toSnakeCaseDeep(obj: unknown): unknown;
|
|
|
4174
4174
|
*/
|
|
4175
4175
|
declare function toCamelCaseDeep(obj: unknown): unknown;
|
|
4176
4176
|
|
|
4177
|
-
interface
|
|
4177
|
+
interface ResultsRepoLocalPaths {
|
|
4178
4178
|
readonly rootDir: string;
|
|
4179
4179
|
readonly repoDir: string;
|
|
4180
4180
|
readonly statusFile: string;
|
|
@@ -4186,7 +4186,7 @@ interface ResultsRepoStatus {
|
|
|
4186
4186
|
readonly path?: string;
|
|
4187
4187
|
readonly auto_push?: boolean;
|
|
4188
4188
|
readonly branch_prefix?: string;
|
|
4189
|
-
readonly
|
|
4189
|
+
readonly local_dir?: string;
|
|
4190
4190
|
readonly last_synced_at?: string;
|
|
4191
4191
|
readonly last_error?: string;
|
|
4192
4192
|
}
|
|
@@ -4198,27 +4198,27 @@ interface CheckedOutResultsRepoBranch {
|
|
|
4198
4198
|
interface PreparedResultsRepoBranch extends CheckedOutResultsRepoBranch {
|
|
4199
4199
|
readonly cleanup: () => Promise<void>;
|
|
4200
4200
|
}
|
|
4201
|
-
declare function
|
|
4201
|
+
declare function normalizeResultsConfig(config: ResultsConfig): Required<ResultsConfig>;
|
|
4202
4202
|
declare function resolveResultsRepoUrl(repo: string): string;
|
|
4203
|
-
declare function
|
|
4204
|
-
declare function ensureResultsRepoClone(config:
|
|
4205
|
-
declare function getResultsRepoStatus(config?:
|
|
4206
|
-
declare function syncResultsRepo(config:
|
|
4207
|
-
declare function checkoutResultsRepoBranch(config:
|
|
4208
|
-
declare function prepareResultsRepoBranch(config:
|
|
4203
|
+
declare function getResultsRepoLocalPaths(repo: string): ResultsRepoLocalPaths;
|
|
4204
|
+
declare function ensureResultsRepoClone(config: ResultsConfig): Promise<string>;
|
|
4205
|
+
declare function getResultsRepoStatus(config?: ResultsConfig): ResultsRepoStatus;
|
|
4206
|
+
declare function syncResultsRepo(config: ResultsConfig): Promise<ResultsRepoStatus>;
|
|
4207
|
+
declare function checkoutResultsRepoBranch(config: ResultsConfig, branchName: string): Promise<CheckedOutResultsRepoBranch>;
|
|
4208
|
+
declare function prepareResultsRepoBranch(config: ResultsConfig, branchName: string): Promise<PreparedResultsRepoBranch>;
|
|
4209
4209
|
declare function stageResultsArtifacts(params: {
|
|
4210
4210
|
readonly repoDir: string;
|
|
4211
4211
|
readonly sourceDir: string;
|
|
4212
4212
|
readonly destinationDir: string;
|
|
4213
4213
|
}): Promise<void>;
|
|
4214
|
-
declare function resolveResultsRepoRunsDir(config:
|
|
4214
|
+
declare function resolveResultsRepoRunsDir(config: ResultsConfig): string;
|
|
4215
4215
|
declare function directorySizeBytes(targetPath: string): Promise<number>;
|
|
4216
4216
|
declare function commitAndPushResultsBranch(params: {
|
|
4217
4217
|
readonly repoDir: string;
|
|
4218
4218
|
readonly branchName: string;
|
|
4219
4219
|
readonly commitMessage: string;
|
|
4220
4220
|
}): Promise<boolean>;
|
|
4221
|
-
declare function pushResultsRepoBranch(config:
|
|
4221
|
+
declare function pushResultsRepoBranch(config: ResultsConfig, branchName: string, cwd?: string): Promise<void>;
|
|
4222
4222
|
declare function createDraftResultsPr(params: {
|
|
4223
4223
|
readonly repo: string;
|
|
4224
4224
|
readonly repoDir: string;
|
|
@@ -4229,15 +4229,30 @@ declare function createDraftResultsPr(params: {
|
|
|
4229
4229
|
}): Promise<string>;
|
|
4230
4230
|
/**
|
|
4231
4231
|
* Push results directly to the base branch of the results repo.
|
|
4232
|
-
* Handles non-fast-forward conflicts by
|
|
4232
|
+
* Handles non-fast-forward conflicts by fetching, rebasing, and retrying.
|
|
4233
4233
|
* Returns true if artifacts were pushed, false if no changes were detected.
|
|
4234
4234
|
*/
|
|
4235
4235
|
declare function directPushResults(params: {
|
|
4236
|
-
readonly config:
|
|
4236
|
+
readonly config: ResultsConfig;
|
|
4237
4237
|
readonly sourceDir: string;
|
|
4238
4238
|
readonly destinationPath: string;
|
|
4239
4239
|
readonly commitMessage: string;
|
|
4240
4240
|
}): Promise<boolean>;
|
|
4241
|
+
interface GitListedRun {
|
|
4242
|
+
run_id: string;
|
|
4243
|
+
experiment: string;
|
|
4244
|
+
timestamp: string;
|
|
4245
|
+
pass_rate?: number;
|
|
4246
|
+
target?: string;
|
|
4247
|
+
manifest_path: string;
|
|
4248
|
+
benchmark_path: string;
|
|
4249
|
+
display_name: string;
|
|
4250
|
+
test_count: number;
|
|
4251
|
+
avg_score: number;
|
|
4252
|
+
size_bytes: number;
|
|
4253
|
+
}
|
|
4254
|
+
declare function listGitRuns(repoDir: string, ref?: string): Promise<GitListedRun[]>;
|
|
4255
|
+
declare function materializeGitRun(repoDir: string, relativeRunPath: string, ref?: string): Promise<void>;
|
|
4241
4256
|
|
|
4242
4257
|
/**
|
|
4243
4258
|
* The default config directory (~/.agentv). Always resolves to the user's home
|
|
@@ -4951,4 +4966,4 @@ type AgentKernel = {
|
|
|
4951
4966
|
};
|
|
4952
4967
|
declare function createAgentKernel(): AgentKernel;
|
|
4953
4968
|
|
|
4954
|
-
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AgentVConfig$1 as AgentVYamlConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type CheckedOutResultsRepoBranch, type ChildGraderResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeGrader, type CodeGraderConfig, type CodeGraderOptions, type CodexDiscoverOptions, type CodexSession, type CommandExecutor, type CompositeAggregatorConfig, CompositeGrader, type CompositeGraderConfig, type CompositeGraderOptions, type ConfidenceIntervalAggregation, type ContainsAllGraderConfig, type ContainsAnyGraderConfig, type ContainsGraderConfig, type Content, type ContentFile, type ContentImage, type ContentPreprocessorConfig, type ContentText, type ConversationAggregation, type ConversationMode, type ConversationTurn, type ConversationTurnInput, type CopilotCliResolvedConfig, type DiscoverOptions as CopilotDiscoverOptions, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, type CopilotSessionMeta, CostGrader, type CostGraderConfig, type CostGraderOptions, type CreateContainerOptions, DEFAULT_CATEGORY, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DEFAULT_GRADER_TEMPLATE, DEFAULT_THRESHOLD, type DependencyFailurePolicy, type DependencyResult, type DepsScanResult, DeterministicAssertionGrader, type DockerWorkspaceConfig, DockerWorkspaceProvider, type EndsWithGraderConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsGraderConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTargetRef, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type ExecInContainerOptions, type ExecResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsGrader, type ExecutionMetricsGraderConfig, type ExecutionMetricsGraderOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyGrader, type FieldAccuracyGraderConfig, type FieldAccuracyGraderOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type Grader, type GraderConfig, type GraderDispatchContext, type GraderFactory, type GraderFactoryFn, type GraderKind, GraderRegistry, type GraderResult, type IcontainsAllGraderConfig, type IcontainsAnyGraderConfig, type IcontainsGraderConfig, type InlineAssertEvaluatorConfig, type IsJsonGraderConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyGrader, type LatencyGraderConfig, type LatencyGraderOptions, LlmGrader, type LlmGraderConfig, type LlmGraderOptions, type LlmGraderPromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type ParsedCopilotSession, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type PreparedResultsRepoBranch, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type ProjectSource, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexGraderConfig, type RepoCheckout, type RepoClone, type RepoConfig, type RepoDep, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type
|
|
4969
|
+
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AgentVConfig$1 as AgentVYamlConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type CheckedOutResultsRepoBranch, type ChildGraderResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeGrader, type CodeGraderConfig, type CodeGraderOptions, type CodexDiscoverOptions, type CodexSession, type CommandExecutor, type CompositeAggregatorConfig, CompositeGrader, type CompositeGraderConfig, type CompositeGraderOptions, type ConfidenceIntervalAggregation, type ContainsAllGraderConfig, type ContainsAnyGraderConfig, type ContainsGraderConfig, type Content, type ContentFile, type ContentImage, type ContentPreprocessorConfig, type ContentText, type ConversationAggregation, type ConversationMode, type ConversationTurn, type ConversationTurnInput, type CopilotCliResolvedConfig, type DiscoverOptions as CopilotDiscoverOptions, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, type CopilotSessionMeta, CostGrader, type CostGraderConfig, type CostGraderOptions, type CreateContainerOptions, DEFAULT_CATEGORY, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DEFAULT_GRADER_TEMPLATE, DEFAULT_THRESHOLD, type DependencyFailurePolicy, type DependencyResult, type DepsScanResult, DeterministicAssertionGrader, type DockerWorkspaceConfig, DockerWorkspaceProvider, type EndsWithGraderConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsGraderConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTargetRef, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type ExecInContainerOptions, type ExecResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsGrader, type ExecutionMetricsGraderConfig, type ExecutionMetricsGraderOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyGrader, type FieldAccuracyGraderConfig, type FieldAccuracyGraderOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type GitListedRun, type Grader, type GraderConfig, type GraderDispatchContext, type GraderFactory, type GraderFactoryFn, type GraderKind, GraderRegistry, type GraderResult, type IcontainsAllGraderConfig, type IcontainsAnyGraderConfig, type IcontainsGraderConfig, type InlineAssertEvaluatorConfig, type IsJsonGraderConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyGrader, type LatencyGraderConfig, type LatencyGraderOptions, LlmGrader, type LlmGraderConfig, type LlmGraderOptions, type LlmGraderPromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type ParsedCopilotSession, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type PreparedResultsRepoBranch, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type ProjectSource, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexGraderConfig, type RepoCheckout, type RepoClone, type RepoConfig, type RepoDep, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type ResultsConfig, type ResultsRepoLocalPaths, type ResultsRepoStatus, type RubricItem, type RubricsEvaluatorConfig, RunBudgetTracker, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerGrader, type SkillTriggerGraderConfig, type StartsWithGraderConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TargetHooksConfig, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageGrader, type TokenUsageGraderConfig, type TokenUsageGraderOptions, type ToolCall, type ToolTestMessage, type ToolTrajectoryExpectedItem, ToolTrajectoryGrader, type ToolTrajectoryGraderConfig, type ToolTrajectoryGraderOptions, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptJsonLine, TranscriptProvider, type TranscriptReplayEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type TsEvalResult, type TurnFailurePolicy, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceEnvConfig, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addProject, assembleLlmGraderPrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, checkoutResultsRepoBranch, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, commitAndPushResultsBranch, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createDraftResultsPr, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, deriveProjectId, detectFormat, directPushResults, directorySizeBytes, discoverAssertions, discoverClaudeSessions, discoverCodexSessions, discoverCopilotSessions, discoverGraders, discoverProjects, discoverProviders, ensureResultsRepoClone, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetRefsFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, formatToolCalls, freeformEvaluationSchema, generateRubrics, getAgentvConfigDir, getAgentvHome, getOutputFilenames, getProject, getProjectsRegistryPath, getResultsRepoLocalPaths, getResultsRepoStatus, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, groupTranscriptJsonLines, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isGraderKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, killAllTrackedChildren, listGitRuns, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadProjectRegistry, loadTestById, loadTestSuite, loadTests, loadTsConfig, loadTsEvalFile, materializeGitRun, mergeExecutionMetrics, negateScore, normalizeLineEndings, normalizeResultsConfig, parseAgentSkillsEvals, parseClaudeSession, parseCodexSession, parseCopilotEvents, parseEnvOutput, parseJsonFromText, parseJsonSafe, parseYamlValue, prepareResultsRepoBranch, pushResultsRepoBranch, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, readTranscriptJsonl, removeProject, resolveAndCreateProvider, resolveDelegatedTargetDefinition, resolveFileReference, resolveResultsRepoRunsDir, resolveResultsRepoUrl, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runBeforeSessionHook, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveProjectRegistry, scanRepoDeps, scoreRangeEvaluationSchema, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, stageResultsArtifacts, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, syncProject, syncProjects, syncResultsRepo, toCamelCaseDeep, toSnakeCaseDeep, toTranscriptJsonLines, tokensPerTool, touchProject, trackChild, trackedChildCount, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
|