@agentv/core 4.31.3 → 4.32.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-A27NE3R7.js → chunk-N5EU446L.js} +70 -66
- package/dist/chunk-N5EU446L.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +1 -0
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -0
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +163 -147
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +28 -16
- package/dist/index.d.ts +28 -16
- package/dist/index.js +22 -10
- package/dist/index.js.map +1 -1
- package/dist/{ts-eval-loader-XR6DNOZ3.js → ts-eval-loader-Z6IUSDNA.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-A27NE3R7.js.map +0 -1
- /package/dist/{ts-eval-loader-XR6DNOZ3.js.map → ts-eval-loader-Z6IUSDNA.js.map} +0 -0
package/dist/index.d.cts
CHANGED
|
@@ -2008,8 +2008,12 @@ type AgentVConfig$1 = {
|
|
|
2008
2008
|
readonly hooks?: HooksConfig;
|
|
2009
2009
|
};
|
|
2010
2010
|
/**
|
|
2011
|
-
* Load optional
|
|
2012
|
-
*
|
|
2011
|
+
* Load optional AgentV YAML configuration.
|
|
2012
|
+
*
|
|
2013
|
+
* Project-local `.agentv/config.yaml` files are searched from the eval file
|
|
2014
|
+
* directory up to the repo root. If no project-local config is found, AgentV
|
|
2015
|
+
* falls back to the home/global config at `${AGENTV_HOME:-~/.agentv}/config.yaml`.
|
|
2016
|
+
* The first valid file wins; there is intentionally no cross-file merge.
|
|
2013
2017
|
*/
|
|
2014
2018
|
declare function loadConfig(evalFilePath: string, repoRoot: string): Promise<AgentVConfig$1 | null>;
|
|
2015
2019
|
/**
|
|
@@ -4255,24 +4259,32 @@ declare function listGitRuns(repoDir: string, ref?: string): Promise<GitListedRu
|
|
|
4255
4259
|
declare function materializeGitRun(repoDir: string, relativeRunPath: string, ref?: string): Promise<void>;
|
|
4256
4260
|
|
|
4257
4261
|
/**
|
|
4258
|
-
*
|
|
4259
|
-
*
|
|
4260
|
-
*
|
|
4262
|
+
* AgentV's lightweight home/config directory. Stores machine-local config files
|
|
4263
|
+
* such as config.yaml, projects.yaml, version-check.json, last-config.json, and
|
|
4264
|
+
* managed helper binaries. AGENTV_HOME relocates only this config/home surface.
|
|
4261
4265
|
*/
|
|
4262
4266
|
declare function getAgentvConfigDir(): string;
|
|
4263
4267
|
/**
|
|
4264
|
-
*
|
|
4265
|
-
*
|
|
4266
|
-
*
|
|
4268
|
+
* Backward-compatible alias for AgentV's home/config directory.
|
|
4269
|
+
* Prefer getAgentvConfigDir() for lightweight config files and
|
|
4270
|
+
* getAgentvDataDir() for heavy runtime data.
|
|
4267
4271
|
*/
|
|
4268
4272
|
declare function getAgentvHome(): string;
|
|
4273
|
+
/**
|
|
4274
|
+
* AgentV's heavy runtime data directory. Stores workspaces, workspace pool,
|
|
4275
|
+
* subagents, trace state, caches, downloaded dependencies, and results clones.
|
|
4276
|
+
* AGENTV_DATA_DIR can separate this large data from AGENTV_HOME; when unset it
|
|
4277
|
+
* falls back to AGENTV_HOME (or ~/.agentv) so existing AGENTV_HOME users keep
|
|
4278
|
+
* their runtime data in the same location.
|
|
4279
|
+
*/
|
|
4280
|
+
declare function getAgentvDataDir(): string;
|
|
4269
4281
|
declare function getWorkspacesRoot(): string;
|
|
4270
4282
|
declare function getSubagentsRoot(): string;
|
|
4271
4283
|
declare function getTraceStateRoot(): string;
|
|
4272
4284
|
declare function getWorkspacePoolRoot(): string;
|
|
4273
4285
|
|
|
4274
4286
|
/**
|
|
4275
|
-
* Project registry for AgentV
|
|
4287
|
+
* Project registry for AgentV Dashboard multi-project support.
|
|
4276
4288
|
*
|
|
4277
4289
|
* A Project = any directory containing a `.agentv/` folder. Projects hold
|
|
4278
4290
|
* eval runs, and (incrementally) traces, spans, and other telemetry —
|
|
@@ -4280,7 +4292,7 @@ declare function getWorkspacePoolRoot(): string;
|
|
|
4280
4292
|
* Braintrust, W&B Weave, and LangSmith.
|
|
4281
4293
|
*
|
|
4282
4294
|
* The registry lives at `~/.agentv/projects.yaml` and is the single source
|
|
4283
|
-
* of truth for which projects
|
|
4295
|
+
* of truth for which projects Dashboard shows. Dashboard re-reads the file on every
|
|
4284
4296
|
* `/api/projects` request, so edits (direct, via POST /api/projects, via
|
|
4285
4297
|
* the CLI's --add/--remove, or via a Kubernetes ConfigMap mount) are reflected
|
|
4286
4298
|
* without restarting `agentv serve`.
|
|
@@ -4302,7 +4314,7 @@ declare function getWorkspacePoolRoot(): string;
|
|
|
4302
4314
|
*
|
|
4303
4315
|
* Concurrency: the registry assumes a single writer. All mutating calls
|
|
4304
4316
|
* (add/remove/touchProject) do read-modify-write on projects.yaml
|
|
4305
|
-
* without a lock.
|
|
4317
|
+
* without a lock. Dashboard's HTTP handlers are serialized by Node's
|
|
4306
4318
|
* single-threaded event loop, which satisfies the 24/7 deployment case.
|
|
4307
4319
|
* Run only one `agentv` process against a given home at a time.
|
|
4308
4320
|
*
|
|
@@ -4362,15 +4374,15 @@ declare function touchProject(projectId: string): void;
|
|
|
4362
4374
|
* Scan a directory tree (up to maxDepth levels) for directories containing `.agentv/`.
|
|
4363
4375
|
* Returns absolute paths of discovered project directories, sorted for
|
|
4364
4376
|
* deterministic iteration. This is a one-shot helper for bulk registration;
|
|
4365
|
-
*
|
|
4377
|
+
* Dashboard does not scan at request time.
|
|
4366
4378
|
*/
|
|
4367
4379
|
declare function discoverProjects(rootDir: string, maxDepth?: number): string[];
|
|
4368
4380
|
|
|
4369
4381
|
/**
|
|
4370
4382
|
* Project sync — pulls remote git repos to the local path declared in the
|
|
4371
|
-
* project registry before
|
|
4383
|
+
* project registry before Dashboard/eval startup.
|
|
4372
4384
|
*
|
|
4373
|
-
* Sync is oneshot only, triggered by the
|
|
4385
|
+
* Sync is oneshot only, triggered by the Dashboard UI "Sync" button or the
|
|
4374
4386
|
* `agentv project sync` CLI command. There is no daemon or continuous mode.
|
|
4375
4387
|
*
|
|
4376
4388
|
* First run — git clone --depth 1 --filter=blob:none --branch <ref> <url> <path>
|
|
@@ -4624,7 +4636,7 @@ declare function runBeforeSessionHook(command: string): void;
|
|
|
4624
4636
|
* Tracks long-lived child processes spawned by AgentV providers so that a
|
|
4625
4637
|
* top-level signal handler can kill them all on Ctrl+C / SIGTERM.
|
|
4626
4638
|
*
|
|
4627
|
-
* Why this exists: when the CLI receives SIGTERM (e.g. from
|
|
4639
|
+
* Why this exists: when the CLI receives SIGTERM (e.g. from Dashboard's Stop
|
|
4628
4640
|
* button), Node exits the parent process but does NOT propagate the signal
|
|
4629
4641
|
* to grandchildren. Without tracking, the spawned `claude`, `codex`, `pi`,
|
|
4630
4642
|
* `copilot` subprocesses linger as orphans. The CLI's signal handler walks
|
|
@@ -4966,4 +4978,4 @@ type AgentKernel = {
|
|
|
4966
4978
|
};
|
|
4967
4979
|
declare function createAgentKernel(): AgentKernel;
|
|
4968
4980
|
|
|
4969
|
-
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AgentVConfig$1 as AgentVYamlConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type CheckedOutResultsRepoBranch, type ChildGraderResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeGrader, type CodeGraderConfig, type CodeGraderOptions, type CodexDiscoverOptions, type CodexSession, type CommandExecutor, type CompositeAggregatorConfig, CompositeGrader, type CompositeGraderConfig, type CompositeGraderOptions, type ConfidenceIntervalAggregation, type ContainsAllGraderConfig, type ContainsAnyGraderConfig, type ContainsGraderConfig, type Content, type ContentFile, type ContentImage, type ContentPreprocessorConfig, type ContentText, type ConversationAggregation, type ConversationMode, type ConversationTurn, type ConversationTurnInput, type CopilotCliResolvedConfig, type DiscoverOptions as CopilotDiscoverOptions, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, type CopilotSessionMeta, CostGrader, type CostGraderConfig, type CostGraderOptions, type CreateContainerOptions, DEFAULT_CATEGORY, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DEFAULT_GRADER_TEMPLATE, DEFAULT_THRESHOLD, type DependencyFailurePolicy, type DependencyResult, type DepsScanResult, DeterministicAssertionGrader, type DockerWorkspaceConfig, DockerWorkspaceProvider, type EndsWithGraderConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsGraderConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTargetRef, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type ExecInContainerOptions, type ExecResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsGrader, type ExecutionMetricsGraderConfig, type ExecutionMetricsGraderOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyGrader, type FieldAccuracyGraderConfig, type FieldAccuracyGraderOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type GitListedRun, type Grader, type GraderConfig, type GraderDispatchContext, type GraderFactory, type GraderFactoryFn, type GraderKind, GraderRegistry, type GraderResult, type IcontainsAllGraderConfig, type IcontainsAnyGraderConfig, type IcontainsGraderConfig, type InlineAssertEvaluatorConfig, type IsJsonGraderConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyGrader, type LatencyGraderConfig, type LatencyGraderOptions, LlmGrader, type LlmGraderConfig, type LlmGraderOptions, type LlmGraderPromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type ParsedCopilotSession, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type PreparedResultsRepoBranch, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type ProjectSource, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexGraderConfig, type RepoCheckout, type RepoClone, type RepoConfig, type RepoDep, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type ResultsConfig, type ResultsRepoLocalPaths, type ResultsRepoStatus, type RubricItem, type RubricsEvaluatorConfig, RunBudgetTracker, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerGrader, type SkillTriggerGraderConfig, type StartsWithGraderConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TargetHooksConfig, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageGrader, type TokenUsageGraderConfig, type TokenUsageGraderOptions, type ToolCall, type ToolTestMessage, type ToolTrajectoryExpectedItem, ToolTrajectoryGrader, type ToolTrajectoryGraderConfig, type ToolTrajectoryGraderOptions, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptJsonLine, TranscriptProvider, type TranscriptReplayEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type TsEvalResult, type TurnFailurePolicy, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceEnvConfig, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addProject, assembleLlmGraderPrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, checkoutResultsRepoBranch, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, commitAndPushResultsBranch, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createDraftResultsPr, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, deriveProjectId, detectFormat, directPushResults, directorySizeBytes, discoverAssertions, discoverClaudeSessions, discoverCodexSessions, discoverCopilotSessions, discoverGraders, discoverProjects, discoverProviders, ensureResultsRepoClone, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetRefsFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, formatToolCalls, freeformEvaluationSchema, generateRubrics, getAgentvConfigDir, getAgentvHome, getOutputFilenames, getProject, getProjectsRegistryPath, getResultsRepoLocalPaths, getResultsRepoStatus, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, groupTranscriptJsonLines, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isGraderKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, killAllTrackedChildren, listGitRuns, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadProjectRegistry, loadTestById, loadTestSuite, loadTests, loadTsConfig, loadTsEvalFile, materializeGitRun, mergeExecutionMetrics, negateScore, normalizeLineEndings, normalizeResultsConfig, parseAgentSkillsEvals, parseClaudeSession, parseCodexSession, parseCopilotEvents, parseEnvOutput, parseJsonFromText, parseJsonSafe, parseYamlValue, prepareResultsRepoBranch, pushResultsRepoBranch, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, readTranscriptJsonl, removeProject, resolveAndCreateProvider, resolveDelegatedTargetDefinition, resolveFileReference, resolveResultsRepoRunsDir, resolveResultsRepoUrl, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runBeforeSessionHook, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveProjectRegistry, scanRepoDeps, scoreRangeEvaluationSchema, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, stageResultsArtifacts, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, syncProject, syncProjects, syncResultsRepo, toCamelCaseDeep, toSnakeCaseDeep, toTranscriptJsonLines, tokensPerTool, touchProject, trackChild, trackedChildCount, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
|
|
4981
|
+
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AgentVConfig$1 as AgentVYamlConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type CheckedOutResultsRepoBranch, type ChildGraderResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeGrader, type CodeGraderConfig, type CodeGraderOptions, type CodexDiscoverOptions, type CodexSession, type CommandExecutor, type CompositeAggregatorConfig, CompositeGrader, type CompositeGraderConfig, type CompositeGraderOptions, type ConfidenceIntervalAggregation, type ContainsAllGraderConfig, type ContainsAnyGraderConfig, type ContainsGraderConfig, type Content, type ContentFile, type ContentImage, type ContentPreprocessorConfig, type ContentText, type ConversationAggregation, type ConversationMode, type ConversationTurn, type ConversationTurnInput, type CopilotCliResolvedConfig, type DiscoverOptions as CopilotDiscoverOptions, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, type CopilotSessionMeta, CostGrader, type CostGraderConfig, type CostGraderOptions, type CreateContainerOptions, DEFAULT_CATEGORY, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DEFAULT_GRADER_TEMPLATE, DEFAULT_THRESHOLD, type DependencyFailurePolicy, type DependencyResult, type DepsScanResult, DeterministicAssertionGrader, type DockerWorkspaceConfig, DockerWorkspaceProvider, type EndsWithGraderConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsGraderConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTargetRef, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type ExecInContainerOptions, type ExecResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsGrader, type ExecutionMetricsGraderConfig, type ExecutionMetricsGraderOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyGrader, type FieldAccuracyGraderConfig, type FieldAccuracyGraderOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type GitListedRun, type Grader, type GraderConfig, type GraderDispatchContext, type GraderFactory, type GraderFactoryFn, type GraderKind, GraderRegistry, type GraderResult, type IcontainsAllGraderConfig, type IcontainsAnyGraderConfig, type IcontainsGraderConfig, type InlineAssertEvaluatorConfig, type IsJsonGraderConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyGrader, type LatencyGraderConfig, type LatencyGraderOptions, LlmGrader, type LlmGraderConfig, type LlmGraderOptions, type LlmGraderPromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type ParsedCopilotSession, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type PreparedResultsRepoBranch, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type ProjectSource, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexGraderConfig, type RepoCheckout, type RepoClone, type RepoConfig, type RepoDep, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type ResultsConfig, type ResultsRepoLocalPaths, type ResultsRepoStatus, type RubricItem, type RubricsEvaluatorConfig, RunBudgetTracker, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerGrader, type SkillTriggerGraderConfig, type StartsWithGraderConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TargetHooksConfig, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageGrader, type TokenUsageGraderConfig, type TokenUsageGraderOptions, type ToolCall, type ToolTestMessage, type ToolTrajectoryExpectedItem, ToolTrajectoryGrader, type ToolTrajectoryGraderConfig, type ToolTrajectoryGraderOptions, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptJsonLine, TranscriptProvider, type TranscriptReplayEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type TsEvalResult, type TurnFailurePolicy, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceEnvConfig, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addProject, assembleLlmGraderPrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, checkoutResultsRepoBranch, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, commitAndPushResultsBranch, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createDraftResultsPr, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, deriveProjectId, detectFormat, directPushResults, directorySizeBytes, discoverAssertions, discoverClaudeSessions, discoverCodexSessions, discoverCopilotSessions, discoverGraders, discoverProjects, discoverProviders, ensureResultsRepoClone, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetRefsFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, formatToolCalls, freeformEvaluationSchema, generateRubrics, getAgentvConfigDir, getAgentvDataDir, getAgentvHome, getOutputFilenames, getProject, getProjectsRegistryPath, getResultsRepoLocalPaths, getResultsRepoStatus, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, groupTranscriptJsonLines, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isGraderKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, killAllTrackedChildren, listGitRuns, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadProjectRegistry, loadTestById, loadTestSuite, loadTests, loadTsConfig, loadTsEvalFile, materializeGitRun, mergeExecutionMetrics, negateScore, normalizeLineEndings, normalizeResultsConfig, parseAgentSkillsEvals, parseClaudeSession, parseCodexSession, parseCopilotEvents, parseEnvOutput, parseJsonFromText, parseJsonSafe, parseYamlValue, prepareResultsRepoBranch, pushResultsRepoBranch, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, readTranscriptJsonl, removeProject, resolveAndCreateProvider, resolveDelegatedTargetDefinition, resolveFileReference, resolveResultsRepoRunsDir, resolveResultsRepoUrl, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runBeforeSessionHook, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveProjectRegistry, scanRepoDeps, scoreRangeEvaluationSchema, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, stageResultsArtifacts, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, syncProject, syncProjects, syncResultsRepo, toCamelCaseDeep, toSnakeCaseDeep, toTranscriptJsonLines, tokensPerTool, touchProject, trackChild, trackedChildCount, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
|
package/dist/index.d.ts
CHANGED
|
@@ -2008,8 +2008,12 @@ type AgentVConfig$1 = {
|
|
|
2008
2008
|
readonly hooks?: HooksConfig;
|
|
2009
2009
|
};
|
|
2010
2010
|
/**
|
|
2011
|
-
* Load optional
|
|
2012
|
-
*
|
|
2011
|
+
* Load optional AgentV YAML configuration.
|
|
2012
|
+
*
|
|
2013
|
+
* Project-local `.agentv/config.yaml` files are searched from the eval file
|
|
2014
|
+
* directory up to the repo root. If no project-local config is found, AgentV
|
|
2015
|
+
* falls back to the home/global config at `${AGENTV_HOME:-~/.agentv}/config.yaml`.
|
|
2016
|
+
* The first valid file wins; there is intentionally no cross-file merge.
|
|
2013
2017
|
*/
|
|
2014
2018
|
declare function loadConfig(evalFilePath: string, repoRoot: string): Promise<AgentVConfig$1 | null>;
|
|
2015
2019
|
/**
|
|
@@ -4255,24 +4259,32 @@ declare function listGitRuns(repoDir: string, ref?: string): Promise<GitListedRu
|
|
|
4255
4259
|
declare function materializeGitRun(repoDir: string, relativeRunPath: string, ref?: string): Promise<void>;
|
|
4256
4260
|
|
|
4257
4261
|
/**
|
|
4258
|
-
*
|
|
4259
|
-
*
|
|
4260
|
-
*
|
|
4262
|
+
* AgentV's lightweight home/config directory. Stores machine-local config files
|
|
4263
|
+
* such as config.yaml, projects.yaml, version-check.json, last-config.json, and
|
|
4264
|
+
* managed helper binaries. AGENTV_HOME relocates only this config/home surface.
|
|
4261
4265
|
*/
|
|
4262
4266
|
declare function getAgentvConfigDir(): string;
|
|
4263
4267
|
/**
|
|
4264
|
-
*
|
|
4265
|
-
*
|
|
4266
|
-
*
|
|
4268
|
+
* Backward-compatible alias for AgentV's home/config directory.
|
|
4269
|
+
* Prefer getAgentvConfigDir() for lightweight config files and
|
|
4270
|
+
* getAgentvDataDir() for heavy runtime data.
|
|
4267
4271
|
*/
|
|
4268
4272
|
declare function getAgentvHome(): string;
|
|
4273
|
+
/**
|
|
4274
|
+
* AgentV's heavy runtime data directory. Stores workspaces, workspace pool,
|
|
4275
|
+
* subagents, trace state, caches, downloaded dependencies, and results clones.
|
|
4276
|
+
* AGENTV_DATA_DIR can separate this large data from AGENTV_HOME; when unset it
|
|
4277
|
+
* falls back to AGENTV_HOME (or ~/.agentv) so existing AGENTV_HOME users keep
|
|
4278
|
+
* their runtime data in the same location.
|
|
4279
|
+
*/
|
|
4280
|
+
declare function getAgentvDataDir(): string;
|
|
4269
4281
|
declare function getWorkspacesRoot(): string;
|
|
4270
4282
|
declare function getSubagentsRoot(): string;
|
|
4271
4283
|
declare function getTraceStateRoot(): string;
|
|
4272
4284
|
declare function getWorkspacePoolRoot(): string;
|
|
4273
4285
|
|
|
4274
4286
|
/**
|
|
4275
|
-
* Project registry for AgentV
|
|
4287
|
+
* Project registry for AgentV Dashboard multi-project support.
|
|
4276
4288
|
*
|
|
4277
4289
|
* A Project = any directory containing a `.agentv/` folder. Projects hold
|
|
4278
4290
|
* eval runs, and (incrementally) traces, spans, and other telemetry —
|
|
@@ -4280,7 +4292,7 @@ declare function getWorkspacePoolRoot(): string;
|
|
|
4280
4292
|
* Braintrust, W&B Weave, and LangSmith.
|
|
4281
4293
|
*
|
|
4282
4294
|
* The registry lives at `~/.agentv/projects.yaml` and is the single source
|
|
4283
|
-
* of truth for which projects
|
|
4295
|
+
* of truth for which projects Dashboard shows. Dashboard re-reads the file on every
|
|
4284
4296
|
* `/api/projects` request, so edits (direct, via POST /api/projects, via
|
|
4285
4297
|
* the CLI's --add/--remove, or via a Kubernetes ConfigMap mount) are reflected
|
|
4286
4298
|
* without restarting `agentv serve`.
|
|
@@ -4302,7 +4314,7 @@ declare function getWorkspacePoolRoot(): string;
|
|
|
4302
4314
|
*
|
|
4303
4315
|
* Concurrency: the registry assumes a single writer. All mutating calls
|
|
4304
4316
|
* (add/remove/touchProject) do read-modify-write on projects.yaml
|
|
4305
|
-
* without a lock.
|
|
4317
|
+
* without a lock. Dashboard's HTTP handlers are serialized by Node's
|
|
4306
4318
|
* single-threaded event loop, which satisfies the 24/7 deployment case.
|
|
4307
4319
|
* Run only one `agentv` process against a given home at a time.
|
|
4308
4320
|
*
|
|
@@ -4362,15 +4374,15 @@ declare function touchProject(projectId: string): void;
|
|
|
4362
4374
|
* Scan a directory tree (up to maxDepth levels) for directories containing `.agentv/`.
|
|
4363
4375
|
* Returns absolute paths of discovered project directories, sorted for
|
|
4364
4376
|
* deterministic iteration. This is a one-shot helper for bulk registration;
|
|
4365
|
-
*
|
|
4377
|
+
* Dashboard does not scan at request time.
|
|
4366
4378
|
*/
|
|
4367
4379
|
declare function discoverProjects(rootDir: string, maxDepth?: number): string[];
|
|
4368
4380
|
|
|
4369
4381
|
/**
|
|
4370
4382
|
* Project sync — pulls remote git repos to the local path declared in the
|
|
4371
|
-
* project registry before
|
|
4383
|
+
* project registry before Dashboard/eval startup.
|
|
4372
4384
|
*
|
|
4373
|
-
* Sync is oneshot only, triggered by the
|
|
4385
|
+
* Sync is oneshot only, triggered by the Dashboard UI "Sync" button or the
|
|
4374
4386
|
* `agentv project sync` CLI command. There is no daemon or continuous mode.
|
|
4375
4387
|
*
|
|
4376
4388
|
* First run — git clone --depth 1 --filter=blob:none --branch <ref> <url> <path>
|
|
@@ -4624,7 +4636,7 @@ declare function runBeforeSessionHook(command: string): void;
|
|
|
4624
4636
|
* Tracks long-lived child processes spawned by AgentV providers so that a
|
|
4625
4637
|
* top-level signal handler can kill them all on Ctrl+C / SIGTERM.
|
|
4626
4638
|
*
|
|
4627
|
-
* Why this exists: when the CLI receives SIGTERM (e.g. from
|
|
4639
|
+
* Why this exists: when the CLI receives SIGTERM (e.g. from Dashboard's Stop
|
|
4628
4640
|
* button), Node exits the parent process but does NOT propagate the signal
|
|
4629
4641
|
* to grandchildren. Without tracking, the spawned `claude`, `codex`, `pi`,
|
|
4630
4642
|
* `copilot` subprocesses linger as orphans. The CLI's signal handler walks
|
|
@@ -4966,4 +4978,4 @@ type AgentKernel = {
|
|
|
4966
4978
|
};
|
|
4967
4979
|
declare function createAgentKernel(): AgentKernel;
|
|
4968
4980
|
|
|
4969
|
-
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AgentVConfig$1 as AgentVYamlConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type CheckedOutResultsRepoBranch, type ChildGraderResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeGrader, type CodeGraderConfig, type CodeGraderOptions, type CodexDiscoverOptions, type CodexSession, type CommandExecutor, type CompositeAggregatorConfig, CompositeGrader, type CompositeGraderConfig, type CompositeGraderOptions, type ConfidenceIntervalAggregation, type ContainsAllGraderConfig, type ContainsAnyGraderConfig, type ContainsGraderConfig, type Content, type ContentFile, type ContentImage, type ContentPreprocessorConfig, type ContentText, type ConversationAggregation, type ConversationMode, type ConversationTurn, type ConversationTurnInput, type CopilotCliResolvedConfig, type DiscoverOptions as CopilotDiscoverOptions, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, type CopilotSessionMeta, CostGrader, type CostGraderConfig, type CostGraderOptions, type CreateContainerOptions, DEFAULT_CATEGORY, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DEFAULT_GRADER_TEMPLATE, DEFAULT_THRESHOLD, type DependencyFailurePolicy, type DependencyResult, type DepsScanResult, DeterministicAssertionGrader, type DockerWorkspaceConfig, DockerWorkspaceProvider, type EndsWithGraderConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsGraderConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTargetRef, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type ExecInContainerOptions, type ExecResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsGrader, type ExecutionMetricsGraderConfig, type ExecutionMetricsGraderOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyGrader, type FieldAccuracyGraderConfig, type FieldAccuracyGraderOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type GitListedRun, type Grader, type GraderConfig, type GraderDispatchContext, type GraderFactory, type GraderFactoryFn, type GraderKind, GraderRegistry, type GraderResult, type IcontainsAllGraderConfig, type IcontainsAnyGraderConfig, type IcontainsGraderConfig, type InlineAssertEvaluatorConfig, type IsJsonGraderConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyGrader, type LatencyGraderConfig, type LatencyGraderOptions, LlmGrader, type LlmGraderConfig, type LlmGraderOptions, type LlmGraderPromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type ParsedCopilotSession, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type PreparedResultsRepoBranch, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type ProjectSource, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexGraderConfig, type RepoCheckout, type RepoClone, type RepoConfig, type RepoDep, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type ResultsConfig, type ResultsRepoLocalPaths, type ResultsRepoStatus, type RubricItem, type RubricsEvaluatorConfig, RunBudgetTracker, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerGrader, type SkillTriggerGraderConfig, type StartsWithGraderConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TargetHooksConfig, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageGrader, type TokenUsageGraderConfig, type TokenUsageGraderOptions, type ToolCall, type ToolTestMessage, type ToolTrajectoryExpectedItem, ToolTrajectoryGrader, type ToolTrajectoryGraderConfig, type ToolTrajectoryGraderOptions, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptJsonLine, TranscriptProvider, type TranscriptReplayEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type TsEvalResult, type TurnFailurePolicy, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceEnvConfig, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addProject, assembleLlmGraderPrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, checkoutResultsRepoBranch, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, commitAndPushResultsBranch, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createDraftResultsPr, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, deriveProjectId, detectFormat, directPushResults, directorySizeBytes, discoverAssertions, discoverClaudeSessions, discoverCodexSessions, discoverCopilotSessions, discoverGraders, discoverProjects, discoverProviders, ensureResultsRepoClone, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetRefsFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, formatToolCalls, freeformEvaluationSchema, generateRubrics, getAgentvConfigDir, getAgentvHome, getOutputFilenames, getProject, getProjectsRegistryPath, getResultsRepoLocalPaths, getResultsRepoStatus, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, groupTranscriptJsonLines, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isGraderKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, killAllTrackedChildren, listGitRuns, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadProjectRegistry, loadTestById, loadTestSuite, loadTests, loadTsConfig, loadTsEvalFile, materializeGitRun, mergeExecutionMetrics, negateScore, normalizeLineEndings, normalizeResultsConfig, parseAgentSkillsEvals, parseClaudeSession, parseCodexSession, parseCopilotEvents, parseEnvOutput, parseJsonFromText, parseJsonSafe, parseYamlValue, prepareResultsRepoBranch, pushResultsRepoBranch, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, readTranscriptJsonl, removeProject, resolveAndCreateProvider, resolveDelegatedTargetDefinition, resolveFileReference, resolveResultsRepoRunsDir, resolveResultsRepoUrl, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runBeforeSessionHook, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveProjectRegistry, scanRepoDeps, scoreRangeEvaluationSchema, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, stageResultsArtifacts, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, syncProject, syncProjects, syncResultsRepo, toCamelCaseDeep, toSnakeCaseDeep, toTranscriptJsonLines, tokensPerTool, touchProject, trackChild, trackedChildCount, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
|
|
4981
|
+
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AgentVConfig$1 as AgentVYamlConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type CheckedOutResultsRepoBranch, type ChildGraderResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeGrader, type CodeGraderConfig, type CodeGraderOptions, type CodexDiscoverOptions, type CodexSession, type CommandExecutor, type CompositeAggregatorConfig, CompositeGrader, type CompositeGraderConfig, type CompositeGraderOptions, type ConfidenceIntervalAggregation, type ContainsAllGraderConfig, type ContainsAnyGraderConfig, type ContainsGraderConfig, type Content, type ContentFile, type ContentImage, type ContentPreprocessorConfig, type ContentText, type ConversationAggregation, type ConversationMode, type ConversationTurn, type ConversationTurnInput, type CopilotCliResolvedConfig, type DiscoverOptions as CopilotDiscoverOptions, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, type CopilotSessionMeta, CostGrader, type CostGraderConfig, type CostGraderOptions, type CreateContainerOptions, DEFAULT_CATEGORY, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DEFAULT_GRADER_TEMPLATE, DEFAULT_THRESHOLD, type DependencyFailurePolicy, type DependencyResult, type DepsScanResult, DeterministicAssertionGrader, type DockerWorkspaceConfig, DockerWorkspaceProvider, type EndsWithGraderConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsGraderConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTargetRef, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type ExecInContainerOptions, type ExecResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsGrader, type ExecutionMetricsGraderConfig, type ExecutionMetricsGraderOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyGrader, type FieldAccuracyGraderConfig, type FieldAccuracyGraderOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type GitListedRun, type Grader, type GraderConfig, type GraderDispatchContext, type GraderFactory, type GraderFactoryFn, type GraderKind, GraderRegistry, type GraderResult, type IcontainsAllGraderConfig, type IcontainsAnyGraderConfig, type IcontainsGraderConfig, type InlineAssertEvaluatorConfig, type IsJsonGraderConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyGrader, type LatencyGraderConfig, type LatencyGraderOptions, LlmGrader, type LlmGraderConfig, type LlmGraderOptions, type LlmGraderPromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type ParsedCopilotSession, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type PreparedResultsRepoBranch, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type ProjectSource, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexGraderConfig, type RepoCheckout, type RepoClone, type RepoConfig, type RepoDep, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type ResultsConfig, type ResultsRepoLocalPaths, type ResultsRepoStatus, type RubricItem, type RubricsEvaluatorConfig, RunBudgetTracker, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerGrader, type SkillTriggerGraderConfig, type StartsWithGraderConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TargetHooksConfig, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageGrader, type TokenUsageGraderConfig, type TokenUsageGraderOptions, type ToolCall, type ToolTestMessage, type ToolTrajectoryExpectedItem, ToolTrajectoryGrader, type ToolTrajectoryGraderConfig, type ToolTrajectoryGraderOptions, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptJsonLine, TranscriptProvider, type TranscriptReplayEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type TsEvalResult, type TurnFailurePolicy, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceEnvConfig, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addProject, assembleLlmGraderPrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, checkoutResultsRepoBranch, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, commitAndPushResultsBranch, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createDraftResultsPr, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, deriveProjectId, detectFormat, directPushResults, directorySizeBytes, discoverAssertions, discoverClaudeSessions, discoverCodexSessions, discoverCopilotSessions, discoverGraders, discoverProjects, discoverProviders, ensureResultsRepoClone, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetRefsFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, formatToolCalls, freeformEvaluationSchema, generateRubrics, getAgentvConfigDir, getAgentvDataDir, getAgentvHome, getOutputFilenames, getProject, getProjectsRegistryPath, getResultsRepoLocalPaths, getResultsRepoStatus, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, groupTranscriptJsonLines, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isGraderKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, killAllTrackedChildren, listGitRuns, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadProjectRegistry, loadTestById, loadTestSuite, loadTests, loadTsConfig, loadTsEvalFile, materializeGitRun, mergeExecutionMetrics, negateScore, normalizeLineEndings, normalizeResultsConfig, parseAgentSkillsEvals, parseClaudeSession, parseCodexSession, parseCopilotEvents, parseEnvOutput, parseJsonFromText, parseJsonSafe, parseYamlValue, prepareResultsRepoBranch, pushResultsRepoBranch, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, readTranscriptJsonl, removeProject, resolveAndCreateProvider, resolveDelegatedTargetDefinition, resolveFileReference, resolveResultsRepoRunsDir, resolveResultsRepoUrl, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runBeforeSessionHook, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveProjectRegistry, scanRepoDeps, scoreRangeEvaluationSchema, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, stageResultsArtifacts, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, syncProject, syncProjects, syncResultsRepo, toCamelCaseDeep, toSnakeCaseDeep, toTranscriptJsonLines, tokensPerTool, touchProject, trackChild, trackedChildCount, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
|
package/dist/index.js
CHANGED
|
@@ -72,6 +72,7 @@ import {
|
|
|
72
72
|
formatToolCalls,
|
|
73
73
|
freeformEvaluationSchema,
|
|
74
74
|
getAgentvConfigDir,
|
|
75
|
+
getAgentvDataDir,
|
|
75
76
|
getAgentvHome,
|
|
76
77
|
getSubagentsRoot,
|
|
77
78
|
getTraceStateRoot,
|
|
@@ -132,7 +133,7 @@ import {
|
|
|
132
133
|
tokensPerTool,
|
|
133
134
|
trackChild,
|
|
134
135
|
trackedChildCount
|
|
135
|
-
} from "./chunk-
|
|
136
|
+
} from "./chunk-N5EU446L.js";
|
|
136
137
|
import {
|
|
137
138
|
COMMON_TARGET_SETTINGS,
|
|
138
139
|
TEST_MESSAGE_ROLES,
|
|
@@ -735,6 +736,8 @@ import os from "node:os";
|
|
|
735
736
|
import path4 from "node:path";
|
|
736
737
|
import { promisify } from "node:util";
|
|
737
738
|
var execFileAsync = promisify(execFile);
|
|
739
|
+
var RESULTS_REPO_RESULTS_DIR = ".agentv/results";
|
|
740
|
+
var RESULTS_REPO_RUNS_DIR = `${RESULTS_REPO_RESULTS_DIR}/runs`;
|
|
738
741
|
function sanitizeRepoSlug(repo) {
|
|
739
742
|
return repo.trim().replace(/[^A-Za-z0-9._-]+/g, "-");
|
|
740
743
|
}
|
|
@@ -754,7 +757,7 @@ function expandHome(p) {
|
|
|
754
757
|
}
|
|
755
758
|
function normalizeResultsConfig(config) {
|
|
756
759
|
const repo = config.repo.trim();
|
|
757
|
-
const resolvedPath = config.path ? expandHome(config.path.trim()) : path4.join(
|
|
760
|
+
const resolvedPath = config.path ? expandHome(config.path.trim()) : path4.join(getAgentvDataDir(), "results", sanitizeRepoSlug(repo));
|
|
758
761
|
return {
|
|
759
762
|
mode: "github",
|
|
760
763
|
repo,
|
|
@@ -770,7 +773,7 @@ function resolveResultsRepoUrl(repo) {
|
|
|
770
773
|
return `https://github.com/${repo}.git`;
|
|
771
774
|
}
|
|
772
775
|
function getResultsRepoLocalPaths(repo) {
|
|
773
|
-
const rootDir = path4.join(
|
|
776
|
+
const rootDir = path4.join(getAgentvDataDir(), "cache", "results-repo", sanitizeRepoSlug(repo));
|
|
774
777
|
return {
|
|
775
778
|
rootDir,
|
|
776
779
|
repoDir: path4.join(rootDir, "repo"),
|
|
@@ -967,7 +970,7 @@ async function stageResultsArtifacts(params) {
|
|
|
967
970
|
}
|
|
968
971
|
function resolveResultsRepoRunsDir(config) {
|
|
969
972
|
const normalized = normalizeResultsConfig(config);
|
|
970
|
-
return path4.join(normalized.path, "runs");
|
|
973
|
+
return path4.join(normalized.path, RESULTS_REPO_RESULTS_DIR, "runs");
|
|
971
974
|
}
|
|
972
975
|
async function directorySizeBytes(targetPath) {
|
|
973
976
|
const entry = await stat(targetPath);
|
|
@@ -1030,7 +1033,12 @@ async function directPushResults(params) {
|
|
|
1030
1033
|
const repoDir = await ensureResultsRepoClone(normalized);
|
|
1031
1034
|
const baseBranch = await resolveDefaultBranch(repoDir);
|
|
1032
1035
|
await fetchResultsRepo(repoDir);
|
|
1033
|
-
const destinationDir = path4.join(
|
|
1036
|
+
const destinationDir = path4.join(
|
|
1037
|
+
repoDir,
|
|
1038
|
+
RESULTS_REPO_RESULTS_DIR,
|
|
1039
|
+
"runs",
|
|
1040
|
+
params.destinationPath
|
|
1041
|
+
);
|
|
1034
1042
|
await stageResultsArtifacts({
|
|
1035
1043
|
repoDir,
|
|
1036
1044
|
sourceDir: params.sourceDir,
|
|
@@ -1178,9 +1186,12 @@ function parseGitBatchBlobs(output) {
|
|
|
1178
1186
|
return blobs;
|
|
1179
1187
|
}
|
|
1180
1188
|
async function listGitRuns(repoDir, ref = "origin/main") {
|
|
1181
|
-
const { stdout: treeOut } = await runGit(
|
|
1182
|
-
|
|
1183
|
-
|
|
1189
|
+
const { stdout: treeOut } = await runGit(
|
|
1190
|
+
["ls-tree", "-r", "--name-only", ref, RESULTS_REPO_RUNS_DIR],
|
|
1191
|
+
{
|
|
1192
|
+
cwd: repoDir
|
|
1193
|
+
}
|
|
1194
|
+
);
|
|
1184
1195
|
const benchmarkPaths = treeOut.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.endsWith("/benchmark.json"));
|
|
1185
1196
|
if (benchmarkPaths.length === 0) {
|
|
1186
1197
|
return [];
|
|
@@ -1197,7 +1208,7 @@ async function listGitRuns(repoDir, ref = "origin/main") {
|
|
|
1197
1208
|
const benchmarkPath = benchmarkPaths[index];
|
|
1198
1209
|
const benchmark = JSON.parse(blob.content.toString("utf8"));
|
|
1199
1210
|
const runDir = path4.posix.dirname(benchmarkPath);
|
|
1200
|
-
const relativeRunPath = path4.posix.relative(
|
|
1211
|
+
const relativeRunPath = path4.posix.relative(RESULTS_REPO_RUNS_DIR, runDir);
|
|
1201
1212
|
const runId = buildGitRunId(relativeRunPath);
|
|
1202
1213
|
const timestamp = benchmark.metadata?.timestamp?.trim() || path4.posix.basename(runDir);
|
|
1203
1214
|
const targets = benchmark.metadata?.targets ?? [];
|
|
@@ -1223,7 +1234,7 @@ async function listGitRuns(repoDir, ref = "origin/main") {
|
|
|
1223
1234
|
}
|
|
1224
1235
|
async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main") {
|
|
1225
1236
|
const normalizedRunPath = relativeRunPath.split(path4.sep).join("/");
|
|
1226
|
-
const runTreePath = path4.posix.join(
|
|
1237
|
+
const runTreePath = path4.posix.join(RESULTS_REPO_RUNS_DIR, normalizedRunPath);
|
|
1227
1238
|
const targetRunDir = path4.join(repoDir, ...runTreePath.split("/"));
|
|
1228
1239
|
const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, runTreePath], {
|
|
1229
1240
|
cwd: repoDir
|
|
@@ -2859,6 +2870,7 @@ export {
|
|
|
2859
2870
|
freeformEvaluationSchema,
|
|
2860
2871
|
generateRubrics,
|
|
2861
2872
|
getAgentvConfigDir,
|
|
2873
|
+
getAgentvDataDir,
|
|
2862
2874
|
getAgentvHome,
|
|
2863
2875
|
getOutputFilenames,
|
|
2864
2876
|
getProject,
|