npm - @agentv/core - Versions diffs - 4.31.3 → 4.32.0-next.1 - Mend

@agentv/core 4.31.3 → 4.32.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/{chunk-A27NE3R7.js → chunk-N5EU446L.js} +70 -66
package/dist/chunk-N5EU446L.js.map +1 -0
package/dist/evaluation/validation/index.cjs +1 -0
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +1 -0
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +163 -147
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +28 -16
package/dist/index.d.ts +28 -16
package/dist/index.js +22 -10
package/dist/index.js.map +1 -1
package/dist/{ts-eval-loader-XR6DNOZ3.js → ts-eval-loader-Z6IUSDNA.js} +2 -2
package/package.json +1 -1
package/dist/chunk-A27NE3R7.js.map +0 -1
/package/dist/{ts-eval-loader-XR6DNOZ3.js.map → ts-eval-loader-Z6IUSDNA.js.map} +0 -0

package/dist/index.d.cts CHANGED Viewed

@@ -2008,8 +2008,12 @@ type AgentVConfig$1 = {
     readonly hooks?: HooksConfig;
 };
 /**
- * Load optional .agentv/config.yaml configuration file.
- * Searches from eval file directory up to repo root.
+ * Load optional AgentV YAML configuration.
+ *
+ * Project-local `.agentv/config.yaml` files are searched from the eval file
+ * directory up to the repo root. If no project-local config is found, AgentV
+ * falls back to the home/global config at `${AGENTV_HOME:-~/.agentv}/config.yaml`.
+ * The first valid file wins; there is intentionally no cross-file merge.
  */
 declare function loadConfig(evalFilePath: string, repoRoot: string): Promise<AgentVConfig$1 | null>;
 /**
@@ -4255,24 +4259,32 @@ declare function listGitRuns(repoDir: string, ref?: string): Promise<GitListedRu
 declare function materializeGitRun(repoDir: string, relativeRunPath: string, ref?: string): Promise<void>;
 /**
- * The default config directory (~/.agentv). Always resolves to the user's home
- * directory regardless of AGENTV_HOME. Used for lightweight, machine-local files
- * like version-check.json, last-config.json, and benchmarks.yaml.
+ * AgentV's lightweight home/config directory. Stores machine-local config files
+ * such as config.yaml, projects.yaml, version-check.json, last-config.json, and
+ * managed helper binaries. AGENTV_HOME relocates only this config/home surface.
  */
 declare function getAgentvConfigDir(): string;
 /**
- * The data root for heavy/large artifacts (workspaces, workspace-pool, subagents,
- * trace-state, cache, deps). Respects AGENTV_HOME override so users can relocate
- * bulky data to a different drive. Falls back to ~/.agentv when unset.
+ * Backward-compatible alias for AgentV's home/config directory.
+ * Prefer getAgentvConfigDir() for lightweight config files and
+ * getAgentvDataDir() for heavy runtime data.
  */
 declare function getAgentvHome(): string;
+/**
+ * AgentV's heavy runtime data directory. Stores workspaces, workspace pool,
+ * subagents, trace state, caches, downloaded dependencies, and results clones.
+ * AGENTV_DATA_DIR can separate this large data from AGENTV_HOME; when unset it
+ * falls back to AGENTV_HOME (or ~/.agentv) so existing AGENTV_HOME users keep
+ * their runtime data in the same location.
+ */
+declare function getAgentvDataDir(): string;
 declare function getWorkspacesRoot(): string;
 declare function getSubagentsRoot(): string;
 declare function getTraceStateRoot(): string;
 declare function getWorkspacePoolRoot(): string;
 /**
- * Project registry for AgentV Studio multi-project support.
+ * Project registry for AgentV Dashboard multi-project support.
  *
  * A Project = any directory containing a `.agentv/` folder. Projects hold
  * eval runs, and (incrementally) traces, spans, and other telemetry —
@@ -4280,7 +4292,7 @@ declare function getWorkspacePoolRoot(): string;
  * Braintrust, W&B Weave, and LangSmith.
  *
  * The registry lives at `~/.agentv/projects.yaml` and is the single source
- * of truth for which projects Studio shows. Studio re-reads the file on every
+ * of truth for which projects Dashboard shows. Dashboard re-reads the file on every
  * `/api/projects` request, so edits (direct, via POST /api/projects, via
  * the CLI's --add/--remove, or via a Kubernetes ConfigMap mount) are reflected
  * without restarting `agentv serve`.
@@ -4302,7 +4314,7 @@ declare function getWorkspacePoolRoot(): string;
  *
  * Concurrency: the registry assumes a single writer. All mutating calls
  * (add/remove/touchProject) do read-modify-write on projects.yaml
- * without a lock. Studio's HTTP handlers are serialized by Node's
+ * without a lock. Dashboard's HTTP handlers are serialized by Node's
  * single-threaded event loop, which satisfies the 24/7 deployment case.
  * Run only one `agentv` process against a given home at a time.
  *
@@ -4362,15 +4374,15 @@ declare function touchProject(projectId: string): void;
  * Scan a directory tree (up to maxDepth levels) for directories containing `.agentv/`.
  * Returns absolute paths of discovered project directories, sorted for
  * deterministic iteration. This is a one-shot helper for bulk registration;
- * Studio does not scan at request time.
+ * Dashboard does not scan at request time.
  */
 declare function discoverProjects(rootDir: string, maxDepth?: number): string[];
 /**
  * Project sync — pulls remote git repos to the local path declared in the
- * project registry before Studio/eval startup.
+ * project registry before Dashboard/eval startup.
  *
- * Sync is oneshot only, triggered by the Studio UI "Sync" button or the
+ * Sync is oneshot only, triggered by the Dashboard UI "Sync" button or the
  * `agentv project sync` CLI command. There is no daemon or continuous mode.
  *
  *   First run  — git clone --depth 1 --filter=blob:none --branch <ref> <url> <path>
@@ -4624,7 +4636,7 @@ declare function runBeforeSessionHook(command: string): void;
  * Tracks long-lived child processes spawned by AgentV providers so that a
  * top-level signal handler can kill them all on Ctrl+C / SIGTERM.
  *
- * Why this exists: when the CLI receives SIGTERM (e.g. from Studio's Stop
+ * Why this exists: when the CLI receives SIGTERM (e.g. from Dashboard's Stop
  * button), Node exits the parent process but does NOT propagate the signal
  * to grandchildren. Without tracking, the spawned `claude`, `codex`, `pi`,
  * `copilot` subprocesses linger as orphans. The CLI's signal handler walks
@@ -4966,4 +4978,4 @@ type AgentKernel = {
 };
 declare function createAgentKernel(): AgentKernel;
-export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AgentVConfig$1 as AgentVYamlConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type CheckedOutResultsRepoBranch, type ChildGraderResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeGrader, type CodeGraderConfig, type CodeGraderOptions, type CodexDiscoverOptions, type CodexSession, type CommandExecutor, type CompositeAggregatorConfig, CompositeGrader, type CompositeGraderConfig, type CompositeGraderOptions, type ConfidenceIntervalAggregation, type ContainsAllGraderConfig, type ContainsAnyGraderConfig, type ContainsGraderConfig, type Content, type ContentFile, type ContentImage, type ContentPreprocessorConfig, type ContentText, type ConversationAggregation, type ConversationMode, type ConversationTurn, type ConversationTurnInput, type CopilotCliResolvedConfig, type DiscoverOptions as CopilotDiscoverOptions, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, type CopilotSessionMeta, CostGrader, type CostGraderConfig, type CostGraderOptions, type CreateContainerOptions, DEFAULT_CATEGORY, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DEFAULT_GRADER_TEMPLATE, DEFAULT_THRESHOLD, type DependencyFailurePolicy, type DependencyResult, type DepsScanResult, DeterministicAssertionGrader, type DockerWorkspaceConfig, DockerWorkspaceProvider, type EndsWithGraderConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsGraderConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTargetRef, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type ExecInContainerOptions, type ExecResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsGrader, type ExecutionMetricsGraderConfig, type ExecutionMetricsGraderOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyGrader, type FieldAccuracyGraderConfig, type FieldAccuracyGraderOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type GitListedRun, type Grader, type GraderConfig, type GraderDispatchContext, type GraderFactory, type GraderFactoryFn, type GraderKind, GraderRegistry, type GraderResult, type IcontainsAllGraderConfig, type IcontainsAnyGraderConfig, type IcontainsGraderConfig, type InlineAssertEvaluatorConfig, type IsJsonGraderConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyGrader, type LatencyGraderConfig, type LatencyGraderOptions, LlmGrader, type LlmGraderConfig, type LlmGraderOptions, type LlmGraderPromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type ParsedCopilotSession, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type PreparedResultsRepoBranch, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type ProjectSource, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexGraderConfig, type RepoCheckout, type RepoClone, type RepoConfig, type RepoDep, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type ResultsConfig, type ResultsRepoLocalPaths, type ResultsRepoStatus, type RubricItem, type RubricsEvaluatorConfig, RunBudgetTracker, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerGrader, type SkillTriggerGraderConfig, type StartsWithGraderConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TargetHooksConfig, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageGrader, type TokenUsageGraderConfig, type TokenUsageGraderOptions, type ToolCall, type ToolTestMessage, type ToolTrajectoryExpectedItem, ToolTrajectoryGrader, type ToolTrajectoryGraderConfig, type ToolTrajectoryGraderOptions, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptJsonLine, TranscriptProvider, type TranscriptReplayEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type TsEvalResult, type TurnFailurePolicy, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceEnvConfig, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addProject, assembleLlmGraderPrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, checkoutResultsRepoBranch, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, commitAndPushResultsBranch, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createDraftResultsPr, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, deriveProjectId, detectFormat, directPushResults, directorySizeBytes, discoverAssertions, discoverClaudeSessions, discoverCodexSessions, discoverCopilotSessions, discoverGraders, discoverProjects, discoverProviders, ensureResultsRepoClone, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetRefsFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, formatToolCalls, freeformEvaluationSchema, generateRubrics, getAgentvConfigDir, getAgentvHome, getOutputFilenames, getProject, getProjectsRegistryPath, getResultsRepoLocalPaths, getResultsRepoStatus, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, groupTranscriptJsonLines, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isGraderKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, killAllTrackedChildren, listGitRuns, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadProjectRegistry, loadTestById, loadTestSuite, loadTests, loadTsConfig, loadTsEvalFile, materializeGitRun, mergeExecutionMetrics, negateScore, normalizeLineEndings, normalizeResultsConfig, parseAgentSkillsEvals, parseClaudeSession, parseCodexSession, parseCopilotEvents, parseEnvOutput, parseJsonFromText, parseJsonSafe, parseYamlValue, prepareResultsRepoBranch, pushResultsRepoBranch, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, readTranscriptJsonl, removeProject, resolveAndCreateProvider, resolveDelegatedTargetDefinition, resolveFileReference, resolveResultsRepoRunsDir, resolveResultsRepoUrl, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runBeforeSessionHook, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveProjectRegistry, scanRepoDeps, scoreRangeEvaluationSchema, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, stageResultsArtifacts, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, syncProject, syncProjects, syncResultsRepo, toCamelCaseDeep, toSnakeCaseDeep, toTranscriptJsonLines, tokensPerTool, touchProject, trackChild, trackedChildCount, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
+export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AgentVConfig$1 as AgentVYamlConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type CheckedOutResultsRepoBranch, type ChildGraderResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeGrader, type CodeGraderConfig, type CodeGraderOptions, type CodexDiscoverOptions, type CodexSession, type CommandExecutor, type CompositeAggregatorConfig, CompositeGrader, type CompositeGraderConfig, type CompositeGraderOptions, type ConfidenceIntervalAggregation, type ContainsAllGraderConfig, type ContainsAnyGraderConfig, type ContainsGraderConfig, type Content, type ContentFile, type ContentImage, type ContentPreprocessorConfig, type ContentText, type ConversationAggregation, type ConversationMode, type ConversationTurn, type ConversationTurnInput, type CopilotCliResolvedConfig, type DiscoverOptions as CopilotDiscoverOptions, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, type CopilotSessionMeta, CostGrader, type CostGraderConfig, type CostGraderOptions, type CreateContainerOptions, DEFAULT_CATEGORY, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DEFAULT_GRADER_TEMPLATE, DEFAULT_THRESHOLD, type DependencyFailurePolicy, type DependencyResult, type DepsScanResult, DeterministicAssertionGrader, type DockerWorkspaceConfig, DockerWorkspaceProvider, type EndsWithGraderConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsGraderConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTargetRef, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type ExecInContainerOptions, type ExecResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsGrader, type ExecutionMetricsGraderConfig, type ExecutionMetricsGraderOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyGrader, type FieldAccuracyGraderConfig, type FieldAccuracyGraderOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type GitListedRun, type Grader, type GraderConfig, type GraderDispatchContext, type GraderFactory, type GraderFactoryFn, type GraderKind, GraderRegistry, type GraderResult, type IcontainsAllGraderConfig, type IcontainsAnyGraderConfig, type IcontainsGraderConfig, type InlineAssertEvaluatorConfig, type IsJsonGraderConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyGrader, type LatencyGraderConfig, type LatencyGraderOptions, LlmGrader, type LlmGraderConfig, type LlmGraderOptions, type LlmGraderPromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type ParsedCopilotSession, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type PreparedResultsRepoBranch, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type ProjectSource, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexGraderConfig, type RepoCheckout, type RepoClone, type RepoConfig, type RepoDep, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type ResultsConfig, type ResultsRepoLocalPaths, type ResultsRepoStatus, type RubricItem, type RubricsEvaluatorConfig, RunBudgetTracker, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerGrader, type SkillTriggerGraderConfig, type StartsWithGraderConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TargetHooksConfig, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageGrader, type TokenUsageGraderConfig, type TokenUsageGraderOptions, type ToolCall, type ToolTestMessage, type ToolTrajectoryExpectedItem, ToolTrajectoryGrader, type ToolTrajectoryGraderConfig, type ToolTrajectoryGraderOptions, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptJsonLine, TranscriptProvider, type TranscriptReplayEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type TsEvalResult, type TurnFailurePolicy, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceEnvConfig, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addProject, assembleLlmGraderPrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, checkoutResultsRepoBranch, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, commitAndPushResultsBranch, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createDraftResultsPr, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, deriveProjectId, detectFormat, directPushResults, directorySizeBytes, discoverAssertions, discoverClaudeSessions, discoverCodexSessions, discoverCopilotSessions, discoverGraders, discoverProjects, discoverProviders, ensureResultsRepoClone, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetRefsFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, formatToolCalls, freeformEvaluationSchema, generateRubrics, getAgentvConfigDir, getAgentvDataDir, getAgentvHome, getOutputFilenames, getProject, getProjectsRegistryPath, getResultsRepoLocalPaths, getResultsRepoStatus, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, groupTranscriptJsonLines, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isGraderKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, killAllTrackedChildren, listGitRuns, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadProjectRegistry, loadTestById, loadTestSuite, loadTests, loadTsConfig, loadTsEvalFile, materializeGitRun, mergeExecutionMetrics, negateScore, normalizeLineEndings, normalizeResultsConfig, parseAgentSkillsEvals, parseClaudeSession, parseCodexSession, parseCopilotEvents, parseEnvOutput, parseJsonFromText, parseJsonSafe, parseYamlValue, prepareResultsRepoBranch, pushResultsRepoBranch, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, readTranscriptJsonl, removeProject, resolveAndCreateProvider, resolveDelegatedTargetDefinition, resolveFileReference, resolveResultsRepoRunsDir, resolveResultsRepoUrl, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runBeforeSessionHook, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveProjectRegistry, scanRepoDeps, scoreRangeEvaluationSchema, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, stageResultsArtifacts, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, syncProject, syncProjects, syncResultsRepo, toCamelCaseDeep, toSnakeCaseDeep, toTranscriptJsonLines, tokensPerTool, touchProject, trackChild, trackedChildCount, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };

package/dist/index.d.ts CHANGED Viewed

@@ -2008,8 +2008,12 @@ type AgentVConfig$1 = {
     readonly hooks?: HooksConfig;
 };
 /**
- * Load optional .agentv/config.yaml configuration file.
- * Searches from eval file directory up to repo root.
+ * Load optional AgentV YAML configuration.
+ *
+ * Project-local `.agentv/config.yaml` files are searched from the eval file
+ * directory up to the repo root. If no project-local config is found, AgentV
+ * falls back to the home/global config at `${AGENTV_HOME:-~/.agentv}/config.yaml`.
+ * The first valid file wins; there is intentionally no cross-file merge.
  */
 declare function loadConfig(evalFilePath: string, repoRoot: string): Promise<AgentVConfig$1 | null>;
 /**
@@ -4255,24 +4259,32 @@ declare function listGitRuns(repoDir: string, ref?: string): Promise<GitListedRu
 declare function materializeGitRun(repoDir: string, relativeRunPath: string, ref?: string): Promise<void>;
 /**
- * The default config directory (~/.agentv). Always resolves to the user's home
- * directory regardless of AGENTV_HOME. Used for lightweight, machine-local files
- * like version-check.json, last-config.json, and benchmarks.yaml.
+ * AgentV's lightweight home/config directory. Stores machine-local config files
+ * such as config.yaml, projects.yaml, version-check.json, last-config.json, and
+ * managed helper binaries. AGENTV_HOME relocates only this config/home surface.
  */
 declare function getAgentvConfigDir(): string;
 /**
- * The data root for heavy/large artifacts (workspaces, workspace-pool, subagents,
- * trace-state, cache, deps). Respects AGENTV_HOME override so users can relocate
- * bulky data to a different drive. Falls back to ~/.agentv when unset.
+ * Backward-compatible alias for AgentV's home/config directory.
+ * Prefer getAgentvConfigDir() for lightweight config files and
+ * getAgentvDataDir() for heavy runtime data.
  */
 declare function getAgentvHome(): string;
+/**
+ * AgentV's heavy runtime data directory. Stores workspaces, workspace pool,
+ * subagents, trace state, caches, downloaded dependencies, and results clones.
+ * AGENTV_DATA_DIR can separate this large data from AGENTV_HOME; when unset it
+ * falls back to AGENTV_HOME (or ~/.agentv) so existing AGENTV_HOME users keep
+ * their runtime data in the same location.
+ */
+declare function getAgentvDataDir(): string;
 declare function getWorkspacesRoot(): string;
 declare function getSubagentsRoot(): string;
 declare function getTraceStateRoot(): string;
 declare function getWorkspacePoolRoot(): string;
 /**
- * Project registry for AgentV Studio multi-project support.
+ * Project registry for AgentV Dashboard multi-project support.
  *
  * A Project = any directory containing a `.agentv/` folder. Projects hold
  * eval runs, and (incrementally) traces, spans, and other telemetry —
@@ -4280,7 +4292,7 @@ declare function getWorkspacePoolRoot(): string;
  * Braintrust, W&B Weave, and LangSmith.
  *
  * The registry lives at `~/.agentv/projects.yaml` and is the single source
- * of truth for which projects Studio shows. Studio re-reads the file on every
+ * of truth for which projects Dashboard shows. Dashboard re-reads the file on every
  * `/api/projects` request, so edits (direct, via POST /api/projects, via
  * the CLI's --add/--remove, or via a Kubernetes ConfigMap mount) are reflected
  * without restarting `agentv serve`.
@@ -4302,7 +4314,7 @@ declare function getWorkspacePoolRoot(): string;
  *
  * Concurrency: the registry assumes a single writer. All mutating calls
  * (add/remove/touchProject) do read-modify-write on projects.yaml
- * without a lock. Studio's HTTP handlers are serialized by Node's
+ * without a lock. Dashboard's HTTP handlers are serialized by Node's
  * single-threaded event loop, which satisfies the 24/7 deployment case.
  * Run only one `agentv` process against a given home at a time.
  *
@@ -4362,15 +4374,15 @@ declare function touchProject(projectId: string): void;
  * Scan a directory tree (up to maxDepth levels) for directories containing `.agentv/`.
  * Returns absolute paths of discovered project directories, sorted for
  * deterministic iteration. This is a one-shot helper for bulk registration;
- * Studio does not scan at request time.
+ * Dashboard does not scan at request time.
  */
 declare function discoverProjects(rootDir: string, maxDepth?: number): string[];
 /**
  * Project sync — pulls remote git repos to the local path declared in the
- * project registry before Studio/eval startup.
+ * project registry before Dashboard/eval startup.
  *
- * Sync is oneshot only, triggered by the Studio UI "Sync" button or the
+ * Sync is oneshot only, triggered by the Dashboard UI "Sync" button or the
  * `agentv project sync` CLI command. There is no daemon or continuous mode.
  *
  *   First run  — git clone --depth 1 --filter=blob:none --branch <ref> <url> <path>
@@ -4624,7 +4636,7 @@ declare function runBeforeSessionHook(command: string): void;
  * Tracks long-lived child processes spawned by AgentV providers so that a
  * top-level signal handler can kill them all on Ctrl+C / SIGTERM.
  *
- * Why this exists: when the CLI receives SIGTERM (e.g. from Studio's Stop
+ * Why this exists: when the CLI receives SIGTERM (e.g. from Dashboard's Stop
  * button), Node exits the parent process but does NOT propagate the signal
  * to grandchildren. Without tracking, the spawned `claude`, `codex`, `pi`,
  * `copilot` subprocesses linger as orphans. The CLI's signal handler walks
@@ -4966,4 +4978,4 @@ type AgentKernel = {
 };
 declare function createAgentKernel(): AgentKernel;
-export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AgentVConfig$1 as AgentVYamlConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type CheckedOutResultsRepoBranch, type ChildGraderResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeGrader, type CodeGraderConfig, type CodeGraderOptions, type CodexDiscoverOptions, type CodexSession, type CommandExecutor, type CompositeAggregatorConfig, CompositeGrader, type CompositeGraderConfig, type CompositeGraderOptions, type ConfidenceIntervalAggregation, type ContainsAllGraderConfig, type ContainsAnyGraderConfig, type ContainsGraderConfig, type Content, type ContentFile, type ContentImage, type ContentPreprocessorConfig, type ContentText, type ConversationAggregation, type ConversationMode, type ConversationTurn, type ConversationTurnInput, type CopilotCliResolvedConfig, type DiscoverOptions as CopilotDiscoverOptions, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, type CopilotSessionMeta, CostGrader, type CostGraderConfig, type CostGraderOptions, type CreateContainerOptions, DEFAULT_CATEGORY, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DEFAULT_GRADER_TEMPLATE, DEFAULT_THRESHOLD, type DependencyFailurePolicy, type DependencyResult, type DepsScanResult, DeterministicAssertionGrader, type DockerWorkspaceConfig, DockerWorkspaceProvider, type EndsWithGraderConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsGraderConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTargetRef, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type ExecInContainerOptions, type ExecResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsGrader, type ExecutionMetricsGraderConfig, type ExecutionMetricsGraderOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyGrader, type FieldAccuracyGraderConfig, type FieldAccuracyGraderOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type GitListedRun, type Grader, type GraderConfig, type GraderDispatchContext, type GraderFactory, type GraderFactoryFn, type GraderKind, GraderRegistry, type GraderResult, type IcontainsAllGraderConfig, type IcontainsAnyGraderConfig, type IcontainsGraderConfig, type InlineAssertEvaluatorConfig, type IsJsonGraderConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyGrader, type LatencyGraderConfig, type LatencyGraderOptions, LlmGrader, type LlmGraderConfig, type LlmGraderOptions, type LlmGraderPromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type ParsedCopilotSession, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type PreparedResultsRepoBranch, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type ProjectSource, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexGraderConfig, type RepoCheckout, type RepoClone, type RepoConfig, type RepoDep, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type ResultsConfig, type ResultsRepoLocalPaths, type ResultsRepoStatus, type RubricItem, type RubricsEvaluatorConfig, RunBudgetTracker, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerGrader, type SkillTriggerGraderConfig, type StartsWithGraderConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TargetHooksConfig, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageGrader, type TokenUsageGraderConfig, type TokenUsageGraderOptions, type ToolCall, type ToolTestMessage, type ToolTrajectoryExpectedItem, ToolTrajectoryGrader, type ToolTrajectoryGraderConfig, type ToolTrajectoryGraderOptions, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptJsonLine, TranscriptProvider, type TranscriptReplayEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type TsEvalResult, type TurnFailurePolicy, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceEnvConfig, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addProject, assembleLlmGraderPrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, checkoutResultsRepoBranch, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, commitAndPushResultsBranch, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createDraftResultsPr, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, deriveProjectId, detectFormat, directPushResults, directorySizeBytes, discoverAssertions, discoverClaudeSessions, discoverCodexSessions, discoverCopilotSessions, discoverGraders, discoverProjects, discoverProviders, ensureResultsRepoClone, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetRefsFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, formatToolCalls, freeformEvaluationSchema, generateRubrics, getAgentvConfigDir, getAgentvHome, getOutputFilenames, getProject, getProjectsRegistryPath, getResultsRepoLocalPaths, getResultsRepoStatus, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, groupTranscriptJsonLines, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isGraderKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, killAllTrackedChildren, listGitRuns, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadProjectRegistry, loadTestById, loadTestSuite, loadTests, loadTsConfig, loadTsEvalFile, materializeGitRun, mergeExecutionMetrics, negateScore, normalizeLineEndings, normalizeResultsConfig, parseAgentSkillsEvals, parseClaudeSession, parseCodexSession, parseCopilotEvents, parseEnvOutput, parseJsonFromText, parseJsonSafe, parseYamlValue, prepareResultsRepoBranch, pushResultsRepoBranch, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, readTranscriptJsonl, removeProject, resolveAndCreateProvider, resolveDelegatedTargetDefinition, resolveFileReference, resolveResultsRepoRunsDir, resolveResultsRepoUrl, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runBeforeSessionHook, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveProjectRegistry, scanRepoDeps, scoreRangeEvaluationSchema, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, stageResultsArtifacts, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, syncProject, syncProjects, syncResultsRepo, toCamelCaseDeep, toSnakeCaseDeep, toTranscriptJsonLines, tokensPerTool, touchProject, trackChild, trackedChildCount, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
+export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AgentVConfig$1 as AgentVYamlConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type CheckedOutResultsRepoBranch, type ChildGraderResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeGrader, type CodeGraderConfig, type CodeGraderOptions, type CodexDiscoverOptions, type CodexSession, type CommandExecutor, type CompositeAggregatorConfig, CompositeGrader, type CompositeGraderConfig, type CompositeGraderOptions, type ConfidenceIntervalAggregation, type ContainsAllGraderConfig, type ContainsAnyGraderConfig, type ContainsGraderConfig, type Content, type ContentFile, type ContentImage, type ContentPreprocessorConfig, type ContentText, type ConversationAggregation, type ConversationMode, type ConversationTurn, type ConversationTurnInput, type CopilotCliResolvedConfig, type DiscoverOptions as CopilotDiscoverOptions, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, type CopilotSessionMeta, CostGrader, type CostGraderConfig, type CostGraderOptions, type CreateContainerOptions, DEFAULT_CATEGORY, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DEFAULT_GRADER_TEMPLATE, DEFAULT_THRESHOLD, type DependencyFailurePolicy, type DependencyResult, type DepsScanResult, DeterministicAssertionGrader, type DockerWorkspaceConfig, DockerWorkspaceProvider, type EndsWithGraderConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsGraderConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTargetRef, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type ExecInContainerOptions, type ExecResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsGrader, type ExecutionMetricsGraderConfig, type ExecutionMetricsGraderOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyGrader, type FieldAccuracyGraderConfig, type FieldAccuracyGraderOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type GitListedRun, type Grader, type GraderConfig, type GraderDispatchContext, type GraderFactory, type GraderFactoryFn, type GraderKind, GraderRegistry, type GraderResult, type IcontainsAllGraderConfig, type IcontainsAnyGraderConfig, type IcontainsGraderConfig, type InlineAssertEvaluatorConfig, type IsJsonGraderConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyGrader, type LatencyGraderConfig, type LatencyGraderOptions, LlmGrader, type LlmGraderConfig, type LlmGraderOptions, type LlmGraderPromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type ParsedCopilotSession, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type PreparedResultsRepoBranch, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type ProjectSource, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexGraderConfig, type RepoCheckout, type RepoClone, type RepoConfig, type RepoDep, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type ResultsConfig, type ResultsRepoLocalPaths, type ResultsRepoStatus, type RubricItem, type RubricsEvaluatorConfig, RunBudgetTracker, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerGrader, type SkillTriggerGraderConfig, type StartsWithGraderConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TargetHooksConfig, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageGrader, type TokenUsageGraderConfig, type TokenUsageGraderOptions, type ToolCall, type ToolTestMessage, type ToolTrajectoryExpectedItem, ToolTrajectoryGrader, type ToolTrajectoryGraderConfig, type ToolTrajectoryGraderOptions, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptJsonLine, TranscriptProvider, type TranscriptReplayEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type TsEvalResult, type TurnFailurePolicy, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceEnvConfig, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addProject, assembleLlmGraderPrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, checkoutResultsRepoBranch, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, commitAndPushResultsBranch, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createDraftResultsPr, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, deriveProjectId, detectFormat, directPushResults, directorySizeBytes, discoverAssertions, discoverClaudeSessions, discoverCodexSessions, discoverCopilotSessions, discoverGraders, discoverProjects, discoverProviders, ensureResultsRepoClone, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetRefsFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, formatToolCalls, freeformEvaluationSchema, generateRubrics, getAgentvConfigDir, getAgentvDataDir, getAgentvHome, getOutputFilenames, getProject, getProjectsRegistryPath, getResultsRepoLocalPaths, getResultsRepoStatus, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, groupTranscriptJsonLines, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isGraderKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, killAllTrackedChildren, listGitRuns, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadProjectRegistry, loadTestById, loadTestSuite, loadTests, loadTsConfig, loadTsEvalFile, materializeGitRun, mergeExecutionMetrics, negateScore, normalizeLineEndings, normalizeResultsConfig, parseAgentSkillsEvals, parseClaudeSession, parseCodexSession, parseCopilotEvents, parseEnvOutput, parseJsonFromText, parseJsonSafe, parseYamlValue, prepareResultsRepoBranch, pushResultsRepoBranch, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, readTranscriptJsonl, removeProject, resolveAndCreateProvider, resolveDelegatedTargetDefinition, resolveFileReference, resolveResultsRepoRunsDir, resolveResultsRepoUrl, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runBeforeSessionHook, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveProjectRegistry, scanRepoDeps, scoreRangeEvaluationSchema, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, stageResultsArtifacts, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, syncProject, syncProjects, syncResultsRepo, toCamelCaseDeep, toSnakeCaseDeep, toTranscriptJsonLines, tokensPerTool, touchProject, trackChild, trackedChildCount, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };

package/dist/index.js CHANGED Viewed

@@ -72,6 +72,7 @@ import {
   formatToolCalls,
   freeformEvaluationSchema,
   getAgentvConfigDir,
+  getAgentvDataDir,
   getAgentvHome,
   getSubagentsRoot,
   getTraceStateRoot,
@@ -132,7 +133,7 @@ import {
   tokensPerTool,
   trackChild,
   trackedChildCount
-} from "./chunk-A27NE3R7.js";
+} from "./chunk-N5EU446L.js";
 import {
   COMMON_TARGET_SETTINGS,
   TEST_MESSAGE_ROLES,
@@ -735,6 +736,8 @@ import os from "node:os";
 import path4 from "node:path";
 import { promisify } from "node:util";
 var execFileAsync = promisify(execFile);
+var RESULTS_REPO_RESULTS_DIR = ".agentv/results";
+var RESULTS_REPO_RUNS_DIR = `${RESULTS_REPO_RESULTS_DIR}/runs`;
 function sanitizeRepoSlug(repo) {
   return repo.trim().replace(/[^A-Za-z0-9._-]+/g, "-");
 }
@@ -754,7 +757,7 @@ function expandHome(p) {
 }
 function normalizeResultsConfig(config) {
   const repo = config.repo.trim();
-  const resolvedPath = config.path ? expandHome(config.path.trim()) : path4.join(getAgentvHome(), "results", sanitizeRepoSlug(repo));
+  const resolvedPath = config.path ? expandHome(config.path.trim()) : path4.join(getAgentvDataDir(), "results", sanitizeRepoSlug(repo));
   return {
     mode: "github",
     repo,
@@ -770,7 +773,7 @@ function resolveResultsRepoUrl(repo) {
   return `https://github.com/${repo}.git`;
 }
 function getResultsRepoLocalPaths(repo) {
-  const rootDir = path4.join(getAgentvHome(), "cache", "results-repo", sanitizeRepoSlug(repo));
+  const rootDir = path4.join(getAgentvDataDir(), "cache", "results-repo", sanitizeRepoSlug(repo));
   return {
     rootDir,
     repoDir: path4.join(rootDir, "repo"),
@@ -967,7 +970,7 @@ async function stageResultsArtifacts(params) {
 }
 function resolveResultsRepoRunsDir(config) {
   const normalized = normalizeResultsConfig(config);
-  return path4.join(normalized.path, "runs");
+  return path4.join(normalized.path, RESULTS_REPO_RESULTS_DIR, "runs");
 }
 async function directorySizeBytes(targetPath) {
   const entry = await stat(targetPath);
@@ -1030,7 +1033,12 @@ async function directPushResults(params) {
   const repoDir = await ensureResultsRepoClone(normalized);
   const baseBranch = await resolveDefaultBranch(repoDir);
   await fetchResultsRepo(repoDir);
-  const destinationDir = path4.join(repoDir, "runs", params.destinationPath);
+  const destinationDir = path4.join(
+    repoDir,
+    RESULTS_REPO_RESULTS_DIR,
+    "runs",
+    params.destinationPath
+  );
   await stageResultsArtifacts({
     repoDir,
     sourceDir: params.sourceDir,
@@ -1178,9 +1186,12 @@ function parseGitBatchBlobs(output) {
   return blobs;
 }
 async function listGitRuns(repoDir, ref = "origin/main") {
-  const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, "runs"], {
-    cwd: repoDir
-  });
+  const { stdout: treeOut } = await runGit(
+    ["ls-tree", "-r", "--name-only", ref, RESULTS_REPO_RUNS_DIR],
+    {
+      cwd: repoDir
+    }
+  );
   const benchmarkPaths = treeOut.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.endsWith("/benchmark.json"));
   if (benchmarkPaths.length === 0) {
     return [];
@@ -1197,7 +1208,7 @@ async function listGitRuns(repoDir, ref = "origin/main") {
     const benchmarkPath = benchmarkPaths[index];
     const benchmark = JSON.parse(blob.content.toString("utf8"));
     const runDir = path4.posix.dirname(benchmarkPath);
-    const relativeRunPath = path4.posix.relative("runs", runDir);
+    const relativeRunPath = path4.posix.relative(RESULTS_REPO_RUNS_DIR, runDir);
     const runId = buildGitRunId(relativeRunPath);
     const timestamp = benchmark.metadata?.timestamp?.trim() || path4.posix.basename(runDir);
     const targets = benchmark.metadata?.targets ?? [];
@@ -1223,7 +1234,7 @@ async function listGitRuns(repoDir, ref = "origin/main") {
 }
 async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main") {
   const normalizedRunPath = relativeRunPath.split(path4.sep).join("/");
-  const runTreePath = path4.posix.join("runs", normalizedRunPath);
+  const runTreePath = path4.posix.join(RESULTS_REPO_RUNS_DIR, normalizedRunPath);
   const targetRunDir = path4.join(repoDir, ...runTreePath.split("/"));
   const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, runTreePath], {
     cwd: repoDir
@@ -2859,6 +2870,7 @@ export {
   freeformEvaluationSchema,
   generateRubrics,
   getAgentvConfigDir,
+  getAgentvDataDir,
   getAgentvHome,
   getOutputFilenames,
   getProject,