npm - @agentv/core - Versions diffs - 4.6.0 → 4.6.1 - Mend

@agentv/core 4.6.0 → 4.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/{chunk-AIQ5FO4G.js → chunk-ZK4GG7PR.js} +61 -1
package/dist/chunk-ZK4GG7PR.js.map +1 -0
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +1 -1
package/dist/index.cjs +225 -60
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +4 -1
package/dist/index.d.ts +4 -1
package/dist/index.js +174 -69
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/dist/chunk-AIQ5FO4G.js.map +0 -1

package/dist/index.d.cts CHANGED Viewed

@@ -1804,6 +1804,7 @@ interface PiCodingAgentResolvedConfig {
     readonly subprovider?: string;
     readonly model?: string;
     readonly apiKey?: string;
+    readonly baseUrl?: string;
     readonly tools?: string;
     readonly thinking?: string;
     readonly cwd?: string;
@@ -1818,6 +1819,7 @@ interface PiCliResolvedConfig {
     readonly subprovider?: string;
     readonly model?: string;
     readonly apiKey?: string;
+    readonly baseUrl?: string;
     readonly tools?: string;
     readonly thinking?: string;
     readonly args?: readonly string[];
@@ -1936,6 +1938,7 @@ type ResolvedTarget = (ResolvedTargetBase & {
  * here automatically makes it valid in targets.yaml without a separate update.
  */
 declare const COMMON_TARGET_SETTINGS: readonly ["use_target", "provider_batching", "providerBatching", "subagent_mode_allowed", "subagentModeAllowed", "fallback_targets", "fallbackTargets"];
+declare function resolveDelegatedTargetDefinition(name: string, definitions: ReadonlyMap<string, TargetDefinition>, env?: EnvLookup): TargetDefinition | undefined;
 declare function resolveTargetDefinition(definition: TargetDefinition, env?: EnvLookup, evalFilePath?: string): ResolvedTarget;
 /**
@@ -3909,4 +3912,4 @@ type AgentKernel = {
 };
 declare function createAgentKernel(): AgentKernel;
-export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type ChildEvaluatorResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type Content, type ContentFile, type ContentImage, type ContentText, type CopilotCliResolvedConfig, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_CATEGORY, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addProject, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, deriveProjectId, detectFormat, discoverAssertions, discoverClaudeSessions, discoverCopilotSessions, discoverGraders, discoverGraders as discoverJudges, discoverProjects, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getProject, getProjectsRegistryPath, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadProjectRegistry, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseClaudeSession, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, removeProject, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveProjectRegistry, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, touchProject, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
+export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type ChildEvaluatorResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type Content, type ContentFile, type ContentImage, type ContentText, type CopilotCliResolvedConfig, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_CATEGORY, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addProject, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, deriveProjectId, detectFormat, discoverAssertions, discoverClaudeSessions, discoverCopilotSessions, discoverGraders, discoverGraders as discoverJudges, discoverProjects, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getProject, getProjectsRegistryPath, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadProjectRegistry, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseClaudeSession, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, removeProject, resolveAndCreateProvider, resolveDelegatedTargetDefinition, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveProjectRegistry, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, touchProject, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };

package/dist/index.d.ts CHANGED Viewed

@@ -1804,6 +1804,7 @@ interface PiCodingAgentResolvedConfig {
     readonly subprovider?: string;
     readonly model?: string;
     readonly apiKey?: string;
+    readonly baseUrl?: string;
     readonly tools?: string;
     readonly thinking?: string;
     readonly cwd?: string;
@@ -1818,6 +1819,7 @@ interface PiCliResolvedConfig {
     readonly subprovider?: string;
     readonly model?: string;
     readonly apiKey?: string;
+    readonly baseUrl?: string;
     readonly tools?: string;
     readonly thinking?: string;
     readonly args?: readonly string[];
@@ -1936,6 +1938,7 @@ type ResolvedTarget = (ResolvedTargetBase & {
  * here automatically makes it valid in targets.yaml without a separate update.
  */
 declare const COMMON_TARGET_SETTINGS: readonly ["use_target", "provider_batching", "providerBatching", "subagent_mode_allowed", "subagentModeAllowed", "fallback_targets", "fallbackTargets"];
+declare function resolveDelegatedTargetDefinition(name: string, definitions: ReadonlyMap<string, TargetDefinition>, env?: EnvLookup): TargetDefinition | undefined;
 declare function resolveTargetDefinition(definition: TargetDefinition, env?: EnvLookup, evalFilePath?: string): ResolvedTarget;
 /**
@@ -3909,4 +3912,4 @@ type AgentKernel = {
 };
 declare function createAgentKernel(): AgentKernel;
-export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type ChildEvaluatorResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type Content, type ContentFile, type ContentImage, type ContentText, type CopilotCliResolvedConfig, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_CATEGORY, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addProject, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, deriveProjectId, detectFormat, discoverAssertions, discoverClaudeSessions, discoverCopilotSessions, discoverGraders, discoverGraders as discoverJudges, discoverProjects, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getProject, getProjectsRegistryPath, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadProjectRegistry, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseClaudeSession, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, removeProject, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveProjectRegistry, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, touchProject, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
+export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type ChildEvaluatorResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type Content, type ContentFile, type ContentImage, type ContentText, type CopilotCliResolvedConfig, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_CATEGORY, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addProject, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, deriveProjectId, detectFormat, discoverAssertions, discoverClaudeSessions, discoverCopilotSessions, discoverGraders, discoverGraders as discoverJudges, discoverProjects, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getProject, getProjectsRegistryPath, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadProjectRegistry, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseClaudeSession, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, removeProject, resolveAndCreateProvider, resolveDelegatedTargetDefinition, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveProjectRegistry, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, touchProject, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };

package/dist/index.js CHANGED Viewed

@@ -21,9 +21,10 @@ import {
   normalizeLineEndings,
   readJsonFile,
   readTextFile,
+  resolveDelegatedTargetDefinition,
   resolveFileReference,
   resolveTargetDefinition
-} from "./chunk-AIQ5FO4G.js";
+} from "./chunk-ZK4GG7PR.js";
 import {
   AgentvProvider
 } from "./chunk-PRNXHNLF.js";
@@ -5216,15 +5217,16 @@ var CliProvider = class {
       outputFilePath
     );
     const renderedCommand = renderTemplate(this.config.command, templateValues);
+    const effectiveCwd = requests[0]?.cwd ?? this.config.cwd;
     if (this.verbose) {
       console.log(
-        `[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
+        `[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${effectiveCwd ?? ""} command=${renderedCommand}`
       );
     }
     try {
       const startTime = Date.now();
       const result = await this.runCommand(renderedCommand, {
-        cwd: this.config.cwd,
+        cwd: effectiveCwd,
         env: process.env,
         timeoutMs: this.config.timeoutMs,
         signal: controller.signal
@@ -5257,7 +5259,7 @@ var CliProvider = class {
               command: renderedCommand,
               stderr: result.stderr,
               exitCode: result.exitCode ?? 0,
-              cwd: this.config.cwd,
+              cwd: effectiveCwd,
               outputFile: outputFilePath
             }
           };
@@ -5275,7 +5277,7 @@ var CliProvider = class {
               command: renderedCommand,
               stderr: result.stderr,
               exitCode: result.exitCode ?? 0,
-              cwd: this.config.cwd,
+              cwd: effectiveCwd,
               outputFile: outputFilePath,
               error: errorMessage
             }
@@ -5290,7 +5292,7 @@ var CliProvider = class {
             command: renderedCommand,
             stderr: result.stderr,
             exitCode: result.exitCode ?? 0,
-            cwd: this.config.cwd,
+            cwd: effectiveCwd,
             outputFile: outputFilePath,
             recordId: evalCaseId
           }
@@ -7240,9 +7242,9 @@ var MockProvider = class {
 };
 // src/evaluation/providers/pi-cli.ts
-import { spawn as spawn3 } from "node:child_process";
+import { execSync, spawn as spawn3 } from "node:child_process";
 import { randomUUID as randomUUID7 } from "node:crypto";
-import { createWriteStream as createWriteStream5 } from "node:fs";
+import { accessSync, createWriteStream as createWriteStream5, readFileSync as readFileSync2 } from "node:fs";
 import { mkdir as mkdir6, mkdtemp, rm, writeFile } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import path19 from "node:path";
@@ -7300,6 +7302,59 @@ function subscribeToPiLogEntries(listener) {
   };
 }
+// src/evaluation/providers/pi-provider-aliases.ts
+var SUBPROVIDER_ALIASES = {
+  azure: "azure-openai-responses"
+};
+var SUBPROVIDER_ALIASES_WITH_BASE_URL = {
+  // Azure v1 endpoints are OpenAI-compatible; use the standard client
+  // to avoid AzureOpenAI adding api-version query params.
+  azure: "openai-responses"
+};
+var ENV_KEY_MAP = {
+  google: "GEMINI_API_KEY",
+  gemini: "GEMINI_API_KEY",
+  anthropic: "ANTHROPIC_API_KEY",
+  openai: "OPENAI_API_KEY",
+  groq: "GROQ_API_KEY",
+  xai: "XAI_API_KEY",
+  openrouter: "OPENROUTER_API_KEY",
+  azure: "AZURE_OPENAI_API_KEY"
+};
+var ENV_BASE_URL_MAP = {
+  openai: "OPENAI_BASE_URL",
+  azure: "AZURE_OPENAI_BASE_URL",
+  openrouter: "OPENROUTER_BASE_URL"
+};
+function resolveSubprovider(name, hasBaseUrl = false) {
+  const lower = name.toLowerCase();
+  if (hasBaseUrl) {
+    const alias = SUBPROVIDER_ALIASES_WITH_BASE_URL[lower];
+    if (alias) return alias;
+  }
+  return SUBPROVIDER_ALIASES[lower] ?? name;
+}
+function resolveCliProvider(name) {
+  const lower = name.toLowerCase();
+  if (lower === "azure") return "azure-openai-responses";
+  return name;
+}
+function resolveEnvKeyName(provider, hasBaseUrl = false) {
+  const lower = provider.toLowerCase();
+  if (hasBaseUrl && lower === "azure") return "OPENAI_API_KEY";
+  return ENV_KEY_MAP[lower];
+}
+function resolveEnvBaseUrlName(provider, hasBaseUrl = false) {
+  const lower = provider.toLowerCase();
+  if (hasBaseUrl && lower === "azure") return "OPENAI_BASE_URL";
+  return ENV_BASE_URL_MAP[lower];
+}
+function extractAzureResourceName(baseUrl) {
+  const urlMatch = baseUrl.match(/^https?:\/\/([^./]+)/);
+  if (urlMatch) return urlMatch[1];
+  return baseUrl;
+}
 // src/evaluation/providers/pi-utils.ts
 function extractPiTextContent(content) {
   if (typeof content === "string") {
@@ -7458,12 +7513,12 @@ var PiCliProvider = class {
   buildPiArgs(prompt, inputFiles) {
     const args = [];
     if (this.config.subprovider) {
-      args.push("--provider", this.config.subprovider);
+      args.push("--provider", resolveCliProvider(this.config.subprovider));
     }
     if (this.config.model) {
       args.push("--model", this.config.model);
     }
-    if (this.config.apiKey) {
+    if (this.config.apiKey && this.config.subprovider?.toLowerCase() !== "azure") {
       args.push("--api-key", this.config.apiKey);
     }
     args.push("--mode", "json");
@@ -7515,35 +7570,35 @@ ${prompt}` : prompt;
   }
   buildEnv() {
     const env = { ...process.env };
-    if (this.config.apiKey) {
-      const provider = this.config.subprovider?.toLowerCase() ?? "google";
-      const ENV_KEY_MAP = {
-        google: "GEMINI_API_KEY",
-        gemini: "GEMINI_API_KEY",
-        anthropic: "ANTHROPIC_API_KEY",
-        openai: "OPENAI_API_KEY",
-        groq: "GROQ_API_KEY",
-        xai: "XAI_API_KEY",
-        openrouter: "OPENROUTER_API_KEY"
-      };
-      const envKey = ENV_KEY_MAP[provider];
-      if (envKey) {
-        env[envKey] = this.config.apiKey;
+    const provider = this.config.subprovider?.toLowerCase() ?? "google";
+    if (provider === "azure") {
+      if (this.config.apiKey) {
+        env.AZURE_OPENAI_API_KEY = this.config.apiKey;
+      }
+      if (this.config.baseUrl) {
+        env.AZURE_OPENAI_RESOURCE_NAME = extractAzureResourceName(this.config.baseUrl);
+      }
+    } else {
+      if (this.config.apiKey) {
+        const envKey = resolveEnvKeyName(provider);
+        if (envKey) {
+          env[envKey] = this.config.apiKey;
+        }
       }
     }
     if (this.config.subprovider) {
-      const provider = this.config.subprovider.toLowerCase();
+      const resolvedProvider = resolveCliProvider(this.config.subprovider);
       const PROVIDER_OWN_PREFIXES = {
         openrouter: ["OPENROUTER_"],
         anthropic: ["ANTHROPIC_"],
         openai: ["OPENAI_"],
-        azure: ["AZURE_OPENAI_"],
+        "azure-openai-responses": ["AZURE_OPENAI_"],
         google: ["GEMINI_", "GOOGLE_GENERATIVE_AI_"],
         gemini: ["GEMINI_", "GOOGLE_GENERATIVE_AI_"],
         groq: ["GROQ_"],
         xai: ["XAI_"]
       };
-      const ownPrefixes = PROVIDER_OWN_PREFIXES[provider] ?? [];
+      const ownPrefixes = PROVIDER_OWN_PREFIXES[resolvedProvider] ?? [];
       const allOtherPrefixes = Object.entries(PROVIDER_OWN_PREFIXES).filter(([key]) => key !== provider).flatMap(([, prefixes]) => prefixes);
       for (const key of Object.keys(env)) {
         if (allOtherPrefixes.some((prefix) => key.startsWith(prefix)) && !ownPrefixes.some((prefix) => key.startsWith(prefix))) {
@@ -7834,6 +7889,24 @@ function extractMessages(events) {
       }
     }
   }
+  if (messages) {
+    for (let i = messages.length - 1; i >= 0; i--) {
+      if (messages[i].role === "assistant" && !messages[i].content) {
+        for (let j = events.length - 1; j >= 0; j--) {
+          const evt = events[j];
+          if (!evt || evt.type !== "message_end") continue;
+          const msg = evt.message;
+          if (msg?.role !== "assistant") continue;
+          const text = extractPiTextContent(msg.content);
+          if (text) {
+            messages[i] = { ...messages[i], content: text };
+            break;
+          }
+        }
+        break;
+      }
+    }
+  }
   const eventToolCalls = extractToolCallsFromEvents(events);
   if (eventToolCalls.length > 0) {
     injectEventToolCalls(messages, eventToolCalls);
@@ -8018,17 +8091,43 @@ function formatTimeoutSuffix3(timeoutMs) {
   if (!timeoutMs || timeoutMs <= 0) return "";
   return ` after ${Math.ceil(timeoutMs / 1e3)}s`;
 }
+function resolveWindowsCmd(executable) {
+  if (process.platform !== "win32") return [executable, []];
+  const lower = executable.toLowerCase();
+  if (lower.endsWith(".js") || lower.endsWith(".exe")) return [executable, []];
+  let fullPath;
+  try {
+    fullPath = execSync(`where ${executable}`, { encoding: "utf-8" }).trim().split(/\r?\n/)[0].trim();
+  } catch {
+    return [executable, []];
+  }
+  const cmdPath = fullPath.endsWith(".cmd") ? fullPath : `${fullPath}.cmd`;
+  try {
+    const content = readFileSync2(cmdPath, "utf-8");
+    const match = content.match(/"?%_prog%"?\s+"([^"]+\.js)"/);
+    if (match) {
+      const dp0 = path19.dirname(path19.resolve(cmdPath));
+      const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${path19.sep}`);
+      try {
+        accessSync(scriptPath);
+        return ["node", [scriptPath]];
+      } catch {
+      }
+    }
+  } catch {
+  }
+  return [executable, []];
+}
 async function defaultPiRunner(options) {
   return await new Promise((resolve, reject) => {
     const parts = options.executable.split(/\s+/);
-    const executable = parts[0];
-    const executableArgs = parts.slice(1);
+    const [resolvedExe, prefixArgs] = resolveWindowsCmd(parts[0]);
+    const executableArgs = [...prefixArgs, ...parts.slice(1)];
     const allArgs = [...executableArgs, ...options.args];
-    const child = spawn3(executable, allArgs, {
+    const child = spawn3(resolvedExe, allArgs, {
       cwd: options.cwd,
       env: options.env,
-      stdio: ["pipe", "pipe", "pipe"],
-      shell: false
+      stdio: ["pipe", "pipe", "pipe"]
     });
     let stdout = "";
     let stderr = "";
@@ -8083,9 +8182,9 @@ async function defaultPiRunner(options) {
 }
 // src/evaluation/providers/pi-coding-agent.ts
-import { execSync } from "node:child_process";
+import { execSync as execSync2 } from "node:child_process";
 import { randomUUID as randomUUID8 } from "node:crypto";
-import { accessSync, createWriteStream as createWriteStream6 } from "node:fs";
+import { accessSync as accessSync2, createWriteStream as createWriteStream6 } from "node:fs";
 import { mkdir as mkdir7 } from "node:fs/promises";
 import path20 from "node:path";
 import { createInterface } from "node:readline";
@@ -8113,7 +8212,7 @@ function findAgentvRoot() {
   for (let i = 0; i < 10; i++) {
     try {
       const pkg = path20.join(dir, "package.json");
-      accessSync(pkg);
+      accessSync2(pkg);
       return dir;
     } catch {
       const parent = path20.dirname(dir);
@@ -8133,7 +8232,7 @@ async function doLoadSdkModules() {
     if (await promptInstall()) {
       const installDir = findAgentvRoot();
       console.error(`Installing @mariozechner/pi-coding-agent into ${installDir}...`);
-      execSync("bun add @mariozechner/pi-coding-agent", {
+      execSync2("bun add @mariozechner/pi-coding-agent", {
         cwd: installDir,
         stdio: "inherit"
       });
@@ -8174,7 +8273,9 @@ async function loadSdkModules() {
     codingTools: piSdk.codingTools,
     toolMap,
     SessionManager: piSdk.SessionManager,
-    getModel: piAi.getModel
+    getModel: piAi.getModel,
+    // biome-ignore lint/suspicious/noExplicitAny: registerBuiltInApiProviders exists at runtime but not in type defs
+    registerBuiltInApiProviders: piAi.registerBuiltInApiProviders
   };
 }
 var PiCodingAgentProvider = class {
@@ -8196,17 +8297,31 @@ var PiCodingAgentProvider = class {
     const startTime = (/* @__PURE__ */ new Date()).toISOString();
     const startMs = Date.now();
     const sdk = await loadSdkModules();
+    sdk.registerBuiltInApiProviders();
     const logger = await this.createStreamLogger(request).catch(() => void 0);
     try {
       const cwd = this.resolveCwd(request.cwd);
-      const providerName = this.config.subprovider ?? "google";
+      const rawProvider = this.config.subprovider ?? "google";
+      const hasBaseUrl = !!this.config.baseUrl;
+      const providerName = resolveSubprovider(rawProvider, hasBaseUrl);
       const modelId = this.config.model ?? "gemini-2.5-flash";
-      this.setApiKeyEnv(providerName);
-      const model = sdk.getModel(providerName, modelId);
+      this.setApiKeyEnv(rawProvider, hasBaseUrl);
+      this.setBaseUrlEnv(rawProvider, hasBaseUrl);
+      let model = sdk.getModel(providerName, modelId);
       if (!model) {
-        throw new Error(
-          `pi-coding-agent: getModel('${providerName}', '${modelId}') returned undefined. The model '${modelId}' is not registered for provider '${providerName}' in pi-ai. Check that subprovider and model are correct in your target config.`
-        );
+        const envProvider = providerName.replace(/-responses$/, "");
+        model = {
+          id: modelId,
+          name: modelId,
+          api: providerName,
+          provider: envProvider,
+          baseUrl: this.config.baseUrl ?? "",
+          reasoning: false,
+          input: ["text"],
+          cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+          contextWindow: 128e3,
+          maxTokens: 16384
+        };
       }
       const tools = this.resolveTools(sdk);
       const { session } = await sdk.createAgentSession({
@@ -8359,22 +8474,21 @@ ${fileList}`;
     }
   }
   /** Maps config apiKey to the provider-specific env var the SDK reads. */
-  setApiKeyEnv(providerName) {
+  setApiKeyEnv(providerName, hasBaseUrl = false) {
     if (!this.config.apiKey) return;
-    const ENV_KEY_MAP = {
-      google: "GEMINI_API_KEY",
-      gemini: "GEMINI_API_KEY",
-      anthropic: "ANTHROPIC_API_KEY",
-      openai: "OPENAI_API_KEY",
-      groq: "GROQ_API_KEY",
-      xai: "XAI_API_KEY",
-      openrouter: "OPENROUTER_API_KEY"
-    };
-    const envKey = ENV_KEY_MAP[providerName.toLowerCase()];
+    const envKey = resolveEnvKeyName(providerName, hasBaseUrl);
     if (envKey) {
       process.env[envKey] = this.config.apiKey;
     }
   }
+  /** Maps config baseUrl to the provider-specific env var the SDK reads. */
+  setBaseUrlEnv(providerName, hasBaseUrl = false) {
+    if (!this.config.baseUrl) return;
+    const envKey = resolveEnvBaseUrlName(providerName, hasBaseUrl);
+    if (envKey) {
+      process.env[envKey] = this.config.baseUrl;
+    }
+  }
   resolveCwd(cwdOverride) {
     if (cwdOverride) {
       return path20.resolve(cwdOverride);
@@ -15509,20 +15623,10 @@ async function runEvaluation(options) {
     if (resolvedTargetsByName.has(name)) {
       return resolvedTargetsByName.get(name);
     }
-    let definition = targetDefinitions.get(name);
+    const definition = resolveDelegatedTargetDefinition(name, targetDefinitions, envLookup);
     if (!definition) {
       return void 0;
     }
-    for (let depth = 0; depth < 5; depth++) {
-      const useTarget = definition.use_target;
-      if (typeof useTarget !== "string" || useTarget.trim().length === 0) break;
-      const envMatch = useTarget.trim().match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
-      const resolvedName = envMatch ? envLookup[envMatch[1]] ?? "" : useTarget.trim();
-      if (resolvedName.length === 0) break;
-      const next = targetDefinitions.get(resolvedName);
-      if (!next) break;
-      definition = next;
-    }
     const resolved = resolveTargetDefinition(definition, envLookup, evalFilePath);
     resolvedTargetsByName.set(name, resolved);
     return resolved;
@@ -17608,7 +17712,7 @@ async function discoverDefaultTarget(repoRoot) {
   return null;
 }
 async function loadEnvHierarchy(repoRoot, startPath) {
-  const { readFileSync: readFileSync3 } = await import("node:fs");
+  const { readFileSync: readFileSync4 } = await import("node:fs");
   const chain = buildDirectoryChain(startPath, repoRoot);
   const envFiles = [];
   for (const dir of chain) {
@@ -17617,7 +17721,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
   }
   for (let i = 0; i < envFiles.length; i++) {
     try {
-      const content = readFileSync3(envFiles[i], "utf8");
+      const content = readFileSync4(envFiles[i], "utf8");
       for (const line of content.split("\n")) {
         const trimmed = line.trim();
         if (!trimmed || trimmed.startsWith("#")) continue;
@@ -17832,7 +17936,7 @@ function shouldSkipCacheForTemperature(targetConfig) {
 }
 // src/projects.ts
-import { existsSync as existsSync6, mkdirSync, readFileSync as readFileSync2, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
+import { existsSync as existsSync6, mkdirSync, readFileSync as readFileSync3, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
 import path47 from "node:path";
 import { parse as parseYaml3, stringify as stringifyYaml } from "yaml";
 function getProjectsRegistryPath() {
@@ -17844,7 +17948,7 @@ function loadProjectRegistry() {
     return { projects: [] };
   }
   try {
-    const raw = readFileSync2(registryPath, "utf-8");
+    const raw = readFileSync3(registryPath, "utf-8");
     const parsed = parseYaml3(raw);
     if (!parsed || !Array.isArray(parsed.projects)) {
       return { projects: [] };
@@ -18881,6 +18985,7 @@ export {
   readTranscriptFile,
   removeProject,
   resolveAndCreateProvider,
+  resolveDelegatedTargetDefinition,
   resolveFileReference,
   resolveTargetDefinition,
   resolveWorkspaceTemplate,