@agentv/core 4.6.0 → 4.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -1804,6 +1804,7 @@ interface PiCodingAgentResolvedConfig {
1804
1804
  readonly subprovider?: string;
1805
1805
  readonly model?: string;
1806
1806
  readonly apiKey?: string;
1807
+ readonly baseUrl?: string;
1807
1808
  readonly tools?: string;
1808
1809
  readonly thinking?: string;
1809
1810
  readonly cwd?: string;
@@ -1818,6 +1819,7 @@ interface PiCliResolvedConfig {
1818
1819
  readonly subprovider?: string;
1819
1820
  readonly model?: string;
1820
1821
  readonly apiKey?: string;
1822
+ readonly baseUrl?: string;
1821
1823
  readonly tools?: string;
1822
1824
  readonly thinking?: string;
1823
1825
  readonly args?: readonly string[];
@@ -1936,6 +1938,7 @@ type ResolvedTarget = (ResolvedTargetBase & {
1936
1938
  * here automatically makes it valid in targets.yaml without a separate update.
1937
1939
  */
1938
1940
  declare const COMMON_TARGET_SETTINGS: readonly ["use_target", "provider_batching", "providerBatching", "subagent_mode_allowed", "subagentModeAllowed", "fallback_targets", "fallbackTargets"];
1941
+ declare function resolveDelegatedTargetDefinition(name: string, definitions: ReadonlyMap<string, TargetDefinition>, env?: EnvLookup): TargetDefinition | undefined;
1939
1942
  declare function resolveTargetDefinition(definition: TargetDefinition, env?: EnvLookup, evalFilePath?: string): ResolvedTarget;
1940
1943
 
1941
1944
  /**
@@ -3909,4 +3912,4 @@ type AgentKernel = {
3909
3912
  };
3910
3913
  declare function createAgentKernel(): AgentKernel;
3911
3914
 
3912
- export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type ChildEvaluatorResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type Content, type ContentFile, type ContentImage, type ContentText, type CopilotCliResolvedConfig, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_CATEGORY, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addProject, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, deriveProjectId, detectFormat, discoverAssertions, discoverClaudeSessions, discoverCopilotSessions, discoverGraders, discoverGraders as discoverJudges, discoverProjects, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getProject, getProjectsRegistryPath, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadProjectRegistry, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseClaudeSession, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, removeProject, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveProjectRegistry, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, touchProject, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
3915
+ export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type ChildEvaluatorResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type Content, type ContentFile, type ContentImage, type ContentText, type CopilotCliResolvedConfig, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_CATEGORY, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addProject, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, deriveProjectId, detectFormat, discoverAssertions, discoverClaudeSessions, discoverCopilotSessions, discoverGraders, discoverGraders as discoverJudges, discoverProjects, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getProject, getProjectsRegistryPath, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadProjectRegistry, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseClaudeSession, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, removeProject, resolveAndCreateProvider, resolveDelegatedTargetDefinition, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveProjectRegistry, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, touchProject, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
package/dist/index.d.ts CHANGED
@@ -1804,6 +1804,7 @@ interface PiCodingAgentResolvedConfig {
1804
1804
  readonly subprovider?: string;
1805
1805
  readonly model?: string;
1806
1806
  readonly apiKey?: string;
1807
+ readonly baseUrl?: string;
1807
1808
  readonly tools?: string;
1808
1809
  readonly thinking?: string;
1809
1810
  readonly cwd?: string;
@@ -1818,6 +1819,7 @@ interface PiCliResolvedConfig {
1818
1819
  readonly subprovider?: string;
1819
1820
  readonly model?: string;
1820
1821
  readonly apiKey?: string;
1822
+ readonly baseUrl?: string;
1821
1823
  readonly tools?: string;
1822
1824
  readonly thinking?: string;
1823
1825
  readonly args?: readonly string[];
@@ -1936,6 +1938,7 @@ type ResolvedTarget = (ResolvedTargetBase & {
1936
1938
  * here automatically makes it valid in targets.yaml without a separate update.
1937
1939
  */
1938
1940
  declare const COMMON_TARGET_SETTINGS: readonly ["use_target", "provider_batching", "providerBatching", "subagent_mode_allowed", "subagentModeAllowed", "fallback_targets", "fallbackTargets"];
1941
+ declare function resolveDelegatedTargetDefinition(name: string, definitions: ReadonlyMap<string, TargetDefinition>, env?: EnvLookup): TargetDefinition | undefined;
1939
1942
  declare function resolveTargetDefinition(definition: TargetDefinition, env?: EnvLookup, evalFilePath?: string): ResolvedTarget;
1940
1943
 
1941
1944
  /**
@@ -3909,4 +3912,4 @@ type AgentKernel = {
3909
3912
  };
3910
3913
  declare function createAgentKernel(): AgentKernel;
3911
3914
 
3912
- export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type ChildEvaluatorResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type Content, type ContentFile, type ContentImage, type ContentText, type CopilotCliResolvedConfig, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_CATEGORY, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addProject, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, deriveProjectId, detectFormat, discoverAssertions, discoverClaudeSessions, discoverCopilotSessions, discoverGraders, discoverGraders as discoverJudges, discoverProjects, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getProject, getProjectsRegistryPath, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadProjectRegistry, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseClaudeSession, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, removeProject, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveProjectRegistry, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, touchProject, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
3915
+ export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type ChildEvaluatorResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type Content, type ContentFile, type ContentImage, type ContentText, type CopilotCliResolvedConfig, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_CATEGORY, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type ProjectEntry, type ProjectRegistry, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addProject, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, deriveProjectId, detectFormat, discoverAssertions, discoverClaudeSessions, discoverCopilotSessions, discoverGraders, discoverGraders as discoverJudges, discoverProjects, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getProject, getProjectsRegistryPath, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadProjectRegistry, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseClaudeSession, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, removeProject, resolveAndCreateProvider, resolveDelegatedTargetDefinition, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveProjectRegistry, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, touchProject, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
package/dist/index.js CHANGED
@@ -21,9 +21,10 @@ import {
21
21
  normalizeLineEndings,
22
22
  readJsonFile,
23
23
  readTextFile,
24
+ resolveDelegatedTargetDefinition,
24
25
  resolveFileReference,
25
26
  resolveTargetDefinition
26
- } from "./chunk-AIQ5FO4G.js";
27
+ } from "./chunk-ZK4GG7PR.js";
27
28
  import {
28
29
  AgentvProvider
29
30
  } from "./chunk-PRNXHNLF.js";
@@ -5216,15 +5217,16 @@ var CliProvider = class {
5216
5217
  outputFilePath
5217
5218
  );
5218
5219
  const renderedCommand = renderTemplate(this.config.command, templateValues);
5220
+ const effectiveCwd = requests[0]?.cwd ?? this.config.cwd;
5219
5221
  if (this.verbose) {
5220
5222
  console.log(
5221
- `[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
5223
+ `[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${effectiveCwd ?? ""} command=${renderedCommand}`
5222
5224
  );
5223
5225
  }
5224
5226
  try {
5225
5227
  const startTime = Date.now();
5226
5228
  const result = await this.runCommand(renderedCommand, {
5227
- cwd: this.config.cwd,
5229
+ cwd: effectiveCwd,
5228
5230
  env: process.env,
5229
5231
  timeoutMs: this.config.timeoutMs,
5230
5232
  signal: controller.signal
@@ -5257,7 +5259,7 @@ var CliProvider = class {
5257
5259
  command: renderedCommand,
5258
5260
  stderr: result.stderr,
5259
5261
  exitCode: result.exitCode ?? 0,
5260
- cwd: this.config.cwd,
5262
+ cwd: effectiveCwd,
5261
5263
  outputFile: outputFilePath
5262
5264
  }
5263
5265
  };
@@ -5275,7 +5277,7 @@ var CliProvider = class {
5275
5277
  command: renderedCommand,
5276
5278
  stderr: result.stderr,
5277
5279
  exitCode: result.exitCode ?? 0,
5278
- cwd: this.config.cwd,
5280
+ cwd: effectiveCwd,
5279
5281
  outputFile: outputFilePath,
5280
5282
  error: errorMessage
5281
5283
  }
@@ -5290,7 +5292,7 @@ var CliProvider = class {
5290
5292
  command: renderedCommand,
5291
5293
  stderr: result.stderr,
5292
5294
  exitCode: result.exitCode ?? 0,
5293
- cwd: this.config.cwd,
5295
+ cwd: effectiveCwd,
5294
5296
  outputFile: outputFilePath,
5295
5297
  recordId: evalCaseId
5296
5298
  }
@@ -7240,9 +7242,9 @@ var MockProvider = class {
7240
7242
  };
7241
7243
 
7242
7244
  // src/evaluation/providers/pi-cli.ts
7243
- import { spawn as spawn3 } from "node:child_process";
7245
+ import { execSync, spawn as spawn3 } from "node:child_process";
7244
7246
  import { randomUUID as randomUUID7 } from "node:crypto";
7245
- import { createWriteStream as createWriteStream5 } from "node:fs";
7247
+ import { accessSync, createWriteStream as createWriteStream5, readFileSync as readFileSync2 } from "node:fs";
7246
7248
  import { mkdir as mkdir6, mkdtemp, rm, writeFile } from "node:fs/promises";
7247
7249
  import { tmpdir } from "node:os";
7248
7250
  import path19 from "node:path";
@@ -7300,6 +7302,59 @@ function subscribeToPiLogEntries(listener) {
7300
7302
  };
7301
7303
  }
7302
7304
 
7305
+ // src/evaluation/providers/pi-provider-aliases.ts
7306
+ var SUBPROVIDER_ALIASES = {
7307
+ azure: "azure-openai-responses"
7308
+ };
7309
+ var SUBPROVIDER_ALIASES_WITH_BASE_URL = {
7310
+ // Azure v1 endpoints are OpenAI-compatible; use the standard client
7311
+ // to avoid AzureOpenAI adding api-version query params.
7312
+ azure: "openai-responses"
7313
+ };
7314
+ var ENV_KEY_MAP = {
7315
+ google: "GEMINI_API_KEY",
7316
+ gemini: "GEMINI_API_KEY",
7317
+ anthropic: "ANTHROPIC_API_KEY",
7318
+ openai: "OPENAI_API_KEY",
7319
+ groq: "GROQ_API_KEY",
7320
+ xai: "XAI_API_KEY",
7321
+ openrouter: "OPENROUTER_API_KEY",
7322
+ azure: "AZURE_OPENAI_API_KEY"
7323
+ };
7324
+ var ENV_BASE_URL_MAP = {
7325
+ openai: "OPENAI_BASE_URL",
7326
+ azure: "AZURE_OPENAI_BASE_URL",
7327
+ openrouter: "OPENROUTER_BASE_URL"
7328
+ };
7329
+ function resolveSubprovider(name, hasBaseUrl = false) {
7330
+ const lower = name.toLowerCase();
7331
+ if (hasBaseUrl) {
7332
+ const alias = SUBPROVIDER_ALIASES_WITH_BASE_URL[lower];
7333
+ if (alias) return alias;
7334
+ }
7335
+ return SUBPROVIDER_ALIASES[lower] ?? name;
7336
+ }
7337
+ function resolveCliProvider(name) {
7338
+ const lower = name.toLowerCase();
7339
+ if (lower === "azure") return "azure-openai-responses";
7340
+ return name;
7341
+ }
7342
+ function resolveEnvKeyName(provider, hasBaseUrl = false) {
7343
+ const lower = provider.toLowerCase();
7344
+ if (hasBaseUrl && lower === "azure") return "OPENAI_API_KEY";
7345
+ return ENV_KEY_MAP[lower];
7346
+ }
7347
+ function resolveEnvBaseUrlName(provider, hasBaseUrl = false) {
7348
+ const lower = provider.toLowerCase();
7349
+ if (hasBaseUrl && lower === "azure") return "OPENAI_BASE_URL";
7350
+ return ENV_BASE_URL_MAP[lower];
7351
+ }
7352
+ function extractAzureResourceName(baseUrl) {
7353
+ const urlMatch = baseUrl.match(/^https?:\/\/([^./]+)/);
7354
+ if (urlMatch) return urlMatch[1];
7355
+ return baseUrl;
7356
+ }
7357
+
7303
7358
  // src/evaluation/providers/pi-utils.ts
7304
7359
  function extractPiTextContent(content) {
7305
7360
  if (typeof content === "string") {
@@ -7458,12 +7513,12 @@ var PiCliProvider = class {
7458
7513
  buildPiArgs(prompt, inputFiles) {
7459
7514
  const args = [];
7460
7515
  if (this.config.subprovider) {
7461
- args.push("--provider", this.config.subprovider);
7516
+ args.push("--provider", resolveCliProvider(this.config.subprovider));
7462
7517
  }
7463
7518
  if (this.config.model) {
7464
7519
  args.push("--model", this.config.model);
7465
7520
  }
7466
- if (this.config.apiKey) {
7521
+ if (this.config.apiKey && this.config.subprovider?.toLowerCase() !== "azure") {
7467
7522
  args.push("--api-key", this.config.apiKey);
7468
7523
  }
7469
7524
  args.push("--mode", "json");
@@ -7515,35 +7570,35 @@ ${prompt}` : prompt;
7515
7570
  }
7516
7571
  buildEnv() {
7517
7572
  const env = { ...process.env };
7518
- if (this.config.apiKey) {
7519
- const provider = this.config.subprovider?.toLowerCase() ?? "google";
7520
- const ENV_KEY_MAP = {
7521
- google: "GEMINI_API_KEY",
7522
- gemini: "GEMINI_API_KEY",
7523
- anthropic: "ANTHROPIC_API_KEY",
7524
- openai: "OPENAI_API_KEY",
7525
- groq: "GROQ_API_KEY",
7526
- xai: "XAI_API_KEY",
7527
- openrouter: "OPENROUTER_API_KEY"
7528
- };
7529
- const envKey = ENV_KEY_MAP[provider];
7530
- if (envKey) {
7531
- env[envKey] = this.config.apiKey;
7573
+ const provider = this.config.subprovider?.toLowerCase() ?? "google";
7574
+ if (provider === "azure") {
7575
+ if (this.config.apiKey) {
7576
+ env.AZURE_OPENAI_API_KEY = this.config.apiKey;
7577
+ }
7578
+ if (this.config.baseUrl) {
7579
+ env.AZURE_OPENAI_RESOURCE_NAME = extractAzureResourceName(this.config.baseUrl);
7580
+ }
7581
+ } else {
7582
+ if (this.config.apiKey) {
7583
+ const envKey = resolveEnvKeyName(provider);
7584
+ if (envKey) {
7585
+ env[envKey] = this.config.apiKey;
7586
+ }
7532
7587
  }
7533
7588
  }
7534
7589
  if (this.config.subprovider) {
7535
- const provider = this.config.subprovider.toLowerCase();
7590
+ const resolvedProvider = resolveCliProvider(this.config.subprovider);
7536
7591
  const PROVIDER_OWN_PREFIXES = {
7537
7592
  openrouter: ["OPENROUTER_"],
7538
7593
  anthropic: ["ANTHROPIC_"],
7539
7594
  openai: ["OPENAI_"],
7540
- azure: ["AZURE_OPENAI_"],
7595
+ "azure-openai-responses": ["AZURE_OPENAI_"],
7541
7596
  google: ["GEMINI_", "GOOGLE_GENERATIVE_AI_"],
7542
7597
  gemini: ["GEMINI_", "GOOGLE_GENERATIVE_AI_"],
7543
7598
  groq: ["GROQ_"],
7544
7599
  xai: ["XAI_"]
7545
7600
  };
7546
- const ownPrefixes = PROVIDER_OWN_PREFIXES[provider] ?? [];
7601
+ const ownPrefixes = PROVIDER_OWN_PREFIXES[resolvedProvider] ?? [];
7547
7602
  const allOtherPrefixes = Object.entries(PROVIDER_OWN_PREFIXES).filter(([key]) => key !== provider).flatMap(([, prefixes]) => prefixes);
7548
7603
  for (const key of Object.keys(env)) {
7549
7604
  if (allOtherPrefixes.some((prefix) => key.startsWith(prefix)) && !ownPrefixes.some((prefix) => key.startsWith(prefix))) {
@@ -7834,6 +7889,24 @@ function extractMessages(events) {
7834
7889
  }
7835
7890
  }
7836
7891
  }
7892
+ if (messages) {
7893
+ for (let i = messages.length - 1; i >= 0; i--) {
7894
+ if (messages[i].role === "assistant" && !messages[i].content) {
7895
+ for (let j = events.length - 1; j >= 0; j--) {
7896
+ const evt = events[j];
7897
+ if (!evt || evt.type !== "message_end") continue;
7898
+ const msg = evt.message;
7899
+ if (msg?.role !== "assistant") continue;
7900
+ const text = extractPiTextContent(msg.content);
7901
+ if (text) {
7902
+ messages[i] = { ...messages[i], content: text };
7903
+ break;
7904
+ }
7905
+ }
7906
+ break;
7907
+ }
7908
+ }
7909
+ }
7837
7910
  const eventToolCalls = extractToolCallsFromEvents(events);
7838
7911
  if (eventToolCalls.length > 0) {
7839
7912
  injectEventToolCalls(messages, eventToolCalls);
@@ -8018,17 +8091,43 @@ function formatTimeoutSuffix3(timeoutMs) {
8018
8091
  if (!timeoutMs || timeoutMs <= 0) return "";
8019
8092
  return ` after ${Math.ceil(timeoutMs / 1e3)}s`;
8020
8093
  }
8094
+ function resolveWindowsCmd(executable) {
8095
+ if (process.platform !== "win32") return [executable, []];
8096
+ const lower = executable.toLowerCase();
8097
+ if (lower.endsWith(".js") || lower.endsWith(".exe")) return [executable, []];
8098
+ let fullPath;
8099
+ try {
8100
+ fullPath = execSync(`where ${executable}`, { encoding: "utf-8" }).trim().split(/\r?\n/)[0].trim();
8101
+ } catch {
8102
+ return [executable, []];
8103
+ }
8104
+ const cmdPath = fullPath.endsWith(".cmd") ? fullPath : `${fullPath}.cmd`;
8105
+ try {
8106
+ const content = readFileSync2(cmdPath, "utf-8");
8107
+ const match = content.match(/"?%_prog%"?\s+"([^"]+\.js)"/);
8108
+ if (match) {
8109
+ const dp0 = path19.dirname(path19.resolve(cmdPath));
8110
+ const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${path19.sep}`);
8111
+ try {
8112
+ accessSync(scriptPath);
8113
+ return ["node", [scriptPath]];
8114
+ } catch {
8115
+ }
8116
+ }
8117
+ } catch {
8118
+ }
8119
+ return [executable, []];
8120
+ }
8021
8121
  async function defaultPiRunner(options) {
8022
8122
  return await new Promise((resolve, reject) => {
8023
8123
  const parts = options.executable.split(/\s+/);
8024
- const executable = parts[0];
8025
- const executableArgs = parts.slice(1);
8124
+ const [resolvedExe, prefixArgs] = resolveWindowsCmd(parts[0]);
8125
+ const executableArgs = [...prefixArgs, ...parts.slice(1)];
8026
8126
  const allArgs = [...executableArgs, ...options.args];
8027
- const child = spawn3(executable, allArgs, {
8127
+ const child = spawn3(resolvedExe, allArgs, {
8028
8128
  cwd: options.cwd,
8029
8129
  env: options.env,
8030
- stdio: ["pipe", "pipe", "pipe"],
8031
- shell: false
8130
+ stdio: ["pipe", "pipe", "pipe"]
8032
8131
  });
8033
8132
  let stdout = "";
8034
8133
  let stderr = "";
@@ -8083,9 +8182,9 @@ async function defaultPiRunner(options) {
8083
8182
  }
8084
8183
 
8085
8184
  // src/evaluation/providers/pi-coding-agent.ts
8086
- import { execSync } from "node:child_process";
8185
+ import { execSync as execSync2 } from "node:child_process";
8087
8186
  import { randomUUID as randomUUID8 } from "node:crypto";
8088
- import { accessSync, createWriteStream as createWriteStream6 } from "node:fs";
8187
+ import { accessSync as accessSync2, createWriteStream as createWriteStream6 } from "node:fs";
8089
8188
  import { mkdir as mkdir7 } from "node:fs/promises";
8090
8189
  import path20 from "node:path";
8091
8190
  import { createInterface } from "node:readline";
@@ -8113,7 +8212,7 @@ function findAgentvRoot() {
8113
8212
  for (let i = 0; i < 10; i++) {
8114
8213
  try {
8115
8214
  const pkg = path20.join(dir, "package.json");
8116
- accessSync(pkg);
8215
+ accessSync2(pkg);
8117
8216
  return dir;
8118
8217
  } catch {
8119
8218
  const parent = path20.dirname(dir);
@@ -8133,7 +8232,7 @@ async function doLoadSdkModules() {
8133
8232
  if (await promptInstall()) {
8134
8233
  const installDir = findAgentvRoot();
8135
8234
  console.error(`Installing @mariozechner/pi-coding-agent into ${installDir}...`);
8136
- execSync("bun add @mariozechner/pi-coding-agent", {
8235
+ execSync2("bun add @mariozechner/pi-coding-agent", {
8137
8236
  cwd: installDir,
8138
8237
  stdio: "inherit"
8139
8238
  });
@@ -8174,7 +8273,9 @@ async function loadSdkModules() {
8174
8273
  codingTools: piSdk.codingTools,
8175
8274
  toolMap,
8176
8275
  SessionManager: piSdk.SessionManager,
8177
- getModel: piAi.getModel
8276
+ getModel: piAi.getModel,
8277
+ // biome-ignore lint/suspicious/noExplicitAny: registerBuiltInApiProviders exists at runtime but not in type defs
8278
+ registerBuiltInApiProviders: piAi.registerBuiltInApiProviders
8178
8279
  };
8179
8280
  }
8180
8281
  var PiCodingAgentProvider = class {
@@ -8196,17 +8297,31 @@ var PiCodingAgentProvider = class {
8196
8297
  const startTime = (/* @__PURE__ */ new Date()).toISOString();
8197
8298
  const startMs = Date.now();
8198
8299
  const sdk = await loadSdkModules();
8300
+ sdk.registerBuiltInApiProviders();
8199
8301
  const logger = await this.createStreamLogger(request).catch(() => void 0);
8200
8302
  try {
8201
8303
  const cwd = this.resolveCwd(request.cwd);
8202
- const providerName = this.config.subprovider ?? "google";
8304
+ const rawProvider = this.config.subprovider ?? "google";
8305
+ const hasBaseUrl = !!this.config.baseUrl;
8306
+ const providerName = resolveSubprovider(rawProvider, hasBaseUrl);
8203
8307
  const modelId = this.config.model ?? "gemini-2.5-flash";
8204
- this.setApiKeyEnv(providerName);
8205
- const model = sdk.getModel(providerName, modelId);
8308
+ this.setApiKeyEnv(rawProvider, hasBaseUrl);
8309
+ this.setBaseUrlEnv(rawProvider, hasBaseUrl);
8310
+ let model = sdk.getModel(providerName, modelId);
8206
8311
  if (!model) {
8207
- throw new Error(
8208
- `pi-coding-agent: getModel('${providerName}', '${modelId}') returned undefined. The model '${modelId}' is not registered for provider '${providerName}' in pi-ai. Check that subprovider and model are correct in your target config.`
8209
- );
8312
+ const envProvider = providerName.replace(/-responses$/, "");
8313
+ model = {
8314
+ id: modelId,
8315
+ name: modelId,
8316
+ api: providerName,
8317
+ provider: envProvider,
8318
+ baseUrl: this.config.baseUrl ?? "",
8319
+ reasoning: false,
8320
+ input: ["text"],
8321
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
8322
+ contextWindow: 128e3,
8323
+ maxTokens: 16384
8324
+ };
8210
8325
  }
8211
8326
  const tools = this.resolveTools(sdk);
8212
8327
  const { session } = await sdk.createAgentSession({
@@ -8359,22 +8474,21 @@ ${fileList}`;
8359
8474
  }
8360
8475
  }
8361
8476
  /** Maps config apiKey to the provider-specific env var the SDK reads. */
8362
- setApiKeyEnv(providerName) {
8477
+ setApiKeyEnv(providerName, hasBaseUrl = false) {
8363
8478
  if (!this.config.apiKey) return;
8364
- const ENV_KEY_MAP = {
8365
- google: "GEMINI_API_KEY",
8366
- gemini: "GEMINI_API_KEY",
8367
- anthropic: "ANTHROPIC_API_KEY",
8368
- openai: "OPENAI_API_KEY",
8369
- groq: "GROQ_API_KEY",
8370
- xai: "XAI_API_KEY",
8371
- openrouter: "OPENROUTER_API_KEY"
8372
- };
8373
- const envKey = ENV_KEY_MAP[providerName.toLowerCase()];
8479
+ const envKey = resolveEnvKeyName(providerName, hasBaseUrl);
8374
8480
  if (envKey) {
8375
8481
  process.env[envKey] = this.config.apiKey;
8376
8482
  }
8377
8483
  }
8484
+ /** Maps config baseUrl to the provider-specific env var the SDK reads. */
8485
+ setBaseUrlEnv(providerName, hasBaseUrl = false) {
8486
+ if (!this.config.baseUrl) return;
8487
+ const envKey = resolveEnvBaseUrlName(providerName, hasBaseUrl);
8488
+ if (envKey) {
8489
+ process.env[envKey] = this.config.baseUrl;
8490
+ }
8491
+ }
8378
8492
  resolveCwd(cwdOverride) {
8379
8493
  if (cwdOverride) {
8380
8494
  return path20.resolve(cwdOverride);
@@ -15509,20 +15623,10 @@ async function runEvaluation(options) {
15509
15623
  if (resolvedTargetsByName.has(name)) {
15510
15624
  return resolvedTargetsByName.get(name);
15511
15625
  }
15512
- let definition = targetDefinitions.get(name);
15626
+ const definition = resolveDelegatedTargetDefinition(name, targetDefinitions, envLookup);
15513
15627
  if (!definition) {
15514
15628
  return void 0;
15515
15629
  }
15516
- for (let depth = 0; depth < 5; depth++) {
15517
- const useTarget = definition.use_target;
15518
- if (typeof useTarget !== "string" || useTarget.trim().length === 0) break;
15519
- const envMatch = useTarget.trim().match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
15520
- const resolvedName = envMatch ? envLookup[envMatch[1]] ?? "" : useTarget.trim();
15521
- if (resolvedName.length === 0) break;
15522
- const next = targetDefinitions.get(resolvedName);
15523
- if (!next) break;
15524
- definition = next;
15525
- }
15526
15630
  const resolved = resolveTargetDefinition(definition, envLookup, evalFilePath);
15527
15631
  resolvedTargetsByName.set(name, resolved);
15528
15632
  return resolved;
@@ -17608,7 +17712,7 @@ async function discoverDefaultTarget(repoRoot) {
17608
17712
  return null;
17609
17713
  }
17610
17714
  async function loadEnvHierarchy(repoRoot, startPath) {
17611
- const { readFileSync: readFileSync3 } = await import("node:fs");
17715
+ const { readFileSync: readFileSync4 } = await import("node:fs");
17612
17716
  const chain = buildDirectoryChain(startPath, repoRoot);
17613
17717
  const envFiles = [];
17614
17718
  for (const dir of chain) {
@@ -17617,7 +17721,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
17617
17721
  }
17618
17722
  for (let i = 0; i < envFiles.length; i++) {
17619
17723
  try {
17620
- const content = readFileSync3(envFiles[i], "utf8");
17724
+ const content = readFileSync4(envFiles[i], "utf8");
17621
17725
  for (const line of content.split("\n")) {
17622
17726
  const trimmed = line.trim();
17623
17727
  if (!trimmed || trimmed.startsWith("#")) continue;
@@ -17832,7 +17936,7 @@ function shouldSkipCacheForTemperature(targetConfig) {
17832
17936
  }
17833
17937
 
17834
17938
  // src/projects.ts
17835
- import { existsSync as existsSync6, mkdirSync, readFileSync as readFileSync2, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
17939
+ import { existsSync as existsSync6, mkdirSync, readFileSync as readFileSync3, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
17836
17940
  import path47 from "node:path";
17837
17941
  import { parse as parseYaml3, stringify as stringifyYaml } from "yaml";
17838
17942
  function getProjectsRegistryPath() {
@@ -17844,7 +17948,7 @@ function loadProjectRegistry() {
17844
17948
  return { projects: [] };
17845
17949
  }
17846
17950
  try {
17847
- const raw = readFileSync2(registryPath, "utf-8");
17951
+ const raw = readFileSync3(registryPath, "utf-8");
17848
17952
  const parsed = parseYaml3(raw);
17849
17953
  if (!parsed || !Array.isArray(parsed.projects)) {
17850
17954
  return { projects: [] };
@@ -18881,6 +18985,7 @@ export {
18881
18985
  readTranscriptFile,
18882
18986
  removeProject,
18883
18987
  resolveAndCreateProvider,
18988
+ resolveDelegatedTargetDefinition,
18884
18989
  resolveFileReference,
18885
18990
  resolveTargetDefinition,
18886
18991
  resolveWorkspaceTemplate,