@agentv/core 3.11.0 → 3.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-HMXZ2AX4.js → chunk-3G2KXH7N.js} +31 -23
- package/dist/chunk-3G2KXH7N.js.map +1 -0
- package/dist/{chunk-AVTN5AB7.js → chunk-4XWPXNQM.js} +62 -24
- package/dist/chunk-4XWPXNQM.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +1 -1
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/index.cjs +1120 -800
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +29 -8
- package/dist/index.d.ts +29 -8
- package/dist/index.js +956 -682
- package/dist/index.js.map +1 -1
- package/dist/simple-trace-file-exporter-CRIO5HDZ.js +7 -0
- package/package.json +9 -3
- package/dist/chunk-AVTN5AB7.js.map +0 -1
- package/dist/chunk-HMXZ2AX4.js.map +0 -1
- package/dist/simple-trace-file-exporter-S76DMABU.js +0 -7
- /package/dist/{simple-trace-file-exporter-S76DMABU.js.map → simple-trace-file-exporter-CRIO5HDZ.js.map} +0 -0
package/dist/index.d.cts
CHANGED
|
@@ -8,7 +8,7 @@ interface ChatMessage {
|
|
|
8
8
|
readonly name?: string;
|
|
9
9
|
}
|
|
10
10
|
type ChatPrompt = readonly ChatMessage[];
|
|
11
|
-
type ProviderKind = 'openai' | 'openrouter' | 'azure' | 'anthropic' | 'gemini' | 'codex' | 'copilot-sdk' | 'copilot-cli' | 'pi-coding-agent' | 'pi-
|
|
11
|
+
type ProviderKind = 'openai' | 'openrouter' | 'azure' | 'anthropic' | 'gemini' | 'codex' | 'copilot-sdk' | 'copilot-cli' | 'pi-coding-agent' | 'pi-cli' | 'claude' | 'claude-cli' | 'claude-sdk' | 'cli' | 'mock' | 'vscode' | 'vscode-insiders' | 'agentv';
|
|
12
12
|
/** Callbacks for real-time observability during provider execution */
|
|
13
13
|
interface ProviderStreamCallbacks {
|
|
14
14
|
onToolCallStart?: (toolName: string, toolCallId?: string) => void;
|
|
@@ -1691,13 +1691,11 @@ interface CopilotSdkResolvedConfig {
|
|
|
1691
1691
|
readonly systemPrompt?: string;
|
|
1692
1692
|
}
|
|
1693
1693
|
interface PiCodingAgentResolvedConfig {
|
|
1694
|
-
readonly executable: string;
|
|
1695
1694
|
readonly subprovider?: string;
|
|
1696
1695
|
readonly model?: string;
|
|
1697
1696
|
readonly apiKey?: string;
|
|
1698
1697
|
readonly tools?: string;
|
|
1699
1698
|
readonly thinking?: string;
|
|
1700
|
-
readonly args?: readonly string[];
|
|
1701
1699
|
readonly cwd?: string;
|
|
1702
1700
|
readonly workspaceTemplate?: string;
|
|
1703
1701
|
readonly timeoutMs?: number;
|
|
@@ -1705,11 +1703,19 @@ interface PiCodingAgentResolvedConfig {
|
|
|
1705
1703
|
readonly logFormat?: 'summary' | 'json';
|
|
1706
1704
|
readonly systemPrompt?: string;
|
|
1707
1705
|
}
|
|
1708
|
-
interface
|
|
1706
|
+
interface PiCliResolvedConfig {
|
|
1707
|
+
readonly executable: string;
|
|
1709
1708
|
readonly subprovider?: string;
|
|
1710
1709
|
readonly model?: string;
|
|
1711
1710
|
readonly apiKey?: string;
|
|
1711
|
+
readonly tools?: string;
|
|
1712
|
+
readonly thinking?: string;
|
|
1713
|
+
readonly args?: readonly string[];
|
|
1714
|
+
readonly cwd?: string;
|
|
1715
|
+
readonly workspaceTemplate?: string;
|
|
1712
1716
|
readonly timeoutMs?: number;
|
|
1717
|
+
readonly logDir?: string;
|
|
1718
|
+
readonly logFormat?: 'summary' | 'json';
|
|
1713
1719
|
readonly systemPrompt?: string;
|
|
1714
1720
|
}
|
|
1715
1721
|
interface ClaudeResolvedConfig {
|
|
@@ -1805,12 +1811,12 @@ type ResolvedTarget = {
|
|
|
1805
1811
|
readonly providerBatching?: boolean;
|
|
1806
1812
|
readonly config: PiCodingAgentResolvedConfig;
|
|
1807
1813
|
} | {
|
|
1808
|
-
readonly kind: 'pi-
|
|
1814
|
+
readonly kind: 'pi-cli';
|
|
1809
1815
|
readonly name: string;
|
|
1810
1816
|
readonly graderTarget?: string;
|
|
1811
1817
|
readonly workers?: number;
|
|
1812
1818
|
readonly providerBatching?: boolean;
|
|
1813
|
-
readonly config:
|
|
1819
|
+
readonly config: PiCliResolvedConfig;
|
|
1814
1820
|
} | {
|
|
1815
1821
|
readonly kind: 'claude';
|
|
1816
1822
|
readonly name: string;
|
|
@@ -3504,6 +3510,8 @@ declare class OtelStreamingObserver {
|
|
|
3504
3510
|
private readonly parentCtx?;
|
|
3505
3511
|
private rootSpan;
|
|
3506
3512
|
private rootCtx;
|
|
3513
|
+
private observedChildSpans;
|
|
3514
|
+
private pendingMetrics;
|
|
3507
3515
|
constructor(tracer: Tracer, api: OtelApi, captureContent: boolean, parentCtx?: any | undefined);
|
|
3508
3516
|
/** Create root eval span immediately (visible in backend right away) */
|
|
3509
3517
|
startEvalCase(testId: string, target: string, evalSet?: string): void;
|
|
@@ -3511,8 +3519,21 @@ declare class OtelStreamingObserver {
|
|
|
3511
3519
|
onToolCall(name: string, input: unknown, output: unknown, _durationMs: number, toolCallId?: string): void;
|
|
3512
3520
|
/** Create and immediately export an LLM span */
|
|
3513
3521
|
onLlmCall(model: string, tokenUsage?: ProviderTokenUsage): void;
|
|
3522
|
+
/** Record final execution metrics before the root span is finalized. */
|
|
3523
|
+
recordEvalMetrics(result: {
|
|
3524
|
+
durationMs?: number;
|
|
3525
|
+
costUsd?: number;
|
|
3526
|
+
tokenUsage?: ProviderTokenUsage;
|
|
3527
|
+
trace?: {
|
|
3528
|
+
eventCount: number;
|
|
3529
|
+
toolCalls: Record<string, number>;
|
|
3530
|
+
llmCallCount?: number;
|
|
3531
|
+
};
|
|
3532
|
+
}): void;
|
|
3514
3533
|
/** Finalize root span with score/verdict after evaluation completes */
|
|
3515
3534
|
finalizeEvalCase(score: number, error?: string): void;
|
|
3535
|
+
/** Backfill child spans from the completed result when the provider emitted no live callbacks. */
|
|
3536
|
+
completeFromResult(result: EvaluationResult): void;
|
|
3516
3537
|
/** Return the active eval span's trace ID and span ID for Braintrust trace bridging */
|
|
3517
3538
|
getActiveSpanIds(): {
|
|
3518
3539
|
parentSpanId: string;
|
|
@@ -3550,6 +3571,7 @@ declare class SimpleTraceFileExporter {
|
|
|
3550
3571
|
private streamReady;
|
|
3551
3572
|
private pendingWrites;
|
|
3552
3573
|
private _shuttingDown;
|
|
3574
|
+
private spansByTraceId;
|
|
3553
3575
|
constructor(filePath: string);
|
|
3554
3576
|
private ensureStream;
|
|
3555
3577
|
export(spans: ReadableSpan[], resultCallback: (result: {
|
|
@@ -3557,7 +3579,6 @@ declare class SimpleTraceFileExporter {
|
|
|
3557
3579
|
}) => void): void;
|
|
3558
3580
|
shutdown(): Promise<void>;
|
|
3559
3581
|
forceFlush(): Promise<void>;
|
|
3560
|
-
private collectChildren;
|
|
3561
3582
|
private buildSimpleRecord;
|
|
3562
3583
|
}
|
|
3563
3584
|
|
|
@@ -3619,4 +3640,4 @@ type AgentKernel = {
|
|
|
3619
3640
|
};
|
|
3620
3641
|
declare function createAgentKernel(): AgentKernel;
|
|
3621
3642
|
|
|
3622
|
-
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type
|
|
3643
|
+
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
|
package/dist/index.d.ts
CHANGED
|
@@ -8,7 +8,7 @@ interface ChatMessage {
|
|
|
8
8
|
readonly name?: string;
|
|
9
9
|
}
|
|
10
10
|
type ChatPrompt = readonly ChatMessage[];
|
|
11
|
-
type ProviderKind = 'openai' | 'openrouter' | 'azure' | 'anthropic' | 'gemini' | 'codex' | 'copilot-sdk' | 'copilot-cli' | 'pi-coding-agent' | 'pi-
|
|
11
|
+
type ProviderKind = 'openai' | 'openrouter' | 'azure' | 'anthropic' | 'gemini' | 'codex' | 'copilot-sdk' | 'copilot-cli' | 'pi-coding-agent' | 'pi-cli' | 'claude' | 'claude-cli' | 'claude-sdk' | 'cli' | 'mock' | 'vscode' | 'vscode-insiders' | 'agentv';
|
|
12
12
|
/** Callbacks for real-time observability during provider execution */
|
|
13
13
|
interface ProviderStreamCallbacks {
|
|
14
14
|
onToolCallStart?: (toolName: string, toolCallId?: string) => void;
|
|
@@ -1691,13 +1691,11 @@ interface CopilotSdkResolvedConfig {
|
|
|
1691
1691
|
readonly systemPrompt?: string;
|
|
1692
1692
|
}
|
|
1693
1693
|
interface PiCodingAgentResolvedConfig {
|
|
1694
|
-
readonly executable: string;
|
|
1695
1694
|
readonly subprovider?: string;
|
|
1696
1695
|
readonly model?: string;
|
|
1697
1696
|
readonly apiKey?: string;
|
|
1698
1697
|
readonly tools?: string;
|
|
1699
1698
|
readonly thinking?: string;
|
|
1700
|
-
readonly args?: readonly string[];
|
|
1701
1699
|
readonly cwd?: string;
|
|
1702
1700
|
readonly workspaceTemplate?: string;
|
|
1703
1701
|
readonly timeoutMs?: number;
|
|
@@ -1705,11 +1703,19 @@ interface PiCodingAgentResolvedConfig {
|
|
|
1705
1703
|
readonly logFormat?: 'summary' | 'json';
|
|
1706
1704
|
readonly systemPrompt?: string;
|
|
1707
1705
|
}
|
|
1708
|
-
interface
|
|
1706
|
+
interface PiCliResolvedConfig {
|
|
1707
|
+
readonly executable: string;
|
|
1709
1708
|
readonly subprovider?: string;
|
|
1710
1709
|
readonly model?: string;
|
|
1711
1710
|
readonly apiKey?: string;
|
|
1711
|
+
readonly tools?: string;
|
|
1712
|
+
readonly thinking?: string;
|
|
1713
|
+
readonly args?: readonly string[];
|
|
1714
|
+
readonly cwd?: string;
|
|
1715
|
+
readonly workspaceTemplate?: string;
|
|
1712
1716
|
readonly timeoutMs?: number;
|
|
1717
|
+
readonly logDir?: string;
|
|
1718
|
+
readonly logFormat?: 'summary' | 'json';
|
|
1713
1719
|
readonly systemPrompt?: string;
|
|
1714
1720
|
}
|
|
1715
1721
|
interface ClaudeResolvedConfig {
|
|
@@ -1805,12 +1811,12 @@ type ResolvedTarget = {
|
|
|
1805
1811
|
readonly providerBatching?: boolean;
|
|
1806
1812
|
readonly config: PiCodingAgentResolvedConfig;
|
|
1807
1813
|
} | {
|
|
1808
|
-
readonly kind: 'pi-
|
|
1814
|
+
readonly kind: 'pi-cli';
|
|
1809
1815
|
readonly name: string;
|
|
1810
1816
|
readonly graderTarget?: string;
|
|
1811
1817
|
readonly workers?: number;
|
|
1812
1818
|
readonly providerBatching?: boolean;
|
|
1813
|
-
readonly config:
|
|
1819
|
+
readonly config: PiCliResolvedConfig;
|
|
1814
1820
|
} | {
|
|
1815
1821
|
readonly kind: 'claude';
|
|
1816
1822
|
readonly name: string;
|
|
@@ -3504,6 +3510,8 @@ declare class OtelStreamingObserver {
|
|
|
3504
3510
|
private readonly parentCtx?;
|
|
3505
3511
|
private rootSpan;
|
|
3506
3512
|
private rootCtx;
|
|
3513
|
+
private observedChildSpans;
|
|
3514
|
+
private pendingMetrics;
|
|
3507
3515
|
constructor(tracer: Tracer, api: OtelApi, captureContent: boolean, parentCtx?: any | undefined);
|
|
3508
3516
|
/** Create root eval span immediately (visible in backend right away) */
|
|
3509
3517
|
startEvalCase(testId: string, target: string, evalSet?: string): void;
|
|
@@ -3511,8 +3519,21 @@ declare class OtelStreamingObserver {
|
|
|
3511
3519
|
onToolCall(name: string, input: unknown, output: unknown, _durationMs: number, toolCallId?: string): void;
|
|
3512
3520
|
/** Create and immediately export an LLM span */
|
|
3513
3521
|
onLlmCall(model: string, tokenUsage?: ProviderTokenUsage): void;
|
|
3522
|
+
/** Record final execution metrics before the root span is finalized. */
|
|
3523
|
+
recordEvalMetrics(result: {
|
|
3524
|
+
durationMs?: number;
|
|
3525
|
+
costUsd?: number;
|
|
3526
|
+
tokenUsage?: ProviderTokenUsage;
|
|
3527
|
+
trace?: {
|
|
3528
|
+
eventCount: number;
|
|
3529
|
+
toolCalls: Record<string, number>;
|
|
3530
|
+
llmCallCount?: number;
|
|
3531
|
+
};
|
|
3532
|
+
}): void;
|
|
3514
3533
|
/** Finalize root span with score/verdict after evaluation completes */
|
|
3515
3534
|
finalizeEvalCase(score: number, error?: string): void;
|
|
3535
|
+
/** Backfill child spans from the completed result when the provider emitted no live callbacks. */
|
|
3536
|
+
completeFromResult(result: EvaluationResult): void;
|
|
3516
3537
|
/** Return the active eval span's trace ID and span ID for Braintrust trace bridging */
|
|
3517
3538
|
getActiveSpanIds(): {
|
|
3518
3539
|
parentSpanId: string;
|
|
@@ -3550,6 +3571,7 @@ declare class SimpleTraceFileExporter {
|
|
|
3550
3571
|
private streamReady;
|
|
3551
3572
|
private pendingWrites;
|
|
3552
3573
|
private _shuttingDown;
|
|
3574
|
+
private spansByTraceId;
|
|
3553
3575
|
constructor(filePath: string);
|
|
3554
3576
|
private ensureStream;
|
|
3555
3577
|
export(spans: ReadableSpan[], resultCallback: (result: {
|
|
@@ -3557,7 +3579,6 @@ declare class SimpleTraceFileExporter {
|
|
|
3557
3579
|
}) => void): void;
|
|
3558
3580
|
shutdown(): Promise<void>;
|
|
3559
3581
|
forceFlush(): Promise<void>;
|
|
3560
|
-
private collectChildren;
|
|
3561
3582
|
private buildSimpleRecord;
|
|
3562
3583
|
}
|
|
3563
3584
|
|
|
@@ -3619,4 +3640,4 @@ type AgentKernel = {
|
|
|
3619
3640
|
};
|
|
3620
3641
|
declare function createAgentKernel(): AgentKernel;
|
|
3621
3642
|
|
|
3622
|
-
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type
|
|
3643
|
+
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
|