@agentv/core 3.12.0 → 3.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -1250,7 +1250,6 @@ type EvalMetadata = z.infer<typeof MetadataSchema>;
1250
1250
  declare const DEFAULT_EVAL_PATTERNS: readonly string[];
1251
1251
  type ExecutionDefaults = {
1252
1252
  readonly verbose?: boolean;
1253
- readonly trace_file?: string;
1254
1253
  readonly keep_workspaces?: boolean;
1255
1254
  readonly otel_file?: string;
1256
1255
  readonly export_otel?: boolean;
@@ -3024,8 +3023,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3024
3023
  agentTimeoutMs: z.ZodOptional<z.ZodNumber>;
3025
3024
  /** Enable verbose logging */
3026
3025
  verbose: z.ZodOptional<z.ZodBoolean>;
3027
- /** Write human-readable trace JSONL to this path (supports {timestamp} placeholder) */
3028
- traceFile: z.ZodOptional<z.ZodString>;
3029
3026
  /** Always keep temp workspaces after eval */
3030
3027
  keepWorkspaces: z.ZodOptional<z.ZodBoolean>;
3031
3028
  /** Write OTLP JSON trace to this path (supports {timestamp} placeholder) */
@@ -3036,7 +3033,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3036
3033
  maxRetries?: number | undefined;
3037
3034
  agentTimeoutMs?: number | undefined;
3038
3035
  keepWorkspaces?: boolean | undefined;
3039
- traceFile?: string | undefined;
3040
3036
  otelFile?: string | undefined;
3041
3037
  }, {
3042
3038
  workers?: number | undefined;
@@ -3044,7 +3040,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3044
3040
  maxRetries?: number | undefined;
3045
3041
  agentTimeoutMs?: number | undefined;
3046
3042
  keepWorkspaces?: boolean | undefined;
3047
- traceFile?: string | undefined;
3048
3043
  otelFile?: string | undefined;
3049
3044
  }>>;
3050
3045
  /** Output settings */
@@ -3093,7 +3088,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3093
3088
  maxRetries?: number | undefined;
3094
3089
  agentTimeoutMs?: number | undefined;
3095
3090
  keepWorkspaces?: boolean | undefined;
3096
- traceFile?: string | undefined;
3097
3091
  otelFile?: string | undefined;
3098
3092
  } | undefined;
3099
3093
  cache?: {
@@ -3115,7 +3109,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3115
3109
  maxRetries?: number | undefined;
3116
3110
  agentTimeoutMs?: number | undefined;
3117
3111
  keepWorkspaces?: boolean | undefined;
3118
- traceFile?: string | undefined;
3119
3112
  otelFile?: string | undefined;
3120
3113
  } | undefined;
3121
3114
  cache?: {
@@ -3469,8 +3462,6 @@ interface OtelExportOptions {
3469
3462
  readonly groupTurns?: boolean;
3470
3463
  /** Path to write OTLP JSON file (importable by OTel backends) */
3471
3464
  readonly otlpFilePath?: string;
3472
- /** Path to write human-readable simple JSONL trace file */
3473
- readonly traceFilePath?: string;
3474
3465
  }
3475
3466
  /** Preset configuration for a known observability backend. */
3476
3467
  interface OtelBackendPreset {
@@ -3543,7 +3534,7 @@ declare class OtelStreamingObserver {
3543
3534
  getStreamCallbacks(): ProviderStreamCallbacks;
3544
3535
  }
3545
3536
 
3546
- type ReadableSpan$1 = any;
3537
+ type ReadableSpan = any;
3547
3538
  /**
3548
3539
  * SpanExporter that writes OTLP JSON (the standard OTel wire format) to a file.
3549
3540
  * The file can be imported by any OTel-compatible backend.
@@ -3552,34 +3543,12 @@ declare class OtlpJsonFileExporter {
3552
3543
  private spans;
3553
3544
  private filePath;
3554
3545
  constructor(filePath: string);
3555
- export(spans: ReadableSpan$1[], resultCallback: (result: {
3556
- code: number;
3557
- }) => void): void;
3558
- shutdown(): Promise<void>;
3559
- forceFlush(): Promise<void>;
3560
- private flush;
3561
- }
3562
-
3563
- type ReadableSpan = any;
3564
- /**
3565
- * SpanExporter that writes human-readable JSONL (one line per root span).
3566
- * Designed for quick debugging and analysis without OTel tooling.
3567
- */
3568
- declare class SimpleTraceFileExporter {
3569
- private stream;
3570
- private filePath;
3571
- private streamReady;
3572
- private pendingWrites;
3573
- private _shuttingDown;
3574
- private spansByTraceId;
3575
- constructor(filePath: string);
3576
- private ensureStream;
3577
3546
  export(spans: ReadableSpan[], resultCallback: (result: {
3578
3547
  code: number;
3579
3548
  }) => void): void;
3580
3549
  shutdown(): Promise<void>;
3581
3550
  forceFlush(): Promise<void>;
3582
- private buildSimpleRecord;
3551
+ private flush;
3583
3552
  }
3584
3553
 
3585
3554
  /**
@@ -3640,4 +3609,4 @@ type AgentKernel = {
3640
3609
  };
3641
3610
  declare function createAgentKernel(): AgentKernel;
3642
3611
 
3643
- export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
3612
+ export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
package/dist/index.d.ts CHANGED
@@ -1250,7 +1250,6 @@ type EvalMetadata = z.infer<typeof MetadataSchema>;
1250
1250
  declare const DEFAULT_EVAL_PATTERNS: readonly string[];
1251
1251
  type ExecutionDefaults = {
1252
1252
  readonly verbose?: boolean;
1253
- readonly trace_file?: string;
1254
1253
  readonly keep_workspaces?: boolean;
1255
1254
  readonly otel_file?: string;
1256
1255
  readonly export_otel?: boolean;
@@ -3024,8 +3023,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3024
3023
  agentTimeoutMs: z.ZodOptional<z.ZodNumber>;
3025
3024
  /** Enable verbose logging */
3026
3025
  verbose: z.ZodOptional<z.ZodBoolean>;
3027
- /** Write human-readable trace JSONL to this path (supports {timestamp} placeholder) */
3028
- traceFile: z.ZodOptional<z.ZodString>;
3029
3026
  /** Always keep temp workspaces after eval */
3030
3027
  keepWorkspaces: z.ZodOptional<z.ZodBoolean>;
3031
3028
  /** Write OTLP JSON trace to this path (supports {timestamp} placeholder) */
@@ -3036,7 +3033,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3036
3033
  maxRetries?: number | undefined;
3037
3034
  agentTimeoutMs?: number | undefined;
3038
3035
  keepWorkspaces?: boolean | undefined;
3039
- traceFile?: string | undefined;
3040
3036
  otelFile?: string | undefined;
3041
3037
  }, {
3042
3038
  workers?: number | undefined;
@@ -3044,7 +3040,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3044
3040
  maxRetries?: number | undefined;
3045
3041
  agentTimeoutMs?: number | undefined;
3046
3042
  keepWorkspaces?: boolean | undefined;
3047
- traceFile?: string | undefined;
3048
3043
  otelFile?: string | undefined;
3049
3044
  }>>;
3050
3045
  /** Output settings */
@@ -3093,7 +3088,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3093
3088
  maxRetries?: number | undefined;
3094
3089
  agentTimeoutMs?: number | undefined;
3095
3090
  keepWorkspaces?: boolean | undefined;
3096
- traceFile?: string | undefined;
3097
3091
  otelFile?: string | undefined;
3098
3092
  } | undefined;
3099
3093
  cache?: {
@@ -3115,7 +3109,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3115
3109
  maxRetries?: number | undefined;
3116
3110
  agentTimeoutMs?: number | undefined;
3117
3111
  keepWorkspaces?: boolean | undefined;
3118
- traceFile?: string | undefined;
3119
3112
  otelFile?: string | undefined;
3120
3113
  } | undefined;
3121
3114
  cache?: {
@@ -3469,8 +3462,6 @@ interface OtelExportOptions {
3469
3462
  readonly groupTurns?: boolean;
3470
3463
  /** Path to write OTLP JSON file (importable by OTel backends) */
3471
3464
  readonly otlpFilePath?: string;
3472
- /** Path to write human-readable simple JSONL trace file */
3473
- readonly traceFilePath?: string;
3474
3465
  }
3475
3466
  /** Preset configuration for a known observability backend. */
3476
3467
  interface OtelBackendPreset {
@@ -3543,7 +3534,7 @@ declare class OtelStreamingObserver {
3543
3534
  getStreamCallbacks(): ProviderStreamCallbacks;
3544
3535
  }
3545
3536
 
3546
- type ReadableSpan$1 = any;
3537
+ type ReadableSpan = any;
3547
3538
  /**
3548
3539
  * SpanExporter that writes OTLP JSON (the standard OTel wire format) to a file.
3549
3540
  * The file can be imported by any OTel-compatible backend.
@@ -3552,34 +3543,12 @@ declare class OtlpJsonFileExporter {
3552
3543
  private spans;
3553
3544
  private filePath;
3554
3545
  constructor(filePath: string);
3555
- export(spans: ReadableSpan$1[], resultCallback: (result: {
3556
- code: number;
3557
- }) => void): void;
3558
- shutdown(): Promise<void>;
3559
- forceFlush(): Promise<void>;
3560
- private flush;
3561
- }
3562
-
3563
- type ReadableSpan = any;
3564
- /**
3565
- * SpanExporter that writes human-readable JSONL (one line per root span).
3566
- * Designed for quick debugging and analysis without OTel tooling.
3567
- */
3568
- declare class SimpleTraceFileExporter {
3569
- private stream;
3570
- private filePath;
3571
- private streamReady;
3572
- private pendingWrites;
3573
- private _shuttingDown;
3574
- private spansByTraceId;
3575
- constructor(filePath: string);
3576
- private ensureStream;
3577
3546
  export(spans: ReadableSpan[], resultCallback: (result: {
3578
3547
  code: number;
3579
3548
  }) => void): void;
3580
3549
  shutdown(): Promise<void>;
3581
3550
  forceFlush(): Promise<void>;
3582
- private buildSimpleRecord;
3551
+ private flush;
3583
3552
  }
3584
3553
 
3585
3554
  /**
@@ -3640,4 +3609,4 @@ type AgentKernel = {
3640
3609
  };
3641
3610
  declare function createAgentKernel(): AgentKernel;
3642
3611
 
3643
- export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
3612
+ export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
package/dist/index.js CHANGED
@@ -26,9 +26,6 @@ import {
26
26
  import {
27
27
  OtlpJsonFileExporter
28
28
  } from "./chunk-HFSYZHGF.js";
29
- import {
30
- SimpleTraceFileExporter
31
- } from "./chunk-3G2KXH7N.js";
32
29
 
33
30
  // src/evaluation/trace.ts
34
31
  function computeTraceSummary(messages) {
@@ -615,12 +612,6 @@ function parseExecutionDefaults(raw, configPath) {
615
612
  } else if (obj.verbose !== void 0) {
616
613
  logWarning(`Invalid execution.verbose in ${configPath}, expected boolean`);
617
614
  }
618
- const traceFile = obj.trace_file;
619
- if (typeof traceFile === "string" && traceFile.trim().length > 0) {
620
- result.trace_file = traceFile.trim();
621
- } else if (traceFile !== void 0) {
622
- logWarning(`Invalid execution.trace_file in ${configPath}, expected non-empty string`);
623
- }
624
615
  if (typeof obj.keep_workspaces === "boolean") {
625
616
  result.keep_workspaces = obj.keep_workspaces;
626
617
  } else if (obj.keep_workspaces !== void 0) {
@@ -9601,10 +9592,26 @@ function extractJsonBlob(text) {
9601
9592
  const match = text.match(/\{[\s\S]*\}/);
9602
9593
  return match?.[0];
9603
9594
  }
9595
+ function repairSchemaNearBooleanFields(text) {
9596
+ return text.replace(
9597
+ /("passed"\s*:\s*)(?:"([^"]+)"|([A-Za-z_][A-Za-z0-9_-]*))/gi,
9598
+ (_match, prefix, quotedValue, bareValue) => {
9599
+ const value = (quotedValue ?? bareValue ?? "").trim().toLowerCase();
9600
+ if (value === "true") {
9601
+ return `${prefix}true`;
9602
+ }
9603
+ if (value === "false") {
9604
+ return `${prefix}false`;
9605
+ }
9606
+ return `${prefix}false`;
9607
+ }
9608
+ );
9609
+ }
9604
9610
  function parseJsonFromText(text) {
9605
9611
  const cleaned = typeof text === "string" ? text.replace(/```json\n?|```/g, "").trim() : "";
9606
9612
  const blob = extractJsonBlob(cleaned) ?? cleaned;
9607
- return JSON.parse(blob);
9613
+ const repaired = repairSchemaNearBooleanFields(blob);
9614
+ return JSON.parse(repaired);
9608
9615
  }
9609
9616
  function isNonEmptyString(value) {
9610
9617
  return typeof value === "string" && value.trim().length > 0;
@@ -16820,8 +16827,6 @@ var AgentVConfigSchema = z4.object({
16820
16827
  agentTimeoutMs: z4.number().int().min(0).optional(),
16821
16828
  /** Enable verbose logging */
16822
16829
  verbose: z4.boolean().optional(),
16823
- /** Write human-readable trace JSONL to this path (supports {timestamp} placeholder) */
16824
- traceFile: z4.string().optional(),
16825
16830
  /** Always keep temp workspaces after eval */
16826
16831
  keepWorkspaces: z4.boolean().optional(),
16827
16832
  /** Write OTLP JSON trace to this path (supports {timestamp} placeholder) */
@@ -17121,12 +17126,6 @@ var OtelTraceExporter = class {
17121
17126
  new SimpleSpanProcessor(new OtlpJsonFileExporter2(this.options.otlpFilePath))
17122
17127
  );
17123
17128
  }
17124
- if (this.options.traceFilePath) {
17125
- const { SimpleTraceFileExporter: SimpleTraceFileExporter2 } = await import("./simple-trace-file-exporter-CRIO5HDZ.js");
17126
- processors.push(
17127
- new SimpleSpanProcessor(new SimpleTraceFileExporter2(this.options.traceFilePath))
17128
- );
17129
- }
17130
17129
  if (processors.length === 0) {
17131
17130
  return false;
17132
17131
  }
@@ -17577,7 +17576,6 @@ export {
17577
17576
  ProviderRegistry,
17578
17577
  RepoManager,
17579
17578
  ResponseCache,
17580
- SimpleTraceFileExporter,
17581
17579
  SkillTriggerEvaluator,
17582
17580
  TEST_MESSAGE_ROLES,
17583
17581
  TemplateNotDirectoryError,