@agentv/core 3.12.0 → 3.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +19 -146
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +3 -34
- package/dist/index.d.ts +3 -34
- package/dist/index.js +17 -19
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-3G2KXH7N.js +0 -120
- package/dist/chunk-3G2KXH7N.js.map +0 -1
- package/dist/simple-trace-file-exporter-CRIO5HDZ.js +0 -7
- package/dist/simple-trace-file-exporter-CRIO5HDZ.js.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -1250,7 +1250,6 @@ type EvalMetadata = z.infer<typeof MetadataSchema>;
|
|
|
1250
1250
|
declare const DEFAULT_EVAL_PATTERNS: readonly string[];
|
|
1251
1251
|
type ExecutionDefaults = {
|
|
1252
1252
|
readonly verbose?: boolean;
|
|
1253
|
-
readonly trace_file?: string;
|
|
1254
1253
|
readonly keep_workspaces?: boolean;
|
|
1255
1254
|
readonly otel_file?: string;
|
|
1256
1255
|
readonly export_otel?: boolean;
|
|
@@ -3024,8 +3023,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3024
3023
|
agentTimeoutMs: z.ZodOptional<z.ZodNumber>;
|
|
3025
3024
|
/** Enable verbose logging */
|
|
3026
3025
|
verbose: z.ZodOptional<z.ZodBoolean>;
|
|
3027
|
-
/** Write human-readable trace JSONL to this path (supports {timestamp} placeholder) */
|
|
3028
|
-
traceFile: z.ZodOptional<z.ZodString>;
|
|
3029
3026
|
/** Always keep temp workspaces after eval */
|
|
3030
3027
|
keepWorkspaces: z.ZodOptional<z.ZodBoolean>;
|
|
3031
3028
|
/** Write OTLP JSON trace to this path (supports {timestamp} placeholder) */
|
|
@@ -3036,7 +3033,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3036
3033
|
maxRetries?: number | undefined;
|
|
3037
3034
|
agentTimeoutMs?: number | undefined;
|
|
3038
3035
|
keepWorkspaces?: boolean | undefined;
|
|
3039
|
-
traceFile?: string | undefined;
|
|
3040
3036
|
otelFile?: string | undefined;
|
|
3041
3037
|
}, {
|
|
3042
3038
|
workers?: number | undefined;
|
|
@@ -3044,7 +3040,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3044
3040
|
maxRetries?: number | undefined;
|
|
3045
3041
|
agentTimeoutMs?: number | undefined;
|
|
3046
3042
|
keepWorkspaces?: boolean | undefined;
|
|
3047
|
-
traceFile?: string | undefined;
|
|
3048
3043
|
otelFile?: string | undefined;
|
|
3049
3044
|
}>>;
|
|
3050
3045
|
/** Output settings */
|
|
@@ -3093,7 +3088,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3093
3088
|
maxRetries?: number | undefined;
|
|
3094
3089
|
agentTimeoutMs?: number | undefined;
|
|
3095
3090
|
keepWorkspaces?: boolean | undefined;
|
|
3096
|
-
traceFile?: string | undefined;
|
|
3097
3091
|
otelFile?: string | undefined;
|
|
3098
3092
|
} | undefined;
|
|
3099
3093
|
cache?: {
|
|
@@ -3115,7 +3109,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3115
3109
|
maxRetries?: number | undefined;
|
|
3116
3110
|
agentTimeoutMs?: number | undefined;
|
|
3117
3111
|
keepWorkspaces?: boolean | undefined;
|
|
3118
|
-
traceFile?: string | undefined;
|
|
3119
3112
|
otelFile?: string | undefined;
|
|
3120
3113
|
} | undefined;
|
|
3121
3114
|
cache?: {
|
|
@@ -3469,8 +3462,6 @@ interface OtelExportOptions {
|
|
|
3469
3462
|
readonly groupTurns?: boolean;
|
|
3470
3463
|
/** Path to write OTLP JSON file (importable by OTel backends) */
|
|
3471
3464
|
readonly otlpFilePath?: string;
|
|
3472
|
-
/** Path to write human-readable simple JSONL trace file */
|
|
3473
|
-
readonly traceFilePath?: string;
|
|
3474
3465
|
}
|
|
3475
3466
|
/** Preset configuration for a known observability backend. */
|
|
3476
3467
|
interface OtelBackendPreset {
|
|
@@ -3543,7 +3534,7 @@ declare class OtelStreamingObserver {
|
|
|
3543
3534
|
getStreamCallbacks(): ProviderStreamCallbacks;
|
|
3544
3535
|
}
|
|
3545
3536
|
|
|
3546
|
-
type ReadableSpan
|
|
3537
|
+
type ReadableSpan = any;
|
|
3547
3538
|
/**
|
|
3548
3539
|
* SpanExporter that writes OTLP JSON (the standard OTel wire format) to a file.
|
|
3549
3540
|
* The file can be imported by any OTel-compatible backend.
|
|
@@ -3552,34 +3543,12 @@ declare class OtlpJsonFileExporter {
|
|
|
3552
3543
|
private spans;
|
|
3553
3544
|
private filePath;
|
|
3554
3545
|
constructor(filePath: string);
|
|
3555
|
-
export(spans: ReadableSpan$1[], resultCallback: (result: {
|
|
3556
|
-
code: number;
|
|
3557
|
-
}) => void): void;
|
|
3558
|
-
shutdown(): Promise<void>;
|
|
3559
|
-
forceFlush(): Promise<void>;
|
|
3560
|
-
private flush;
|
|
3561
|
-
}
|
|
3562
|
-
|
|
3563
|
-
type ReadableSpan = any;
|
|
3564
|
-
/**
|
|
3565
|
-
* SpanExporter that writes human-readable JSONL (one line per root span).
|
|
3566
|
-
* Designed for quick debugging and analysis without OTel tooling.
|
|
3567
|
-
*/
|
|
3568
|
-
declare class SimpleTraceFileExporter {
|
|
3569
|
-
private stream;
|
|
3570
|
-
private filePath;
|
|
3571
|
-
private streamReady;
|
|
3572
|
-
private pendingWrites;
|
|
3573
|
-
private _shuttingDown;
|
|
3574
|
-
private spansByTraceId;
|
|
3575
|
-
constructor(filePath: string);
|
|
3576
|
-
private ensureStream;
|
|
3577
3546
|
export(spans: ReadableSpan[], resultCallback: (result: {
|
|
3578
3547
|
code: number;
|
|
3579
3548
|
}) => void): void;
|
|
3580
3549
|
shutdown(): Promise<void>;
|
|
3581
3550
|
forceFlush(): Promise<void>;
|
|
3582
|
-
private
|
|
3551
|
+
private flush;
|
|
3583
3552
|
}
|
|
3584
3553
|
|
|
3585
3554
|
/**
|
|
@@ -3640,4 +3609,4 @@ type AgentKernel = {
|
|
|
3640
3609
|
};
|
|
3641
3610
|
declare function createAgentKernel(): AgentKernel;
|
|
3642
3611
|
|
|
3643
|
-
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext,
|
|
3612
|
+
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
|
package/dist/index.d.ts
CHANGED
|
@@ -1250,7 +1250,6 @@ type EvalMetadata = z.infer<typeof MetadataSchema>;
|
|
|
1250
1250
|
declare const DEFAULT_EVAL_PATTERNS: readonly string[];
|
|
1251
1251
|
type ExecutionDefaults = {
|
|
1252
1252
|
readonly verbose?: boolean;
|
|
1253
|
-
readonly trace_file?: string;
|
|
1254
1253
|
readonly keep_workspaces?: boolean;
|
|
1255
1254
|
readonly otel_file?: string;
|
|
1256
1255
|
readonly export_otel?: boolean;
|
|
@@ -3024,8 +3023,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3024
3023
|
agentTimeoutMs: z.ZodOptional<z.ZodNumber>;
|
|
3025
3024
|
/** Enable verbose logging */
|
|
3026
3025
|
verbose: z.ZodOptional<z.ZodBoolean>;
|
|
3027
|
-
/** Write human-readable trace JSONL to this path (supports {timestamp} placeholder) */
|
|
3028
|
-
traceFile: z.ZodOptional<z.ZodString>;
|
|
3029
3026
|
/** Always keep temp workspaces after eval */
|
|
3030
3027
|
keepWorkspaces: z.ZodOptional<z.ZodBoolean>;
|
|
3031
3028
|
/** Write OTLP JSON trace to this path (supports {timestamp} placeholder) */
|
|
@@ -3036,7 +3033,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3036
3033
|
maxRetries?: number | undefined;
|
|
3037
3034
|
agentTimeoutMs?: number | undefined;
|
|
3038
3035
|
keepWorkspaces?: boolean | undefined;
|
|
3039
|
-
traceFile?: string | undefined;
|
|
3040
3036
|
otelFile?: string | undefined;
|
|
3041
3037
|
}, {
|
|
3042
3038
|
workers?: number | undefined;
|
|
@@ -3044,7 +3040,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3044
3040
|
maxRetries?: number | undefined;
|
|
3045
3041
|
agentTimeoutMs?: number | undefined;
|
|
3046
3042
|
keepWorkspaces?: boolean | undefined;
|
|
3047
|
-
traceFile?: string | undefined;
|
|
3048
3043
|
otelFile?: string | undefined;
|
|
3049
3044
|
}>>;
|
|
3050
3045
|
/** Output settings */
|
|
@@ -3093,7 +3088,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3093
3088
|
maxRetries?: number | undefined;
|
|
3094
3089
|
agentTimeoutMs?: number | undefined;
|
|
3095
3090
|
keepWorkspaces?: boolean | undefined;
|
|
3096
|
-
traceFile?: string | undefined;
|
|
3097
3091
|
otelFile?: string | undefined;
|
|
3098
3092
|
} | undefined;
|
|
3099
3093
|
cache?: {
|
|
@@ -3115,7 +3109,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3115
3109
|
maxRetries?: number | undefined;
|
|
3116
3110
|
agentTimeoutMs?: number | undefined;
|
|
3117
3111
|
keepWorkspaces?: boolean | undefined;
|
|
3118
|
-
traceFile?: string | undefined;
|
|
3119
3112
|
otelFile?: string | undefined;
|
|
3120
3113
|
} | undefined;
|
|
3121
3114
|
cache?: {
|
|
@@ -3469,8 +3462,6 @@ interface OtelExportOptions {
|
|
|
3469
3462
|
readonly groupTurns?: boolean;
|
|
3470
3463
|
/** Path to write OTLP JSON file (importable by OTel backends) */
|
|
3471
3464
|
readonly otlpFilePath?: string;
|
|
3472
|
-
/** Path to write human-readable simple JSONL trace file */
|
|
3473
|
-
readonly traceFilePath?: string;
|
|
3474
3465
|
}
|
|
3475
3466
|
/** Preset configuration for a known observability backend. */
|
|
3476
3467
|
interface OtelBackendPreset {
|
|
@@ -3543,7 +3534,7 @@ declare class OtelStreamingObserver {
|
|
|
3543
3534
|
getStreamCallbacks(): ProviderStreamCallbacks;
|
|
3544
3535
|
}
|
|
3545
3536
|
|
|
3546
|
-
type ReadableSpan
|
|
3537
|
+
type ReadableSpan = any;
|
|
3547
3538
|
/**
|
|
3548
3539
|
* SpanExporter that writes OTLP JSON (the standard OTel wire format) to a file.
|
|
3549
3540
|
* The file can be imported by any OTel-compatible backend.
|
|
@@ -3552,34 +3543,12 @@ declare class OtlpJsonFileExporter {
|
|
|
3552
3543
|
private spans;
|
|
3553
3544
|
private filePath;
|
|
3554
3545
|
constructor(filePath: string);
|
|
3555
|
-
export(spans: ReadableSpan$1[], resultCallback: (result: {
|
|
3556
|
-
code: number;
|
|
3557
|
-
}) => void): void;
|
|
3558
|
-
shutdown(): Promise<void>;
|
|
3559
|
-
forceFlush(): Promise<void>;
|
|
3560
|
-
private flush;
|
|
3561
|
-
}
|
|
3562
|
-
|
|
3563
|
-
type ReadableSpan = any;
|
|
3564
|
-
/**
|
|
3565
|
-
* SpanExporter that writes human-readable JSONL (one line per root span).
|
|
3566
|
-
* Designed for quick debugging and analysis without OTel tooling.
|
|
3567
|
-
*/
|
|
3568
|
-
declare class SimpleTraceFileExporter {
|
|
3569
|
-
private stream;
|
|
3570
|
-
private filePath;
|
|
3571
|
-
private streamReady;
|
|
3572
|
-
private pendingWrites;
|
|
3573
|
-
private _shuttingDown;
|
|
3574
|
-
private spansByTraceId;
|
|
3575
|
-
constructor(filePath: string);
|
|
3576
|
-
private ensureStream;
|
|
3577
3546
|
export(spans: ReadableSpan[], resultCallback: (result: {
|
|
3578
3547
|
code: number;
|
|
3579
3548
|
}) => void): void;
|
|
3580
3549
|
shutdown(): Promise<void>;
|
|
3581
3550
|
forceFlush(): Promise<void>;
|
|
3582
|
-
private
|
|
3551
|
+
private flush;
|
|
3583
3552
|
}
|
|
3584
3553
|
|
|
3585
3554
|
/**
|
|
@@ -3640,4 +3609,4 @@ type AgentKernel = {
|
|
|
3640
3609
|
};
|
|
3641
3610
|
declare function createAgentKernel(): AgentKernel;
|
|
3642
3611
|
|
|
3643
|
-
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext,
|
|
3612
|
+
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
|
package/dist/index.js
CHANGED
|
@@ -26,9 +26,6 @@ import {
|
|
|
26
26
|
import {
|
|
27
27
|
OtlpJsonFileExporter
|
|
28
28
|
} from "./chunk-HFSYZHGF.js";
|
|
29
|
-
import {
|
|
30
|
-
SimpleTraceFileExporter
|
|
31
|
-
} from "./chunk-3G2KXH7N.js";
|
|
32
29
|
|
|
33
30
|
// src/evaluation/trace.ts
|
|
34
31
|
function computeTraceSummary(messages) {
|
|
@@ -615,12 +612,6 @@ function parseExecutionDefaults(raw, configPath) {
|
|
|
615
612
|
} else if (obj.verbose !== void 0) {
|
|
616
613
|
logWarning(`Invalid execution.verbose in ${configPath}, expected boolean`);
|
|
617
614
|
}
|
|
618
|
-
const traceFile = obj.trace_file;
|
|
619
|
-
if (typeof traceFile === "string" && traceFile.trim().length > 0) {
|
|
620
|
-
result.trace_file = traceFile.trim();
|
|
621
|
-
} else if (traceFile !== void 0) {
|
|
622
|
-
logWarning(`Invalid execution.trace_file in ${configPath}, expected non-empty string`);
|
|
623
|
-
}
|
|
624
615
|
if (typeof obj.keep_workspaces === "boolean") {
|
|
625
616
|
result.keep_workspaces = obj.keep_workspaces;
|
|
626
617
|
} else if (obj.keep_workspaces !== void 0) {
|
|
@@ -9601,10 +9592,26 @@ function extractJsonBlob(text) {
|
|
|
9601
9592
|
const match = text.match(/\{[\s\S]*\}/);
|
|
9602
9593
|
return match?.[0];
|
|
9603
9594
|
}
|
|
9595
|
+
function repairSchemaNearBooleanFields(text) {
|
|
9596
|
+
return text.replace(
|
|
9597
|
+
/("passed"\s*:\s*)(?:"([^"]+)"|([A-Za-z_][A-Za-z0-9_-]*))/gi,
|
|
9598
|
+
(_match, prefix, quotedValue, bareValue) => {
|
|
9599
|
+
const value = (quotedValue ?? bareValue ?? "").trim().toLowerCase();
|
|
9600
|
+
if (value === "true") {
|
|
9601
|
+
return `${prefix}true`;
|
|
9602
|
+
}
|
|
9603
|
+
if (value === "false") {
|
|
9604
|
+
return `${prefix}false`;
|
|
9605
|
+
}
|
|
9606
|
+
return `${prefix}false`;
|
|
9607
|
+
}
|
|
9608
|
+
);
|
|
9609
|
+
}
|
|
9604
9610
|
function parseJsonFromText(text) {
|
|
9605
9611
|
const cleaned = typeof text === "string" ? text.replace(/```json\n?|```/g, "").trim() : "";
|
|
9606
9612
|
const blob = extractJsonBlob(cleaned) ?? cleaned;
|
|
9607
|
-
|
|
9613
|
+
const repaired = repairSchemaNearBooleanFields(blob);
|
|
9614
|
+
return JSON.parse(repaired);
|
|
9608
9615
|
}
|
|
9609
9616
|
function isNonEmptyString(value) {
|
|
9610
9617
|
return typeof value === "string" && value.trim().length > 0;
|
|
@@ -16820,8 +16827,6 @@ var AgentVConfigSchema = z4.object({
|
|
|
16820
16827
|
agentTimeoutMs: z4.number().int().min(0).optional(),
|
|
16821
16828
|
/** Enable verbose logging */
|
|
16822
16829
|
verbose: z4.boolean().optional(),
|
|
16823
|
-
/** Write human-readable trace JSONL to this path (supports {timestamp} placeholder) */
|
|
16824
|
-
traceFile: z4.string().optional(),
|
|
16825
16830
|
/** Always keep temp workspaces after eval */
|
|
16826
16831
|
keepWorkspaces: z4.boolean().optional(),
|
|
16827
16832
|
/** Write OTLP JSON trace to this path (supports {timestamp} placeholder) */
|
|
@@ -17121,12 +17126,6 @@ var OtelTraceExporter = class {
|
|
|
17121
17126
|
new SimpleSpanProcessor(new OtlpJsonFileExporter2(this.options.otlpFilePath))
|
|
17122
17127
|
);
|
|
17123
17128
|
}
|
|
17124
|
-
if (this.options.traceFilePath) {
|
|
17125
|
-
const { SimpleTraceFileExporter: SimpleTraceFileExporter2 } = await import("./simple-trace-file-exporter-CRIO5HDZ.js");
|
|
17126
|
-
processors.push(
|
|
17127
|
-
new SimpleSpanProcessor(new SimpleTraceFileExporter2(this.options.traceFilePath))
|
|
17128
|
-
);
|
|
17129
|
-
}
|
|
17130
17129
|
if (processors.length === 0) {
|
|
17131
17130
|
return false;
|
|
17132
17131
|
}
|
|
@@ -17577,7 +17576,6 @@ export {
|
|
|
17577
17576
|
ProviderRegistry,
|
|
17578
17577
|
RepoManager,
|
|
17579
17578
|
ResponseCache,
|
|
17580
|
-
SimpleTraceFileExporter,
|
|
17581
17579
|
SkillTriggerEvaluator,
|
|
17582
17580
|
TEST_MESSAGE_ROLES,
|
|
17583
17581
|
TemplateNotDirectoryError,
|