@agentv/core 3.14.6 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-HP5PFOVK.js → chunk-PXYYRDHH.js} +142 -148
- package/dist/chunk-PXYYRDHH.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +9 -2
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +3 -2
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +569 -257
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +135 -93
- package/dist/index.d.ts +135 -93
- package/dist/index.js +459 -141
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-HP5PFOVK.js.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -1,6 +1,71 @@
|
|
|
1
1
|
import * as ai from 'ai';
|
|
2
2
|
import { z } from 'zod';
|
|
3
3
|
|
|
4
|
+
/**
|
|
5
|
+
* Multimodal content types for the AgentV pipeline.
|
|
6
|
+
*
|
|
7
|
+
* Models structured content blocks (text, images, files) that flow end-to-end
|
|
8
|
+
* without lossy flattening. Modeled after Inspect AI's discriminated union approach.
|
|
9
|
+
*
|
|
10
|
+
* ## Content model
|
|
11
|
+
*
|
|
12
|
+
* `Message.content` accepts `string | Content[]`:
|
|
13
|
+
* - `string` — backward-compatible plain text (most common case)
|
|
14
|
+
* - `Content[]` — array of typed content blocks for multimodal messages
|
|
15
|
+
*
|
|
16
|
+
* Binary data (images, files) is referenced by URL/base64 string or filesystem
|
|
17
|
+
* path — never raw bytes. This keeps payloads serializable and lets code graders
|
|
18
|
+
* access files via path without decoding.
|
|
19
|
+
*
|
|
20
|
+
* ## How to extend
|
|
21
|
+
*
|
|
22
|
+
* To add a new content variant (e.g., `ContentAudio`):
|
|
23
|
+
* 1. Define the interface with a unique `type` discriminant
|
|
24
|
+
* 2. Add it to the `Content` union
|
|
25
|
+
* 3. Update `getTextContent()` if the new type has extractable text
|
|
26
|
+
* 4. Update `isContent()` type guard with the new type string
|
|
27
|
+
*/
|
|
28
|
+
/** A text content block. */
|
|
29
|
+
interface ContentText {
|
|
30
|
+
readonly type: 'text';
|
|
31
|
+
readonly text: string;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* An image content block.
|
|
35
|
+
* `source` is a URL, data URI (base64), or filesystem path.
|
|
36
|
+
*/
|
|
37
|
+
interface ContentImage {
|
|
38
|
+
readonly type: 'image';
|
|
39
|
+
readonly media_type: string;
|
|
40
|
+
readonly source: string;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* A file content block.
|
|
44
|
+
* `path` is a filesystem path or URL referencing the file.
|
|
45
|
+
*/
|
|
46
|
+
interface ContentFile {
|
|
47
|
+
readonly type: 'file';
|
|
48
|
+
readonly media_type: string;
|
|
49
|
+
readonly path: string;
|
|
50
|
+
}
|
|
51
|
+
/** Discriminated union of all content block types. */
|
|
52
|
+
type Content = ContentText | ContentImage | ContentFile;
|
|
53
|
+
/** Check whether a value is a valid `Content` block. */
|
|
54
|
+
declare function isContent(value: unknown): value is Content;
|
|
55
|
+
/** Check whether a value is a `Content[]` array (at least one valid block). */
|
|
56
|
+
declare function isContentArray(value: unknown): value is Content[];
|
|
57
|
+
/**
|
|
58
|
+
* Extract plain text from `string | Content[]`.
|
|
59
|
+
*
|
|
60
|
+
* - If `content` is a string, returns it directly.
|
|
61
|
+
* - If `content` is a `Content[]`, concatenates all `ContentText.text` values
|
|
62
|
+
* (separated by newlines) and returns the result.
|
|
63
|
+
* - Returns `''` for `undefined`/`null`/unrecognized shapes.
|
|
64
|
+
*
|
|
65
|
+
* This is a **non-destructive** accessor — the original `Content[]` is preserved.
|
|
66
|
+
*/
|
|
67
|
+
declare function getTextContent(content: string | Content[] | undefined | null): string;
|
|
68
|
+
|
|
4
69
|
type ChatMessageRole = 'system' | 'user' | 'assistant' | 'tool' | 'function';
|
|
5
70
|
interface ChatMessage {
|
|
6
71
|
readonly role: ChatMessageRole;
|
|
@@ -74,8 +139,8 @@ interface Message {
|
|
|
74
139
|
readonly role: string;
|
|
75
140
|
/** Optional name for the message sender */
|
|
76
141
|
readonly name?: string;
|
|
77
|
-
/** Message content */
|
|
78
|
-
readonly content?:
|
|
142
|
+
/** Message content — plain string or structured content blocks for multimodal data. */
|
|
143
|
+
readonly content?: string | Content[];
|
|
79
144
|
/** Tool calls made in this message */
|
|
80
145
|
readonly toolCalls?: readonly ToolCall[];
|
|
81
146
|
/** ISO 8601 timestamp when the message started */
|
|
@@ -120,6 +185,13 @@ interface ProviderResponse {
|
|
|
120
185
|
/** ISO 8601 timestamp when execution ended (optional) */
|
|
121
186
|
readonly endTime?: string;
|
|
122
187
|
}
|
|
188
|
+
/**
|
|
189
|
+
* Extract the content from the last assistant message in an output message array.
|
|
190
|
+
* Returns empty string if no assistant message found.
|
|
191
|
+
*
|
|
192
|
+
* Handles both plain-string content and Content[] (extracts text blocks).
|
|
193
|
+
*/
|
|
194
|
+
declare function extractLastAssistantContent(messages: readonly Message[] | undefined): string;
|
|
123
195
|
interface Provider {
|
|
124
196
|
readonly id: string;
|
|
125
197
|
readonly kind: ProviderKind;
|
|
@@ -1012,7 +1084,8 @@ type EvaluatorConfig = CodeEvaluatorConfig | LlmGraderEvaluatorConfig | Composit
|
|
|
1012
1084
|
*/
|
|
1013
1085
|
interface EvalTest {
|
|
1014
1086
|
readonly id: string;
|
|
1015
|
-
readonly
|
|
1087
|
+
readonly dataset?: string;
|
|
1088
|
+
readonly category?: string;
|
|
1016
1089
|
readonly conversation_id?: string;
|
|
1017
1090
|
readonly question: string;
|
|
1018
1091
|
readonly input: readonly TestMessage[];
|
|
@@ -1121,7 +1194,8 @@ type FailOnError = boolean;
|
|
|
1121
1194
|
interface EvaluationResult {
|
|
1122
1195
|
readonly timestamp: string;
|
|
1123
1196
|
readonly testId: string;
|
|
1124
|
-
readonly
|
|
1197
|
+
readonly dataset?: string;
|
|
1198
|
+
readonly category?: string;
|
|
1125
1199
|
readonly conversationId?: string;
|
|
1126
1200
|
readonly score: number;
|
|
1127
1201
|
readonly assertions: readonly AssertionEntry[];
|
|
@@ -1347,6 +1421,8 @@ type LoadOptions = {
|
|
|
1347
1421
|
readonly verbose?: boolean;
|
|
1348
1422
|
/** Filter tests by ID pattern (glob supported, e.g., "summary-*") */
|
|
1349
1423
|
readonly filter?: string;
|
|
1424
|
+
/** Category derived from the eval file's directory path */
|
|
1425
|
+
readonly category?: string;
|
|
1350
1426
|
};
|
|
1351
1427
|
/**
|
|
1352
1428
|
* Read metadata from a test suite file (like target name).
|
|
@@ -1764,133 +1840,80 @@ interface AgentVResolvedConfig {
|
|
|
1764
1840
|
readonly model: string;
|
|
1765
1841
|
readonly temperature: number;
|
|
1766
1842
|
}
|
|
1767
|
-
|
|
1768
|
-
|
|
1843
|
+
/** Base fields shared by all resolved targets. */
|
|
1844
|
+
interface ResolvedTargetBase {
|
|
1769
1845
|
readonly name: string;
|
|
1770
1846
|
readonly graderTarget?: string;
|
|
1771
1847
|
readonly workers?: number;
|
|
1772
1848
|
readonly providerBatching?: boolean;
|
|
1849
|
+
/**
|
|
1850
|
+
* Whether this target can be executed via executor subagents in subagent mode.
|
|
1851
|
+
* Defaults to `true` for all non-CLI providers. Set `false` in targets.yaml
|
|
1852
|
+
* to force CLI invocation even in subagent mode.
|
|
1853
|
+
*/
|
|
1854
|
+
readonly subagentModeAllowed?: boolean;
|
|
1855
|
+
}
|
|
1856
|
+
type ResolvedTarget = (ResolvedTargetBase & {
|
|
1857
|
+
readonly kind: 'openai';
|
|
1773
1858
|
readonly config: OpenAIResolvedConfig;
|
|
1774
|
-
} | {
|
|
1859
|
+
}) | (ResolvedTargetBase & {
|
|
1775
1860
|
readonly kind: 'openrouter';
|
|
1776
|
-
readonly name: string;
|
|
1777
|
-
readonly graderTarget?: string;
|
|
1778
|
-
readonly workers?: number;
|
|
1779
|
-
readonly providerBatching?: boolean;
|
|
1780
1861
|
readonly config: OpenRouterResolvedConfig;
|
|
1781
|
-
} | {
|
|
1862
|
+
}) | (ResolvedTargetBase & {
|
|
1782
1863
|
readonly kind: 'azure';
|
|
1783
|
-
readonly name: string;
|
|
1784
|
-
readonly graderTarget?: string;
|
|
1785
|
-
readonly workers?: number;
|
|
1786
|
-
readonly providerBatching?: boolean;
|
|
1787
1864
|
readonly config: AzureResolvedConfig;
|
|
1788
|
-
} | {
|
|
1865
|
+
}) | (ResolvedTargetBase & {
|
|
1789
1866
|
readonly kind: 'anthropic';
|
|
1790
|
-
readonly name: string;
|
|
1791
|
-
readonly graderTarget?: string;
|
|
1792
|
-
readonly workers?: number;
|
|
1793
|
-
readonly providerBatching?: boolean;
|
|
1794
1867
|
readonly config: AnthropicResolvedConfig;
|
|
1795
|
-
} | {
|
|
1868
|
+
}) | (ResolvedTargetBase & {
|
|
1796
1869
|
readonly kind: 'gemini';
|
|
1797
|
-
readonly name: string;
|
|
1798
|
-
readonly graderTarget?: string;
|
|
1799
|
-
readonly workers?: number;
|
|
1800
|
-
readonly providerBatching?: boolean;
|
|
1801
1870
|
readonly config: GeminiResolvedConfig;
|
|
1802
|
-
} | {
|
|
1871
|
+
}) | (ResolvedTargetBase & {
|
|
1803
1872
|
readonly kind: 'codex';
|
|
1804
|
-
readonly name: string;
|
|
1805
|
-
readonly graderTarget?: string;
|
|
1806
|
-
readonly workers?: number;
|
|
1807
|
-
readonly providerBatching?: boolean;
|
|
1808
1873
|
readonly config: CodexResolvedConfig;
|
|
1809
|
-
} | {
|
|
1874
|
+
}) | (ResolvedTargetBase & {
|
|
1810
1875
|
readonly kind: 'copilot-sdk';
|
|
1811
|
-
readonly name: string;
|
|
1812
|
-
readonly graderTarget?: string;
|
|
1813
|
-
readonly workers?: number;
|
|
1814
|
-
readonly providerBatching?: boolean;
|
|
1815
1876
|
readonly config: CopilotSdkResolvedConfig;
|
|
1816
|
-
} | {
|
|
1877
|
+
}) | (ResolvedTargetBase & {
|
|
1817
1878
|
readonly kind: 'copilot-cli';
|
|
1818
|
-
readonly name: string;
|
|
1819
|
-
readonly graderTarget?: string;
|
|
1820
|
-
readonly workers?: number;
|
|
1821
|
-
readonly providerBatching?: boolean;
|
|
1822
1879
|
readonly config: CopilotCliResolvedConfig;
|
|
1823
|
-
} | {
|
|
1880
|
+
}) | (ResolvedTargetBase & {
|
|
1824
1881
|
readonly kind: 'copilot-log';
|
|
1825
|
-
readonly name: string;
|
|
1826
|
-
readonly graderTarget?: string;
|
|
1827
|
-
readonly workers?: number;
|
|
1828
|
-
readonly providerBatching?: boolean;
|
|
1829
1882
|
readonly config: CopilotLogResolvedConfig;
|
|
1830
|
-
} | {
|
|
1883
|
+
}) | (ResolvedTargetBase & {
|
|
1831
1884
|
readonly kind: 'pi-coding-agent';
|
|
1832
|
-
readonly name: string;
|
|
1833
|
-
readonly graderTarget?: string;
|
|
1834
|
-
readonly workers?: number;
|
|
1835
|
-
readonly providerBatching?: boolean;
|
|
1836
1885
|
readonly config: PiCodingAgentResolvedConfig;
|
|
1837
|
-
} | {
|
|
1886
|
+
}) | (ResolvedTargetBase & {
|
|
1838
1887
|
readonly kind: 'pi-cli';
|
|
1839
|
-
readonly name: string;
|
|
1840
|
-
readonly graderTarget?: string;
|
|
1841
|
-
readonly workers?: number;
|
|
1842
|
-
readonly providerBatching?: boolean;
|
|
1843
1888
|
readonly config: PiCliResolvedConfig;
|
|
1844
|
-
} | {
|
|
1889
|
+
}) | (ResolvedTargetBase & {
|
|
1845
1890
|
readonly kind: 'claude';
|
|
1846
|
-
readonly name: string;
|
|
1847
|
-
readonly graderTarget?: string;
|
|
1848
|
-
readonly workers?: number;
|
|
1849
|
-
readonly providerBatching?: boolean;
|
|
1850
1891
|
readonly config: ClaudeResolvedConfig;
|
|
1851
|
-
} | {
|
|
1892
|
+
}) | (ResolvedTargetBase & {
|
|
1852
1893
|
readonly kind: 'claude-cli';
|
|
1853
|
-
readonly name: string;
|
|
1854
|
-
readonly graderTarget?: string;
|
|
1855
|
-
readonly workers?: number;
|
|
1856
|
-
readonly providerBatching?: boolean;
|
|
1857
1894
|
readonly config: ClaudeResolvedConfig;
|
|
1858
|
-
} | {
|
|
1895
|
+
}) | (ResolvedTargetBase & {
|
|
1859
1896
|
readonly kind: 'claude-sdk';
|
|
1860
|
-
readonly name: string;
|
|
1861
|
-
readonly graderTarget?: string;
|
|
1862
|
-
readonly workers?: number;
|
|
1863
|
-
readonly providerBatching?: boolean;
|
|
1864
1897
|
readonly config: ClaudeResolvedConfig;
|
|
1865
|
-
} | {
|
|
1898
|
+
}) | (ResolvedTargetBase & {
|
|
1866
1899
|
readonly kind: 'mock';
|
|
1867
|
-
readonly name: string;
|
|
1868
|
-
readonly graderTarget?: string;
|
|
1869
|
-
readonly workers?: number;
|
|
1870
|
-
readonly providerBatching?: boolean;
|
|
1871
1900
|
readonly config: MockResolvedConfig;
|
|
1872
|
-
} | {
|
|
1901
|
+
}) | (ResolvedTargetBase & {
|
|
1873
1902
|
readonly kind: 'vscode' | 'vscode-insiders';
|
|
1874
|
-
readonly name: string;
|
|
1875
|
-
readonly graderTarget?: string;
|
|
1876
|
-
readonly workers?: number;
|
|
1877
|
-
readonly providerBatching?: boolean;
|
|
1878
1903
|
readonly config: VSCodeResolvedConfig;
|
|
1879
|
-
} | {
|
|
1904
|
+
}) | (ResolvedTargetBase & {
|
|
1880
1905
|
readonly kind: 'agentv';
|
|
1881
|
-
readonly name: string;
|
|
1882
|
-
readonly graderTarget?: string;
|
|
1883
|
-
readonly workers?: number;
|
|
1884
|
-
readonly providerBatching?: boolean;
|
|
1885
1906
|
readonly config: AgentVResolvedConfig;
|
|
1886
|
-
} | {
|
|
1907
|
+
}) | (ResolvedTargetBase & {
|
|
1887
1908
|
readonly kind: 'cli';
|
|
1888
|
-
readonly name: string;
|
|
1889
|
-
readonly graderTarget?: string;
|
|
1890
|
-
readonly workers?: number;
|
|
1891
|
-
readonly providerBatching?: boolean;
|
|
1892
1909
|
readonly config: CliResolvedConfig;
|
|
1893
|
-
};
|
|
1910
|
+
});
|
|
1911
|
+
/**
|
|
1912
|
+
* Optional settings accepted on ALL target definitions regardless of provider.
|
|
1913
|
+
* Exported so the targets validator can reuse the same list — adding a field
|
|
1914
|
+
* here automatically makes it valid in targets.yaml without a separate update.
|
|
1915
|
+
*/
|
|
1916
|
+
declare const COMMON_TARGET_SETTINGS: readonly ["provider_batching", "providerBatching", "subagent_mode_allowed", "subagentModeAllowed"];
|
|
1894
1917
|
declare function resolveTargetDefinition(definition: TargetDefinition, env?: EnvLookup, evalFilePath?: string): ResolvedTarget;
|
|
1895
1918
|
|
|
1896
1919
|
/**
|
|
@@ -2451,6 +2474,14 @@ declare function calculateRubricScore(result: z.infer<typeof rubricEvaluationSch
|
|
|
2451
2474
|
* Build the output schema for score-range rubric evaluation.
|
|
2452
2475
|
*/
|
|
2453
2476
|
declare function buildScoreRangeOutputSchema(): string;
|
|
2477
|
+
/**
|
|
2478
|
+
* Extract all `ContentImage` blocks from assistant messages.
|
|
2479
|
+
*
|
|
2480
|
+
* Scans `messages` for assistant-role entries whose `content` is a `Content[]`
|
|
2481
|
+
* array and collects every `ContentImage` block. Non-assistant messages and
|
|
2482
|
+
* plain-string content are skipped.
|
|
2483
|
+
*/
|
|
2484
|
+
declare function extractImageBlocks(messages: readonly Message[]): ContentImage[];
|
|
2454
2485
|
|
|
2455
2486
|
/**
|
|
2456
2487
|
* Built-in skill-trigger evaluator.
|
|
@@ -3507,6 +3538,17 @@ declare function getWorkspacePoolRoot(): string;
|
|
|
3507
3538
|
*/
|
|
3508
3539
|
declare function trimBaselineResult(result: EvaluationResult): EvaluationResult;
|
|
3509
3540
|
|
|
3541
|
+
/** Default category for eval files without subdirectory structure. */
|
|
3542
|
+
declare const DEFAULT_CATEGORY = "Uncategorized";
|
|
3543
|
+
/**
|
|
3544
|
+
* Derive a human-readable category from an eval file's relative path.
|
|
3545
|
+
*
|
|
3546
|
+
* Strips the filename and any `evals` directory segments, then joins
|
|
3547
|
+
* remaining directories with `/`. Returns {@link DEFAULT_CATEGORY} for files
|
|
3548
|
+
* at the root level.
|
|
3549
|
+
*/
|
|
3550
|
+
declare function deriveCategory(relativePath: string): string;
|
|
3551
|
+
|
|
3510
3552
|
/** Options for configuring the OTel trace exporter. */
|
|
3511
3553
|
interface OtelExportOptions {
|
|
3512
3554
|
/** OTLP endpoint URL */
|
|
@@ -3668,4 +3710,4 @@ type AgentKernel = {
|
|
|
3668
3710
|
};
|
|
3669
3711
|
declare function createAgentKernel(): AgentKernel;
|
|
3670
3712
|
|
|
3671
|
-
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverCopilotSessions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
|
|
3713
|
+
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, COMMON_TARGET_SETTINGS, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type Content, type ContentFile, type ContentImage, type ContentText, type CopilotCliResolvedConfig, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_CATEGORY, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveCategory, detectFormat, discoverAssertions, discoverCopilotSessions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
|