@alis-build/harness-eval 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -4
- package/dist/adapters/claude-code/index.d.ts +1 -1
- package/dist/adapters/claude-code/index.js +1 -1
- package/dist/{claude-code-ycT0JQZF.js → claude-code-DZ4Vkgp6.js} +35 -6
- package/dist/{claude-code-ycT0JQZF.js.map → claude-code-DZ4Vkgp6.js.map} +1 -1
- package/dist/cli/bin.js +109 -12
- package/dist/cli/bin.js.map +1 -1
- package/dist/config/loader.d.ts +1 -1
- package/dist/config/loader.js +1 -1
- package/dist/{index-6Z17eKZx.d.ts → index-V22PrR0p.d.ts} +2 -1
- package/dist/index.d.ts +270 -152
- package/dist/index.js +124 -5
- package/dist/index.js.map +1 -0
- package/dist/{loader-DTvoVfN0.d.ts → loader-C9yQHUPC.d.ts} +19 -2
- package/dist/{loader-BCnFJ8rm.js → loader-DcI0KfRX.js} +291 -4
- package/dist/loader-DcI0KfRX.js.map +1 -0
- package/dist/{build-DsVJ_UeU.js → projections-BcX7w-f6.js} +486 -243
- package/dist/projections-BcX7w-f6.js.map +1 -0
- package/dist/runner/suite.d.ts +1 -1
- package/dist/runner/suite.js +1 -1
- package/dist/{suite-BoOvK_lq.d.ts → suite-DPJMIEbu.d.ts} +7 -2
- package/dist/{suite-chj0j22j.js → suite-Dlzl-HI0.js} +58 -4
- package/dist/suite-Dlzl-HI0.js.map +1 -0
- package/dist/{types-BQol062t.d.ts → types-CD3TwOtZ.d.ts} +151 -10
- package/package.json +4 -2
- package/schemas/eval-interchange-instances.schema.json +196 -0
- package/schemas/eval-interchange.schema.json +65 -52
- package/schemas/eval-run-envelope.schema.json +182 -425
- package/dist/build-DsVJ_UeU.js.map +0 -1
- package/dist/loader-BCnFJ8rm.js.map +0 -1
- package/dist/suite-chj0j22j.js.map +0 -1
- package/schemas/eval-interchange-agent-trace.schema.json +0 -322
- package/schemas/eval-interchange-proto-instance.schema.json +0 -106
package/dist/index.d.ts
CHANGED
|
@@ -1,120 +1,38 @@
|
|
|
1
1
|
import { A as Usage, B as isUserMessage, C as SystemInitEvent, D as TextBlock, E as SystemUnknownEvent, F as isSystemInit, I as isSystemRetry, L as isTextBlock, M as UserMessageEvent, N as isAssistantMessage, O as ToolResultBlock, P as isResult, R as isToolResultBlock, S as SystemCompactBoundaryEvent, T as SystemRetryEvent, _ as ContentBlock, a as HarnessAdapter, b as StopReason, c as AssistantTurn, d as ToolCall, f as TrajectoryView, g as AssistantMessageEvent, h as AssistantMessage, i as BaseAdapterConfig, j as UserMessage, k as ToolUseBlock, l as RetryRecord, m as namespaceOf, n as AdapterError, o as ParseErrorRecord, p as UsageSummary, r as AdapterResult, s as SuiteConfig, t as AdapterDiagnostics, u as SessionMeta, v as McpServerStatus, w as SystemPluginInstallEvent, x as StreamEvent, y as ResultEvent, z as isToolUseBlock } from "./types-B9H4IZtA.js";
|
|
2
|
-
import { n as index_d_exports, o as ClaudeCodeOptions } from "./index-
|
|
3
|
-
import {
|
|
4
|
-
import { i as GradingConfig, r as parseSuite, s as ConfigError, t as loadSuite } from "./loader-
|
|
5
|
-
import { t as runSuite } from "./suite-
|
|
2
|
+
import { n as index_d_exports, o as ClaudeCodeOptions } from "./index-V22PrR0p.js";
|
|
3
|
+
import { A as ObjectPredicate, C as TrajectoryPairInstanceJson, D as Cardinality, E as AssertionResult, M as ThresholdedAssertion, N as ToolPattern, O as CompoundPredicate, S as TrajectoryInstancesJson, T as Assertion, _ as ProtojsonToolCall, a as ProgressEvent, b as ReferenceTrajectoryConfig, c as RunSuiteOptions, d as TestSuite, f as EvalDatasetRow, g as InstancesJsonlRow, h as InstanceData, i as ProgressCallback, j as Predicate, k as LeafPredicate, l as SuiteReport, m as HarnessMetrics, n as CellReport, o as RepetitionError, p as EvaluationInstanceJson, r as MatrixCell, s as RepetitionResult, t as AssertionStat, u as TestCase, v as ProtojsonTrajectory, w as TrajectorySingleToolUseInstanceJson, x as TrajectoryInstanceMetricKey, y as ReferenceToolNameMode } from "./types-CD3TwOtZ.js";
|
|
4
|
+
import { i as GradingConfig, r as parseSuite, s as ConfigError, t as loadSuite } from "./loader-C9yQHUPC.js";
|
|
5
|
+
import { t as runSuite } from "./suite-DPJMIEbu.js";
|
|
6
6
|
import { Readable } from "node:stream";
|
|
7
7
|
|
|
8
|
-
//#region src/types/eval-interchange.d.ts
|
|
9
|
-
/**
|
|
10
|
-
* TypeScript types for eval interchange output.
|
|
11
|
-
*/
|
|
12
|
-
interface InterchangeToolCall {
|
|
13
|
-
tool_name: string;
|
|
14
|
-
tool_input: string;
|
|
15
|
-
}
|
|
16
|
-
interface InterchangeTrajectory {
|
|
17
|
-
tool_calls: InterchangeToolCall[];
|
|
18
|
-
}
|
|
19
|
-
interface TabularToolCall {
|
|
20
|
-
tool_name: string;
|
|
21
|
-
tool_input: unknown;
|
|
22
|
-
}
|
|
23
|
-
interface ContentPart {
|
|
24
|
-
text?: string;
|
|
25
|
-
function_call?: {
|
|
26
|
-
name: string;
|
|
27
|
-
args: unknown;
|
|
28
|
-
};
|
|
29
|
-
function_response?: {
|
|
30
|
-
name: string;
|
|
31
|
-
response: unknown;
|
|
32
|
-
};
|
|
33
|
-
}
|
|
34
|
-
interface AgentEvent {
|
|
35
|
-
author: string;
|
|
36
|
-
content: {
|
|
37
|
-
parts: ContentPart[];
|
|
38
|
-
};
|
|
39
|
-
event_time?: string;
|
|
40
|
-
state_delta?: Record<string, unknown>;
|
|
41
|
-
active_tools?: Array<{
|
|
42
|
-
name: string;
|
|
43
|
-
}>;
|
|
44
|
-
}
|
|
45
|
-
interface ConversationTurn {
|
|
46
|
-
turn_index: number;
|
|
47
|
-
turn_id?: string;
|
|
48
|
-
events: AgentEvent[];
|
|
49
|
-
}
|
|
50
|
-
interface AgentConfig {
|
|
51
|
-
agent_id: string;
|
|
52
|
-
agent_type?: string;
|
|
53
|
-
description?: string;
|
|
54
|
-
instruction?: string;
|
|
55
|
-
tools?: Array<{
|
|
56
|
-
name: string;
|
|
57
|
-
}>;
|
|
58
|
-
sub_agents?: string[];
|
|
59
|
-
}
|
|
60
|
-
interface AgentTrace {
|
|
61
|
-
agents: Record<string, AgentConfig>;
|
|
62
|
-
turns: ConversationTurn[];
|
|
63
|
-
}
|
|
64
|
-
interface EvalDatasetRow {
|
|
65
|
-
prompt?: string;
|
|
66
|
-
response?: string;
|
|
67
|
-
reference?: string;
|
|
68
|
-
predicted_trajectory: TabularToolCall[];
|
|
69
|
-
reference_trajectory?: TabularToolCall[];
|
|
70
|
-
latency_in_seconds: number;
|
|
71
|
-
failure: 0 | 1;
|
|
72
|
-
human_ratings?: Record<string, number>;
|
|
73
|
-
}
|
|
74
|
-
interface ProtoTrajectoryInstance {
|
|
75
|
-
predicted_trajectory: InterchangeTrajectory;
|
|
76
|
-
reference_trajectory?: InterchangeTrajectory;
|
|
77
|
-
prompt?: string;
|
|
78
|
-
response?: string;
|
|
79
|
-
reference?: string;
|
|
80
|
-
}
|
|
81
|
-
interface TrajectoryMetrics {
|
|
82
|
-
trajectory_exact_match: number;
|
|
83
|
-
trajectory_in_order_match: number;
|
|
84
|
-
trajectory_any_order_match: number;
|
|
85
|
-
trajectory_precision: number;
|
|
86
|
-
trajectory_recall: number;
|
|
87
|
-
trajectory_single_tool_use: number;
|
|
88
|
-
}
|
|
89
|
-
interface ToolCallMetrics {
|
|
90
|
-
tool_call_valid: number;
|
|
91
|
-
tool_name_match: number;
|
|
92
|
-
tool_parameter_key_match: number;
|
|
93
|
-
tool_parameter_kv_match: number;
|
|
94
|
-
}
|
|
95
|
-
//#endregion
|
|
96
8
|
//#region src/grader/types.d.ts
|
|
97
9
|
/**
|
|
98
10
|
* Outcome grading types (LLM-as-judge layer).
|
|
99
11
|
*
|
|
100
12
|
* Behavioral assertions live in harness-eval assertions; expectations here
|
|
101
13
|
* are natural-language outcome checks graded from trajectory transcripts.
|
|
14
|
+
* Grading runs as a second pass over a {@link SuiteReport} JSON artifact.
|
|
102
15
|
*/
|
|
103
16
|
interface GradedExpectation {
|
|
17
|
+
/** Original expectation text from the suite or sidecar file. */
|
|
104
18
|
text: string;
|
|
105
19
|
passed: boolean;
|
|
20
|
+
/** Quote or description supporting the pass/fail decision. */
|
|
106
21
|
evidence: string;
|
|
107
22
|
}
|
|
23
|
+
/** Aggregate pass/fail counts for one grading unit (rep or full report). */
|
|
108
24
|
interface GradingSummary {
|
|
109
25
|
passed: number;
|
|
110
26
|
failed: number;
|
|
111
27
|
total: number;
|
|
112
28
|
passRate: number;
|
|
113
29
|
}
|
|
30
|
+
/** Suggestion for improving an expectation or assertion wording. */
|
|
114
31
|
interface EvalFeedbackSuggestion {
|
|
115
32
|
assertion?: string;
|
|
116
33
|
reason: string;
|
|
117
34
|
}
|
|
35
|
+
/** Optional meta-feedback from the judge about expectation quality. */
|
|
118
36
|
interface EvalFeedback {
|
|
119
37
|
suggestions: EvalFeedbackSuggestion[];
|
|
120
38
|
overall: string;
|
|
@@ -132,6 +50,7 @@ interface RepGradingResult {
|
|
|
132
50
|
graderError?: string;
|
|
133
51
|
durationMs: number;
|
|
134
52
|
}
|
|
53
|
+
/** Full grading report for a suite run. */
|
|
135
54
|
interface SuiteGradingReport {
|
|
136
55
|
gradedAt: string;
|
|
137
56
|
sourceReport: string;
|
|
@@ -140,6 +59,7 @@ interface SuiteGradingReport {
|
|
|
140
59
|
results: RepGradingResult[];
|
|
141
60
|
summary: GradingSummary;
|
|
142
61
|
}
|
|
62
|
+
/** Options controlling {@link gradeReport} and the CLI `grade` command. */
|
|
143
63
|
interface GradeReportOptions {
|
|
144
64
|
/** Path to the report being graded (stored in output). */
|
|
145
65
|
sourceReport?: string;
|
|
@@ -167,6 +87,7 @@ interface GradeReportOptions {
|
|
|
167
87
|
gradeFn?: GraderFn;
|
|
168
88
|
onProgress?: (event: GradeProgressEvent) => void;
|
|
169
89
|
}
|
|
90
|
+
/** Progress events emitted during outcome grading. */
|
|
170
91
|
type GradeProgressEvent = {
|
|
171
92
|
kind: "grade-start";
|
|
172
93
|
total: number;
|
|
@@ -185,13 +106,16 @@ type GradeProgressEvent = {
|
|
|
185
106
|
totalExpectations: number;
|
|
186
107
|
passedExpectations: number;
|
|
187
108
|
};
|
|
109
|
+
/** Pluggable grader implementation (defaults to Claude subprocess). */
|
|
188
110
|
type GraderFn = (input: GraderInput) => Promise<GraderOutput>;
|
|
111
|
+
/** Input passed to a grader for one repetition. */
|
|
189
112
|
interface GraderInput {
|
|
190
113
|
prompt: string;
|
|
191
114
|
transcript: string;
|
|
192
115
|
expectations: string[];
|
|
193
116
|
systemInstruction?: string;
|
|
194
117
|
}
|
|
118
|
+
/** Parsed grader response before alignment with input expectation order. */
|
|
195
119
|
interface GraderOutput {
|
|
196
120
|
expectations: GradedExpectation[];
|
|
197
121
|
summary: GradingSummary;
|
|
@@ -206,20 +130,14 @@ declare const EVAL_RUN_SCHEMA_VERSION = "1.0";
|
|
|
206
130
|
declare const TRAJECTORY_SCHEMA_VERSION = "1.0";
|
|
207
131
|
/** Link to the suite spec that produced a run. */
|
|
208
132
|
interface SuiteReference {
|
|
209
|
-
/** Absolute or repo-relative path to the suite YAML. */
|
|
210
133
|
uri?: string;
|
|
211
|
-
/** Stable suite identifier when known (e.g. case bundle name). */
|
|
212
134
|
id?: string;
|
|
213
|
-
/** SHA-256 or similar hash of suite file contents. */
|
|
214
135
|
contentHash?: string;
|
|
215
136
|
}
|
|
216
137
|
/** Harness that executed the run. */
|
|
217
138
|
interface HarnessInfo {
|
|
218
|
-
/** Adapter id from suite YAML, e.g. `claude-code`. */
|
|
219
139
|
adapter: string;
|
|
220
|
-
/** harness-eval package version when envelope was built. */
|
|
221
140
|
frameworkVersion?: string;
|
|
222
|
-
/** Optional harness binary version (e.g. `claude -v`). */
|
|
223
141
|
harnessVersion?: string;
|
|
224
142
|
}
|
|
225
143
|
/** CI, git, or runtime provenance for correlation in the DB. */
|
|
@@ -244,9 +162,7 @@ interface EvalProvenance {
|
|
|
244
162
|
interface EvalRunSummary {
|
|
245
163
|
cellsTotal: number;
|
|
246
164
|
cellsPassed: number;
|
|
247
|
-
/** All cells passed behavioral assertion thresholds. */
|
|
248
165
|
behavioralPass: boolean;
|
|
249
|
-
/** All graded expectations passed (when outcome layer present). */
|
|
250
166
|
outcomePass?: boolean;
|
|
251
167
|
}
|
|
252
168
|
/** Identity of the judge that produced outcome grades. */
|
|
@@ -278,20 +194,14 @@ interface ExternalScore {
|
|
|
278
194
|
}
|
|
279
195
|
/** Optional large or vendor-specific blobs (store by reference in DB when possible). */
|
|
280
196
|
interface EvalArtifacts {
|
|
281
|
-
/** Claude Code `stream-json` lines — debug only, not cross-harness. */
|
|
282
197
|
rawStreamEvents?: unknown[];
|
|
283
|
-
/** URI to OTLP JSON (S3, GCS, etc.). */
|
|
284
198
|
otlpTraceUri?: string;
|
|
285
|
-
/** Text transcript for judges (`trajectoryToTranscript`). */
|
|
286
199
|
transcript?: string;
|
|
287
200
|
}
|
|
288
|
-
/**
|
|
289
|
-
* One harness invocation — the unit external judges and trajectory queries use.
|
|
290
|
-
*/
|
|
201
|
+
/** One harness invocation — the unit external judges and trajectory queries use. */
|
|
291
202
|
interface EvalRepetition {
|
|
292
203
|
repetitionIndex: number;
|
|
293
204
|
durationMs: number;
|
|
294
|
-
/** Normalized harness session. Required when the harness completed with a view. */
|
|
295
205
|
trajectory?: TrajectoryView & {
|
|
296
206
|
schemaVersion: string;
|
|
297
207
|
};
|
|
@@ -300,18 +210,14 @@ interface EvalRepetition {
|
|
|
300
210
|
outcomeGrades?: OutcomeGrades;
|
|
301
211
|
externalScores?: ExternalScore[];
|
|
302
212
|
artifacts?: EvalArtifacts;
|
|
303
|
-
/**
|
|
304
|
-
|
|
305
|
-
/**
|
|
306
|
-
|
|
307
|
-
/**
|
|
308
|
-
|
|
309
|
-
|
|
213
|
+
/** Vertex EvaluationInstance protojson wire object. */
|
|
214
|
+
evaluationInstance?: EvaluationInstanceJson;
|
|
215
|
+
/** Vertex Trajectory*Instance protojson wire objects keyed by metric. */
|
|
216
|
+
trajectoryInstances?: TrajectoryInstancesJson;
|
|
217
|
+
/** Harness-precomputed trajectory metric scores (camelCase). */
|
|
218
|
+
harnessMetrics?: HarnessMetrics;
|
|
219
|
+
latencySeconds?: number;
|
|
310
220
|
failure?: 0 | 1;
|
|
311
|
-
/** Trajectory-level metrics when reference_trajectory is provided. */
|
|
312
|
-
trajectoryMetrics?: TrajectoryMetrics;
|
|
313
|
-
/** Tool-call-level metrics when reference_trajectory is provided. */
|
|
314
|
-
toolCallMetrics?: ToolCallMetrics;
|
|
315
221
|
error?: {
|
|
316
222
|
message: string;
|
|
317
223
|
diagnostics?: Partial<AdapterDiagnostics>;
|
|
@@ -335,24 +241,16 @@ interface EvalCellResult {
|
|
|
335
241
|
expectations?: string[];
|
|
336
242
|
cellLabel: string;
|
|
337
243
|
axes?: Record<string, string>;
|
|
338
|
-
/** Reference
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
human_ratings?: Record<string, number>;
|
|
244
|
+
/** Reference trajectory in Vertex protojson wire format. */
|
|
245
|
+
referenceTrajectory?: ProtojsonTrajectory;
|
|
246
|
+
humanRatings?: Record<string, number>;
|
|
342
247
|
assertionStats: EvalAssertionStat[];
|
|
343
248
|
adapterErrors: number;
|
|
344
|
-
/** Passed all behavioral assertion thresholds for this cell. */
|
|
345
249
|
behavioralPass: boolean;
|
|
346
|
-
/** Passed all outcome expectations when graded; omitted if not graded. */
|
|
347
250
|
outcomePass?: boolean;
|
|
348
251
|
repetitions: EvalRepetition[];
|
|
349
252
|
}
|
|
350
|
-
/**
|
|
351
|
-
* Top-level document for CI/CD pipelines, APIs, and databases.
|
|
352
|
-
*
|
|
353
|
-
* This is the interchange format your storage layer should target — not
|
|
354
|
-
* {@link import("./stream").StreamEvent} or OTLP traces.
|
|
355
|
-
*/
|
|
253
|
+
/** Top-level document for CI/CD pipelines, APIs, and databases. */
|
|
356
254
|
interface EvalRunEnvelope {
|
|
357
255
|
schemaVersion: typeof EVAL_RUN_SCHEMA_VERSION;
|
|
358
256
|
runId: string;
|
|
@@ -365,12 +263,15 @@ interface EvalRunEnvelope {
|
|
|
365
263
|
cells: EvalCellResult[];
|
|
366
264
|
}
|
|
367
265
|
interface BuildEvalRunEnvelopeOptions {
|
|
368
|
-
/**
|
|
266
|
+
/** Override envelope runId; defaults to a random UUID. */
|
|
369
267
|
runId?: string;
|
|
268
|
+
/** Link to the suite YAML that produced the run. */
|
|
370
269
|
suite?: SuiteReference;
|
|
270
|
+
/** Harness adapter metadata; adapter defaults to `"claude-code"`. */
|
|
371
271
|
harness?: Partial<HarnessInfo>;
|
|
272
|
+
/** CI, git, and runtime provenance for correlation. */
|
|
372
273
|
provenance?: EvalProvenance;
|
|
373
|
-
/**
|
|
274
|
+
/** Outcome grades to merge from a grader run. */
|
|
374
275
|
grading?: {
|
|
375
276
|
gradedAt?: string;
|
|
376
277
|
sourceReport?: string;
|
|
@@ -386,9 +287,9 @@ interface BuildEvalRunEnvelopeOptions {
|
|
|
386
287
|
}>;
|
|
387
288
|
judge?: JudgeInfo;
|
|
388
289
|
};
|
|
389
|
-
/** Include transcript
|
|
290
|
+
/** Include text transcript artifact (default true). */
|
|
390
291
|
includeTranscript?: boolean;
|
|
391
|
-
/** Include raw stream events
|
|
292
|
+
/** Include raw stream-json events (default false; debug only). */
|
|
392
293
|
includeRawStreamEvents?: boolean;
|
|
393
294
|
}
|
|
394
295
|
//#endregion
|
|
@@ -506,6 +407,7 @@ declare function getDefaultAdapter(): HarnessAdapter;
|
|
|
506
407
|
declare const DEFAULT_REPETITIONS = 5;
|
|
507
408
|
/** Default assertion pass-rate threshold when `threshold` is omitted. */
|
|
508
409
|
declare const DEFAULT_THRESHOLD = 1;
|
|
410
|
+
/** Injectable adapter run function (used by tests to stub harness I/O). */
|
|
509
411
|
type AdapterRunFn = (config: BaseAdapterConfig & Record<string, unknown>) => Promise<AdapterResult>;
|
|
510
412
|
/**
|
|
511
413
|
* Build the effective adapter config for one (suite, case, cell).
|
|
@@ -513,8 +415,21 @@ type AdapterRunFn = (config: BaseAdapterConfig & Record<string, unknown>) => Pro
|
|
|
513
415
|
* Merge order (later wins): defaultConfig < case.config < cell.config.
|
|
514
416
|
*/
|
|
515
417
|
declare function mergeConfig(suite: TestSuite, testCase: TestCase, cell: MatrixCell): BaseAdapterConfig & Record<string, unknown>;
|
|
418
|
+
/** Effective repetition count for a case (`case.repetitions` or default). */
|
|
516
419
|
declare function getRepetitions(testCase: TestCase): number;
|
|
420
|
+
/**
|
|
421
|
+
* Run one repetition: invoke the adapter, evaluate assertions, capture errors.
|
|
422
|
+
*
|
|
423
|
+
* Adapter failures are returned as {@link RepetitionResult.error} rather than
|
|
424
|
+
* thrown so the suite runner can continue other reps and report adapter error counts.
|
|
425
|
+
*/
|
|
517
426
|
declare function runRepetition(testCase: TestCase, _cell: MatrixCell, config: BaseAdapterConfig & Record<string, unknown>, repetitionIndex: number, run: AdapterRunFn, signal?: AbortSignal): Promise<RepetitionResult>;
|
|
427
|
+
/**
|
|
428
|
+
* Roll up repetition results into a {@link CellReport}.
|
|
429
|
+
*
|
|
430
|
+
* Adapter errors reduce `evaluatedCount` but do not fail the cell by
|
|
431
|
+
* themselves — only assertion threshold misses mark a cell as failed.
|
|
432
|
+
*/
|
|
518
433
|
declare function aggregateCell(testCase: TestCase, cell: MatrixCell, repetitions: RepetitionResult[]): CellReport;
|
|
519
434
|
//#endregion
|
|
520
435
|
//#region src/runner/limit.d.ts
|
|
@@ -543,24 +458,30 @@ declare function createLimit(max: number): LimitedRunner;
|
|
|
543
458
|
* Shapes follow OTLP/HTTP JSON Protobuf encoding (lowerCamelCase field names).
|
|
544
459
|
* @see https://opentelemetry.io/docs/specs/otlp/
|
|
545
460
|
*/
|
|
461
|
+
/** OTLP ExportTraceServiceRequest root — batch of resource spans. */
|
|
546
462
|
interface ExportTraceServiceRequest {
|
|
547
463
|
resourceSpans: ResourceSpans[];
|
|
548
464
|
}
|
|
465
|
+
/** Resource-attributed span group in an export batch. */
|
|
549
466
|
interface ResourceSpans {
|
|
550
467
|
resource: Resource;
|
|
551
468
|
scopeSpans: ScopeSpans[];
|
|
552
469
|
}
|
|
470
|
+
/** OTLP resource descriptor (service.name, agent metadata). */
|
|
553
471
|
interface Resource {
|
|
554
472
|
attributes: KeyValue[];
|
|
555
473
|
}
|
|
474
|
+
/** Spans emitted by one instrumentation scope within a resource. */
|
|
556
475
|
interface ScopeSpans {
|
|
557
476
|
scope: InstrumentationScope;
|
|
558
477
|
spans: Span[];
|
|
559
478
|
}
|
|
479
|
+
/** Instrumentation library identity (name + optional version). */
|
|
560
480
|
interface InstrumentationScope {
|
|
561
481
|
name: string;
|
|
562
482
|
version?: string;
|
|
563
483
|
}
|
|
484
|
+
/** One span in OTLP JSON encoding (nanosecond timestamps as strings). */
|
|
564
485
|
interface Span {
|
|
565
486
|
traceId: string;
|
|
566
487
|
spanId: string;
|
|
@@ -572,14 +493,17 @@ interface Span {
|
|
|
572
493
|
attributes: KeyValue[];
|
|
573
494
|
status?: SpanStatus;
|
|
574
495
|
}
|
|
496
|
+
/** OTLP span status (OK, ERROR, or UNSET). */
|
|
575
497
|
interface SpanStatus {
|
|
576
498
|
code: number;
|
|
577
499
|
message?: string;
|
|
578
500
|
}
|
|
501
|
+
/** Key-value attribute pair on a span or resource. */
|
|
579
502
|
interface KeyValue {
|
|
580
503
|
key: string;
|
|
581
504
|
value: AnyValue;
|
|
582
505
|
}
|
|
506
|
+
/** Discriminated OTLP attribute value (one of the typed fields set). */
|
|
583
507
|
interface AnyValue {
|
|
584
508
|
stringValue?: string;
|
|
585
509
|
boolValue?: boolean;
|
|
@@ -595,6 +519,7 @@ interface ArrayValue {
|
|
|
595
519
|
interface KeyValueList {
|
|
596
520
|
values: KeyValue[];
|
|
597
521
|
}
|
|
522
|
+
/** Options passed to {@link trajectoryToOtlp} / {@link emitOtel}. */
|
|
598
523
|
interface EmitOtelOptions {
|
|
599
524
|
/** User prompt for the first `gen_ai.input.messages` entry. */
|
|
600
525
|
prompt?: string;
|
|
@@ -627,13 +552,20 @@ interface EmitOtelOptions {
|
|
|
627
552
|
* ```
|
|
628
553
|
*/
|
|
629
554
|
declare function trajectoryToOtlp(view: TrajectoryView, options?: EmitOtelOptions): ExportTraceServiceRequest;
|
|
630
|
-
/** Alias
|
|
555
|
+
/** Alias for {@link trajectoryToOtlp} — matches implementation plan naming. */
|
|
631
556
|
declare const emitOtel: typeof trajectoryToOtlp;
|
|
632
557
|
//#endregion
|
|
633
558
|
//#region src/grader/grade-report.d.ts
|
|
559
|
+
/**
|
|
560
|
+
* Grade every repetition in a {@link SuiteReport} that has expectations.
|
|
561
|
+
*
|
|
562
|
+
* Expectations come from inline case fields or an optional sidecar YAML/JSON
|
|
563
|
+
* map. Runs are concurrent under {@link GradeReportOptions.maxConcurrent}.
|
|
564
|
+
*/
|
|
634
565
|
declare function gradeReport(report: SuiteReport, options?: GradeReportOptions): Promise<SuiteGradingReport>;
|
|
635
566
|
//#endregion
|
|
636
567
|
//#region src/grader/resolve-grade-options.d.ts
|
|
568
|
+
/** CLI flag overrides for grading (take precedence over grading YAML). */
|
|
637
569
|
interface GradeCliOverrides {
|
|
638
570
|
model?: string;
|
|
639
571
|
binary?: string;
|
|
@@ -648,9 +580,16 @@ interface GradeCliOverrides {
|
|
|
648
580
|
declare function resolveGradeOptions(fileConfig?: GradingConfig, cli?: GradeCliOverrides, configPath?: string): GradeReportOptions;
|
|
649
581
|
//#endregion
|
|
650
582
|
//#region src/grader/transcript.d.ts
|
|
583
|
+
/**
|
|
584
|
+
* Render a {@link TrajectoryView} as markdown for LLM graders.
|
|
585
|
+
*
|
|
586
|
+
* Tool results are truncated at {@link MAX_RESULT_CHARS} to keep judge
|
|
587
|
+
* prompts within reasonable token limits.
|
|
588
|
+
*/
|
|
651
589
|
declare function trajectoryToTranscript(view: TrajectoryView, prompt?: string): string;
|
|
652
590
|
//#endregion
|
|
653
591
|
//#region src/grader/claude-grader.d.ts
|
|
592
|
+
/** Options for {@link createClaudeGrader} / {@link runClaudeGrader}. */
|
|
654
593
|
interface ClaudeGraderOptions {
|
|
655
594
|
binary?: string;
|
|
656
595
|
model?: string;
|
|
@@ -659,14 +598,23 @@ interface ClaudeGraderOptions {
|
|
|
659
598
|
cwd?: string;
|
|
660
599
|
claudeCode?: ClaudeCodeOptions;
|
|
661
600
|
}
|
|
601
|
+
/** Factory returning a {@link GraderFn} bound to subprocess options. */
|
|
662
602
|
declare function createClaudeGrader(options?: ClaudeGraderOptions): GraderFn;
|
|
663
603
|
//#endregion
|
|
664
604
|
//#region src/grader/format-console.d.ts
|
|
605
|
+
/**
|
|
606
|
+
* Format a {@link SuiteGradingReport} for terminal output.
|
|
607
|
+
*
|
|
608
|
+
* @param color When true, emit ANSI status colors (default for TTY console).
|
|
609
|
+
*/
|
|
665
610
|
declare function formatGradingConsole(report: SuiteGradingReport, color?: boolean): string;
|
|
611
|
+
/** True when every graded rep passed all expectations without grader errors. */
|
|
666
612
|
declare function gradingReportPassed(report: SuiteGradingReport): boolean;
|
|
667
613
|
//#endregion
|
|
668
614
|
//#region src/reporter/types.d.ts
|
|
615
|
+
/** Output format selector for {@link formatReport}. */
|
|
669
616
|
type ReportFormat = "console" | "markdown" | "json";
|
|
617
|
+
/** Options for suite report formatting. */
|
|
670
618
|
interface ReporterOptions {
|
|
671
619
|
format: ReportFormat;
|
|
672
620
|
baseline?: SuiteReport;
|
|
@@ -674,52 +622,222 @@ interface ReporterOptions {
|
|
|
674
622
|
}
|
|
675
623
|
//#endregion
|
|
676
624
|
//#region src/reporter/index.d.ts
|
|
625
|
+
/**
|
|
626
|
+
* Format a {@link SuiteReport} for console, markdown, or JSON output.
|
|
627
|
+
*
|
|
628
|
+
* JSON format bypasses the renderable intermediate model and serializes the
|
|
629
|
+
* report directly. Console and markdown apply optional baseline deltas.
|
|
630
|
+
*/
|
|
677
631
|
declare function formatReport(report: SuiteReport, options: ReporterOptions): string;
|
|
678
632
|
//#endregion
|
|
679
633
|
//#region src/eval-record/build.d.ts
|
|
680
634
|
/**
|
|
681
635
|
* Convert a {@link SuiteReport} (and optional grading) into a versioned
|
|
682
636
|
* {@link EvalRunEnvelope} for storage or API handoff.
|
|
637
|
+
*
|
|
638
|
+
* @param report - Runner output for one suite execution.
|
|
639
|
+
* @param options - Provenance, grading merge, and artifact inclusion flags.
|
|
640
|
+
* @returns A fully populated envelope with protojson interchange fields on each repetition.
|
|
683
641
|
*/
|
|
684
642
|
declare function buildEvalRunEnvelope(report: SuiteReport, options?: BuildEvalRunEnvelopeOptions): EvalRunEnvelope;
|
|
685
|
-
/**
|
|
643
|
+
/**
|
|
644
|
+
* Build an envelope from on-disk runner and grader JSON artifacts.
|
|
645
|
+
*
|
|
646
|
+
* Reads `reportPath` as a {@link SuiteReport}. When `gradingPath` is set, merges
|
|
647
|
+
* outcome grades from a {@link SuiteGradingReport}. When `suitePath` is set,
|
|
648
|
+
* attaches suite URI and SHA-256 content hash for reproducibility.
|
|
649
|
+
*
|
|
650
|
+
* @param reportPath - Path to the suite run report JSON from `harness-eval run`.
|
|
651
|
+
* @param options - Same build options as {@link buildEvalRunEnvelope}, plus file paths.
|
|
652
|
+
*/
|
|
686
653
|
declare function buildEvalRunEnvelopeFromFiles(reportPath: string, options?: BuildEvalRunEnvelopeOptions & {
|
|
687
654
|
gradingPath?: string;
|
|
688
655
|
suitePath?: string;
|
|
689
656
|
}): Promise<EvalRunEnvelope>;
|
|
690
657
|
//#endregion
|
|
691
|
-
//#region src/
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
658
|
+
//#region src/eval-interchange/enrich.d.ts
|
|
659
|
+
/**
|
|
660
|
+
* Attach Vertex protojson interchange fields to one {@link EvalRepetition}.
|
|
661
|
+
*
|
|
662
|
+
* When no trajectory exists (adapter error), sets `failure: 1` and skips
|
|
663
|
+
* protojson payloads. Trajectory instances and harness metrics are only
|
|
664
|
+
* computed when the suite defines a non-empty reference trajectory.
|
|
665
|
+
*
|
|
666
|
+
* @param repetition - Base repetition from the runner (trajectory, assertions, grades).
|
|
667
|
+
* @param options.prompt - Case prompt for EvaluationInstance.
|
|
668
|
+
* @param options.reference - Suite reference trajectory config, if any.
|
|
669
|
+
*/
|
|
670
|
+
declare function enrichRepetitionWithProtojson(repetition: EvalRepetition, options?: {
|
|
671
|
+
prompt?: string;
|
|
672
|
+
reference?: ReferenceTrajectoryConfig;
|
|
673
|
+
}): EvalRepetition;
|
|
674
|
+
//#endregion
|
|
675
|
+
//#region src/eval-interchange/protojson/evaluation-instance.d.ts
|
|
676
|
+
/**
|
|
677
|
+
* Build an EvaluationInstance protojson object from harness strings.
|
|
678
|
+
*
|
|
679
|
+
* Omitted fields are excluded from the output object rather than set to
|
|
680
|
+
* empty wrappers — protojson omits unset optional fields.
|
|
681
|
+
*
|
|
682
|
+
* @param options.prompt - Case prompt sent to the agent.
|
|
683
|
+
* @param options.response - Final agent response from the trajectory.
|
|
684
|
+
* @param options.reference - Optional reference answer text (rare in harness eval).
|
|
685
|
+
*/
|
|
686
|
+
declare function toEvaluationInstance(options: {
|
|
687
|
+
prompt?: string;
|
|
688
|
+
response?: string;
|
|
689
|
+
reference?: string;
|
|
690
|
+
}): EvaluationInstanceJson;
|
|
691
|
+
//#endregion
|
|
692
|
+
//#region src/eval-interchange/protojson/harness-metrics.d.ts
|
|
693
|
+
/** Suite YAML reference step shape accepted by metric computation. */
|
|
694
|
+
type ReferenceStep$1 = {
|
|
696
695
|
tool_name: string;
|
|
697
696
|
tool_input: unknown;
|
|
698
697
|
};
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
698
|
+
/**
|
|
699
|
+
* Compute trajectory metrics and map snake_case keys to Vertex camelCase.
|
|
700
|
+
*
|
|
701
|
+
* When `referenceToolNameMode` is `"bare"`, both predicted and reference tool
|
|
702
|
+
* names are stripped to the suffix after the last `__` so suite reference steps
|
|
703
|
+
* authored with bare names (e.g. `ListLandingZones`) match harness MCP names
|
|
704
|
+
* (e.g. `mcp__plugin__ListLandingZones`).
|
|
705
|
+
*
|
|
706
|
+
* @param predicted - Tool calls from the harness trajectory view.
|
|
707
|
+
* @param reference - Reference steps from suite YAML.
|
|
708
|
+
* @param options.referenceToolNameMode - Name normalization mode from suite YAML.
|
|
709
|
+
*/
|
|
710
|
+
declare function toHarnessMetrics(predicted: ToolCall[], reference: ReferenceStep$1[], options?: {
|
|
711
|
+
referenceToolNameMode?: ReferenceToolNameMode;
|
|
712
|
+
}): HarnessMetrics;
|
|
713
|
+
//#endregion
|
|
714
|
+
//#region src/eval-interchange/protojson/trajectory-instances.d.ts
|
|
715
|
+
type ReferenceStep = {
|
|
716
|
+
tool_name: string;
|
|
717
|
+
tool_input: unknown;
|
|
718
|
+
};
|
|
719
|
+
/**
|
|
720
|
+
* Build all Trajectory*Instance payloads for one predicted/reference pair.
|
|
721
|
+
*
|
|
722
|
+
* Pair metrics (exact, in-order, any-order, precision, recall) share the
|
|
723
|
+
* same trajectory pair; single-tool-use omits the reference trajectory
|
|
724
|
+
* per Vertex API shape.
|
|
725
|
+
*/
|
|
726
|
+
declare function toTrajectoryInstances(options: {
|
|
727
|
+
predicted: ToolCall[];
|
|
728
|
+
reference: ReferenceStep[];
|
|
729
|
+
referenceToolNameMode?: ReferenceToolNameMode;
|
|
730
|
+
}): TrajectoryInstancesJson;
|
|
704
731
|
//#endregion
|
|
705
732
|
//#region src/eval-interchange/projections.d.ts
|
|
733
|
+
/**
|
|
734
|
+
* Trajectory projection — all repetitions in the envelope as dataset rows.
|
|
735
|
+
*/
|
|
706
736
|
declare function toTrajectory(envelope: EvalRunEnvelope): EvalDatasetRow[];
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
737
|
+
/**
|
|
738
|
+
* Instances projection — all trajectory metric instances as JSONL rows.
|
|
739
|
+
*/
|
|
740
|
+
declare function toInstancesJsonl(envelope: EvalRunEnvelope): InstancesJsonlRow[];
|
|
710
741
|
//#endregion
|
|
711
742
|
//#region src/metrics/trajectory.d.ts
|
|
712
|
-
|
|
743
|
+
/**
|
|
744
|
+
* Trajectory-level metrics for comparing predicted and reference tool-call sequences.
|
|
745
|
+
*
|
|
746
|
+
* Aligns with Vertex AI EvaluationService trajectory metrics (exact match,
|
|
747
|
+
* in-order, any-order, precision, recall, single tool use). Tool calls are
|
|
748
|
+
* compared by `(tool_name, serialized tool_input)` identity after normalization.
|
|
749
|
+
*
|
|
750
|
+
* Binary metrics return 0 or 1; precision and recall return fractions in [0, 1].
|
|
751
|
+
*/
|
|
752
|
+
/** Canonical wire tool call used internally for comparison. */
|
|
753
|
+
interface WireToolCall {
|
|
713
754
|
tool_name: string;
|
|
714
|
-
tool_input:
|
|
755
|
+
tool_input: string;
|
|
756
|
+
}
|
|
757
|
+
/** All trajectory metric scores for one predicted/reference pair. */
|
|
758
|
+
interface TrajectoryMetrics {
|
|
759
|
+
trajectory_exact_match: number;
|
|
760
|
+
trajectory_in_order_match: number;
|
|
761
|
+
trajectory_any_order_match: number;
|
|
762
|
+
trajectory_precision: number;
|
|
763
|
+
trajectory_recall: number;
|
|
764
|
+
trajectory_single_tool_use: number;
|
|
765
|
+
}
|
|
766
|
+
/** Input accepted by trajectory metrics — wire or harness/YAML shapes. */
|
|
767
|
+
type TrajectoryInput = WireToolCall[] | Array<{
|
|
768
|
+
tool_name: string;
|
|
769
|
+
tool_input: unknown | string;
|
|
715
770
|
}>;
|
|
771
|
+
/** Exact sequence equality after normalization. */
|
|
716
772
|
declare function trajectoryExactMatch(predicted: TrajectoryInput, reference: TrajectoryInput): number;
|
|
773
|
+
/** Reference is a subsequence of predicted (order preserved, extras allowed). */
|
|
717
774
|
declare function trajectoryInOrderMatch(predicted: TrajectoryInput, reference: TrajectoryInput): number;
|
|
775
|
+
/** Same multiset of tool calls; length must match. */
|
|
718
776
|
declare function trajectoryAnyOrderMatch(predicted: TrajectoryInput, reference: TrajectoryInput): number;
|
|
777
|
+
/**
|
|
778
|
+
* Fraction of predicted tool calls that appear in reference (multiset).
|
|
779
|
+
*
|
|
780
|
+
* Returns 1 when both trajectories are empty.
|
|
781
|
+
*/
|
|
719
782
|
declare function trajectoryPrecision(predicted: TrajectoryInput, reference: TrajectoryInput): number;
|
|
783
|
+
/**
|
|
784
|
+
* Fraction of reference tool calls matched in predicted (multiset recall).
|
|
785
|
+
*
|
|
786
|
+
* Returns 1 when reference is empty and predicted is empty.
|
|
787
|
+
*/
|
|
720
788
|
declare function trajectoryRecall(predicted: TrajectoryInput, reference: TrajectoryInput): number;
|
|
789
|
+
/** Both trajectories have exactly one call and they match. */
|
|
721
790
|
declare function trajectorySingleToolUse(predicted: TrajectoryInput, reference: TrajectoryInput): number;
|
|
791
|
+
/** Compute all trajectory metrics in one pass. */
|
|
722
792
|
declare function computeTrajectoryMetrics(predicted: TrajectoryInput, reference: TrajectoryInput): TrajectoryMetrics;
|
|
723
793
|
//#endregion
|
|
724
|
-
|
|
794
|
+
//#region src/metrics/tool-calls.d.ts
|
|
795
|
+
/** Options for parameter value comparison. */
|
|
796
|
+
interface ToolCallMetricOptions {
|
|
797
|
+
/** When true, compare serialized JSON strictly (reserved for future semantics). */
|
|
798
|
+
useStrictStringMatch?: boolean;
|
|
799
|
+
}
|
|
800
|
+
/** Aggregated tool-call metric scores (each 0..1). */
|
|
801
|
+
interface ToolCallMetrics {
|
|
802
|
+
tool_call_valid: number;
|
|
803
|
+
tool_name_match: number;
|
|
804
|
+
tool_parameter_key_match: number;
|
|
805
|
+
tool_parameter_kv_match: number;
|
|
806
|
+
}
|
|
807
|
+
type ToolCallInput = TrajectoryInput[number];
|
|
808
|
+
/**
|
|
809
|
+
* Whether a predicted tool call is well-formed (non-empty name, parseable JSON input).
|
|
810
|
+
*
|
|
811
|
+
* @returns 1 when valid, 0 otherwise.
|
|
812
|
+
*/
|
|
813
|
+
declare function toolCallValid(toolCall: ToolCallInput): number;
|
|
814
|
+
/**
|
|
815
|
+
* Whether predicted and reference tool names match exactly.
|
|
816
|
+
*
|
|
817
|
+
* @returns 1 on match, 0 otherwise.
|
|
818
|
+
*/
|
|
819
|
+
declare function toolNameMatch(predicted: ToolCallInput, reference: ToolCallInput): number;
|
|
820
|
+
/**
|
|
821
|
+
* Whether parameter key sets match (same keys, same order after sort).
|
|
822
|
+
*
|
|
823
|
+
* Requires matching tool names first. Returns 0 when args are not objects.
|
|
824
|
+
*/
|
|
825
|
+
declare function toolParameterKeyMatch(predicted: ToolCallInput, reference: ToolCallInput): number;
|
|
826
|
+
/**
|
|
827
|
+
* Whether all reference parameter key-value pairs match in the predicted call.
|
|
828
|
+
*
|
|
829
|
+
* Requires {@link toolParameterKeyMatch} first. Only keys present in reference
|
|
830
|
+
* are checked (predicted may have extra keys).
|
|
831
|
+
*/
|
|
832
|
+
declare function toolParameterKvMatch(predicted: ToolCallInput, reference: ToolCallInput, options?: ToolCallMetricOptions): number;
|
|
833
|
+
/**
|
|
834
|
+
* Average tool-call metrics across index-aligned predicted/reference pairs.
|
|
835
|
+
*
|
|
836
|
+
* Denominator is `max(predicted.length, reference.length, 1)`. Missing
|
|
837
|
+
* predicted calls at an index are skipped for pair metrics; validity still
|
|
838
|
+
* counts when a predicted call exists.
|
|
839
|
+
*/
|
|
840
|
+
declare function computeToolCallMetrics(predicted: ToolCallInput[], reference: ToolCallInput[], options?: ToolCallMetricOptions): ToolCallMetrics;
|
|
841
|
+
//#endregion
|
|
842
|
+
export { type AdapterDiagnostics, AdapterError, type AdapterResult, type AdapterRunFn, Assertion, AssertionResult, AssertionStat, AssistantMessage, AssistantMessageEvent, AssistantTurn, type BaseAdapterConfig, BuildEvalRunEnvelopeOptions, Cardinality, CellReport, CompoundPredicate, ConfigError, ContentBlock, DEFAULT_ADAPTER_ID, DEFAULT_REPETITIONS, DEFAULT_THRESHOLD, EVAL_RUN_SCHEMA_VERSION, type EmitOtelOptions, EvalArtifacts, EvalAssertionStat, EvalCellResult, EvalDatasetRow, EvalProvenance, EvalRepetition, EvalRunEnvelope, EvalRunSummary, EvaluationInstanceJson, type ExportTraceServiceRequest, ExternalScore, type GradeReportOptions, type HarnessAdapter, HarnessInfo, HarnessMetrics, InstanceData, InstancesJsonlRow, JudgeInfo, LeafPredicate, type LimitedRunner, MatrixCell, McpServerStatus, ObjectPredicate, OutcomeGrades, type ParseErrorRecord, type ParseResult, Predicate, ProgressCallback, ProgressEvent, ProtojsonToolCall, ProtojsonTrajectory, ReferenceToolNameMode, ReferenceTrajectoryConfig, type RepGradingResult, RepetitionError, RepetitionResult, type ReporterOptions, ResultEvent, RetryRecord, RunSuiteOptions, SessionMeta, StopReason, StreamEvent, type SuiteConfig, type SuiteGradingReport, SuiteReference, SuiteReport, SystemCompactBoundaryEvent, SystemInitEvent, SystemPluginInstallEvent, SystemRetryEvent, SystemUnknownEvent, TRAJECTORY_SCHEMA_VERSION, TestCase, TestSuite, TextBlock, ThresholdedAssertion, ToolCall, type ToolCallMetricOptions, ToolPattern, ToolResultBlock, ToolUseBlock, TrajectoryBuilder, type TrajectoryInput, TrajectoryInstanceMetricKey, TrajectoryInstancesJson, TrajectoryPairInstanceJson, TrajectorySingleToolUseInstanceJson, TrajectoryView, Usage, UsageSummary, UserMessage, UserMessageEvent, aggregateCell, buildEvalRunEnvelope, buildEvalRunEnvelopeFromFiles, buildTrajectory, index_d_exports as claudeCode, computeToolCallMetrics, computeTrajectoryMetrics, createClaudeGrader, createLimit, emitOtel, enrichRepetitionWithProtojson, evaluate, evaluateAll, formatGradingConsole, formatReport, getAdapter, getDefaultAdapter, getRepetitions, gradeReport, gradingReportPassed, isAssistantMessage, isResult, isSystemInit, isSystemRetry, isTextBlock, isToolResultBlock, isToolUseBlock, isUserMessage, listAdapters, loadSuite, mergeConfig, namespaceOf, parseStreamJson, parseSuite, registerAdapter, resolveGradeOptions, runRepetition, runSuite, toEvaluationInstance, toHarnessMetrics, toInstancesJsonl, toTrajectory, toTrajectoryInstances, toolCallValid, toolNameMatch, toolParameterKeyMatch, toolParameterKvMatch, trajectoryAnyOrderMatch, trajectoryExactMatch, trajectoryInOrderMatch, trajectoryPrecision, trajectoryRecall, trajectorySingleToolUse, trajectoryToOtlp, trajectoryToTranscript };
|
|
725
843
|
//# sourceMappingURL=index.d.ts.map
|