@alis-build/harness-eval 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,441 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://raw.githubusercontent.com/alis-build/harness-eval-ts/main/schemas/trajectory-view.schema.json",
4
+ "title": "TrajectoryViewExport",
5
+ "description": "TrajectoryView with schemaVersion, as embedded in EvalRunEnvelope repetitions.",
6
+ "type": "object",
7
+ "properties": {
8
+ "meta": {
9
+ "$ref": "#/$defs/__schema0"
10
+ },
11
+ "toolCalls": {
12
+ "$ref": "#/$defs/__schema11"
13
+ },
14
+ "turns": {
15
+ "$ref": "#/$defs/__schema22"
16
+ },
17
+ "finalResponse": {
18
+ "$ref": "#/$defs/__schema28"
19
+ },
20
+ "finalStopReason": {
21
+ "$ref": "#/$defs/__schema29"
22
+ },
23
+ "usage": {
24
+ "$ref": "#/$defs/__schema31"
25
+ },
26
+ "retries": {
27
+ "$ref": "#/$defs/__schema37"
28
+ },
29
+ "success": {
30
+ "$ref": "#/$defs/__schema40"
31
+ },
32
+ "schemaVersion": {
33
+ "$ref": "#/$defs/__schema41"
34
+ }
35
+ },
36
+ "required": [
37
+ "meta",
38
+ "toolCalls",
39
+ "turns",
40
+ "finalResponse",
41
+ "finalStopReason",
42
+ "usage",
43
+ "retries",
44
+ "success",
45
+ "schemaVersion"
46
+ ],
47
+ "additionalProperties": false,
48
+ "$defs": {
49
+ "__schema0": {
50
+ "description": "Session metadata from harness initialization.",
51
+ "$ref": "#/$defs/SessionMeta"
52
+ },
53
+ "__schema1": {
54
+ "type": "string",
55
+ "description": "Harness-assigned session identifier from the vendor stream."
56
+ },
57
+ "__schema2": {
58
+ "type": "string",
59
+ "description": "Model identifier used for the session, e.g. claude-sonnet-4-6."
60
+ },
61
+ "__schema3": {
62
+ "type": "string",
63
+ "description": "Working directory the harness used for the run."
64
+ },
65
+ "__schema4": {
66
+ "description": "Permission mode active for the session, when reported by the harness.",
67
+ "$ref": "#/$defs/__schema5"
68
+ },
69
+ "__schema5": {
70
+ "type": "string"
71
+ },
72
+ "__schema6": {
73
+ "type": "array",
74
+ "items": {
75
+ "$ref": "#/$defs/__schema7"
76
+ },
77
+ "description": "Tool names the harness reported as available at session start."
78
+ },
79
+ "__schema7": {
80
+ "type": "string"
81
+ },
82
+ "__schema8": {
83
+ "type": "array",
84
+ "items": {
85
+ "$ref": "#/$defs/McpServer"
86
+ },
87
+ "description": "MCP servers configured for the session, with connection status."
88
+ },
89
+ "McpServer": {
90
+ "type": "object",
91
+ "properties": {
92
+ "name": {
93
+ "$ref": "#/$defs/__schema9"
94
+ },
95
+ "status": {
96
+ "$ref": "#/$defs/__schema10"
97
+ }
98
+ },
99
+ "required": [
100
+ "name",
101
+ "status"
102
+ ],
103
+ "additionalProperties": false,
104
+ "title": "McpServer",
105
+ "description": "MCP server entry from session initialization metadata."
106
+ },
107
+ "__schema9": {
108
+ "type": "string",
109
+ "description": "MCP server name as reported by the harness."
110
+ },
111
+ "__schema10": {
112
+ "type": "string",
113
+ "description": "Connection status at session start, e.g. connected or failed."
114
+ },
115
+ "SessionMeta": {
116
+ "type": "object",
117
+ "properties": {
118
+ "sessionId": {
119
+ "$ref": "#/$defs/__schema1"
120
+ },
121
+ "model": {
122
+ "$ref": "#/$defs/__schema2"
123
+ },
124
+ "cwd": {
125
+ "$ref": "#/$defs/__schema3"
126
+ },
127
+ "permissionMode": {
128
+ "$ref": "#/$defs/__schema4"
129
+ },
130
+ "availableTools": {
131
+ "$ref": "#/$defs/__schema6"
132
+ },
133
+ "mcpServers": {
134
+ "$ref": "#/$defs/__schema8"
135
+ }
136
+ },
137
+ "required": [
138
+ "sessionId",
139
+ "model",
140
+ "cwd",
141
+ "availableTools",
142
+ "mcpServers"
143
+ ],
144
+ "additionalProperties": false,
145
+ "title": "SessionMeta",
146
+ "description": "Session metadata captured from harness initialization (e.g. Claude system/init)."
147
+ },
148
+ "__schema11": {
149
+ "type": "array",
150
+ "items": {
151
+ "$ref": "#/$defs/ToolCall"
152
+ },
153
+ "description": "Every tool call in global emission order."
154
+ },
155
+ "ToolCall": {
156
+ "type": "object",
157
+ "properties": {
158
+ "name": {
159
+ "$ref": "#/$defs/__schema12"
160
+ },
161
+ "namespace": {
162
+ "$ref": "#/$defs/__schema13"
163
+ },
164
+ "callId": {
165
+ "$ref": "#/$defs/__schema15"
166
+ },
167
+ "args": {
168
+ "$ref": "#/$defs/__schema16"
169
+ },
170
+ "result": {
171
+ "$ref": "#/$defs/__schema17"
172
+ },
173
+ "isError": {
174
+ "$ref": "#/$defs/__schema19"
175
+ },
176
+ "turnIndex": {
177
+ "$ref": "#/$defs/__schema20"
178
+ },
179
+ "callIndex": {
180
+ "$ref": "#/$defs/__schema21"
181
+ }
182
+ },
183
+ "required": [
184
+ "name",
185
+ "namespace",
186
+ "callId",
187
+ "args",
188
+ "result",
189
+ "isError",
190
+ "turnIndex",
191
+ "callIndex"
192
+ ],
193
+ "additionalProperties": false,
194
+ "title": "ToolCall",
195
+ "description": "One tool invocation in emission order. Primary unit for behavioral assertions."
196
+ },
197
+ "__schema12": {
198
+ "type": "string",
199
+ "description": "Fully-qualified tool name, e.g. mcp__plugin_alis-build_api__SearchSkills or Bash.",
200
+ "examples": [
201
+ "mcp__plugin_alis-build_api__SearchSkills",
202
+ "Bash"
203
+ ]
204
+ },
205
+ "__schema13": {
206
+ "anyOf": [
207
+ {
208
+ "$ref": "#/$defs/__schema14"
209
+ },
210
+ {
211
+ "type": "null"
212
+ }
213
+ ],
214
+ "description": "Namespace prefix for MCP-style names (mcp__<server>), or null for built-in tools.",
215
+ "examples": [
216
+ "mcp__plugin_alis-build_api",
217
+ null
218
+ ]
219
+ },
220
+ "__schema14": {
221
+ "type": "string"
222
+ },
223
+ "__schema15": {
224
+ "type": "string",
225
+ "description": "Vendor tool-use block id; matches a later tool_result.tool_use_id when present."
226
+ },
227
+ "__schema16": {
228
+ "description": "Arguments the model emitted for this tool call. Tool-specific schema."
229
+ },
230
+ "__schema17": {
231
+ "anyOf": [
232
+ {
233
+ "$ref": "#/$defs/__schema18"
234
+ },
235
+ {
236
+ "type": "null"
237
+ }
238
+ ],
239
+ "description": "Tool result payload, or null if no result was observed (e.g. process killed)."
240
+ },
241
+ "__schema18": {},
242
+ "__schema19": {
243
+ "type": "boolean",
244
+ "description": "Whether the tool reported an error in its result envelope."
245
+ },
246
+ "__schema20": {
247
+ "type": "integer",
248
+ "minimum": -9007199254740991,
249
+ "maximum": 9007199254740991,
250
+ "description": "Assistant turn that produced this call. Parallel calls in one message share a turnIndex."
251
+ },
252
+ "__schema21": {
253
+ "type": "integer",
254
+ "minimum": -9007199254740991,
255
+ "maximum": 9007199254740991,
256
+ "description": "Index in the global ordered tool-call sequence (used for called_before assertions)."
257
+ },
258
+ "__schema22": {
259
+ "type": "array",
260
+ "items": {
261
+ "$ref": "#/$defs/AssistantTurn"
262
+ },
263
+ "description": "Assistant turns with per-turn text and tool calls."
264
+ },
265
+ "AssistantTurn": {
266
+ "type": "object",
267
+ "properties": {
268
+ "turnIndex": {
269
+ "$ref": "#/$defs/__schema23"
270
+ },
271
+ "text": {
272
+ "$ref": "#/$defs/__schema24"
273
+ },
274
+ "toolCalls": {
275
+ "$ref": "#/$defs/__schema25"
276
+ },
277
+ "stopReason": {
278
+ "$ref": "#/$defs/__schema26"
279
+ }
280
+ },
281
+ "required": [
282
+ "turnIndex",
283
+ "text",
284
+ "toolCalls",
285
+ "stopReason"
286
+ ],
287
+ "additionalProperties": false,
288
+ "title": "AssistantTurn",
289
+ "description": "One assistant turn: text content plus any tool calls in that turn."
290
+ },
291
+ "__schema23": {
292
+ "type": "integer",
293
+ "minimum": -9007199254740991,
294
+ "maximum": 9007199254740991,
295
+ "description": "Monotonic assistant turn index."
296
+ },
297
+ "__schema24": {
298
+ "type": "string",
299
+ "description": "Assistant text emitted in this turn (may be empty for tool-only turns)."
300
+ },
301
+ "__schema25": {
302
+ "type": "array",
303
+ "items": {
304
+ "$ref": "#/$defs/ToolCall"
305
+ },
306
+ "description": "Tool calls emitted in this turn, in block order."
307
+ },
308
+ "__schema26": {
309
+ "anyOf": [
310
+ {
311
+ "$ref": "#/$defs/__schema27"
312
+ },
313
+ {
314
+ "type": "null"
315
+ }
316
+ ],
317
+ "description": "Model stop reason for this turn, or null if not reported.",
318
+ "examples": [
319
+ "end_turn",
320
+ "tool_use",
321
+ null
322
+ ]
323
+ },
324
+ "__schema27": {
325
+ "type": "string"
326
+ },
327
+ "__schema28": {
328
+ "type": "string",
329
+ "description": "All assistant text concatenated across turns. Used for response_contains assertions."
330
+ },
331
+ "__schema29": {
332
+ "anyOf": [
333
+ {
334
+ "$ref": "#/$defs/__schema30"
335
+ },
336
+ {
337
+ "type": "null"
338
+ }
339
+ ],
340
+ "description": "Stop reason of the last assistant turn."
341
+ },
342
+ "__schema30": {
343
+ "type": "string"
344
+ },
345
+ "__schema31": {
346
+ "description": "Aggregate usage and cost for the session.",
347
+ "$ref": "#/$defs/UsageSummary"
348
+ },
349
+ "__schema32": {
350
+ "type": "number",
351
+ "description": "Total input tokens for the session."
352
+ },
353
+ "__schema33": {
354
+ "type": "number",
355
+ "description": "Total output tokens for the session."
356
+ },
357
+ "__schema34": {
358
+ "type": "number",
359
+ "description": "Total session cost in USD when reported by the harness."
360
+ },
361
+ "__schema35": {
362
+ "type": "number",
363
+ "description": "Session duration in milliseconds from harness result metadata."
364
+ },
365
+ "__schema36": {
366
+ "type": "number",
367
+ "description": "Number of assistant turns in the session."
368
+ },
369
+ "UsageSummary": {
370
+ "type": "object",
371
+ "properties": {
372
+ "inputTokens": {
373
+ "$ref": "#/$defs/__schema32"
374
+ },
375
+ "outputTokens": {
376
+ "$ref": "#/$defs/__schema33"
377
+ },
378
+ "totalCostUsd": {
379
+ "$ref": "#/$defs/__schema34"
380
+ },
381
+ "durationMs": {
382
+ "$ref": "#/$defs/__schema35"
383
+ },
384
+ "numTurns": {
385
+ "$ref": "#/$defs/__schema36"
386
+ }
387
+ },
388
+ "required": [
389
+ "inputTokens",
390
+ "outputTokens",
391
+ "totalCostUsd",
392
+ "durationMs",
393
+ "numTurns"
394
+ ],
395
+ "additionalProperties": false,
396
+ "title": "UsageSummary",
397
+ "description": "Aggregate token usage, cost, and timing from the harness result."
398
+ },
399
+ "__schema37": {
400
+ "type": "array",
401
+ "items": {
402
+ "$ref": "#/$defs/RetryRecord"
403
+ },
404
+ "description": "Retry events observed during the run."
405
+ },
406
+ "RetryRecord": {
407
+ "type": "object",
408
+ "properties": {
409
+ "offsetMs": {
410
+ "$ref": "#/$defs/__schema38"
411
+ },
412
+ "raw": {
413
+ "$ref": "#/$defs/__schema39"
414
+ }
415
+ },
416
+ "required": [
417
+ "offsetMs",
418
+ "raw"
419
+ ],
420
+ "additionalProperties": false,
421
+ "title": "RetryRecord",
422
+ "description": "Rate-limit or transient error retry observed during the run."
423
+ },
424
+ "__schema38": {
425
+ "type": "number",
426
+ "description": "Approximate milliseconds since session start when the retry was observed."
427
+ },
428
+ "__schema39": {
429
+ "description": "Raw vendor payload from the retry event (e.g. system/api_retry)."
430
+ },
431
+ "__schema40": {
432
+ "type": "boolean",
433
+ "description": "Whether the harness result envelope indicated success."
434
+ },
435
+ "__schema41": {
436
+ "type": "string",
437
+ "const": "1.0",
438
+ "description": "TrajectoryView schema version for storage and API interchange."
439
+ }
440
+ }
441
+ }