@workbench-ai/agent-driver 0.0.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/access-contracts.d.ts +10 -0
- package/dist/access-contracts.d.ts.map +1 -0
- package/dist/access-contracts.js +45 -0
- package/dist/behavior-contract.d.ts +26 -0
- package/dist/behavior-contract.d.ts.map +1 -0
- package/dist/behavior-contract.js +200 -0
- package/dist/conformance.d.ts +3 -0
- package/dist/conformance.d.ts.map +1 -0
- package/dist/conformance.js +37 -0
- package/dist/global-skills.d.ts +22 -0
- package/dist/global-skills.d.ts.map +1 -0
- package/dist/global-skills.js +168 -0
- package/dist/index.d.ts +138 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +44 -0
- package/dist/internal-utils.d.ts +8 -0
- package/dist/internal-utils.d.ts.map +1 -0
- package/dist/internal-utils.js +46 -0
- package/dist/json-rpc.d.ts +22 -0
- package/dist/json-rpc.d.ts.map +1 -0
- package/dist/json-rpc.js +1 -0
- package/dist/managed-runtime.d.ts +21 -0
- package/dist/managed-runtime.d.ts.map +1 -0
- package/dist/managed-runtime.js +119 -0
- package/dist/model-config.d.ts +17 -0
- package/dist/model-config.d.ts.map +1 -0
- package/dist/model-config.js +43 -0
- package/dist/normalized-activity.d.ts +75 -0
- package/dist/normalized-activity.d.ts.map +1 -0
- package/dist/normalized-activity.js +89 -0
- package/dist/prepare.d.ts +9 -0
- package/dist/prepare.d.ts.map +1 -0
- package/dist/prepare.js +119 -0
- package/dist/process-env.d.ts +20 -0
- package/dist/process-env.d.ts.map +1 -0
- package/dist/process-env.js +75 -0
- package/dist/session-runtime.d.ts +60 -0
- package/dist/session-runtime.d.ts.map +1 -0
- package/dist/session-runtime.js +240 -0
- package/dist/tool-semantics.d.ts +22 -0
- package/dist/tool-semantics.d.ts.map +1 -0
- package/dist/tool-semantics.js +241 -0
- package/dist/trace-builder.d.ts +88 -0
- package/dist/trace-builder.d.ts.map +1 -0
- package/dist/trace-builder.js +493 -0
- package/dist/trace-replay.d.ts +33 -0
- package/dist/trace-replay.d.ts.map +1 -0
- package/dist/trace-replay.js +4 -0
- package/dist/types.d.ts +156 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +10 -0
- package/package.json +39 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import type { JsonValue, TraceEvent, TraceSpan, TraceSummary } from "./types.js";
|
|
2
|
+
export interface TraceBundle {
|
|
3
|
+
spans: TraceSpan[];
|
|
4
|
+
events: TraceEvent[];
|
|
5
|
+
summaries: TraceSummary[];
|
|
6
|
+
}
|
|
7
|
+
export declare class HarnessTraceBuilder {
|
|
8
|
+
private readonly args;
|
|
9
|
+
private readonly spans;
|
|
10
|
+
private readonly events;
|
|
11
|
+
private readonly activeToolSpans;
|
|
12
|
+
private readonly dirtySpanIds;
|
|
13
|
+
private turnSpanId;
|
|
14
|
+
private turnStartedAt;
|
|
15
|
+
private outputSpanId;
|
|
16
|
+
private outputStartedAt;
|
|
17
|
+
private usageEventCount;
|
|
18
|
+
private flushedEventCount;
|
|
19
|
+
constructor(args: {
|
|
20
|
+
attemptNumber: number;
|
|
21
|
+
stageId: string;
|
|
22
|
+
stageRunIndex: number;
|
|
23
|
+
stageSpanId: string;
|
|
24
|
+
});
|
|
25
|
+
startTurn(args: {
|
|
26
|
+
at?: string;
|
|
27
|
+
provider?: string | null;
|
|
28
|
+
model?: string | null;
|
|
29
|
+
sessionId?: string | null;
|
|
30
|
+
operationId?: string | null;
|
|
31
|
+
promptText?: string | null;
|
|
32
|
+
attributes?: Record<string, JsonValue>;
|
|
33
|
+
}): void;
|
|
34
|
+
completeTurn(args: {
|
|
35
|
+
at?: string;
|
|
36
|
+
status?: string | null;
|
|
37
|
+
provider?: string | null;
|
|
38
|
+
model?: string | null;
|
|
39
|
+
sessionId?: string | null;
|
|
40
|
+
operationId?: string | null;
|
|
41
|
+
errorMessage?: string | null;
|
|
42
|
+
attributes?: Record<string, JsonValue>;
|
|
43
|
+
}): void;
|
|
44
|
+
startToolCall(args: {
|
|
45
|
+
at?: string;
|
|
46
|
+
toolId?: string | null;
|
|
47
|
+
toolName?: string | null;
|
|
48
|
+
attributes?: Record<string, JsonValue>;
|
|
49
|
+
}): void;
|
|
50
|
+
completeToolCall(args: {
|
|
51
|
+
at?: string;
|
|
52
|
+
toolId?: string | null;
|
|
53
|
+
toolName?: string | null;
|
|
54
|
+
attributes?: Record<string, JsonValue>;
|
|
55
|
+
}): void;
|
|
56
|
+
startAssistantOutput(args: {
|
|
57
|
+
at?: string;
|
|
58
|
+
phase?: string | null;
|
|
59
|
+
itemId?: string | null;
|
|
60
|
+
}): void;
|
|
61
|
+
appendOutputDelta(args: {
|
|
62
|
+
at?: string;
|
|
63
|
+
delta: string;
|
|
64
|
+
phase?: string | null;
|
|
65
|
+
itemId?: string | null;
|
|
66
|
+
}): void;
|
|
67
|
+
completeAssistantOutput(args: {
|
|
68
|
+
at?: string;
|
|
69
|
+
text: string;
|
|
70
|
+
phase?: string | null;
|
|
71
|
+
itemId?: string | null;
|
|
72
|
+
}): void;
|
|
73
|
+
recordUsage(args: {
|
|
74
|
+
at?: string;
|
|
75
|
+
inputTokens?: number | null;
|
|
76
|
+
outputTokens?: number | null;
|
|
77
|
+
attributes?: Record<string, JsonValue>;
|
|
78
|
+
}): void;
|
|
79
|
+
recordNote(message: string, at?: string, attributes?: Record<string, JsonValue>): void;
|
|
80
|
+
recordError(message: string, at?: string, attributes?: Record<string, JsonValue>): void;
|
|
81
|
+
flushBundle(): TraceBundle;
|
|
82
|
+
buildBundle(finalOutput: string, endedAt?: string): TraceBundle;
|
|
83
|
+
private ensureOutputSpan;
|
|
84
|
+
private completeOutputSpan;
|
|
85
|
+
private upsertSpan;
|
|
86
|
+
private appendEvent;
|
|
87
|
+
}
|
|
88
|
+
//# sourceMappingURL=trace-builder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"trace-builder.d.ts","sourceRoot":"","sources":["../src/trace-builder.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,SAAS,EACT,UAAU,EAEV,SAAS,EAET,YAAY,EACb,MAAM,YAAY,CAAC;AAIpB,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,SAAS,EAAE,CAAC;IACnB,MAAM,EAAE,UAAU,EAAE,CAAC;IACrB,SAAS,EAAE,YAAY,EAAE,CAAC;CAC3B;AASD,qBAAa,mBAAmB;IAa5B,OAAO,CAAC,QAAQ,CAAC,IAAI;IAZvB,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAmB;IACzC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAoB;IAC3C,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAqC;IACrE,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAqB;IAClD,OAAO,CAAC,UAAU,CAAuB;IACzC,OAAO,CAAC,aAAa,CAAuB;IAC5C,OAAO,CAAC,YAAY,CAAuB;IAC3C,OAAO,CAAC,eAAe,CAAuB;IAC9C,OAAO,CAAC,eAAe,CAAK;IAC5B,OAAO,CAAC,iBAAiB,CAAK;gBAGX,IAAI,EAAE;QACrB,aAAa,EAAE,MAAM,CAAC;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,aAAa,EAAE,MAAM,CAAC;QACtB,WAAW,EAAE,MAAM,CAAC;KACrB;IAGH,SAAS,CAAC,IAAI,EAAE;QACd,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACzB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACtB,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAC1B,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAC5B,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAC3B,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;KACxC,GAAG,IAAI;IA4CR,YAAY,CAAC,IAAI,EAAE;QACjB,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACvB,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACzB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACtB,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAC1B,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAC5B,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAC7B,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;KACxC,GAAG,IAAI;IAkDR,aAAa,CAAC,IAAI,EAAE;QAClB,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACvB,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACzB,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;KACxC,GAAG,IAAI;IA+CR,gBAAgB,CAAC,IAAI,EAAE;QACrB,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACvB,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACzB,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;KACxC,GAAG,IAAI;IAiDR,oBAAoB,CAAC,IAAI,EAAE;QACzB,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACtB,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;KACxB,GAAG,IAAI;IAOR,iBAAiB,CAAC,IAAI,EAAE;QACtB,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACtB,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;KACxB,GAAG,IAAI;IA0BR,uBAAuB,CAAC,IAAI,EAAE;QAC5B,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACtB,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;KACxB,GAAG,IAAI;IAOR,WAAW,CAAC,IAAI,EAAE;QAChB,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAC5B,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAC7B,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;KACxC,GAAG,IAAI;IA4CR,UAAU,CACR,OAAO,EAAE,MAAM,EACf,EAAE,SAAW,EACb,UAAU,GAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAM,GACzC,IAAI;IAeP,WAAW,CACT,OAAO,EAAE,MAAM,EACf,EAAE,SAAW,EACb,UAAU,GAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAM,GACzC,IAAI;IAiCP,WAAW,IAAI,WAAW;IAe1B,WAAW,CAAC,WAAW,EAAE,MAAM,EAAE,OAAO,SAAW,GAAG,WAAW;IAgDjE,OAAO,CAAC,gBAAgB;IAoCxB,OAAO,CAAC,kBAAkB;IA4C1B,OAAO,CAAC,UAAU;IAWlB,OAAO,CAAC,WAAW;CAGpB"}
|
|
@@ -0,0 +1,493 @@
|
|
|
1
|
+
import { createId, nowIso } from "./internal-utils.js";
|
|
2
|
+
export class HarnessTraceBuilder {
|
|
3
|
+
args;
|
|
4
|
+
spans = [];
|
|
5
|
+
events = [];
|
|
6
|
+
activeToolSpans = new Map();
|
|
7
|
+
dirtySpanIds = new Set();
|
|
8
|
+
turnSpanId = null;
|
|
9
|
+
turnStartedAt = null;
|
|
10
|
+
outputSpanId = null;
|
|
11
|
+
outputStartedAt = null;
|
|
12
|
+
usageEventCount = 0;
|
|
13
|
+
flushedEventCount = 0;
|
|
14
|
+
constructor(args) {
|
|
15
|
+
this.args = args;
|
|
16
|
+
}
|
|
17
|
+
startTurn(args) {
|
|
18
|
+
const at = args.at ?? nowIso();
|
|
19
|
+
this.turnSpanId = this.turnSpanId ?? createId("trace_span");
|
|
20
|
+
this.turnStartedAt = this.turnStartedAt ?? at;
|
|
21
|
+
const existing = this.turnSpanId != null
|
|
22
|
+
? this.spans.find((span) => span.id === this.turnSpanId) ?? null
|
|
23
|
+
: null;
|
|
24
|
+
this.upsertSpan(this.turnSpanId, {
|
|
25
|
+
id: this.turnSpanId,
|
|
26
|
+
parent_id: this.args.stageSpanId,
|
|
27
|
+
attempt_number: this.args.attemptNumber,
|
|
28
|
+
stage_id: this.args.stageId,
|
|
29
|
+
stage_run_index: this.args.stageRunIndex,
|
|
30
|
+
kind: "turn",
|
|
31
|
+
title: "Model turn",
|
|
32
|
+
status: "running",
|
|
33
|
+
started_at: this.turnStartedAt,
|
|
34
|
+
ended_at: null,
|
|
35
|
+
attributes: {
|
|
36
|
+
...(existing?.attributes ?? {}),
|
|
37
|
+
provider: args.provider ?? readTraceString(existing?.attributes, [["provider"]]) ?? null,
|
|
38
|
+
model: args.model ?? readTraceString(existing?.attributes, [["model"]]) ?? null,
|
|
39
|
+
session_id: args.sessionId ?? readTraceString(existing?.attributes, [["session_id"]]) ?? null,
|
|
40
|
+
operation_id: args.operationId ?? readTraceString(existing?.attributes, [["operation_id"]]) ?? null,
|
|
41
|
+
prompt_text: args.promptText ?? readTraceString(existing?.attributes, [["prompt_text"]]) ?? null,
|
|
42
|
+
prompt_format: args.promptText ? "text" : readTraceString(existing?.attributes, [["prompt_format"]]) ?? null,
|
|
43
|
+
prompt_source: args.promptText ? "rendered_stage_prompt" : readTraceString(existing?.attributes, [["prompt_source"]]) ?? null,
|
|
44
|
+
...(args.attributes ?? {}),
|
|
45
|
+
},
|
|
46
|
+
});
|
|
47
|
+
this.appendEvent(createTraceEvent({
|
|
48
|
+
spanId: this.turnSpanId,
|
|
49
|
+
attemptNumber: this.args.attemptNumber,
|
|
50
|
+
stageId: this.args.stageId,
|
|
51
|
+
stageRunIndex: this.args.stageRunIndex,
|
|
52
|
+
kind: "status",
|
|
53
|
+
at,
|
|
54
|
+
message: "Model turn started",
|
|
55
|
+
}));
|
|
56
|
+
}
|
|
57
|
+
completeTurn(args) {
|
|
58
|
+
const at = args.at ?? nowIso();
|
|
59
|
+
const status = normalizeTraceStatus(args.status ?? null);
|
|
60
|
+
const existing = this.turnSpanId != null
|
|
61
|
+
? this.spans.find((span) => span.id === this.turnSpanId) ?? null
|
|
62
|
+
: null;
|
|
63
|
+
if (!this.turnSpanId) {
|
|
64
|
+
this.turnSpanId = createId("trace_span");
|
|
65
|
+
this.turnStartedAt = at;
|
|
66
|
+
}
|
|
67
|
+
this.upsertSpan(this.turnSpanId, {
|
|
68
|
+
id: this.turnSpanId,
|
|
69
|
+
parent_id: this.args.stageSpanId,
|
|
70
|
+
attempt_number: this.args.attemptNumber,
|
|
71
|
+
stage_id: this.args.stageId,
|
|
72
|
+
stage_run_index: this.args.stageRunIndex,
|
|
73
|
+
kind: "turn",
|
|
74
|
+
title: "Model turn",
|
|
75
|
+
status,
|
|
76
|
+
started_at: this.turnStartedAt ?? at,
|
|
77
|
+
ended_at: at,
|
|
78
|
+
attributes: {
|
|
79
|
+
...(existing?.attributes ?? {}),
|
|
80
|
+
provider: args.provider ?? readTraceString(existing?.attributes, [["provider"]]) ?? null,
|
|
81
|
+
model: args.model ?? readTraceString(existing?.attributes, [["model"]]) ?? null,
|
|
82
|
+
session_id: args.sessionId ?? readTraceString(existing?.attributes, [["session_id"]]) ?? null,
|
|
83
|
+
operation_id: args.operationId ?? readTraceString(existing?.attributes, [["operation_id"]]) ?? null,
|
|
84
|
+
error_message: args.errorMessage ?? null,
|
|
85
|
+
...(args.attributes ?? {}),
|
|
86
|
+
},
|
|
87
|
+
});
|
|
88
|
+
this.appendEvent(createTraceEvent({
|
|
89
|
+
spanId: this.turnSpanId,
|
|
90
|
+
attemptNumber: this.args.attemptNumber,
|
|
91
|
+
stageId: this.args.stageId,
|
|
92
|
+
stageRunIndex: this.args.stageRunIndex,
|
|
93
|
+
kind: status === "failed" || status === "canceled" ? "error" : "status",
|
|
94
|
+
at,
|
|
95
|
+
message: args.errorMessage ??
|
|
96
|
+
(status === "completed"
|
|
97
|
+
? "Model turn completed"
|
|
98
|
+
: `Model turn ${status}`),
|
|
99
|
+
}));
|
|
100
|
+
}
|
|
101
|
+
startToolCall(args) {
|
|
102
|
+
const at = args.at ?? nowIso();
|
|
103
|
+
const toolId = args.toolId?.trim() || createId("tool");
|
|
104
|
+
if (this.activeToolSpans.has(toolId)) {
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
const spanId = createId("trace_span");
|
|
108
|
+
const title = `Tool call: ${args.toolName?.trim() || "tool"}`;
|
|
109
|
+
const attributes = {
|
|
110
|
+
tool_name: args.toolName ?? null,
|
|
111
|
+
tool_id: toolId,
|
|
112
|
+
...(args.attributes ?? {}),
|
|
113
|
+
};
|
|
114
|
+
this.activeToolSpans.set(toolId, {
|
|
115
|
+
spanId,
|
|
116
|
+
startedAt: at,
|
|
117
|
+
title,
|
|
118
|
+
attributes,
|
|
119
|
+
});
|
|
120
|
+
this.spans.push({
|
|
121
|
+
id: spanId,
|
|
122
|
+
parent_id: this.turnSpanId ?? this.args.stageSpanId,
|
|
123
|
+
attempt_number: this.args.attemptNumber,
|
|
124
|
+
stage_id: this.args.stageId,
|
|
125
|
+
stage_run_index: this.args.stageRunIndex,
|
|
126
|
+
kind: "tool_call",
|
|
127
|
+
title,
|
|
128
|
+
status: "running",
|
|
129
|
+
started_at: at,
|
|
130
|
+
ended_at: null,
|
|
131
|
+
attributes,
|
|
132
|
+
});
|
|
133
|
+
this.dirtySpanIds.add(spanId);
|
|
134
|
+
this.appendEvent(createTraceEvent({
|
|
135
|
+
spanId,
|
|
136
|
+
attemptNumber: this.args.attemptNumber,
|
|
137
|
+
stageId: this.args.stageId,
|
|
138
|
+
stageRunIndex: this.args.stageRunIndex,
|
|
139
|
+
kind: "status",
|
|
140
|
+
at,
|
|
141
|
+
message: `${title} started`,
|
|
142
|
+
attributes,
|
|
143
|
+
}));
|
|
144
|
+
}
|
|
145
|
+
completeToolCall(args) {
|
|
146
|
+
const at = args.at ?? nowIso();
|
|
147
|
+
const toolId = args.toolId?.trim() || createId("tool");
|
|
148
|
+
if (!this.activeToolSpans.has(toolId)) {
|
|
149
|
+
this.startToolCall({
|
|
150
|
+
at,
|
|
151
|
+
toolId,
|
|
152
|
+
toolName: args.toolName,
|
|
153
|
+
attributes: args.attributes,
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
const tool = this.activeToolSpans.get(toolId);
|
|
157
|
+
if (!tool) {
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
this.upsertSpan(tool.spanId, {
|
|
161
|
+
id: tool.spanId,
|
|
162
|
+
parent_id: this.turnSpanId ?? this.args.stageSpanId,
|
|
163
|
+
attempt_number: this.args.attemptNumber,
|
|
164
|
+
stage_id: this.args.stageId,
|
|
165
|
+
stage_run_index: this.args.stageRunIndex,
|
|
166
|
+
kind: "tool_call",
|
|
167
|
+
title: tool.title,
|
|
168
|
+
status: "completed",
|
|
169
|
+
started_at: tool.startedAt,
|
|
170
|
+
ended_at: at,
|
|
171
|
+
attributes: {
|
|
172
|
+
...tool.attributes,
|
|
173
|
+
...(args.attributes ?? {}),
|
|
174
|
+
},
|
|
175
|
+
});
|
|
176
|
+
this.appendEvent(createTraceEvent({
|
|
177
|
+
spanId: tool.spanId,
|
|
178
|
+
attemptNumber: this.args.attemptNumber,
|
|
179
|
+
stageId: this.args.stageId,
|
|
180
|
+
stageRunIndex: this.args.stageRunIndex,
|
|
181
|
+
kind: "message",
|
|
182
|
+
at,
|
|
183
|
+
message: `${tool.title} completed`,
|
|
184
|
+
attributes: {
|
|
185
|
+
...tool.attributes,
|
|
186
|
+
...(args.attributes ?? {}),
|
|
187
|
+
},
|
|
188
|
+
}));
|
|
189
|
+
this.activeToolSpans.delete(toolId);
|
|
190
|
+
}
|
|
191
|
+
startAssistantOutput(args) {
|
|
192
|
+
this.ensureOutputSpan(args.at ?? nowIso(), {
|
|
193
|
+
phase: args.phase ?? null,
|
|
194
|
+
item_id: args.itemId ?? null,
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
appendOutputDelta(args) {
|
|
198
|
+
const at = args.at ?? nowIso();
|
|
199
|
+
this.ensureOutputSpan(at, {
|
|
200
|
+
phase: args.phase ?? null,
|
|
201
|
+
item_id: args.itemId ?? null,
|
|
202
|
+
});
|
|
203
|
+
if (!this.outputSpanId || !args.delta) {
|
|
204
|
+
return;
|
|
205
|
+
}
|
|
206
|
+
this.appendEvent(createTraceEvent({
|
|
207
|
+
spanId: this.outputSpanId,
|
|
208
|
+
attemptNumber: this.args.attemptNumber,
|
|
209
|
+
stageId: this.args.stageId,
|
|
210
|
+
stageRunIndex: this.args.stageRunIndex,
|
|
211
|
+
kind: "output",
|
|
212
|
+
at,
|
|
213
|
+
message: "Assistant streamed output",
|
|
214
|
+
attributes: {
|
|
215
|
+
delta_text: args.delta,
|
|
216
|
+
delta_length: args.delta.length,
|
|
217
|
+
},
|
|
218
|
+
}));
|
|
219
|
+
}
|
|
220
|
+
completeAssistantOutput(args) {
|
|
221
|
+
this.completeOutputSpan(args.at ?? nowIso(), args.text, {
|
|
222
|
+
phase: args.phase ?? null,
|
|
223
|
+
item_id: args.itemId ?? null,
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
recordUsage(args) {
|
|
227
|
+
const at = args.at ?? nowIso();
|
|
228
|
+
const spanId = createId("trace_span");
|
|
229
|
+
const usage = {
|
|
230
|
+
input_tokens: args.inputTokens ?? null,
|
|
231
|
+
output_tokens: args.outputTokens ?? null,
|
|
232
|
+
...(args.attributes ?? {}),
|
|
233
|
+
};
|
|
234
|
+
this.spans.push({
|
|
235
|
+
id: spanId,
|
|
236
|
+
parent_id: this.turnSpanId ?? this.args.stageSpanId,
|
|
237
|
+
attempt_number: this.args.attemptNumber,
|
|
238
|
+
stage_id: this.args.stageId,
|
|
239
|
+
stage_run_index: this.args.stageRunIndex,
|
|
240
|
+
kind: "usage",
|
|
241
|
+
title: this.usageEventCount === 0 ? "Usage snapshot" : "Usage update",
|
|
242
|
+
status: "completed",
|
|
243
|
+
started_at: at,
|
|
244
|
+
ended_at: at,
|
|
245
|
+
attributes: usage,
|
|
246
|
+
});
|
|
247
|
+
this.dirtySpanIds.add(spanId);
|
|
248
|
+
this.appendEvent(createTraceEvent({
|
|
249
|
+
spanId,
|
|
250
|
+
attemptNumber: this.args.attemptNumber,
|
|
251
|
+
stageId: this.args.stageId,
|
|
252
|
+
stageRunIndex: this.args.stageRunIndex,
|
|
253
|
+
kind: "usage",
|
|
254
|
+
at,
|
|
255
|
+
message: `Usage updated${args.inputTokens != null || args.outputTokens != null
|
|
256
|
+
? ` · in ${args.inputTokens ?? 0} / out ${args.outputTokens ?? 0}`
|
|
257
|
+
: ""}`,
|
|
258
|
+
attributes: usage,
|
|
259
|
+
}));
|
|
260
|
+
this.usageEventCount += 1;
|
|
261
|
+
}
|
|
262
|
+
recordNote(message, at = nowIso(), attributes = {}) {
|
|
263
|
+
this.appendEvent(createTraceEvent({
|
|
264
|
+
spanId: this.turnSpanId ?? this.args.stageSpanId,
|
|
265
|
+
attemptNumber: this.args.attemptNumber,
|
|
266
|
+
stageId: this.args.stageId,
|
|
267
|
+
stageRunIndex: this.args.stageRunIndex,
|
|
268
|
+
kind: "note",
|
|
269
|
+
at,
|
|
270
|
+
message,
|
|
271
|
+
attributes,
|
|
272
|
+
}));
|
|
273
|
+
}
|
|
274
|
+
recordError(message, at = nowIso(), attributes = {}) {
|
|
275
|
+
const spanId = createId("trace_span");
|
|
276
|
+
this.spans.push({
|
|
277
|
+
id: spanId,
|
|
278
|
+
parent_id: this.turnSpanId ?? this.args.stageSpanId,
|
|
279
|
+
attempt_number: this.args.attemptNumber,
|
|
280
|
+
stage_id: this.args.stageId,
|
|
281
|
+
stage_run_index: this.args.stageRunIndex,
|
|
282
|
+
kind: "error",
|
|
283
|
+
title: "Runtime error",
|
|
284
|
+
status: "failed",
|
|
285
|
+
started_at: at,
|
|
286
|
+
ended_at: at,
|
|
287
|
+
attributes: {
|
|
288
|
+
message,
|
|
289
|
+
...attributes,
|
|
290
|
+
},
|
|
291
|
+
});
|
|
292
|
+
this.dirtySpanIds.add(spanId);
|
|
293
|
+
this.appendEvent(createTraceEvent({
|
|
294
|
+
spanId,
|
|
295
|
+
attemptNumber: this.args.attemptNumber,
|
|
296
|
+
stageId: this.args.stageId,
|
|
297
|
+
stageRunIndex: this.args.stageRunIndex,
|
|
298
|
+
kind: "error",
|
|
299
|
+
at,
|
|
300
|
+
message,
|
|
301
|
+
attributes,
|
|
302
|
+
}));
|
|
303
|
+
}
|
|
304
|
+
flushBundle() {
|
|
305
|
+
const dirtySpanIds = [...this.dirtySpanIds];
|
|
306
|
+
const spans = this.spans
|
|
307
|
+
.filter((span) => dirtySpanIds.includes(span.id))
|
|
308
|
+
.sort((left, right) => left.started_at.localeCompare(right.started_at));
|
|
309
|
+
const events = this.events.slice(this.flushedEventCount);
|
|
310
|
+
this.dirtySpanIds.clear();
|
|
311
|
+
this.flushedEventCount = this.events.length;
|
|
312
|
+
return {
|
|
313
|
+
spans,
|
|
314
|
+
events,
|
|
315
|
+
summaries: [],
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
buildBundle(finalOutput, endedAt = nowIso()) {
|
|
319
|
+
if (this.outputSpanId) {
|
|
320
|
+
this.completeOutputSpan(endedAt, finalOutput, {});
|
|
321
|
+
}
|
|
322
|
+
if (this.turnSpanId) {
|
|
323
|
+
const existing = this.spans.find((span) => span.id === this.turnSpanId);
|
|
324
|
+
if (existing && !existing.ended_at) {
|
|
325
|
+
this.upsertSpan(this.turnSpanId, {
|
|
326
|
+
...existing,
|
|
327
|
+
status: existing.status === "running" ? "completed" : existing.status,
|
|
328
|
+
ended_at: endedAt,
|
|
329
|
+
});
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
for (const [toolId, tool] of this.activeToolSpans.entries()) {
|
|
333
|
+
this.upsertSpan(tool.spanId, {
|
|
334
|
+
id: tool.spanId,
|
|
335
|
+
parent_id: this.turnSpanId ?? this.args.stageSpanId,
|
|
336
|
+
attempt_number: this.args.attemptNumber,
|
|
337
|
+
stage_id: this.args.stageId,
|
|
338
|
+
stage_run_index: this.args.stageRunIndex,
|
|
339
|
+
kind: "tool_call",
|
|
340
|
+
title: tool.title,
|
|
341
|
+
status: "warning",
|
|
342
|
+
started_at: tool.startedAt,
|
|
343
|
+
ended_at: endedAt,
|
|
344
|
+
attributes: {
|
|
345
|
+
...tool.attributes,
|
|
346
|
+
note: "tool span completed implicitly during bundle finalization",
|
|
347
|
+
},
|
|
348
|
+
});
|
|
349
|
+
this.activeToolSpans.delete(toolId);
|
|
350
|
+
}
|
|
351
|
+
return {
|
|
352
|
+
spans: [...this.spans].sort((left, right) => left.started_at.localeCompare(right.started_at)),
|
|
353
|
+
events: [...this.events].sort((left, right) => left.at.localeCompare(right.at)),
|
|
354
|
+
summaries: [],
|
|
355
|
+
};
|
|
356
|
+
}
|
|
357
|
+
ensureOutputSpan(at, attributes) {
|
|
358
|
+
if (this.outputSpanId) {
|
|
359
|
+
return;
|
|
360
|
+
}
|
|
361
|
+
this.outputSpanId = createId("trace_span");
|
|
362
|
+
this.outputStartedAt = at;
|
|
363
|
+
this.spans.push({
|
|
364
|
+
id: this.outputSpanId,
|
|
365
|
+
parent_id: this.turnSpanId ?? this.args.stageSpanId,
|
|
366
|
+
attempt_number: this.args.attemptNumber,
|
|
367
|
+
stage_id: this.args.stageId,
|
|
368
|
+
stage_run_index: this.args.stageRunIndex,
|
|
369
|
+
kind: "assistant_output",
|
|
370
|
+
title: "Assistant output",
|
|
371
|
+
status: "running",
|
|
372
|
+
started_at: at,
|
|
373
|
+
ended_at: null,
|
|
374
|
+
attributes,
|
|
375
|
+
});
|
|
376
|
+
this.dirtySpanIds.add(this.outputSpanId);
|
|
377
|
+
this.appendEvent(createTraceEvent({
|
|
378
|
+
spanId: this.outputSpanId,
|
|
379
|
+
attemptNumber: this.args.attemptNumber,
|
|
380
|
+
stageId: this.args.stageId,
|
|
381
|
+
stageRunIndex: this.args.stageRunIndex,
|
|
382
|
+
kind: "status",
|
|
383
|
+
at,
|
|
384
|
+
message: "Assistant output started",
|
|
385
|
+
}));
|
|
386
|
+
}
|
|
387
|
+
completeOutputSpan(at, messageText, attributes) {
|
|
388
|
+
this.ensureOutputSpan(at, attributes);
|
|
389
|
+
if (!this.outputSpanId) {
|
|
390
|
+
return;
|
|
391
|
+
}
|
|
392
|
+
this.upsertSpan(this.outputSpanId, {
|
|
393
|
+
id: this.outputSpanId,
|
|
394
|
+
parent_id: this.turnSpanId ?? this.args.stageSpanId,
|
|
395
|
+
attempt_number: this.args.attemptNumber,
|
|
396
|
+
stage_id: this.args.stageId,
|
|
397
|
+
stage_run_index: this.args.stageRunIndex,
|
|
398
|
+
kind: "assistant_output",
|
|
399
|
+
title: "Assistant output",
|
|
400
|
+
status: "completed",
|
|
401
|
+
started_at: this.outputStartedAt ?? at,
|
|
402
|
+
ended_at: at,
|
|
403
|
+
attributes: {
|
|
404
|
+
...attributes,
|
|
405
|
+
output_length: messageText.length,
|
|
406
|
+
},
|
|
407
|
+
});
|
|
408
|
+
this.appendEvent(createTraceEvent({
|
|
409
|
+
spanId: this.outputSpanId,
|
|
410
|
+
attemptNumber: this.args.attemptNumber,
|
|
411
|
+
stageId: this.args.stageId,
|
|
412
|
+
stageRunIndex: this.args.stageRunIndex,
|
|
413
|
+
kind: "output",
|
|
414
|
+
at,
|
|
415
|
+
message: messageText
|
|
416
|
+
? truncateValue(messageText, 140)
|
|
417
|
+
: "Assistant output completed",
|
|
418
|
+
attributes: {
|
|
419
|
+
output_length: messageText.length,
|
|
420
|
+
output_text: messageText,
|
|
421
|
+
},
|
|
422
|
+
}));
|
|
423
|
+
}
|
|
424
|
+
upsertSpan(spanId, next) {
|
|
425
|
+
const index = this.spans.findIndex((span) => span.id === spanId);
|
|
426
|
+
if (index === -1) {
|
|
427
|
+
this.spans.push(next);
|
|
428
|
+
this.dirtySpanIds.add(spanId);
|
|
429
|
+
return;
|
|
430
|
+
}
|
|
431
|
+
this.spans[index] = next;
|
|
432
|
+
this.dirtySpanIds.add(spanId);
|
|
433
|
+
}
|
|
434
|
+
appendEvent(event) {
|
|
435
|
+
this.events.push(event);
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
function createTraceEvent(args) {
|
|
439
|
+
return {
|
|
440
|
+
id: createId("trace_event"),
|
|
441
|
+
span_id: args.spanId,
|
|
442
|
+
attempt_number: args.attemptNumber,
|
|
443
|
+
stage_id: args.stageId,
|
|
444
|
+
stage_run_index: args.stageRunIndex,
|
|
445
|
+
kind: args.kind,
|
|
446
|
+
at: args.at,
|
|
447
|
+
message: args.message,
|
|
448
|
+
attributes: args.attributes ?? {},
|
|
449
|
+
};
|
|
450
|
+
}
|
|
451
|
+
function normalizeTraceStatus(rawStatus) {
|
|
452
|
+
if (!rawStatus) {
|
|
453
|
+
return "completed";
|
|
454
|
+
}
|
|
455
|
+
if (["failed", "error"].includes(rawStatus)) {
|
|
456
|
+
return "failed";
|
|
457
|
+
}
|
|
458
|
+
if (["interrupted", "canceled", "cancelled"].includes(rawStatus)) {
|
|
459
|
+
return "canceled";
|
|
460
|
+
}
|
|
461
|
+
if (["running", "inprogress", "in_progress", "started"].includes(rawStatus.toLowerCase())) {
|
|
462
|
+
return "running";
|
|
463
|
+
}
|
|
464
|
+
return "completed";
|
|
465
|
+
}
|
|
466
|
+
function readTraceString(value, paths) {
|
|
467
|
+
if (!value) {
|
|
468
|
+
return null;
|
|
469
|
+
}
|
|
470
|
+
for (const path of paths) {
|
|
471
|
+
const current = readNestedValue(value, path);
|
|
472
|
+
if (typeof current === "string" && current.length > 0) {
|
|
473
|
+
return current;
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
return null;
|
|
477
|
+
}
|
|
478
|
+
function readNestedValue(value, path) {
|
|
479
|
+
let current = value;
|
|
480
|
+
for (const segment of path) {
|
|
481
|
+
if (!current || Array.isArray(current) || typeof current !== "object") {
|
|
482
|
+
return undefined;
|
|
483
|
+
}
|
|
484
|
+
current = current[segment];
|
|
485
|
+
}
|
|
486
|
+
return current;
|
|
487
|
+
}
|
|
488
|
+
function truncateValue(value, maxLength) {
|
|
489
|
+
if (value.length <= maxLength) {
|
|
490
|
+
return value;
|
|
491
|
+
}
|
|
492
|
+
return `${value.slice(0, maxLength - 1)}…`;
|
|
493
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { JsonValue, TraceSpan } from "./types.js";
|
|
2
|
+
import type { TraceBundle } from "./trace-builder.js";
|
|
3
|
+
export interface HarnessTraceReplayEntry {
|
|
4
|
+
at: string;
|
|
5
|
+
}
|
|
6
|
+
export interface HarnessTraceReplaySource<TEntry extends HarnessTraceReplayEntry = HarnessTraceReplayEntry> {
|
|
7
|
+
entries: readonly TEntry[];
|
|
8
|
+
}
|
|
9
|
+
export interface HarnessTraceReplayerBuildArgs<TEntry extends HarnessTraceReplayEntry = HarnessTraceReplayEntry> {
|
|
10
|
+
artifact: {
|
|
11
|
+
attempt_number: number;
|
|
12
|
+
stage_id: string;
|
|
13
|
+
run_index: number;
|
|
14
|
+
output_file: string;
|
|
15
|
+
events_file: string;
|
|
16
|
+
raw_events_file?: string | null;
|
|
17
|
+
final_output?: string | null;
|
|
18
|
+
};
|
|
19
|
+
source: HarnessTraceReplaySource<TEntry>;
|
|
20
|
+
oldTurnSpan: TraceSpan | null;
|
|
21
|
+
stageSpanId: string;
|
|
22
|
+
stageStartedAt: string;
|
|
23
|
+
endedAt: string;
|
|
24
|
+
readFinalOutput: () => Promise<string>;
|
|
25
|
+
}
|
|
26
|
+
export interface HarnessTraceReplayer<TEntry extends HarnessTraceReplayEntry = HarnessTraceReplayEntry> {
|
|
27
|
+
harnessId: string;
|
|
28
|
+
parseRawReplayEntries(entries: Array<Record<string, unknown>>): HarnessTraceReplaySource<TEntry> | null;
|
|
29
|
+
parseHarnessReplayEntries(entries: Array<Record<string, unknown>>): HarnessTraceReplaySource<TEntry> | null;
|
|
30
|
+
buildTraceBundle(args: HarnessTraceReplayerBuildArgs<TEntry>): Promise<TraceBundle>;
|
|
31
|
+
}
|
|
32
|
+
export declare function readTraceString(value: Record<string, JsonValue> | undefined, key: string): string | null;
|
|
33
|
+
//# sourceMappingURL=trace-replay.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"trace-replay.d.ts","sourceRoot":"","sources":["../src/trace-replay.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,SAAS,EACT,SAAS,EACV,MAAM,YAAY,CAAC;AAEpB,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAEtD,MAAM,WAAW,uBAAuB;IACtC,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,wBAAwB,CACvC,MAAM,SAAS,uBAAuB,GAAG,uBAAuB;IAEhE,OAAO,EAAE,SAAS,MAAM,EAAE,CAAC;CAC5B;AAED,MAAM,WAAW,6BAA6B,CAC5C,MAAM,SAAS,uBAAuB,GAAG,uBAAuB;IAEhE,QAAQ,EAAE;QACR,cAAc,EAAE,MAAM,CAAC;QACvB,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,MAAM,CAAC;QACpB,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAChC,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;KAC9B,CAAC;IACF,MAAM,EAAE,wBAAwB,CAAC,MAAM,CAAC,CAAC;IACzC,WAAW,EAAE,SAAS,GAAG,IAAI,CAAC;IAC9B,WAAW,EAAE,MAAM,CAAC;IACpB,cAAc,EAAE,MAAM,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,eAAe,EAAE,MAAM,OAAO,CAAC,MAAM,CAAC,CAAC;CACxC;AAED,MAAM,WAAW,oBAAoB,CACnC,MAAM,SAAS,uBAAuB,GAAG,uBAAuB;IAEhE,SAAS,EAAE,MAAM,CAAC;IAClB,qBAAqB,CACnB,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,GACtC,wBAAwB,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC;IAC3C,yBAAyB,CACvB,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,GACtC,wBAAwB,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC;IAC3C,gBAAgB,CACd,IAAI,EAAE,6BAA6B,CAAC,MAAM,CAAC,GAC1C,OAAO,CAAC,WAAW,CAAC,CAAC;CACzB;AAED,wBAAgB,eAAe,CAC7B,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,GAAG,SAAS,EAC5C,GAAG,EAAE,MAAM,GACV,MAAM,GAAG,IAAI,CAGf"}
|