@braintrust/pi-extension 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/package.json +3 -3
- package/src/index.integration.test.ts +32 -1
- package/src/index.test.ts +53 -0
- package/src/index.ts +80 -1
- package/src/types.ts +1 -0
package/README.md
CHANGED
|
@@ -47,9 +47,9 @@ pi -e .
|
|
|
47
47
|
|
|
48
48
|
## Compatibility
|
|
49
49
|
|
|
50
|
-
This package supports the **last
|
|
50
|
+
This package supports the **latest patch release from each of the last six stable pi minor versions**, currently excluding pi versions before `0.65.0`.
|
|
51
51
|
|
|
52
|
-
Our GitHub Actions compatibility job automatically resolves and tests
|
|
52
|
+
Our GitHub Actions compatibility job automatically resolves and tests that compatibility window, so new pi releases are picked up without manually updating the matrix.
|
|
53
53
|
|
|
54
54
|
## Quick start
|
|
55
55
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@braintrust/pi-extension",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.0",
|
|
4
4
|
"description": "Braintrust extension for pi. Includes automatic tracing for pi sessions, turns, LLM calls, and tool executions to Braintrust.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"braintrust",
|
|
@@ -28,8 +28,8 @@
|
|
|
28
28
|
"valibot": "^1.3.1"
|
|
29
29
|
},
|
|
30
30
|
"devDependencies": {
|
|
31
|
-
"@mariozechner/pi-ai": "^0.
|
|
32
|
-
"@mariozechner/pi-coding-agent": "^0.
|
|
31
|
+
"@mariozechner/pi-ai": "^0.71.1",
|
|
32
|
+
"@mariozechner/pi-coding-agent": "^0.71.1",
|
|
33
33
|
"@types/node": "^25.6.0",
|
|
34
34
|
"typescript": "^6.0.2",
|
|
35
35
|
"vite-plus": "^0.1.16",
|
|
@@ -149,6 +149,28 @@ function makeTempDir(prefix: string): string {
|
|
|
149
149
|
return dir;
|
|
150
150
|
}
|
|
151
151
|
|
|
152
|
+
// Injected by CI (see .github/workflows/ci.yml). When unset (e.g. local dev) we
|
|
153
|
+
// assume the currently installed pi is at least as new as any version we branch
|
|
154
|
+
// on below.
|
|
155
|
+
const PI_COMPAT_VERSION = process.env.PI_COMPAT_VERSION;
|
|
156
|
+
|
|
157
|
+
function piCompatAtLeast(target: string): boolean {
|
|
158
|
+
if (!PI_COMPAT_VERSION) return true;
|
|
159
|
+
const parse = (v: string) =>
|
|
160
|
+
v
|
|
161
|
+
.split("-")[0]
|
|
162
|
+
.split(".")
|
|
163
|
+
.map((part) => Number.parseInt(part, 10) || 0);
|
|
164
|
+
const actual = parse(PI_COMPAT_VERSION);
|
|
165
|
+
const wanted = parse(target);
|
|
166
|
+
for (let i = 0; i < Math.max(actual.length, wanted.length); i += 1) {
|
|
167
|
+
const a = actual[i] ?? 0;
|
|
168
|
+
const w = wanted[i] ?? 0;
|
|
169
|
+
if (a !== w) return a > w;
|
|
170
|
+
}
|
|
171
|
+
return true;
|
|
172
|
+
}
|
|
173
|
+
|
|
152
174
|
function buildAssistantMessage(model: Model<Api>): AssistantMessage {
|
|
153
175
|
return {
|
|
154
176
|
role: "assistant",
|
|
@@ -585,9 +607,18 @@ describe("braintrustPiExtension integration", () => {
|
|
|
585
607
|
const firstLlmSpanId = llmSpans[0]?.spanId;
|
|
586
608
|
|
|
587
609
|
expect(toolSpans).toHaveLength(2);
|
|
610
|
+
// pi < 0.68.1 emits `tool_execution_end` in assistant source order, so the
|
|
611
|
+
// extension logs tool spans as [tool-1, tool-2]. Starting with pi 0.68.1 the
|
|
612
|
+
// agent emits parallel tool completions eagerly (completion order), so the
|
|
613
|
+
// fast `tool-2` finishes before the slow `tool-1` and spans are logged as
|
|
614
|
+
// [tool-2, tool-1]. See pi-coding-agent changelog 0.68.1 / issue #3503.
|
|
615
|
+
// TODO: drop the pi < 0.68.1 branch once we stop testing against it.
|
|
616
|
+
const expectedToolCallIdOrder = piCompatAtLeast("0.68.1")
|
|
617
|
+
? ["tool-2", "tool-1"]
|
|
618
|
+
: ["tool-1", "tool-2"];
|
|
588
619
|
expect(
|
|
589
620
|
toolSpans.map((span) => (span.metadata as Record<string, unknown> | undefined)?.tool_call_id),
|
|
590
|
-
).toEqual(
|
|
621
|
+
).toEqual(expectedToolCallIdOrder);
|
|
591
622
|
expect(toolSpans.map((span) => span.parentSpanId)).toEqual([firstLlmSpanId, firstLlmSpanId]);
|
|
592
623
|
});
|
|
593
624
|
|
package/src/index.test.ts
CHANGED
|
@@ -239,6 +239,59 @@ describe("braintrustPiExtension", () => {
|
|
|
239
239
|
expect(mockState.updateSpans).toEqual([]);
|
|
240
240
|
});
|
|
241
241
|
|
|
242
|
+
it("records resolved model, thinking level, and provider response metadata on llm spans", async () => {
|
|
243
|
+
const { emit } = await createHarness();
|
|
244
|
+
|
|
245
|
+
await emit("session_start");
|
|
246
|
+
await emit("thinking_level_select", { level: "high", previousLevel: "off" });
|
|
247
|
+
await emit("before_agent_start", {
|
|
248
|
+
prompt: "Use a routed model",
|
|
249
|
+
images: [],
|
|
250
|
+
});
|
|
251
|
+
await emit("context", { messages: [{ role: "user", content: "Use a routed model" }] });
|
|
252
|
+
await emit("after_provider_response", {
|
|
253
|
+
status: 200,
|
|
254
|
+
headers: {
|
|
255
|
+
"x-ratelimit-remaining-requests": "42",
|
|
256
|
+
"retry-after": "5",
|
|
257
|
+
authorization: "secret",
|
|
258
|
+
},
|
|
259
|
+
});
|
|
260
|
+
await emit("message_end", {
|
|
261
|
+
message: {
|
|
262
|
+
role: "assistant",
|
|
263
|
+
provider: "openrouter",
|
|
264
|
+
model: "auto",
|
|
265
|
+
responseModel: "anthropic/claude-sonnet-4-5",
|
|
266
|
+
timestamp: 1_700_000_000_000,
|
|
267
|
+
content: [{ type: "text", text: "Done." }],
|
|
268
|
+
},
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
const turnSpan = mockState.startSpans.find(
|
|
272
|
+
(span) => span.type === "task" && span.name === "Turn 1",
|
|
273
|
+
);
|
|
274
|
+
const llmSpan = mockState.startSpans.find((span) => span.type === "llm");
|
|
275
|
+
|
|
276
|
+
expect(turnSpan?.metadata).toMatchObject({ thinking_level: "high" });
|
|
277
|
+
expect(llmSpan).toMatchObject({ name: "anthropic/claude-sonnet-4-5" });
|
|
278
|
+
expect(llmSpan?.metadata).toMatchObject({
|
|
279
|
+
model: "anthropic/claude-sonnet-4-5",
|
|
280
|
+
requested_model: "auto",
|
|
281
|
+
response_model: "anthropic/claude-sonnet-4-5",
|
|
282
|
+
thinking_level: "high",
|
|
283
|
+
provider_response_status: 200,
|
|
284
|
+
provider_response_headers: {
|
|
285
|
+
"x-ratelimit-remaining-requests": "42",
|
|
286
|
+
"retry-after": "5",
|
|
287
|
+
},
|
|
288
|
+
});
|
|
289
|
+
const llmMetadata = llmSpan?.metadata as
|
|
290
|
+
| { provider_response_headers?: Record<string, unknown> }
|
|
291
|
+
| undefined;
|
|
292
|
+
expect(llmMetadata?.provider_response_headers?.authorization).toBeUndefined();
|
|
293
|
+
});
|
|
294
|
+
|
|
242
295
|
it("parents tool spans under the llm span that emitted the matching tool call", async () => {
|
|
243
296
|
const { emit } = await createHarness();
|
|
244
297
|
|
package/src/index.ts
CHANGED
|
@@ -39,9 +39,15 @@ interface SessionDescriptor {
|
|
|
39
39
|
sessionKey: string;
|
|
40
40
|
}
|
|
41
41
|
|
|
42
|
+
interface ProviderResponseMetadata {
|
|
43
|
+
status?: number;
|
|
44
|
+
headers?: Record<string, string>;
|
|
45
|
+
}
|
|
46
|
+
|
|
42
47
|
interface PendingLlmCall {
|
|
43
48
|
startedAt: number;
|
|
44
49
|
input: NormalizedAgentMessage[];
|
|
50
|
+
providerResponse?: ProviderResponseMetadata;
|
|
45
51
|
}
|
|
46
52
|
|
|
47
53
|
interface TrackedToolStart {
|
|
@@ -62,6 +68,7 @@ interface ActiveTurn {
|
|
|
62
68
|
lastAssistantMessage?: AssistantMessageLike;
|
|
63
69
|
lastOutput?: NormalizedAssistantMessage;
|
|
64
70
|
error?: string;
|
|
71
|
+
thinkingLevel?: string;
|
|
65
72
|
}
|
|
66
73
|
|
|
67
74
|
interface ActiveSession {
|
|
@@ -80,6 +87,7 @@ interface ActiveSession {
|
|
|
80
87
|
startedAt?: number;
|
|
81
88
|
totalTurns: number;
|
|
82
89
|
totalToolCalls: number;
|
|
90
|
+
thinkingLevel?: string;
|
|
83
91
|
currentTurn?: ActiveTurn;
|
|
84
92
|
}
|
|
85
93
|
|
|
@@ -127,6 +135,52 @@ function safeModelName(model: unknown): string | undefined {
|
|
|
127
135
|
return undefined;
|
|
128
136
|
}
|
|
129
137
|
|
|
138
|
+
function stringProperty(
|
|
139
|
+
value: Record<string, unknown>,
|
|
140
|
+
keys: readonly string[],
|
|
141
|
+
): string | undefined {
|
|
142
|
+
for (const key of keys) {
|
|
143
|
+
const item = value[key];
|
|
144
|
+
if (typeof item === "string" && item.trim()) return item;
|
|
145
|
+
}
|
|
146
|
+
return undefined;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function responseModelName(message: AssistantMessageLike): string | undefined {
|
|
150
|
+
return stringProperty(message as unknown as Record<string, unknown>, [
|
|
151
|
+
"responseModel",
|
|
152
|
+
"routedModel",
|
|
153
|
+
"resolvedModel",
|
|
154
|
+
"actualModel",
|
|
155
|
+
"concreteModel",
|
|
156
|
+
"outputModel",
|
|
157
|
+
]);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function providerResponseMetadata(event: unknown): ProviderResponseMetadata | undefined {
|
|
161
|
+
if (!isPlainObject(event)) return undefined;
|
|
162
|
+
const metadata: ProviderResponseMetadata = {};
|
|
163
|
+
if (typeof event.status === "number") metadata.status = event.status;
|
|
164
|
+
|
|
165
|
+
const headers = event.headers;
|
|
166
|
+
if (isPlainObject(headers)) {
|
|
167
|
+
const allowedHeaders: Record<string, string> = {};
|
|
168
|
+
for (const [key, value] of Object.entries(headers)) {
|
|
169
|
+
const normalizedKey = key.toLowerCase();
|
|
170
|
+
if (!normalizedKey.startsWith("x-ratelimit-") && normalizedKey !== "retry-after") {
|
|
171
|
+
continue;
|
|
172
|
+
}
|
|
173
|
+
if (typeof value === "string") allowedHeaders[normalizedKey] = value;
|
|
174
|
+
else if (typeof value === "number" || typeof value === "boolean") {
|
|
175
|
+
allowedHeaders[normalizedKey] = String(value);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
if (Object.keys(allowedHeaders).length > 0) metadata.headers = allowedHeaders;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
return metadata.status !== undefined || metadata.headers ? metadata : undefined;
|
|
182
|
+
}
|
|
183
|
+
|
|
130
184
|
function getPreviousSessionFile(event: unknown): string | undefined {
|
|
131
185
|
if (!isPlainObject(event)) return undefined;
|
|
132
186
|
return typeof event.previousSessionFile === "string" ? event.previousSessionFile : undefined;
|
|
@@ -666,6 +720,7 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
|
|
|
666
720
|
metadata: {
|
|
667
721
|
turn_number: session.totalTurns,
|
|
668
722
|
active_model: safeModelName(ctx.model),
|
|
723
|
+
thinking_level: session.thinkingLevel,
|
|
669
724
|
},
|
|
670
725
|
name: `Turn ${session.totalTurns}`,
|
|
671
726
|
type: "task",
|
|
@@ -683,6 +738,7 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
|
|
|
683
738
|
lastAssistantMessage: undefined,
|
|
684
739
|
lastOutput: undefined,
|
|
685
740
|
error: undefined,
|
|
741
|
+
thinkingLevel: session.thinkingLevel,
|
|
686
742
|
};
|
|
687
743
|
|
|
688
744
|
store.patch(session.sessionKey, {
|
|
@@ -699,6 +755,22 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
|
|
|
699
755
|
});
|
|
700
756
|
});
|
|
701
757
|
|
|
758
|
+
pi.on("after_provider_response", async (event) => {
|
|
759
|
+
if (!activeSession?.currentTurn) return;
|
|
760
|
+
const metadata = providerResponseMetadata(event);
|
|
761
|
+
if (!metadata) return;
|
|
762
|
+
const pending = [...activeSession.currentTurn.llmCalls]
|
|
763
|
+
.reverse()
|
|
764
|
+
.find((call) => !call.providerResponse);
|
|
765
|
+
if (pending) pending.providerResponse = metadata;
|
|
766
|
+
});
|
|
767
|
+
|
|
768
|
+
pi.on("thinking_level_select", async (event) => {
|
|
769
|
+
if (!isPlainObject(event) || typeof event.level !== "string") return;
|
|
770
|
+
if (activeSession) activeSession.thinkingLevel = event.level;
|
|
771
|
+
if (activeSession?.currentTurn) activeSession.currentTurn.thinkingLevel = event.level;
|
|
772
|
+
});
|
|
773
|
+
|
|
702
774
|
pi.on("message_end", async (event) => {
|
|
703
775
|
const session = activeSession;
|
|
704
776
|
if (
|
|
@@ -716,7 +788,9 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
|
|
|
716
788
|
input: [{ role: "user", content: session.currentTurn.prompt }],
|
|
717
789
|
};
|
|
718
790
|
|
|
719
|
-
const
|
|
791
|
+
const requestedModelName = safeModelName(message) ?? message.model;
|
|
792
|
+
const responseModel = responseModelName(message);
|
|
793
|
+
const modelName = responseModel ?? requestedModelName;
|
|
720
794
|
const endedAt = message.timestamp ?? Date.now();
|
|
721
795
|
const normalizedOutput = normalizeAssistantMessage(message);
|
|
722
796
|
const error =
|
|
@@ -740,7 +814,12 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
|
|
|
740
814
|
api: message.api,
|
|
741
815
|
provider: message.provider,
|
|
742
816
|
model: modelName,
|
|
817
|
+
requested_model: requestedModelName,
|
|
818
|
+
response_model: responseModel,
|
|
743
819
|
stop_reason: message.stopReason,
|
|
820
|
+
thinking_level: session.currentTurn.thinkingLevel ?? session.thinkingLevel,
|
|
821
|
+
provider_response_status: pending.providerResponse?.status,
|
|
822
|
+
provider_response_headers: pending.providerResponse?.headers,
|
|
744
823
|
cache_read_tokens: message.usage?.cacheRead,
|
|
745
824
|
cache_write_tokens: message.usage?.cacheWrite,
|
|
746
825
|
},
|