@braintrust/pi-extension 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -47,9 +47,9 @@ pi -e .
47
47
 
48
48
  ## Compatibility
49
49
 
50
- This package supports the **last three stable pi versions**.
50
+ This package supports the **latest patch release from each of the last six stable pi minor versions**, currently excluding pi versions before `0.65.0`.
51
51
 
52
- Our GitHub Actions compatibility job automatically resolves and tests the latest patch release from each of the last three stable pi minor versions, so new pi releases are picked up without manually updating the matrix.
52
+ Our GitHub Actions compatibility job automatically resolves and tests that compatibility window, so new pi releases are picked up without manually updating the matrix.
53
53
 
54
54
  ## Quick start
55
55
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@braintrust/pi-extension",
3
- "version": "0.4.0",
3
+ "version": "0.5.0",
4
4
  "description": "Braintrust extension for pi. Includes automatic tracing for pi sessions, turns, LLM calls, and tool executions to Braintrust.",
5
5
  "keywords": [
6
6
  "braintrust",
@@ -28,8 +28,8 @@
28
28
  "valibot": "^1.3.1"
29
29
  },
30
30
  "devDependencies": {
31
- "@mariozechner/pi-ai": "^0.68.0",
32
- "@mariozechner/pi-coding-agent": "^0.68.0",
31
+ "@mariozechner/pi-ai": "^0.71.1",
32
+ "@mariozechner/pi-coding-agent": "^0.71.1",
33
33
  "@types/node": "^25.6.0",
34
34
  "typescript": "^6.0.2",
35
35
  "vite-plus": "^0.1.16",
@@ -149,6 +149,28 @@ function makeTempDir(prefix: string): string {
149
149
  return dir;
150
150
  }
151
151
 
152
+ // Injected by CI (see .github/workflows/ci.yml). When unset (e.g. local dev) we
153
+ // assume the currently installed pi is at least as new as any version we branch
154
+ // on below.
155
+ const PI_COMPAT_VERSION = process.env.PI_COMPAT_VERSION;
156
+
157
+ function piCompatAtLeast(target: string): boolean {
158
+ if (!PI_COMPAT_VERSION) return true;
159
+ const parse = (v: string) =>
160
+ v
161
+ .split("-")[0]
162
+ .split(".")
163
+ .map((part) => Number.parseInt(part, 10) || 0);
164
+ const actual = parse(PI_COMPAT_VERSION);
165
+ const wanted = parse(target);
166
+ for (let i = 0; i < Math.max(actual.length, wanted.length); i += 1) {
167
+ const a = actual[i] ?? 0;
168
+ const w = wanted[i] ?? 0;
169
+ if (a !== w) return a > w;
170
+ }
171
+ return true;
172
+ }
173
+
152
174
  function buildAssistantMessage(model: Model<Api>): AssistantMessage {
153
175
  return {
154
176
  role: "assistant",
@@ -585,9 +607,18 @@ describe("braintrustPiExtension integration", () => {
585
607
  const firstLlmSpanId = llmSpans[0]?.spanId;
586
608
 
587
609
  expect(toolSpans).toHaveLength(2);
610
+ // pi < 0.68.1 emits `tool_execution_end` in assistant source order, so the
611
+ // extension logs tool spans as [tool-1, tool-2]. Starting with pi 0.68.1 the
612
+ // agent emits parallel tool completions eagerly (completion order), so the
613
+ // fast `tool-2` finishes before the slow `tool-1` and spans are logged as
614
+ // [tool-2, tool-1]. See pi-coding-agent changelog 0.68.1 / issue #3503.
615
+ // TODO: drop the pi < 0.68.1 branch once we stop testing against it.
616
+ const expectedToolCallIdOrder = piCompatAtLeast("0.68.1")
617
+ ? ["tool-2", "tool-1"]
618
+ : ["tool-1", "tool-2"];
588
619
  expect(
589
620
  toolSpans.map((span) => (span.metadata as Record<string, unknown> | undefined)?.tool_call_id),
590
- ).toEqual(["tool-1", "tool-2"]);
621
+ ).toEqual(expectedToolCallIdOrder);
591
622
  expect(toolSpans.map((span) => span.parentSpanId)).toEqual([firstLlmSpanId, firstLlmSpanId]);
592
623
  });
593
624
 
package/src/index.test.ts CHANGED
@@ -239,6 +239,59 @@ describe("braintrustPiExtension", () => {
239
239
  expect(mockState.updateSpans).toEqual([]);
240
240
  });
241
241
 
242
+ it("records resolved model, thinking level, and provider response metadata on llm spans", async () => {
243
+ const { emit } = await createHarness();
244
+
245
+ await emit("session_start");
246
+ await emit("thinking_level_select", { level: "high", previousLevel: "off" });
247
+ await emit("before_agent_start", {
248
+ prompt: "Use a routed model",
249
+ images: [],
250
+ });
251
+ await emit("context", { messages: [{ role: "user", content: "Use a routed model" }] });
252
+ await emit("after_provider_response", {
253
+ status: 200,
254
+ headers: {
255
+ "x-ratelimit-remaining-requests": "42",
256
+ "retry-after": "5",
257
+ authorization: "secret",
258
+ },
259
+ });
260
+ await emit("message_end", {
261
+ message: {
262
+ role: "assistant",
263
+ provider: "openrouter",
264
+ model: "auto",
265
+ responseModel: "anthropic/claude-sonnet-4-5",
266
+ timestamp: 1_700_000_000_000,
267
+ content: [{ type: "text", text: "Done." }],
268
+ },
269
+ });
270
+
271
+ const turnSpan = mockState.startSpans.find(
272
+ (span) => span.type === "task" && span.name === "Turn 1",
273
+ );
274
+ const llmSpan = mockState.startSpans.find((span) => span.type === "llm");
275
+
276
+ expect(turnSpan?.metadata).toMatchObject({ thinking_level: "high" });
277
+ expect(llmSpan).toMatchObject({ name: "anthropic/claude-sonnet-4-5" });
278
+ expect(llmSpan?.metadata).toMatchObject({
279
+ model: "anthropic/claude-sonnet-4-5",
280
+ requested_model: "auto",
281
+ response_model: "anthropic/claude-sonnet-4-5",
282
+ thinking_level: "high",
283
+ provider_response_status: 200,
284
+ provider_response_headers: {
285
+ "x-ratelimit-remaining-requests": "42",
286
+ "retry-after": "5",
287
+ },
288
+ });
289
+ const llmMetadata = llmSpan?.metadata as
290
+ | { provider_response_headers?: Record<string, unknown> }
291
+ | undefined;
292
+ expect(llmMetadata?.provider_response_headers?.authorization).toBeUndefined();
293
+ });
294
+
242
295
  it("parents tool spans under the llm span that emitted the matching tool call", async () => {
243
296
  const { emit } = await createHarness();
244
297
 
package/src/index.ts CHANGED
@@ -39,9 +39,15 @@ interface SessionDescriptor {
39
39
  sessionKey: string;
40
40
  }
41
41
 
42
+ interface ProviderResponseMetadata {
43
+ status?: number;
44
+ headers?: Record<string, string>;
45
+ }
46
+
42
47
  interface PendingLlmCall {
43
48
  startedAt: number;
44
49
  input: NormalizedAgentMessage[];
50
+ providerResponse?: ProviderResponseMetadata;
45
51
  }
46
52
 
47
53
  interface TrackedToolStart {
@@ -62,6 +68,7 @@ interface ActiveTurn {
62
68
  lastAssistantMessage?: AssistantMessageLike;
63
69
  lastOutput?: NormalizedAssistantMessage;
64
70
  error?: string;
71
+ thinkingLevel?: string;
65
72
  }
66
73
 
67
74
  interface ActiveSession {
@@ -80,6 +87,7 @@ interface ActiveSession {
80
87
  startedAt?: number;
81
88
  totalTurns: number;
82
89
  totalToolCalls: number;
90
+ thinkingLevel?: string;
83
91
  currentTurn?: ActiveTurn;
84
92
  }
85
93
 
@@ -127,6 +135,52 @@ function safeModelName(model: unknown): string | undefined {
127
135
  return undefined;
128
136
  }
129
137
 
138
+ function stringProperty(
139
+ value: Record<string, unknown>,
140
+ keys: readonly string[],
141
+ ): string | undefined {
142
+ for (const key of keys) {
143
+ const item = value[key];
144
+ if (typeof item === "string" && item.trim()) return item;
145
+ }
146
+ return undefined;
147
+ }
148
+
149
+ function responseModelName(message: AssistantMessageLike): string | undefined {
150
+ return stringProperty(message as unknown as Record<string, unknown>, [
151
+ "responseModel",
152
+ "routedModel",
153
+ "resolvedModel",
154
+ "actualModel",
155
+ "concreteModel",
156
+ "outputModel",
157
+ ]);
158
+ }
159
+
160
+ function providerResponseMetadata(event: unknown): ProviderResponseMetadata | undefined {
161
+ if (!isPlainObject(event)) return undefined;
162
+ const metadata: ProviderResponseMetadata = {};
163
+ if (typeof event.status === "number") metadata.status = event.status;
164
+
165
+ const headers = event.headers;
166
+ if (isPlainObject(headers)) {
167
+ const allowedHeaders: Record<string, string> = {};
168
+ for (const [key, value] of Object.entries(headers)) {
169
+ const normalizedKey = key.toLowerCase();
170
+ if (!normalizedKey.startsWith("x-ratelimit-") && normalizedKey !== "retry-after") {
171
+ continue;
172
+ }
173
+ if (typeof value === "string") allowedHeaders[normalizedKey] = value;
174
+ else if (typeof value === "number" || typeof value === "boolean") {
175
+ allowedHeaders[normalizedKey] = String(value);
176
+ }
177
+ }
178
+ if (Object.keys(allowedHeaders).length > 0) metadata.headers = allowedHeaders;
179
+ }
180
+
181
+ return metadata.status !== undefined || metadata.headers ? metadata : undefined;
182
+ }
183
+
130
184
  function getPreviousSessionFile(event: unknown): string | undefined {
131
185
  if (!isPlainObject(event)) return undefined;
132
186
  return typeof event.previousSessionFile === "string" ? event.previousSessionFile : undefined;
@@ -666,6 +720,7 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
666
720
  metadata: {
667
721
  turn_number: session.totalTurns,
668
722
  active_model: safeModelName(ctx.model),
723
+ thinking_level: session.thinkingLevel,
669
724
  },
670
725
  name: `Turn ${session.totalTurns}`,
671
726
  type: "task",
@@ -683,6 +738,7 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
683
738
  lastAssistantMessage: undefined,
684
739
  lastOutput: undefined,
685
740
  error: undefined,
741
+ thinkingLevel: session.thinkingLevel,
686
742
  };
687
743
 
688
744
  store.patch(session.sessionKey, {
@@ -699,6 +755,22 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
699
755
  });
700
756
  });
701
757
 
758
+ pi.on("after_provider_response", async (event) => {
759
+ if (!activeSession?.currentTurn) return;
760
+ const metadata = providerResponseMetadata(event);
761
+ if (!metadata) return;
762
+ const pending = [...activeSession.currentTurn.llmCalls]
763
+ .reverse()
764
+ .find((call) => !call.providerResponse);
765
+ if (pending) pending.providerResponse = metadata;
766
+ });
767
+
768
+ pi.on("thinking_level_select", async (event) => {
769
+ if (!isPlainObject(event) || typeof event.level !== "string") return;
770
+ if (activeSession) activeSession.thinkingLevel = event.level;
771
+ if (activeSession?.currentTurn) activeSession.currentTurn.thinkingLevel = event.level;
772
+ });
773
+
702
774
  pi.on("message_end", async (event) => {
703
775
  const session = activeSession;
704
776
  if (
@@ -716,7 +788,9 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
716
788
  input: [{ role: "user", content: session.currentTurn.prompt }],
717
789
  };
718
790
 
719
- const modelName = safeModelName(message) ?? message.model;
791
+ const requestedModelName = safeModelName(message) ?? message.model;
792
+ const responseModel = responseModelName(message);
793
+ const modelName = responseModel ?? requestedModelName;
720
794
  const endedAt = message.timestamp ?? Date.now();
721
795
  const normalizedOutput = normalizeAssistantMessage(message);
722
796
  const error =
@@ -740,7 +814,12 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
740
814
  api: message.api,
741
815
  provider: message.provider,
742
816
  model: modelName,
817
+ requested_model: requestedModelName,
818
+ response_model: responseModel,
743
819
  stop_reason: message.stopReason,
820
+ thinking_level: session.currentTurn.thinkingLevel ?? session.thinkingLevel,
821
+ provider_response_status: pending.providerResponse?.status,
822
+ provider_response_headers: pending.providerResponse?.headers,
744
823
  cache_read_tokens: message.usage?.cacheRead,
745
824
  cache_write_tokens: message.usage?.cacheWrite,
746
825
  },
package/src/types.ts CHANGED
@@ -121,6 +121,7 @@ export interface AssistantMessageLike {
121
121
  api?: string;
122
122
  provider?: string;
123
123
  model?: string;
124
+ responseModel?: string;
124
125
  usage?: UsageLike;
125
126
  stopReason?: string;
126
127
  errorMessage?: string;