@braintrust/pi-extension 0.3.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # @braintrust/pi-extension
2
2
 
3
+ [![npm version](https://img.shields.io/npm/v/%40braintrust%2Fpi-extension)](https://www.npmjs.com/package/@braintrust/pi-extension)
4
+
3
5
  Braintrust extension for [pi](https://github.com/mariozechner/pi-coding-agent).
4
6
 
5
7
  Today this extension automatically traces pi sessions, turns, model calls, and tool executions to Braintrust.
@@ -45,9 +47,9 @@ pi -e .
45
47
 
46
48
  ## Compatibility
47
49
 
48
- This package supports the **last three stable pi versions**.
50
+ This package supports the **latest patch release from each of the last six stable pi minor versions**, currently excluding pi versions before `0.65.0`.
49
51
 
50
- Our GitHub Actions compatibility job automatically resolves and tests the latest patch release from each of the last three stable pi minor versions, so new pi releases are picked up without manually updating the matrix.
52
+ Our GitHub Actions compatibility job automatically resolves and tests that compatibility window, so new pi releases are picked up without manually updating the matrix.
51
53
 
52
54
  ## Quick start
53
55
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@braintrust/pi-extension",
3
- "version": "0.3.1",
3
+ "version": "0.5.0",
4
4
  "description": "Braintrust extension for pi. Includes automatic tracing for pi sessions, turns, LLM calls, and tool executions to Braintrust.",
5
5
  "keywords": [
6
6
  "braintrust",
@@ -24,12 +24,12 @@
24
24
  "access": "public"
25
25
  },
26
26
  "dependencies": {
27
- "braintrust": "^3.8.0",
27
+ "braintrust": "^3.9.0",
28
28
  "valibot": "^1.3.1"
29
29
  },
30
30
  "devDependencies": {
31
- "@mariozechner/pi-ai": "^0.67.2",
32
- "@mariozechner/pi-coding-agent": "^0.67.2",
31
+ "@mariozechner/pi-ai": "^0.71.1",
32
+ "@mariozechner/pi-coding-agent": "^0.71.1",
33
33
  "@types/node": "^25.6.0",
34
34
  "typescript": "^6.0.2",
35
35
  "vite-plus": "^0.1.16",
@@ -149,6 +149,28 @@ function makeTempDir(prefix: string): string {
149
149
  return dir;
150
150
  }
151
151
 
152
+ // Injected by CI (see .github/workflows/ci.yml). When unset (e.g. local dev) we
153
+ // assume the currently installed pi is at least as new as any version we branch
154
+ // on below.
155
+ const PI_COMPAT_VERSION = process.env.PI_COMPAT_VERSION;
156
+
157
+ function piCompatAtLeast(target: string): boolean {
158
+ if (!PI_COMPAT_VERSION) return true;
159
+ const parse = (v: string) =>
160
+ v
161
+ .split("-")[0]
162
+ .split(".")
163
+ .map((part) => Number.parseInt(part, 10) || 0);
164
+ const actual = parse(PI_COMPAT_VERSION);
165
+ const wanted = parse(target);
166
+ for (let i = 0; i < Math.max(actual.length, wanted.length); i += 1) {
167
+ const a = actual[i] ?? 0;
168
+ const w = wanted[i] ?? 0;
169
+ if (a !== w) return a > w;
170
+ }
171
+ return true;
172
+ }
173
+
152
174
  function buildAssistantMessage(model: Model<Api>): AssistantMessage {
153
175
  return {
154
176
  role: "assistant",
@@ -585,9 +607,18 @@ describe("braintrustPiExtension integration", () => {
585
607
  const firstLlmSpanId = llmSpans[0]?.spanId;
586
608
 
587
609
  expect(toolSpans).toHaveLength(2);
610
+ // pi < 0.68.1 emits `tool_execution_end` in assistant source order, so the
611
+ // extension logs tool spans as [tool-1, tool-2]. Starting with pi 0.68.1 the
612
+ // agent emits parallel tool completions eagerly (completion order), so the
613
+ // fast `tool-2` finishes before the slow `tool-1` and spans are logged as
614
+ // [tool-2, tool-1]. See pi-coding-agent changelog 0.68.1 / issue #3503.
615
+ // TODO: drop the pi < 0.68.1 branch once we stop testing against it.
616
+ const expectedToolCallIdOrder = piCompatAtLeast("0.68.1")
617
+ ? ["tool-2", "tool-1"]
618
+ : ["tool-1", "tool-2"];
588
619
  expect(
589
620
  toolSpans.map((span) => (span.metadata as Record<string, unknown> | undefined)?.tool_call_id),
590
- ).toEqual(["tool-1", "tool-2"]);
621
+ ).toEqual(expectedToolCallIdOrder);
591
622
  expect(toolSpans.map((span) => span.parentSpanId)).toEqual([firstLlmSpanId, firstLlmSpanId]);
592
623
  });
593
624
 
package/src/index.test.ts CHANGED
@@ -239,6 +239,59 @@ describe("braintrustPiExtension", () => {
239
239
  expect(mockState.updateSpans).toEqual([]);
240
240
  });
241
241
 
242
+ it("records resolved model, thinking level, and provider response metadata on llm spans", async () => {
243
+ const { emit } = await createHarness();
244
+
245
+ await emit("session_start");
246
+ await emit("thinking_level_select", { level: "high", previousLevel: "off" });
247
+ await emit("before_agent_start", {
248
+ prompt: "Use a routed model",
249
+ images: [],
250
+ });
251
+ await emit("context", { messages: [{ role: "user", content: "Use a routed model" }] });
252
+ await emit("after_provider_response", {
253
+ status: 200,
254
+ headers: {
255
+ "x-ratelimit-remaining-requests": "42",
256
+ "retry-after": "5",
257
+ authorization: "secret",
258
+ },
259
+ });
260
+ await emit("message_end", {
261
+ message: {
262
+ role: "assistant",
263
+ provider: "openrouter",
264
+ model: "auto",
265
+ responseModel: "anthropic/claude-sonnet-4-5",
266
+ timestamp: 1_700_000_000_000,
267
+ content: [{ type: "text", text: "Done." }],
268
+ },
269
+ });
270
+
271
+ const turnSpan = mockState.startSpans.find(
272
+ (span) => span.type === "task" && span.name === "Turn 1",
273
+ );
274
+ const llmSpan = mockState.startSpans.find((span) => span.type === "llm");
275
+
276
+ expect(turnSpan?.metadata).toMatchObject({ thinking_level: "high" });
277
+ expect(llmSpan).toMatchObject({ name: "anthropic/claude-sonnet-4-5" });
278
+ expect(llmSpan?.metadata).toMatchObject({
279
+ model: "anthropic/claude-sonnet-4-5",
280
+ requested_model: "auto",
281
+ response_model: "anthropic/claude-sonnet-4-5",
282
+ thinking_level: "high",
283
+ provider_response_status: 200,
284
+ provider_response_headers: {
285
+ "x-ratelimit-remaining-requests": "42",
286
+ "retry-after": "5",
287
+ },
288
+ });
289
+ const llmMetadata = llmSpan?.metadata as
290
+ | { provider_response_headers?: Record<string, unknown> }
291
+ | undefined;
292
+ expect(llmMetadata?.provider_response_headers?.authorization).toBeUndefined();
293
+ });
294
+
242
295
  it("parents tool spans under the llm span that emitted the matching tool call", async () => {
243
296
  const { emit } = await createHarness();
244
297
 
@@ -411,6 +464,55 @@ describe("braintrustPiExtension", () => {
411
464
  );
412
465
  });
413
466
 
467
+ it("records the structured shutdown reason on the finalized root span", async () => {
468
+ const { emit } = await createHarness();
469
+
470
+ await emit("session_start");
471
+ await emit("before_agent_start", {
472
+ prompt: "Inspect the package",
473
+ images: [],
474
+ });
475
+ await emit("session_shutdown", { reason: "quit" });
476
+
477
+ const rootFinalizeLog = mockState.logSpans
478
+ .map((entry) => entry.event as Record<string, unknown>)
479
+ .find(
480
+ (event) =>
481
+ (event.metadata as Record<string, unknown> | undefined)?.last_close_reason === "quit",
482
+ );
483
+ expect(rootFinalizeLog).toBeDefined();
484
+ expect(mockState.endSpans.length).toBeGreaterThan(0);
485
+ expect(mockState.flushCalls).toBeGreaterThan(0);
486
+ });
487
+
488
+ it("does not finalize the root span on reload shutdowns", async () => {
489
+ const { emit } = await createHarness();
490
+
491
+ await emit("session_start");
492
+ await emit("before_agent_start", {
493
+ prompt: "Inspect the package",
494
+ images: [],
495
+ });
496
+
497
+ const startsBefore = mockState.startSpans.length;
498
+ const endsBefore = mockState.endSpans.length;
499
+ const flushesBefore = mockState.flushCalls;
500
+
501
+ await emit("session_shutdown", { reason: "reload" });
502
+
503
+ // No additional span endings during reload, but pending writes are still flushed.
504
+ expect(mockState.startSpans.length).toBe(startsBefore);
505
+ expect(mockState.endSpans.length).toBe(endsBefore);
506
+ expect(mockState.flushCalls).toBeGreaterThan(flushesBefore);
507
+ const reloadClose = mockState.logSpans
508
+ .map((entry) => entry.event as Record<string, unknown>)
509
+ .some(
510
+ (event) =>
511
+ (event.metadata as Record<string, unknown> | undefined)?.last_close_reason === "reload",
512
+ );
513
+ expect(reloadClose).toBe(false);
514
+ });
515
+
414
516
  it("hides all UI when showUi is false", async () => {
415
517
  mockState.config.showUi = false;
416
518
 
package/src/index.ts CHANGED
@@ -39,9 +39,15 @@ interface SessionDescriptor {
39
39
  sessionKey: string;
40
40
  }
41
41
 
42
+ interface ProviderResponseMetadata {
43
+ status?: number;
44
+ headers?: Record<string, string>;
45
+ }
46
+
42
47
  interface PendingLlmCall {
43
48
  startedAt: number;
44
49
  input: NormalizedAgentMessage[];
50
+ providerResponse?: ProviderResponseMetadata;
45
51
  }
46
52
 
47
53
  interface TrackedToolStart {
@@ -62,6 +68,7 @@ interface ActiveTurn {
62
68
  lastAssistantMessage?: AssistantMessageLike;
63
69
  lastOutput?: NormalizedAssistantMessage;
64
70
  error?: string;
71
+ thinkingLevel?: string;
65
72
  }
66
73
 
67
74
  interface ActiveSession {
@@ -80,6 +87,7 @@ interface ActiveSession {
80
87
  startedAt?: number;
81
88
  totalTurns: number;
82
89
  totalToolCalls: number;
90
+ thinkingLevel?: string;
83
91
  currentTurn?: ActiveTurn;
84
92
  }
85
93
 
@@ -127,12 +135,58 @@ function safeModelName(model: unknown): string | undefined {
127
135
  return undefined;
128
136
  }
129
137
 
138
+ function stringProperty(
139
+ value: Record<string, unknown>,
140
+ keys: readonly string[],
141
+ ): string | undefined {
142
+ for (const key of keys) {
143
+ const item = value[key];
144
+ if (typeof item === "string" && item.trim()) return item;
145
+ }
146
+ return undefined;
147
+ }
148
+
149
+ function responseModelName(message: AssistantMessageLike): string | undefined {
150
+ return stringProperty(message as unknown as Record<string, unknown>, [
151
+ "responseModel",
152
+ "routedModel",
153
+ "resolvedModel",
154
+ "actualModel",
155
+ "concreteModel",
156
+ "outputModel",
157
+ ]);
158
+ }
159
+
160
+ function providerResponseMetadata(event: unknown): ProviderResponseMetadata | undefined {
161
+ if (!isPlainObject(event)) return undefined;
162
+ const metadata: ProviderResponseMetadata = {};
163
+ if (typeof event.status === "number") metadata.status = event.status;
164
+
165
+ const headers = event.headers;
166
+ if (isPlainObject(headers)) {
167
+ const allowedHeaders: Record<string, string> = {};
168
+ for (const [key, value] of Object.entries(headers)) {
169
+ const normalizedKey = key.toLowerCase();
170
+ if (!normalizedKey.startsWith("x-ratelimit-") && normalizedKey !== "retry-after") {
171
+ continue;
172
+ }
173
+ if (typeof value === "string") allowedHeaders[normalizedKey] = value;
174
+ else if (typeof value === "number" || typeof value === "boolean") {
175
+ allowedHeaders[normalizedKey] = String(value);
176
+ }
177
+ }
178
+ if (Object.keys(allowedHeaders).length > 0) metadata.headers = allowedHeaders;
179
+ }
180
+
181
+ return metadata.status !== undefined || metadata.headers ? metadata : undefined;
182
+ }
183
+
130
184
  function getPreviousSessionFile(event: unknown): string | undefined {
131
185
  if (!isPlainObject(event)) return undefined;
132
186
  return typeof event.previousSessionFile === "string" ? event.previousSessionFile : undefined;
133
187
  }
134
188
 
135
- function getSessionStartReason(event: unknown): string | undefined {
189
+ function getEventReason(event: unknown): string | undefined {
136
190
  if (!isPlainObject(event)) return undefined;
137
191
  return typeof event.reason === "string" ? event.reason : undefined;
138
192
  }
@@ -606,7 +660,7 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
606
660
  pi.on("session_start", async (event, ctx) => {
607
661
  refreshTracingUi(ctx);
608
662
 
609
- const reason = getSessionStartReason(event);
663
+ const reason = getEventReason(event);
610
664
  if (reason === "new" || reason === "resume" || reason === "fork") {
611
665
  await rolloverSession(
612
666
  ctx,
@@ -666,6 +720,7 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
666
720
  metadata: {
667
721
  turn_number: session.totalTurns,
668
722
  active_model: safeModelName(ctx.model),
723
+ thinking_level: session.thinkingLevel,
669
724
  },
670
725
  name: `Turn ${session.totalTurns}`,
671
726
  type: "task",
@@ -683,6 +738,7 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
683
738
  lastAssistantMessage: undefined,
684
739
  lastOutput: undefined,
685
740
  error: undefined,
741
+ thinkingLevel: session.thinkingLevel,
686
742
  };
687
743
 
688
744
  store.patch(session.sessionKey, {
@@ -699,6 +755,22 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
699
755
  });
700
756
  });
701
757
 
758
+ pi.on("after_provider_response", async (event) => {
759
+ if (!activeSession?.currentTurn) return;
760
+ const metadata = providerResponseMetadata(event);
761
+ if (!metadata) return;
762
+ const pending = [...activeSession.currentTurn.llmCalls]
763
+ .reverse()
764
+ .find((call) => !call.providerResponse);
765
+ if (pending) pending.providerResponse = metadata;
766
+ });
767
+
768
+ pi.on("thinking_level_select", async (event) => {
769
+ if (!isPlainObject(event) || typeof event.level !== "string") return;
770
+ if (activeSession) activeSession.thinkingLevel = event.level;
771
+ if (activeSession?.currentTurn) activeSession.currentTurn.thinkingLevel = event.level;
772
+ });
773
+
702
774
  pi.on("message_end", async (event) => {
703
775
  const session = activeSession;
704
776
  if (
@@ -716,7 +788,9 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
716
788
  input: [{ role: "user", content: session.currentTurn.prompt }],
717
789
  };
718
790
 
719
- const modelName = safeModelName(message) ?? message.model;
791
+ const requestedModelName = safeModelName(message) ?? message.model;
792
+ const responseModel = responseModelName(message);
793
+ const modelName = responseModel ?? requestedModelName;
720
794
  const endedAt = message.timestamp ?? Date.now();
721
795
  const normalizedOutput = normalizeAssistantMessage(message);
722
796
  const error =
@@ -740,7 +814,12 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
740
814
  api: message.api,
741
815
  provider: message.provider,
742
816
  model: modelName,
817
+ requested_model: requestedModelName,
818
+ response_model: responseModel,
743
819
  stop_reason: message.stopReason,
820
+ thinking_level: session.currentTurn.thinkingLevel ?? session.thinkingLevel,
821
+ provider_response_status: pending.providerResponse?.status,
822
+ provider_response_headers: pending.providerResponse?.headers,
744
823
  cache_read_tokens: message.usage?.cacheRead,
745
824
  cache_write_tokens: message.usage?.cacheWrite,
746
825
  },
@@ -837,13 +916,27 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
837
916
  await finishTurn("agent_end", Date.now(), finalAssistant);
838
917
  });
839
918
 
840
- pi.on("session_shutdown", async (_event, ctx) => {
919
+ pi.on("session_shutdown", async (event, ctx) => {
841
920
  if (ctx.hasUI) {
842
921
  ctx.ui.setStatus(TRACING_STATUS_KEY, undefined);
843
922
  ctx.ui.setWidget(TRACING_WIDGET_KEY, undefined);
844
923
  }
924
+
925
+ // pi 0.68.0+ exposes a structured reason ("quit" | "reload" | "new" | "resume"
926
+ // | "fork"). Older pi hosts pass no payload, so we fall back to the generic
927
+ // label to stay backwards-compatible and keep the existing metadata shape.
928
+ const reason = getEventReason(event) ?? "session_shutdown";
929
+ logger.debug("session_shutdown", { reason });
930
+
845
931
  if (client && !clientInitializationError) {
846
- await finalizeSession("session_shutdown");
932
+ // On reload the same pi session is about to resume in a freshly imported
933
+ // extension instance, which restores its state from the persisted store and
934
+ // keeps writing to the existing root span. Finalizing here would close that
935
+ // root span out from under the reloaded instance, so we just flush pending
936
+ // writes and let the new instance continue the trace.
937
+ if (reason !== "reload") {
938
+ await finalizeSession(reason);
939
+ }
847
940
  await client.flush();
848
941
  }
849
942
  activeSession = undefined;
package/src/types.ts CHANGED
@@ -121,6 +121,7 @@ export interface AssistantMessageLike {
121
121
  api?: string;
122
122
  provider?: string;
123
123
  model?: string;
124
+ responseModel?: string;
124
125
  usage?: UsageLike;
125
126
  stopReason?: string;
126
127
  errorMessage?: string;