@braintrust/pi-extension 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [![npm version](https://img.shields.io/npm/v/%40braintrust%2Fpi-extension)](https://www.npmjs.com/package/@braintrust/pi-extension)
4
4
 
5
- Braintrust extension for [pi](https://github.com/mariozechner/pi-coding-agent).
5
+ Braintrust extension for [pi](https://github.com/earendil-works/pi-coding-agent).
6
6
 
7
7
  Today this extension automatically traces pi sessions, turns, model calls, and tool executions to Braintrust.
8
8
 
@@ -47,9 +47,9 @@ pi -e .
47
47
 
48
48
  ## Compatibility
49
49
 
50
- This package supports the **last three stable pi versions**.
50
+ This package supports the **latest patch release from each of the last five stable pi minor versions**, currently excluding pi versions before `0.65.0`.
51
51
 
52
- Our GitHub Actions compatibility job automatically resolves and tests the latest patch release from each of the last three stable pi minor versions, so new pi releases are picked up without manually updating the matrix.
52
+ Our GitHub Actions compatibility job automatically resolves and tests that compatibility window, so new pi releases are picked up without manually updating the matrix.
53
53
 
54
54
  ## Quick start
55
55
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@braintrust/pi-extension",
3
- "version": "0.4.0",
3
+ "version": "0.5.1",
4
4
  "description": "Braintrust extension for pi. Includes automatic tracing for pi sessions, turns, LLM calls, and tool executions to Braintrust.",
5
5
  "keywords": [
6
6
  "braintrust",
@@ -28,15 +28,15 @@
28
28
  "valibot": "^1.3.1"
29
29
  },
30
30
  "devDependencies": {
31
- "@mariozechner/pi-ai": "^0.68.0",
32
- "@mariozechner/pi-coding-agent": "^0.68.0",
31
+ "@earendil-works/pi-ai": "^0.74.0",
32
+ "@earendil-works/pi-coding-agent": "^0.74.0",
33
33
  "@types/node": "^25.6.0",
34
34
  "typescript": "^6.0.2",
35
35
  "vite-plus": "^0.1.16",
36
36
  "vitest": "^4.1.4"
37
37
  },
38
38
  "peerDependencies": {
39
- "@mariozechner/pi-coding-agent": "*"
39
+ "@earendil-works/pi-coding-agent": "*"
40
40
  },
41
41
  "devEngines": {
42
42
  "packageManager": {
@@ -57,10 +57,11 @@
57
57
  },
58
58
  "scripts": {
59
59
  "preinstall": "node -e \"const userAgent = process.env.npm_config_user_agent || ''; if (process.env.INIT_CWD === process.cwd() && !userAgent.includes('pnpm/')) { console.error('Use pnpm in this repo.'); process.exit(1); }\"",
60
- "check": "vp check",
60
+ "check": "node scripts/sync-version.mjs --check && vp check",
61
61
  "fmt": "vp fmt",
62
62
  "lint": "vp lint",
63
63
  "pack": "vp pack",
64
+ "sync:version": "node scripts/sync-version.mjs",
64
65
  "test": "vitest run",
65
66
  "test:integration": "vitest run src/index.integration.test.ts",
66
67
  "test:watch": "vitest",
@@ -324,9 +324,8 @@ describe("loadConfig", () => {
324
324
  });
325
325
 
326
326
  describe("createLogger", () => {
327
- it("writes json log lines to the default log file when debug is enabled", async () => {
328
- const stateDir = makeTempDir("pi-extension-state-");
329
- const config: TraceConfig = {
327
+ function makeLoggerConfig(overrides: Partial<TraceConfig> = {}): TraceConfig {
328
+ return {
330
329
  enabled: true,
331
330
  apiKey: "key",
332
331
  apiUrl: undefined,
@@ -335,14 +334,19 @@ describe("createLogger", () => {
335
334
  projectName: "pi",
336
335
  debug: true,
337
336
  logFile: undefined,
338
- stateDir,
337
+ stateDir: makeTempDir("pi-extension-state-"),
339
338
  additionalMetadata: {},
340
339
  parentSpanId: undefined,
341
340
  rootSpanId: undefined,
342
341
  showUi: true,
343
342
  showTraceLink: true,
344
343
  configIssues: [],
344
+ ...overrides,
345
345
  };
346
+ }
347
+
348
+ it("writes json log lines to the default log file when debug is enabled", async () => {
349
+ const config = makeLoggerConfig();
346
350
 
347
351
  const logger = createLogger(config);
348
352
  logger.debug("debug message", { nested: { value: 1 } });
@@ -361,4 +365,27 @@ describe("createLogger", () => {
361
365
  message: "warn message",
362
366
  });
363
367
  });
368
+
369
+ it("writes warnings and errors to the default log file when debug is disabled", async () => {
370
+ const config = makeLoggerConfig({ debug: false, logFile: undefined });
371
+ const logger = createLogger(config);
372
+
373
+ logger.debug("debug message");
374
+ logger.info("info message");
375
+ logger.warn("warn message", { reason: "test" });
376
+ logger.error("error message");
377
+ await logger.flush();
378
+
379
+ const lines = readFileSync(logger.filePath, "utf8").trim().split("\n");
380
+ expect(lines).toHaveLength(2);
381
+ expect(JSON.parse(lines[0])).toMatchObject({
382
+ level: "warn",
383
+ message: "warn message",
384
+ data: { reason: "test" },
385
+ });
386
+ expect(JSON.parse(lines[1])).toMatchObject({
387
+ level: "error",
388
+ message: "error message",
389
+ });
390
+ });
364
391
  });
package/src/config.ts CHANGED
@@ -450,14 +450,25 @@ export function createLogger(config: TraceConfig): Logger {
450
450
  config.logFile && config.logFile !== "true" && config.logFile !== "auto"
451
451
  ? config.logFile
452
452
  : join(config.stateDir, "braintrust-pi-extension.log");
453
- const loggingEnabled = config.debug || Boolean(config.logFile);
453
+ const infoLoggingEnabled = config.debug || Boolean(config.logFile);
454
454
 
455
- if (loggingEnabled) ensureDir(dirname(explicitLogFile));
455
+ let logDirEnsured = false;
456
+
457
+ function shouldLog(level: LogLevel): boolean {
458
+ return level === "warn" || level === "error" || infoLoggingEnabled;
459
+ }
460
+
461
+ function ensureLogDir(): void {
462
+ if (logDirEnsured) return;
463
+ ensureDir(dirname(explicitLogFile));
464
+ logDirEnsured = true;
465
+ }
456
466
 
457
467
  let pendingWrite = Promise.resolve();
458
468
 
459
469
  function emit(level: LogLevel, message: string, data?: unknown): void {
460
- if (!loggingEnabled) return;
470
+ if (!shouldLog(level)) return;
471
+ ensureLogDir();
461
472
  pendingWrite = pendingWrite
462
473
  .catch(() => {})
463
474
  .then(async () => {
@@ -11,14 +11,14 @@ import {
11
11
  type Model,
12
12
  type SimpleStreamOptions,
13
13
  type ToolCall,
14
- } from "@mariozechner/pi-ai";
15
- import * as piCodingAgent from "@mariozechner/pi-coding-agent";
14
+ } from "@earendil-works/pi-ai";
15
+ import * as piCodingAgent from "@earendil-works/pi-coding-agent";
16
16
  import {
17
17
  createAgentSession,
18
18
  DefaultResourceLoader,
19
19
  SessionManager,
20
20
  type ExtensionAPI,
21
- } from "@mariozechner/pi-coding-agent";
21
+ } from "@earendil-works/pi-coding-agent";
22
22
  import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
23
23
  import braintrustPiExtension from "./index.ts";
24
24
 
@@ -149,6 +149,28 @@ function makeTempDir(prefix: string): string {
149
149
  return dir;
150
150
  }
151
151
 
152
+ // Injected by CI (see .github/workflows/ci.yml). When unset (e.g. local dev) we
153
+ // assume the currently installed pi is at least as new as any version we branch
154
+ // on below.
155
+ const PI_COMPAT_VERSION = process.env.PI_COMPAT_VERSION;
156
+
157
+ function piCompatAtLeast(target: string): boolean {
158
+ if (!PI_COMPAT_VERSION) return true;
159
+ const parse = (v: string) =>
160
+ v
161
+ .split("-")[0]
162
+ .split(".")
163
+ .map((part) => Number.parseInt(part, 10) || 0);
164
+ const actual = parse(PI_COMPAT_VERSION);
165
+ const wanted = parse(target);
166
+ for (let i = 0; i < Math.max(actual.length, wanted.length); i += 1) {
167
+ const a = actual[i] ?? 0;
168
+ const w = wanted[i] ?? 0;
169
+ if (a !== w) return a > w;
170
+ }
171
+ return true;
172
+ }
173
+
152
174
  function buildAssistantMessage(model: Model<Api>): AssistantMessage {
153
175
  return {
154
176
  role: "assistant",
@@ -585,9 +607,18 @@ describe("braintrustPiExtension integration", () => {
585
607
  const firstLlmSpanId = llmSpans[0]?.spanId;
586
608
 
587
609
  expect(toolSpans).toHaveLength(2);
610
+ // pi < 0.68.1 emits `tool_execution_end` in assistant source order, so the
611
+ // extension logs tool spans as [tool-1, tool-2]. Starting with pi 0.68.1 the
612
+ // agent emits parallel tool completions eagerly (completion order), so the
613
+ // fast `tool-2` finishes before the slow `tool-1` and spans are logged as
614
+ // [tool-2, tool-1]. See pi-coding-agent changelog 0.68.1 / issue #3503.
615
+ // TODO: drop the pi < 0.68.1 branch once we stop testing against it.
616
+ const expectedToolCallIdOrder = piCompatAtLeast("0.68.1")
617
+ ? ["tool-2", "tool-1"]
618
+ : ["tool-1", "tool-2"];
588
619
  expect(
589
620
  toolSpans.map((span) => (span.metadata as Record<string, unknown> | undefined)?.tool_call_id),
590
- ).toEqual(["tool-1", "tool-2"]);
621
+ ).toEqual(expectedToolCallIdOrder);
591
622
  expect(toolSpans.map((span) => span.parentSpanId)).toEqual([firstLlmSpanId, firstLlmSpanId]);
592
623
  });
593
624
 
package/src/index.test.ts CHANGED
@@ -1,5 +1,9 @@
1
+ import { readFileSync } from "node:fs";
1
2
  import { beforeEach, describe, expect, it, vi } from "vitest";
2
3
 
4
+ const packageVersion = (JSON.parse(readFileSync("package.json", "utf8")) as { version: string })
5
+ .version;
6
+
3
7
  const mockState = vi.hoisted(() => ({
4
8
  startSpans: [] as Array<Record<string, unknown>>,
5
9
  logSpans: [] as Array<Record<string, unknown>>,
@@ -193,6 +197,7 @@ describe("braintrustPiExtension", () => {
193
197
  expect(mockState.widgets.at(-1)?.content?.[1]).toBe(
194
198
  "braintrust.dev/app/test-org/p/pi/logs?oid=trace-row-1",
195
199
  );
200
+ expect(mockState.startSpans[0]?.metadata).toMatchObject({ extension_version: packageVersion });
196
201
 
197
202
  await emit("session_shutdown");
198
203
 
@@ -239,6 +244,59 @@ describe("braintrustPiExtension", () => {
239
244
  expect(mockState.updateSpans).toEqual([]);
240
245
  });
241
246
 
247
+ it("records resolved model, thinking level, and provider response metadata on llm spans", async () => {
248
+ const { emit } = await createHarness();
249
+
250
+ await emit("session_start");
251
+ await emit("thinking_level_select", { level: "high", previousLevel: "off" });
252
+ await emit("before_agent_start", {
253
+ prompt: "Use a routed model",
254
+ images: [],
255
+ });
256
+ await emit("context", { messages: [{ role: "user", content: "Use a routed model" }] });
257
+ await emit("after_provider_response", {
258
+ status: 200,
259
+ headers: {
260
+ "x-ratelimit-remaining-requests": "42",
261
+ "retry-after": "5",
262
+ authorization: "secret",
263
+ },
264
+ });
265
+ await emit("message_end", {
266
+ message: {
267
+ role: "assistant",
268
+ provider: "openrouter",
269
+ model: "auto",
270
+ responseModel: "anthropic/claude-sonnet-4-5",
271
+ timestamp: 1_700_000_000_000,
272
+ content: [{ type: "text", text: "Done." }],
273
+ },
274
+ });
275
+
276
+ const turnSpan = mockState.startSpans.find(
277
+ (span) => span.type === "task" && span.name === "Turn 1",
278
+ );
279
+ const llmSpan = mockState.startSpans.find((span) => span.type === "llm");
280
+
281
+ expect(turnSpan?.metadata).toMatchObject({ thinking_level: "high" });
282
+ expect(llmSpan).toMatchObject({ name: "anthropic/claude-sonnet-4-5" });
283
+ expect(llmSpan?.metadata).toMatchObject({
284
+ model: "anthropic/claude-sonnet-4-5",
285
+ requested_model: "auto",
286
+ response_model: "anthropic/claude-sonnet-4-5",
287
+ thinking_level: "high",
288
+ provider_response_status: 200,
289
+ provider_response_headers: {
290
+ "x-ratelimit-remaining-requests": "42",
291
+ "retry-after": "5",
292
+ },
293
+ });
294
+ const llmMetadata = llmSpan?.metadata as
295
+ | { provider_response_headers?: Record<string, unknown> }
296
+ | undefined;
297
+ expect(llmMetadata?.provider_response_headers?.authorization).toBeUndefined();
298
+ });
299
+
242
300
  it("parents tool spans under the llm span that emitted the matching tool call", async () => {
243
301
  const { emit } = await createHarness();
244
302
 
package/src/index.ts CHANGED
@@ -1,6 +1,10 @@
1
- import { basename, resolve } from "node:path";
2
1
  import { hostname, userInfo } from "node:os";
3
- import type { AgentEndEvent, ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
2
+ import { basename, resolve } from "node:path";
3
+ import type {
4
+ AgentEndEvent,
5
+ ExtensionAPI,
6
+ ExtensionContext,
7
+ } from "@earendil-works/pi-coding-agent";
4
8
  import { BraintrustClient, type BraintrustSpanHandle } from "./client.ts";
5
9
  import { createLogger, loadConfig } from "./config.ts";
6
10
  import { createStateStore } from "./state.ts";
@@ -13,6 +17,7 @@ import type {
13
17
  NormalizedAssistantMessage,
14
18
  TraceConfig,
15
19
  } from "./types.ts";
20
+ import { EXTENSION_VERSION } from "./version.ts";
16
21
  import {
17
22
  buildTurnInput,
18
23
  extractErrorText,
@@ -29,7 +34,6 @@ import {
29
34
  toUnixSeconds,
30
35
  } from "./utils.ts";
31
36
 
32
- const EXTENSION_VERSION = "0.1.0";
33
37
  const TRACING_STATUS_KEY = "braintrust-tracing";
34
38
  const TRACING_WIDGET_KEY = "braintrust-trace-link";
35
39
 
@@ -39,9 +43,15 @@ interface SessionDescriptor {
39
43
  sessionKey: string;
40
44
  }
41
45
 
46
+ interface ProviderResponseMetadata {
47
+ status?: number;
48
+ headers?: Record<string, string>;
49
+ }
50
+
42
51
  interface PendingLlmCall {
43
52
  startedAt: number;
44
53
  input: NormalizedAgentMessage[];
54
+ providerResponse?: ProviderResponseMetadata;
45
55
  }
46
56
 
47
57
  interface TrackedToolStart {
@@ -62,6 +72,7 @@ interface ActiveTurn {
62
72
  lastAssistantMessage?: AssistantMessageLike;
63
73
  lastOutput?: NormalizedAssistantMessage;
64
74
  error?: string;
75
+ thinkingLevel?: string;
65
76
  }
66
77
 
67
78
  interface ActiveSession {
@@ -80,6 +91,7 @@ interface ActiveSession {
80
91
  startedAt?: number;
81
92
  totalTurns: number;
82
93
  totalToolCalls: number;
94
+ thinkingLevel?: string;
83
95
  currentTurn?: ActiveTurn;
84
96
  }
85
97
 
@@ -127,6 +139,52 @@ function safeModelName(model: unknown): string | undefined {
127
139
  return undefined;
128
140
  }
129
141
 
142
+ function stringProperty(
143
+ value: Record<string, unknown>,
144
+ keys: readonly string[],
145
+ ): string | undefined {
146
+ for (const key of keys) {
147
+ const item = value[key];
148
+ if (typeof item === "string" && item.trim()) return item;
149
+ }
150
+ return undefined;
151
+ }
152
+
153
+ function responseModelName(message: AssistantMessageLike): string | undefined {
154
+ return stringProperty(message as unknown as Record<string, unknown>, [
155
+ "responseModel",
156
+ "routedModel",
157
+ "resolvedModel",
158
+ "actualModel",
159
+ "concreteModel",
160
+ "outputModel",
161
+ ]);
162
+ }
163
+
164
+ function providerResponseMetadata(event: unknown): ProviderResponseMetadata | undefined {
165
+ if (!isPlainObject(event)) return undefined;
166
+ const metadata: ProviderResponseMetadata = {};
167
+ if (typeof event.status === "number") metadata.status = event.status;
168
+
169
+ const headers = event.headers;
170
+ if (isPlainObject(headers)) {
171
+ const allowedHeaders: Record<string, string> = {};
172
+ for (const [key, value] of Object.entries(headers)) {
173
+ const normalizedKey = key.toLowerCase();
174
+ if (!normalizedKey.startsWith("x-ratelimit-") && normalizedKey !== "retry-after") {
175
+ continue;
176
+ }
177
+ if (typeof value === "string") allowedHeaders[normalizedKey] = value;
178
+ else if (typeof value === "number" || typeof value === "boolean") {
179
+ allowedHeaders[normalizedKey] = String(value);
180
+ }
181
+ }
182
+ if (Object.keys(allowedHeaders).length > 0) metadata.headers = allowedHeaders;
183
+ }
184
+
185
+ return metadata.status !== undefined || metadata.headers ? metadata : undefined;
186
+ }
187
+
130
188
  function getPreviousSessionFile(event: unknown): string | undefined {
131
189
  if (!isPlainObject(event)) return undefined;
132
190
  return typeof event.previousSessionFile === "string" ? event.previousSessionFile : undefined;
@@ -666,6 +724,7 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
666
724
  metadata: {
667
725
  turn_number: session.totalTurns,
668
726
  active_model: safeModelName(ctx.model),
727
+ thinking_level: session.thinkingLevel,
669
728
  },
670
729
  name: `Turn ${session.totalTurns}`,
671
730
  type: "task",
@@ -683,6 +742,7 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
683
742
  lastAssistantMessage: undefined,
684
743
  lastOutput: undefined,
685
744
  error: undefined,
745
+ thinkingLevel: session.thinkingLevel,
686
746
  };
687
747
 
688
748
  store.patch(session.sessionKey, {
@@ -699,6 +759,22 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
699
759
  });
700
760
  });
701
761
 
762
+ pi.on("after_provider_response", async (event) => {
763
+ if (!activeSession?.currentTurn) return;
764
+ const metadata = providerResponseMetadata(event);
765
+ if (!metadata) return;
766
+ const pending = [...activeSession.currentTurn.llmCalls]
767
+ .reverse()
768
+ .find((call) => !call.providerResponse);
769
+ if (pending) pending.providerResponse = metadata;
770
+ });
771
+
772
+ pi.on("thinking_level_select", async (event) => {
773
+ if (!isPlainObject(event) || typeof event.level !== "string") return;
774
+ if (activeSession) activeSession.thinkingLevel = event.level;
775
+ if (activeSession?.currentTurn) activeSession.currentTurn.thinkingLevel = event.level;
776
+ });
777
+
702
778
  pi.on("message_end", async (event) => {
703
779
  const session = activeSession;
704
780
  if (
@@ -716,7 +792,9 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
716
792
  input: [{ role: "user", content: session.currentTurn.prompt }],
717
793
  };
718
794
 
719
- const modelName = safeModelName(message) ?? message.model;
795
+ const requestedModelName = safeModelName(message) ?? message.model;
796
+ const responseModel = responseModelName(message);
797
+ const modelName = responseModel ?? requestedModelName;
720
798
  const endedAt = message.timestamp ?? Date.now();
721
799
  const normalizedOutput = normalizeAssistantMessage(message);
722
800
  const error =
@@ -740,7 +818,12 @@ export default function braintrustPiExtension(pi: ExtensionAPI): void {
740
818
  api: message.api,
741
819
  provider: message.provider,
742
820
  model: modelName,
821
+ requested_model: requestedModelName,
822
+ response_model: responseModel,
743
823
  stop_reason: message.stopReason,
824
+ thinking_level: session.currentTurn.thinkingLevel ?? session.thinkingLevel,
825
+ provider_response_status: pending.providerResponse?.status,
826
+ provider_response_headers: pending.providerResponse?.headers,
744
827
  cache_read_tokens: message.usage?.cacheRead,
745
828
  cache_write_tokens: message.usage?.cacheWrite,
746
829
  },
package/src/types.ts CHANGED
@@ -121,6 +121,7 @@ export interface AssistantMessageLike {
121
121
  api?: string;
122
122
  provider?: string;
123
123
  model?: string;
124
+ responseModel?: string;
124
125
  usage?: UsageLike;
125
126
  stopReason?: string;
126
127
  errorMessage?: string;
package/src/version.ts ADDED
@@ -0,0 +1,2 @@
1
+ // This file is generated by scripts/sync-version.mjs. Do not edit by hand.
2
+ export const EXTENSION_VERSION = "0.5.1";