braintrust 2.2.1-rc.0 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/browser.mjs CHANGED
@@ -1928,7 +1928,8 @@ var SpanType = z6.union([
1928
1928
  "automation",
1929
1929
  "facet",
1930
1930
  "preprocessor",
1931
- "classifier"
1931
+ "classifier",
1932
+ "review"
1932
1933
  ]),
1933
1934
  z6.null()
1934
1935
  ]);
@@ -5187,12 +5188,19 @@ function updateSpanImpl({
5187
5188
  parentObjectType,
5188
5189
  parentObjectId,
5189
5190
  id,
5191
+ root_span_id,
5192
+ span_id,
5190
5193
  event
5191
5194
  }) {
5195
+ if (isEmpty2(root_span_id) !== isEmpty2(span_id)) {
5196
+ throw new Error("both root_span_id and span_id must be set, or neither");
5197
+ }
5198
+ const hasExplicitSpanIds = root_span_id !== void 0 && span_id !== void 0;
5192
5199
  const updateEvent = deepCopyEvent(
5193
5200
  validateAndSanitizeExperimentLogPartialArgs({
5201
+ ...event,
5194
5202
  id,
5195
- ...event
5203
+ ...hasExplicitSpanIds ? { root_span_id, span_id } : {}
5196
5204
  })
5197
5205
  );
5198
5206
  const parentIds = async () => new SpanComponentsV3({
@@ -5224,6 +5232,8 @@ function updateSpan({
5224
5232
  spanComponentsToObjectIdLambda(resolvedState, components)
5225
5233
  ),
5226
5234
  id: components.data.row_id,
5235
+ root_span_id: components.data.root_span_id,
5236
+ span_id: components.data.span_id,
5227
5237
  event
5228
5238
  });
5229
5239
  }
@@ -5535,7 +5545,7 @@ var Logger = class {
5535
5545
  * @param event The event data to update the span with. Must include `id`. See {@link Experiment.log} for a full list of valid fields.
5536
5546
  */
5537
5547
  updateSpan(event) {
5538
- const { id, ...eventRest } = event;
5548
+ const { id, root_span_id, span_id, ...eventRest } = event;
5539
5549
  if (!id) {
5540
5550
  throw new Error("Span id is required to update a span");
5541
5551
  }
@@ -5544,6 +5554,8 @@ var Logger = class {
5544
5554
  parentObjectType: this.parentObjectType(),
5545
5555
  parentObjectId: this.lazyId,
5546
5556
  id,
5557
+ root_span_id,
5558
+ span_id,
5547
5559
  event: eventRest
5548
5560
  });
5549
5561
  }
@@ -7826,7 +7838,7 @@ View complete results in Braintrust or run experiment.summarize() again.`
7826
7838
  * @param event The event data to update the span with. Must include `id`. See {@link Experiment.log} for a full list of valid fields.
7827
7839
  */
7828
7840
  updateSpan(event) {
7829
- const { id, ...eventRest } = event;
7841
+ const { id, root_span_id, span_id, ...eventRest } = event;
7830
7842
  if (!id) {
7831
7843
  throw new Error("Span id is required to update a span");
7832
7844
  }
@@ -7835,6 +7847,8 @@ View complete results in Braintrust or run experiment.summarize() again.`
7835
7847
  parentObjectType: this.parentObjectType(),
7836
7848
  parentObjectId: this.lazyId,
7837
7849
  id,
7850
+ root_span_id,
7851
+ span_id,
7838
7852
  event: eventRest
7839
7853
  });
7840
7854
  }
@@ -12695,14 +12709,17 @@ function parseToolName(rawToolName) {
12695
12709
  rawToolName
12696
12710
  };
12697
12711
  }
12698
- function createToolTracingHooks(parentSpanExportPromise, activeToolSpans, mcpServers) {
12712
+ function createToolTracingHooks(resolveParentSpan, activeToolSpans, mcpServers, subAgentSpans, endedSubAgentSpans) {
12699
12713
  const preToolUse = async (input, toolUseID) => {
12700
12714
  if (input.hook_event_name !== "PreToolUse" || !toolUseID) {
12701
12715
  return {};
12702
12716
  }
12717
+ if (input.tool_name === "Task") {
12718
+ return {};
12719
+ }
12703
12720
  const parsed = parseToolName(input.tool_name);
12704
12721
  const mcpMetadata = getMcpServerMetadata(parsed.mcpServer, mcpServers);
12705
- const parentExport = await parentSpanExportPromise;
12722
+ const parentExport = await resolveParentSpan(toolUseID);
12706
12723
  const toolSpan = startSpan({
12707
12724
  name: parsed.displayName,
12708
12725
  spanAttributes: { type: "tool" /* TOOL */ },
@@ -12730,6 +12747,30 @@ function createToolTracingHooks(parentSpanExportPromise, activeToolSpans, mcpSer
12730
12747
  if (input.hook_event_name !== "PostToolUse" || !toolUseID) {
12731
12748
  return {};
12732
12749
  }
12750
+ const subAgentSpan = subAgentSpans.get(toolUseID);
12751
+ if (subAgentSpan) {
12752
+ try {
12753
+ const response = input.tool_response;
12754
+ const metadata = {};
12755
+ if (response?.status) {
12756
+ metadata["claude_agent_sdk.status"] = response.status;
12757
+ }
12758
+ if (response?.totalDurationMs) {
12759
+ metadata["claude_agent_sdk.duration_ms"] = response.totalDurationMs;
12760
+ }
12761
+ if (response?.totalToolUseCount !== void 0) {
12762
+ metadata["claude_agent_sdk.tool_use_count"] = response.totalToolUseCount;
12763
+ }
12764
+ subAgentSpan.log({
12765
+ output: response?.content,
12766
+ metadata
12767
+ });
12768
+ } finally {
12769
+ subAgentSpan.end();
12770
+ endedSubAgentSpans.add(toolUseID);
12771
+ }
12772
+ return {};
12773
+ }
12733
12774
  const toolSpan = activeToolSpans.get(toolUseID);
12734
12775
  if (!toolSpan) {
12735
12776
  return {};
@@ -12746,6 +12787,16 @@ function createToolTracingHooks(parentSpanExportPromise, activeToolSpans, mcpSer
12746
12787
  if (input.hook_event_name !== "PostToolUseFailure" || !toolUseID) {
12747
12788
  return {};
12748
12789
  }
12790
+ const subAgentSpan = subAgentSpans.get(toolUseID);
12791
+ if (subAgentSpan) {
12792
+ try {
12793
+ subAgentSpan.log({ error: input.error });
12794
+ } finally {
12795
+ subAgentSpan.end();
12796
+ endedSubAgentSpans.add(toolUseID);
12797
+ }
12798
+ return {};
12799
+ }
12749
12800
  const toolSpan = activeToolSpans.get(toolUseID);
12750
12801
  if (!toolSpan) {
12751
12802
  return {};
@@ -12770,12 +12821,14 @@ function createToolTracingHooks(parentSpanExportPromise, activeToolSpans, mcpSer
12770
12821
  };
12771
12822
  return { preToolUse, postToolUse, postToolUseFailure };
12772
12823
  }
12773
- function injectTracingHooks(options, parentSpanExportPromise, activeToolSpans) {
12824
+ function injectTracingHooks(options, resolveParentSpan, activeToolSpans, subAgentSpans, endedSubAgentSpans) {
12774
12825
  const mcpServers = options.mcpServers;
12775
12826
  const { preToolUse, postToolUse, postToolUseFailure } = createToolTracingHooks(
12776
- parentSpanExportPromise,
12827
+ resolveParentSpan,
12777
12828
  activeToolSpans,
12778
- mcpServers
12829
+ mcpServers,
12830
+ subAgentSpans,
12831
+ endedSubAgentSpans
12779
12832
  );
12780
12833
  const existingHooks = options.hooks ?? {};
12781
12834
  return {
@@ -12821,18 +12874,45 @@ function filterSerializableOptions(options) {
12821
12874
  }
12822
12875
  return filtered;
12823
12876
  }
12877
+ function isAsyncIterable(value) {
12878
+ return value !== null && value !== void 0 && typeof value[Symbol.asyncIterator] === "function";
12879
+ }
12824
12880
  function wrapClaudeAgentQuery(queryFn, defaultThis) {
12825
12881
  const proxy = new Proxy(queryFn, {
12826
12882
  apply(target, thisArg, argArray) {
12827
12883
  const params = argArray[0] ?? {};
12828
12884
  const { prompt, options = {} } = params;
12885
+ const promptIsAsyncIterable = isAsyncIterable(prompt);
12886
+ let capturedPromptMessages;
12887
+ let promptForQuery = prompt;
12888
+ let promptStarted = false;
12889
+ let resolvePromptDone;
12890
+ const promptDone = new Promise((resolve) => {
12891
+ resolvePromptDone = resolve;
12892
+ });
12893
+ if (promptIsAsyncIterable) {
12894
+ capturedPromptMessages = [];
12895
+ const originalPrompt = prompt;
12896
+ const capturingPrompt = (async function* () {
12897
+ promptStarted = true;
12898
+ try {
12899
+ for await (const msg of originalPrompt) {
12900
+ capturedPromptMessages.push(msg);
12901
+ yield msg;
12902
+ }
12903
+ } finally {
12904
+ resolvePromptDone?.();
12905
+ }
12906
+ })();
12907
+ promptForQuery = capturingPrompt;
12908
+ }
12829
12909
  const span = startSpan({
12830
12910
  name: "Claude Agent",
12831
12911
  spanAttributes: {
12832
12912
  type: "task" /* TASK */
12833
12913
  },
12834
12914
  event: {
12835
- input: typeof prompt === "string" ? prompt : { type: "streaming", description: "AsyncIterable<SDKMessage>" },
12915
+ input: typeof prompt === "string" ? prompt : promptIsAsyncIterable ? void 0 : prompt !== void 0 ? String(prompt) : void 0,
12836
12916
  metadata: filterSerializableOptions(options)
12837
12917
  }
12838
12918
  });
@@ -12843,13 +12923,22 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
12843
12923
  let currentMessageStartTime = getCurrentUnixTimestamp();
12844
12924
  const currentMessages = [];
12845
12925
  const createLLMSpan = async () => {
12926
+ const parentToolUseId = currentMessages[0]?.parent_tool_use_id ?? null;
12927
+ let parentSpanExport;
12928
+ if (parentToolUseId) {
12929
+ const subAgentSpan = subAgentSpans.get(parentToolUseId);
12930
+ parentSpanExport = subAgentSpan ? await subAgentSpan.export() : await span.export();
12931
+ } else {
12932
+ parentSpanExport = await span.export();
12933
+ }
12846
12934
  const finalMessageContent = await _createLLMSpanForMessages(
12847
12935
  currentMessages,
12848
12936
  prompt,
12849
12937
  finalResults,
12850
12938
  options,
12851
12939
  currentMessageStartTime,
12852
- await span.export()
12940
+ capturedPromptMessages,
12941
+ parentSpanExport
12853
12942
  );
12854
12943
  if (finalMessageContent) {
12855
12944
  finalResults.push(finalMessageContent);
@@ -12863,12 +12952,34 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
12863
12952
  };
12864
12953
  const invocationTarget = thisArg === proxy || thisArg === void 0 ? defaultThis ?? thisArg : thisArg;
12865
12954
  const activeToolSpans = /* @__PURE__ */ new Map();
12955
+ const subAgentSpans = /* @__PURE__ */ new Map();
12956
+ const endedSubAgentSpans = /* @__PURE__ */ new Set();
12957
+ const toolUseToParent = /* @__PURE__ */ new Map();
12958
+ const pendingSubAgentNames = /* @__PURE__ */ new Map();
12959
+ const resolveParentSpan = async (toolUseID) => {
12960
+ const parentToolUseId = toolUseToParent.get(toolUseID);
12961
+ if (parentToolUseId) {
12962
+ const subAgentSpan = subAgentSpans.get(parentToolUseId);
12963
+ if (subAgentSpan) {
12964
+ return subAgentSpan.export();
12965
+ }
12966
+ }
12967
+ return span.export();
12968
+ };
12866
12969
  const optionsWithHooks = injectTracingHooks(
12867
12970
  options,
12868
- span.export(),
12869
- activeToolSpans
12971
+ resolveParentSpan,
12972
+ activeToolSpans,
12973
+ subAgentSpans,
12974
+ endedSubAgentSpans
12870
12975
  );
12871
- const modifiedArgArray = [{ ...params, options: optionsWithHooks }];
12976
+ const modifiedArgArray = [
12977
+ {
12978
+ ...params,
12979
+ ...promptForQuery !== void 0 ? { prompt: promptForQuery } : {},
12980
+ options: optionsWithHooks
12981
+ }
12982
+ ];
12872
12983
  const originalGenerator = withCurrent(
12873
12984
  span,
12874
12985
  () => Reflect.apply(target, invocationTarget, modifiedArgArray)
@@ -12877,6 +12988,41 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
12877
12988
  try {
12878
12989
  for await (const message of originalGenerator) {
12879
12990
  const currentTime = getCurrentUnixTimestamp();
12991
+ if (message.type === "assistant" && Array.isArray(message.message?.content)) {
12992
+ const parentToolUseId = message.parent_tool_use_id ?? null;
12993
+ for (const block of message.message.content) {
12994
+ if (block.type === "tool_use" && block.id) {
12995
+ toolUseToParent.set(block.id, parentToolUseId);
12996
+ if (block.name === "Task" && block.input?.subagent_type) {
12997
+ pendingSubAgentNames.set(
12998
+ block.id,
12999
+ block.input.subagent_type
13000
+ );
13001
+ }
13002
+ }
13003
+ }
13004
+ }
13005
+ if ("parent_tool_use_id" in message) {
13006
+ const parentToolUseId = message.parent_tool_use_id;
13007
+ if (parentToolUseId && !subAgentSpans.has(parentToolUseId)) {
13008
+ const agentName = pendingSubAgentNames.get(parentToolUseId);
13009
+ const spanName = agentName ? `Agent: ${agentName}` : "Agent: sub-agent";
13010
+ const parentExport = await span.export();
13011
+ const subAgentSpan = startSpan({
13012
+ name: spanName,
13013
+ spanAttributes: { type: "task" /* TASK */ },
13014
+ event: {
13015
+ metadata: {
13016
+ ...agentName && {
13017
+ "claude_agent_sdk.agent_type": agentName
13018
+ }
13019
+ }
13020
+ },
13021
+ parent: parentExport
13022
+ });
13023
+ subAgentSpans.set(parentToolUseId, subAgentSpan);
13024
+ }
13025
+ }
12880
13026
  const messageId = message.message?.id;
12881
13027
  if (messageId && messageId !== currentMessageId) {
12882
13028
  await createLLMSpan();
@@ -12922,6 +13068,22 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
12922
13068
  });
12923
13069
  throw error;
12924
13070
  } finally {
13071
+ for (const [id, subSpan] of subAgentSpans) {
13072
+ if (!endedSubAgentSpans.has(id)) {
13073
+ subSpan.end();
13074
+ }
13075
+ }
13076
+ subAgentSpans.clear();
13077
+ if (capturedPromptMessages) {
13078
+ if (promptStarted) {
13079
+ await promptDone;
13080
+ }
13081
+ if (capturedPromptMessages.length > 0) {
13082
+ span.log({
13083
+ input: _formatCapturedMessages(capturedPromptMessages)
13084
+ });
13085
+ }
13086
+ }
12925
13087
  span.end();
12926
13088
  }
12927
13089
  })();
@@ -12949,14 +13111,25 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
12949
13111
  });
12950
13112
  return proxy;
12951
13113
  }
12952
- function _buildLLMInput(prompt, conversationHistory) {
12953
- const promptMessage = typeof prompt === "string" ? { content: prompt, role: "user" } : void 0;
12954
- const inputParts = [
12955
- ...promptMessage ? [promptMessage] : [],
12956
- ...conversationHistory
12957
- ];
13114
+ function _buildLLMInput(prompt, conversationHistory, capturedPromptMessages) {
13115
+ const promptMessages = [];
13116
+ if (typeof prompt === "string") {
13117
+ promptMessages.push({ content: prompt, role: "user" });
13118
+ } else if (capturedPromptMessages && capturedPromptMessages.length > 0) {
13119
+ for (const msg of capturedPromptMessages) {
13120
+ const role = msg.message?.role;
13121
+ const content = msg.message?.content;
13122
+ if (role && content !== void 0) {
13123
+ promptMessages.push({ content, role });
13124
+ }
13125
+ }
13126
+ }
13127
+ const inputParts = [...promptMessages, ...conversationHistory];
12958
13128
  return inputParts.length > 0 ? inputParts : void 0;
12959
13129
  }
13130
+ function _formatCapturedMessages(messages) {
13131
+ return messages.length > 0 ? messages : [];
13132
+ }
12960
13133
  function _extractUsageFromMessage(message) {
12961
13134
  const metrics = {};
12962
13135
  let usage;
@@ -12990,7 +13163,7 @@ function _extractUsageFromMessage(message) {
12990
13163
  }
12991
13164
  return metrics;
12992
13165
  }
12993
- async function _createLLMSpanForMessages(messages, prompt, conversationHistory, options, startTime, parentSpan) {
13166
+ async function _createLLMSpanForMessages(messages, prompt, conversationHistory, options, startTime, capturedPromptMessages, parentSpan) {
12994
13167
  if (messages.length === 0) return void 0;
12995
13168
  const lastMessage = messages[messages.length - 1];
12996
13169
  if (lastMessage.type !== "assistant" || !lastMessage.message?.usage) {
@@ -12998,7 +13171,11 @@ async function _createLLMSpanForMessages(messages, prompt, conversationHistory,
12998
13171
  }
12999
13172
  const model = lastMessage.message.model || options.model;
13000
13173
  const usage = _extractUsageFromMessage(lastMessage);
13001
- const input = _buildLLMInput(prompt, conversationHistory);
13174
+ const input = _buildLLMInput(
13175
+ prompt,
13176
+ conversationHistory,
13177
+ capturedPromptMessages
13178
+ );
13002
13179
  const outputs = messages.map(
13003
13180
  (m) => m.message?.content && m.message?.role ? { content: m.message.content, role: m.message.role } : void 0
13004
13181
  ).filter((c) => c !== void 0);
@@ -13755,7 +13932,7 @@ function unescapePath(path) {
13755
13932
  }
13756
13933
  var graph_framework_default = { createGraph };
13757
13934
 
13758
- // ../node_modules/.pnpm/async@3.2.5/node_modules/async/dist/async.mjs
13935
+ // ../node_modules/async/dist/async.mjs
13759
13936
  function initialParams(fn) {
13760
13937
  return function(...args) {
13761
13938
  var callback = args.pop();
@@ -13826,7 +14003,7 @@ function isAsync(fn) {
13826
14003
  function isAsyncGenerator2(fn) {
13827
14004
  return fn[Symbol.toStringTag] === "AsyncGenerator";
13828
14005
  }
13829
- function isAsyncIterable(obj) {
14006
+ function isAsyncIterable2(obj) {
13830
14007
  return typeof obj[Symbol.asyncIterator] === "function";
13831
14008
  }
13832
14009
  function wrapAsync(asyncFn) {
@@ -13880,7 +14057,6 @@ function isArrayLike(value) {
13880
14057
  return value && typeof value.length === "number" && value.length >= 0 && value.length % 1 === 0;
13881
14058
  }
13882
14059
  var breakLoop = {};
13883
- var breakLoop$1 = breakLoop;
13884
14060
  function once(fn) {
13885
14061
  function wrapper(...args) {
13886
14062
  if (fn === null) return;
@@ -13972,7 +14148,7 @@ function asyncEachOfLimit(generator, limit, iteratee, callback) {
13972
14148
  canceled = true;
13973
14149
  return;
13974
14150
  }
13975
- if (result === breakLoop$1 || done && running <= 0) {
14151
+ if (result === breakLoop || done && running <= 0) {
13976
14152
  done = true;
13977
14153
  return callback(null);
13978
14154
  }
@@ -13998,7 +14174,7 @@ var eachOfLimit$2 = (limit) => {
13998
14174
  if (isAsyncGenerator2(obj)) {
13999
14175
  return asyncEachOfLimit(obj, limit, iteratee, callback);
14000
14176
  }
14001
- if (isAsyncIterable(obj)) {
14177
+ if (isAsyncIterable2(obj)) {
14002
14178
  return asyncEachOfLimit(obj[Symbol.asyncIterator](), limit, iteratee, callback);
14003
14179
  }
14004
14180
  var nextElem = createIterator(obj);
@@ -14015,7 +14191,7 @@ var eachOfLimit$2 = (limit) => {
14015
14191
  } else if (err === false) {
14016
14192
  done = true;
14017
14193
  canceled = true;
14018
- } else if (value === breakLoop$1 || done && running <= 0) {
14194
+ } else if (value === breakLoop || done && running <= 0) {
14019
14195
  done = true;
14020
14196
  return callback(null);
14021
14197
  } else if (!looping) {
@@ -14058,7 +14234,7 @@ function eachOfArrayLike(coll, iteratee, callback) {
14058
14234
  if (canceled === true) return;
14059
14235
  if (err) {
14060
14236
  callback(err);
14061
- } else if (++completed === length || value === breakLoop$1) {
14237
+ } else if (++completed === length || value === breakLoop) {
14062
14238
  callback(null);
14063
14239
  }
14064
14240
  }
@@ -14454,7 +14630,7 @@ function _createTester(check, getResult) {
14454
14630
  if (check(result) && !testResult) {
14455
14631
  testPassed = true;
14456
14632
  testResult = getResult(true, value);
14457
- return callback(null, breakLoop$1);
14633
+ return callback(null, breakLoop);
14458
14634
  }
14459
14635
  callback();
14460
14636
  });
@@ -15232,7 +15408,7 @@ function callEvaluatorData(data) {
15232
15408
  baseExperiment
15233
15409
  };
15234
15410
  }
15235
- function isAsyncIterable2(value) {
15411
+ function isAsyncIterable3(value) {
15236
15412
  return typeof value === "object" && value !== null && typeof value[Symbol.asyncIterator] === "function";
15237
15413
  }
15238
15414
  function isIterable(value) {
@@ -15453,7 +15629,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
15453
15629
  }
15454
15630
  const resolvedDataResult = dataResult instanceof Promise ? await dataResult : dataResult;
15455
15631
  const dataIterable = (() => {
15456
- if (isAsyncIterable2(resolvedDataResult)) {
15632
+ if (isAsyncIterable3(resolvedDataResult)) {
15457
15633
  return resolvedDataResult;
15458
15634
  }
15459
15635
  if (Array.isArray(resolvedDataResult) || isIterable(resolvedDataResult)) {
@@ -16305,6 +16481,7 @@ var CodeParameters = class {
16305
16481
  this.metadata = opts.metadata;
16306
16482
  }
16307
16483
  async toFunctionDefinition(projectNameToId) {
16484
+ const schema = serializeEvalParameterstoParametersSchema(this.schema);
16308
16485
  return {
16309
16486
  project_id: await projectNameToId.resolve(this.project),
16310
16487
  name: this.name,
@@ -16313,8 +16490,8 @@ var CodeParameters = class {
16313
16490
  function_type: "parameters",
16314
16491
  function_data: {
16315
16492
  type: "parameters",
16316
- data: {},
16317
- __schema: serializeEvalParameterstoParametersSchema(this.schema)
16493
+ data: getDefaultDataFromParametersSchema(schema),
16494
+ __schema: schema
16318
16495
  },
16319
16496
  if_exists: this.ifExists,
16320
16497
  metadata: this.metadata
@@ -16369,6 +16546,16 @@ function serializeEvalParameterstoParametersSchema(parameters) {
16369
16546
  additionalProperties: true
16370
16547
  };
16371
16548
  }
16549
+ function getDefaultDataFromParametersSchema(schema) {
16550
+ return Object.fromEntries(
16551
+ Object.entries(schema.properties).flatMap(([name, value]) => {
16552
+ if (!("default" in value)) {
16553
+ return [];
16554
+ }
16555
+ return [[name, value.default]];
16556
+ })
16557
+ );
16558
+ }
16372
16559
  var ProjectNameIdMap = class {
16373
16560
  nameToId = {};
16374
16561
  idToName = {};