braintrust 2.2.1-rc.0 → 2.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dev/dist/index.d.mts +1 -1
- package/dev/dist/index.d.ts +1 -1
- package/dev/dist/index.js +34 -12
- package/dev/dist/index.mjs +40 -18
- package/dist/browser.d.mts +240 -240
- package/dist/browser.d.ts +240 -240
- package/dist/browser.js +249 -62
- package/dist/browser.mjs +221 -34
- package/dist/cli.js +1241 -723
- package/dist/index.d.mts +240 -240
- package/dist/index.d.ts +240 -240
- package/dist/index.js +249 -62
- package/dist/index.mjs +227 -40
- package/package.json +23 -23
- package/util/dist/index.d.mts +3 -2
- package/util/dist/index.d.ts +3 -2
- package/util/dist/index.js +3 -1
- package/util/dist/index.mjs +3 -1
- package/LICENSE +0 -201
- package/dist/eval-runner.js +0 -12680
package/dist/index.js
CHANGED
|
@@ -2150,7 +2150,8 @@ var SpanType = _v3.z.union([
|
|
|
2150
2150
|
"automation",
|
|
2151
2151
|
"facet",
|
|
2152
2152
|
"preprocessor",
|
|
2153
|
-
"classifier"
|
|
2153
|
+
"classifier",
|
|
2154
|
+
"review"
|
|
2154
2155
|
]),
|
|
2155
2156
|
_v3.z.null()
|
|
2156
2157
|
]);
|
|
@@ -5409,12 +5410,19 @@ function updateSpanImpl({
|
|
|
5409
5410
|
parentObjectType,
|
|
5410
5411
|
parentObjectId,
|
|
5411
5412
|
id,
|
|
5413
|
+
root_span_id,
|
|
5414
|
+
span_id,
|
|
5412
5415
|
event
|
|
5413
5416
|
}) {
|
|
5417
|
+
if (isEmpty2(root_span_id) !== isEmpty2(span_id)) {
|
|
5418
|
+
throw new Error("both root_span_id and span_id must be set, or neither");
|
|
5419
|
+
}
|
|
5420
|
+
const hasExplicitSpanIds = root_span_id !== void 0 && span_id !== void 0;
|
|
5414
5421
|
const updateEvent = deepCopyEvent(
|
|
5415
5422
|
validateAndSanitizeExperimentLogPartialArgs({
|
|
5423
|
+
...event,
|
|
5416
5424
|
id,
|
|
5417
|
-
...
|
|
5425
|
+
...hasExplicitSpanIds ? { root_span_id, span_id } : {}
|
|
5418
5426
|
})
|
|
5419
5427
|
);
|
|
5420
5428
|
const parentIds = async () => new SpanComponentsV3({
|
|
@@ -5446,6 +5454,8 @@ function updateSpan({
|
|
|
5446
5454
|
spanComponentsToObjectIdLambda(resolvedState, components)
|
|
5447
5455
|
),
|
|
5448
5456
|
id: components.data.row_id,
|
|
5457
|
+
root_span_id: components.data.root_span_id,
|
|
5458
|
+
span_id: components.data.span_id,
|
|
5449
5459
|
event
|
|
5450
5460
|
});
|
|
5451
5461
|
}
|
|
@@ -5757,7 +5767,7 @@ var Logger = (_class7 = class {
|
|
|
5757
5767
|
* @param event The event data to update the span with. Must include `id`. See {@link Experiment.log} for a full list of valid fields.
|
|
5758
5768
|
*/
|
|
5759
5769
|
updateSpan(event) {
|
|
5760
|
-
const { id, ...eventRest } = event;
|
|
5770
|
+
const { id, root_span_id, span_id, ...eventRest } = event;
|
|
5761
5771
|
if (!id) {
|
|
5762
5772
|
throw new Error("Span id is required to update a span");
|
|
5763
5773
|
}
|
|
@@ -5766,6 +5776,8 @@ var Logger = (_class7 = class {
|
|
|
5766
5776
|
parentObjectType: this.parentObjectType(),
|
|
5767
5777
|
parentObjectId: this.lazyId,
|
|
5768
5778
|
id,
|
|
5779
|
+
root_span_id,
|
|
5780
|
+
span_id,
|
|
5769
5781
|
event: eventRest
|
|
5770
5782
|
});
|
|
5771
5783
|
}
|
|
@@ -8048,7 +8060,7 @@ View complete results in Braintrust or run experiment.summarize() again.`
|
|
|
8048
8060
|
* @param event The event data to update the span with. Must include `id`. See {@link Experiment.log} for a full list of valid fields.
|
|
8049
8061
|
*/
|
|
8050
8062
|
updateSpan(event) {
|
|
8051
|
-
const { id, ...eventRest } = event;
|
|
8063
|
+
const { id, root_span_id, span_id, ...eventRest } = event;
|
|
8052
8064
|
if (!id) {
|
|
8053
8065
|
throw new Error("Span id is required to update a span");
|
|
8054
8066
|
}
|
|
@@ -8057,6 +8069,8 @@ View complete results in Braintrust or run experiment.summarize() again.`
|
|
|
8057
8069
|
parentObjectType: this.parentObjectType(),
|
|
8058
8070
|
parentObjectId: this.lazyId,
|
|
8059
8071
|
id,
|
|
8072
|
+
root_span_id,
|
|
8073
|
+
span_id,
|
|
8060
8074
|
event: eventRest
|
|
8061
8075
|
});
|
|
8062
8076
|
}
|
|
@@ -12922,14 +12936,17 @@ function parseToolName(rawToolName) {
|
|
|
12922
12936
|
rawToolName
|
|
12923
12937
|
};
|
|
12924
12938
|
}
|
|
12925
|
-
function createToolTracingHooks(
|
|
12939
|
+
function createToolTracingHooks(resolveParentSpan, activeToolSpans, mcpServers, subAgentSpans, endedSubAgentSpans) {
|
|
12926
12940
|
const preToolUse = async (input, toolUseID) => {
|
|
12927
12941
|
if (input.hook_event_name !== "PreToolUse" || !toolUseID) {
|
|
12928
12942
|
return {};
|
|
12929
12943
|
}
|
|
12944
|
+
if (input.tool_name === "Task") {
|
|
12945
|
+
return {};
|
|
12946
|
+
}
|
|
12930
12947
|
const parsed = parseToolName(input.tool_name);
|
|
12931
12948
|
const mcpMetadata = getMcpServerMetadata(parsed.mcpServer, mcpServers);
|
|
12932
|
-
const parentExport = await
|
|
12949
|
+
const parentExport = await resolveParentSpan(toolUseID);
|
|
12933
12950
|
const toolSpan = startSpan({
|
|
12934
12951
|
name: parsed.displayName,
|
|
12935
12952
|
spanAttributes: { type: "tool" /* TOOL */ },
|
|
@@ -12957,6 +12974,30 @@ function createToolTracingHooks(parentSpanExportPromise, activeToolSpans, mcpSer
|
|
|
12957
12974
|
if (input.hook_event_name !== "PostToolUse" || !toolUseID) {
|
|
12958
12975
|
return {};
|
|
12959
12976
|
}
|
|
12977
|
+
const subAgentSpan = subAgentSpans.get(toolUseID);
|
|
12978
|
+
if (subAgentSpan) {
|
|
12979
|
+
try {
|
|
12980
|
+
const response = input.tool_response;
|
|
12981
|
+
const metadata = {};
|
|
12982
|
+
if (_optionalChain([response, 'optionalAccess', _276 => _276.status])) {
|
|
12983
|
+
metadata["claude_agent_sdk.status"] = response.status;
|
|
12984
|
+
}
|
|
12985
|
+
if (_optionalChain([response, 'optionalAccess', _277 => _277.totalDurationMs])) {
|
|
12986
|
+
metadata["claude_agent_sdk.duration_ms"] = response.totalDurationMs;
|
|
12987
|
+
}
|
|
12988
|
+
if (_optionalChain([response, 'optionalAccess', _278 => _278.totalToolUseCount]) !== void 0) {
|
|
12989
|
+
metadata["claude_agent_sdk.tool_use_count"] = response.totalToolUseCount;
|
|
12990
|
+
}
|
|
12991
|
+
subAgentSpan.log({
|
|
12992
|
+
output: _optionalChain([response, 'optionalAccess', _279 => _279.content]),
|
|
12993
|
+
metadata
|
|
12994
|
+
});
|
|
12995
|
+
} finally {
|
|
12996
|
+
subAgentSpan.end();
|
|
12997
|
+
endedSubAgentSpans.add(toolUseID);
|
|
12998
|
+
}
|
|
12999
|
+
return {};
|
|
13000
|
+
}
|
|
12960
13001
|
const toolSpan = activeToolSpans.get(toolUseID);
|
|
12961
13002
|
if (!toolSpan) {
|
|
12962
13003
|
return {};
|
|
@@ -12973,6 +13014,16 @@ function createToolTracingHooks(parentSpanExportPromise, activeToolSpans, mcpSer
|
|
|
12973
13014
|
if (input.hook_event_name !== "PostToolUseFailure" || !toolUseID) {
|
|
12974
13015
|
return {};
|
|
12975
13016
|
}
|
|
13017
|
+
const subAgentSpan = subAgentSpans.get(toolUseID);
|
|
13018
|
+
if (subAgentSpan) {
|
|
13019
|
+
try {
|
|
13020
|
+
subAgentSpan.log({ error: input.error });
|
|
13021
|
+
} finally {
|
|
13022
|
+
subAgentSpan.end();
|
|
13023
|
+
endedSubAgentSpans.add(toolUseID);
|
|
13024
|
+
}
|
|
13025
|
+
return {};
|
|
13026
|
+
}
|
|
12976
13027
|
const toolSpan = activeToolSpans.get(toolUseID);
|
|
12977
13028
|
if (!toolSpan) {
|
|
12978
13029
|
return {};
|
|
@@ -12997,12 +13048,14 @@ function createToolTracingHooks(parentSpanExportPromise, activeToolSpans, mcpSer
|
|
|
12997
13048
|
};
|
|
12998
13049
|
return { preToolUse, postToolUse, postToolUseFailure };
|
|
12999
13050
|
}
|
|
13000
|
-
function injectTracingHooks(options,
|
|
13051
|
+
function injectTracingHooks(options, resolveParentSpan, activeToolSpans, subAgentSpans, endedSubAgentSpans) {
|
|
13001
13052
|
const mcpServers = options.mcpServers;
|
|
13002
13053
|
const { preToolUse, postToolUse, postToolUseFailure } = createToolTracingHooks(
|
|
13003
|
-
|
|
13054
|
+
resolveParentSpan,
|
|
13004
13055
|
activeToolSpans,
|
|
13005
|
-
mcpServers
|
|
13056
|
+
mcpServers,
|
|
13057
|
+
subAgentSpans,
|
|
13058
|
+
endedSubAgentSpans
|
|
13006
13059
|
);
|
|
13007
13060
|
const existingHooks = _nullishCoalesce(options.hooks, () => ( {}));
|
|
13008
13061
|
return {
|
|
@@ -13048,18 +13101,45 @@ function filterSerializableOptions(options) {
|
|
|
13048
13101
|
}
|
|
13049
13102
|
return filtered;
|
|
13050
13103
|
}
|
|
13104
|
+
function isAsyncIterable(value) {
|
|
13105
|
+
return value !== null && value !== void 0 && typeof value[Symbol.asyncIterator] === "function";
|
|
13106
|
+
}
|
|
13051
13107
|
function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
13052
13108
|
const proxy = new Proxy(queryFn, {
|
|
13053
13109
|
apply(target, thisArg, argArray) {
|
|
13054
13110
|
const params = _nullishCoalesce(argArray[0], () => ( {}));
|
|
13055
13111
|
const { prompt, options = {} } = params;
|
|
13112
|
+
const promptIsAsyncIterable = isAsyncIterable(prompt);
|
|
13113
|
+
let capturedPromptMessages;
|
|
13114
|
+
let promptForQuery = prompt;
|
|
13115
|
+
let promptStarted = false;
|
|
13116
|
+
let resolvePromptDone;
|
|
13117
|
+
const promptDone = new Promise((resolve) => {
|
|
13118
|
+
resolvePromptDone = resolve;
|
|
13119
|
+
});
|
|
13120
|
+
if (promptIsAsyncIterable) {
|
|
13121
|
+
capturedPromptMessages = [];
|
|
13122
|
+
const originalPrompt = prompt;
|
|
13123
|
+
const capturingPrompt = (async function* () {
|
|
13124
|
+
promptStarted = true;
|
|
13125
|
+
try {
|
|
13126
|
+
for await (const msg of originalPrompt) {
|
|
13127
|
+
capturedPromptMessages.push(msg);
|
|
13128
|
+
yield msg;
|
|
13129
|
+
}
|
|
13130
|
+
} finally {
|
|
13131
|
+
_optionalChain([resolvePromptDone, 'optionalCall', _280 => _280()]);
|
|
13132
|
+
}
|
|
13133
|
+
})();
|
|
13134
|
+
promptForQuery = capturingPrompt;
|
|
13135
|
+
}
|
|
13056
13136
|
const span = startSpan({
|
|
13057
13137
|
name: "Claude Agent",
|
|
13058
13138
|
spanAttributes: {
|
|
13059
13139
|
type: "task" /* TASK */
|
|
13060
13140
|
},
|
|
13061
13141
|
event: {
|
|
13062
|
-
input: typeof prompt === "string" ? prompt :
|
|
13142
|
+
input: typeof prompt === "string" ? prompt : promptIsAsyncIterable ? void 0 : prompt !== void 0 ? String(prompt) : void 0,
|
|
13063
13143
|
metadata: filterSerializableOptions(options)
|
|
13064
13144
|
}
|
|
13065
13145
|
});
|
|
@@ -13070,19 +13150,28 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
|
13070
13150
|
let currentMessageStartTime = getCurrentUnixTimestamp();
|
|
13071
13151
|
const currentMessages = [];
|
|
13072
13152
|
const createLLMSpan = async () => {
|
|
13153
|
+
const parentToolUseId = _nullishCoalesce(_optionalChain([currentMessages, 'access', _281 => _281[0], 'optionalAccess', _282 => _282.parent_tool_use_id]), () => ( null));
|
|
13154
|
+
let parentSpanExport;
|
|
13155
|
+
if (parentToolUseId) {
|
|
13156
|
+
const subAgentSpan = subAgentSpans.get(parentToolUseId);
|
|
13157
|
+
parentSpanExport = subAgentSpan ? await subAgentSpan.export() : await span.export();
|
|
13158
|
+
} else {
|
|
13159
|
+
parentSpanExport = await span.export();
|
|
13160
|
+
}
|
|
13073
13161
|
const finalMessageContent = await _createLLMSpanForMessages(
|
|
13074
13162
|
currentMessages,
|
|
13075
13163
|
prompt,
|
|
13076
13164
|
finalResults,
|
|
13077
13165
|
options,
|
|
13078
13166
|
currentMessageStartTime,
|
|
13079
|
-
|
|
13167
|
+
capturedPromptMessages,
|
|
13168
|
+
parentSpanExport
|
|
13080
13169
|
);
|
|
13081
13170
|
if (finalMessageContent) {
|
|
13082
13171
|
finalResults.push(finalMessageContent);
|
|
13083
13172
|
}
|
|
13084
13173
|
const lastMessage = currentMessages[currentMessages.length - 1];
|
|
13085
|
-
if (_optionalChain([lastMessage, 'optionalAccess',
|
|
13174
|
+
if (_optionalChain([lastMessage, 'optionalAccess', _283 => _283.message, 'optionalAccess', _284 => _284.usage])) {
|
|
13086
13175
|
const outputTokens = getNumberProperty2(lastMessage.message.usage, "output_tokens") || 0;
|
|
13087
13176
|
accumulatedOutputTokens += outputTokens;
|
|
13088
13177
|
}
|
|
@@ -13090,12 +13179,34 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
|
13090
13179
|
};
|
|
13091
13180
|
const invocationTarget = thisArg === proxy || thisArg === void 0 ? _nullishCoalesce(defaultThis, () => ( thisArg)) : thisArg;
|
|
13092
13181
|
const activeToolSpans = /* @__PURE__ */ new Map();
|
|
13182
|
+
const subAgentSpans = /* @__PURE__ */ new Map();
|
|
13183
|
+
const endedSubAgentSpans = /* @__PURE__ */ new Set();
|
|
13184
|
+
const toolUseToParent = /* @__PURE__ */ new Map();
|
|
13185
|
+
const pendingSubAgentNames = /* @__PURE__ */ new Map();
|
|
13186
|
+
const resolveParentSpan = async (toolUseID) => {
|
|
13187
|
+
const parentToolUseId = toolUseToParent.get(toolUseID);
|
|
13188
|
+
if (parentToolUseId) {
|
|
13189
|
+
const subAgentSpan = subAgentSpans.get(parentToolUseId);
|
|
13190
|
+
if (subAgentSpan) {
|
|
13191
|
+
return subAgentSpan.export();
|
|
13192
|
+
}
|
|
13193
|
+
}
|
|
13194
|
+
return span.export();
|
|
13195
|
+
};
|
|
13093
13196
|
const optionsWithHooks = injectTracingHooks(
|
|
13094
13197
|
options,
|
|
13095
|
-
|
|
13096
|
-
activeToolSpans
|
|
13198
|
+
resolveParentSpan,
|
|
13199
|
+
activeToolSpans,
|
|
13200
|
+
subAgentSpans,
|
|
13201
|
+
endedSubAgentSpans
|
|
13097
13202
|
);
|
|
13098
|
-
const modifiedArgArray = [
|
|
13203
|
+
const modifiedArgArray = [
|
|
13204
|
+
{
|
|
13205
|
+
...params,
|
|
13206
|
+
...promptForQuery !== void 0 ? { prompt: promptForQuery } : {},
|
|
13207
|
+
options: optionsWithHooks
|
|
13208
|
+
}
|
|
13209
|
+
];
|
|
13099
13210
|
const originalGenerator = withCurrent(
|
|
13100
13211
|
span,
|
|
13101
13212
|
() => Reflect.apply(target, invocationTarget, modifiedArgArray)
|
|
@@ -13104,20 +13215,55 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
|
13104
13215
|
try {
|
|
13105
13216
|
for await (const message of originalGenerator) {
|
|
13106
13217
|
const currentTime = getCurrentUnixTimestamp();
|
|
13107
|
-
|
|
13218
|
+
if (message.type === "assistant" && Array.isArray(_optionalChain([message, 'access', _285 => _285.message, 'optionalAccess', _286 => _286.content]))) {
|
|
13219
|
+
const parentToolUseId = _nullishCoalesce(message.parent_tool_use_id, () => ( null));
|
|
13220
|
+
for (const block of message.message.content) {
|
|
13221
|
+
if (block.type === "tool_use" && block.id) {
|
|
13222
|
+
toolUseToParent.set(block.id, parentToolUseId);
|
|
13223
|
+
if (block.name === "Task" && _optionalChain([block, 'access', _287 => _287.input, 'optionalAccess', _288 => _288.subagent_type])) {
|
|
13224
|
+
pendingSubAgentNames.set(
|
|
13225
|
+
block.id,
|
|
13226
|
+
block.input.subagent_type
|
|
13227
|
+
);
|
|
13228
|
+
}
|
|
13229
|
+
}
|
|
13230
|
+
}
|
|
13231
|
+
}
|
|
13232
|
+
if ("parent_tool_use_id" in message) {
|
|
13233
|
+
const parentToolUseId = message.parent_tool_use_id;
|
|
13234
|
+
if (parentToolUseId && !subAgentSpans.has(parentToolUseId)) {
|
|
13235
|
+
const agentName = pendingSubAgentNames.get(parentToolUseId);
|
|
13236
|
+
const spanName = agentName ? `Agent: ${agentName}` : "Agent: sub-agent";
|
|
13237
|
+
const parentExport = await span.export();
|
|
13238
|
+
const subAgentSpan = startSpan({
|
|
13239
|
+
name: spanName,
|
|
13240
|
+
spanAttributes: { type: "task" /* TASK */ },
|
|
13241
|
+
event: {
|
|
13242
|
+
metadata: {
|
|
13243
|
+
...agentName && {
|
|
13244
|
+
"claude_agent_sdk.agent_type": agentName
|
|
13245
|
+
}
|
|
13246
|
+
}
|
|
13247
|
+
},
|
|
13248
|
+
parent: parentExport
|
|
13249
|
+
});
|
|
13250
|
+
subAgentSpans.set(parentToolUseId, subAgentSpan);
|
|
13251
|
+
}
|
|
13252
|
+
}
|
|
13253
|
+
const messageId = _optionalChain([message, 'access', _289 => _289.message, 'optionalAccess', _290 => _290.id]);
|
|
13108
13254
|
if (messageId && messageId !== currentMessageId) {
|
|
13109
13255
|
await createLLMSpan();
|
|
13110
13256
|
currentMessageId = messageId;
|
|
13111
13257
|
currentMessageStartTime = currentTime;
|
|
13112
13258
|
}
|
|
13113
|
-
if (message.type === "assistant" && _optionalChain([message, 'access',
|
|
13259
|
+
if (message.type === "assistant" && _optionalChain([message, 'access', _291 => _291.message, 'optionalAccess', _292 => _292.usage])) {
|
|
13114
13260
|
currentMessages.push(message);
|
|
13115
13261
|
}
|
|
13116
13262
|
if (message.type === "result" && message.usage) {
|
|
13117
13263
|
finalUsageMetrics = _extractUsageFromMessage(message);
|
|
13118
13264
|
if (currentMessages.length > 0 && finalUsageMetrics.completion_tokens !== void 0) {
|
|
13119
13265
|
const lastMessage = currentMessages[currentMessages.length - 1];
|
|
13120
|
-
if (_optionalChain([lastMessage, 'optionalAccess',
|
|
13266
|
+
if (_optionalChain([lastMessage, 'optionalAccess', _293 => _293.message, 'optionalAccess', _294 => _294.usage])) {
|
|
13121
13267
|
const adjustedTokens = finalUsageMetrics.completion_tokens - accumulatedOutputTokens;
|
|
13122
13268
|
if (adjustedTokens >= 0) {
|
|
13123
13269
|
lastMessage.message.usage.output_tokens = adjustedTokens;
|
|
@@ -13149,6 +13295,22 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
|
13149
13295
|
});
|
|
13150
13296
|
throw error;
|
|
13151
13297
|
} finally {
|
|
13298
|
+
for (const [id, subSpan] of subAgentSpans) {
|
|
13299
|
+
if (!endedSubAgentSpans.has(id)) {
|
|
13300
|
+
subSpan.end();
|
|
13301
|
+
}
|
|
13302
|
+
}
|
|
13303
|
+
subAgentSpans.clear();
|
|
13304
|
+
if (capturedPromptMessages) {
|
|
13305
|
+
if (promptStarted) {
|
|
13306
|
+
await promptDone;
|
|
13307
|
+
}
|
|
13308
|
+
if (capturedPromptMessages.length > 0) {
|
|
13309
|
+
span.log({
|
|
13310
|
+
input: _formatCapturedMessages(capturedPromptMessages)
|
|
13311
|
+
});
|
|
13312
|
+
}
|
|
13313
|
+
}
|
|
13152
13314
|
span.end();
|
|
13153
13315
|
}
|
|
13154
13316
|
})();
|
|
@@ -13176,19 +13338,30 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
|
13176
13338
|
});
|
|
13177
13339
|
return proxy;
|
|
13178
13340
|
}
|
|
13179
|
-
function _buildLLMInput(prompt, conversationHistory) {
|
|
13180
|
-
const
|
|
13181
|
-
|
|
13182
|
-
|
|
13183
|
-
|
|
13184
|
-
|
|
13341
|
+
function _buildLLMInput(prompt, conversationHistory, capturedPromptMessages) {
|
|
13342
|
+
const promptMessages = [];
|
|
13343
|
+
if (typeof prompt === "string") {
|
|
13344
|
+
promptMessages.push({ content: prompt, role: "user" });
|
|
13345
|
+
} else if (capturedPromptMessages && capturedPromptMessages.length > 0) {
|
|
13346
|
+
for (const msg of capturedPromptMessages) {
|
|
13347
|
+
const role = _optionalChain([msg, 'access', _295 => _295.message, 'optionalAccess', _296 => _296.role]);
|
|
13348
|
+
const content = _optionalChain([msg, 'access', _297 => _297.message, 'optionalAccess', _298 => _298.content]);
|
|
13349
|
+
if (role && content !== void 0) {
|
|
13350
|
+
promptMessages.push({ content, role });
|
|
13351
|
+
}
|
|
13352
|
+
}
|
|
13353
|
+
}
|
|
13354
|
+
const inputParts = [...promptMessages, ...conversationHistory];
|
|
13185
13355
|
return inputParts.length > 0 ? inputParts : void 0;
|
|
13186
13356
|
}
|
|
13357
|
+
function _formatCapturedMessages(messages) {
|
|
13358
|
+
return messages.length > 0 ? messages : [];
|
|
13359
|
+
}
|
|
13187
13360
|
function _extractUsageFromMessage(message) {
|
|
13188
13361
|
const metrics = {};
|
|
13189
13362
|
let usage;
|
|
13190
13363
|
if (message.type === "assistant") {
|
|
13191
|
-
usage = _optionalChain([message, 'access',
|
|
13364
|
+
usage = _optionalChain([message, 'access', _299 => _299.message, 'optionalAccess', _300 => _300.usage]);
|
|
13192
13365
|
} else if (message.type === "result") {
|
|
13193
13366
|
usage = message.usage;
|
|
13194
13367
|
}
|
|
@@ -13217,17 +13390,21 @@ function _extractUsageFromMessage(message) {
|
|
|
13217
13390
|
}
|
|
13218
13391
|
return metrics;
|
|
13219
13392
|
}
|
|
13220
|
-
async function _createLLMSpanForMessages(messages, prompt, conversationHistory, options, startTime, parentSpan) {
|
|
13393
|
+
async function _createLLMSpanForMessages(messages, prompt, conversationHistory, options, startTime, capturedPromptMessages, parentSpan) {
|
|
13221
13394
|
if (messages.length === 0) return void 0;
|
|
13222
13395
|
const lastMessage = messages[messages.length - 1];
|
|
13223
|
-
if (lastMessage.type !== "assistant" || !_optionalChain([lastMessage, 'access',
|
|
13396
|
+
if (lastMessage.type !== "assistant" || !_optionalChain([lastMessage, 'access', _301 => _301.message, 'optionalAccess', _302 => _302.usage])) {
|
|
13224
13397
|
return void 0;
|
|
13225
13398
|
}
|
|
13226
13399
|
const model = lastMessage.message.model || options.model;
|
|
13227
13400
|
const usage = _extractUsageFromMessage(lastMessage);
|
|
13228
|
-
const input = _buildLLMInput(
|
|
13401
|
+
const input = _buildLLMInput(
|
|
13402
|
+
prompt,
|
|
13403
|
+
conversationHistory,
|
|
13404
|
+
capturedPromptMessages
|
|
13405
|
+
);
|
|
13229
13406
|
const outputs = messages.map(
|
|
13230
|
-
(m) => _optionalChain([m, 'access',
|
|
13407
|
+
(m) => _optionalChain([m, 'access', _303 => _303.message, 'optionalAccess', _304 => _304.content]) && _optionalChain([m, 'access', _305 => _305.message, 'optionalAccess', _306 => _306.role]) ? { content: m.message.content, role: m.message.role } : void 0
|
|
13231
13408
|
).filter((c) => c !== void 0);
|
|
13232
13409
|
await traced(
|
|
13233
13410
|
(llmSpan) => {
|
|
@@ -13247,7 +13424,7 @@ async function _createLLMSpanForMessages(messages, prompt, conversationHistory,
|
|
|
13247
13424
|
parent: parentSpan
|
|
13248
13425
|
}
|
|
13249
13426
|
);
|
|
13250
|
-
return _optionalChain([lastMessage, 'access',
|
|
13427
|
+
return _optionalChain([lastMessage, 'access', _307 => _307.message, 'optionalAccess', _308 => _308.content]) && _optionalChain([lastMessage, 'access', _309 => _309.message, 'optionalAccess', _310 => _310.role]) ? { content: lastMessage.message.content, role: lastMessage.message.role } : void 0;
|
|
13251
13428
|
}
|
|
13252
13429
|
function wrapClaudeAgentSDK(sdk) {
|
|
13253
13430
|
const cache = /* @__PURE__ */ new Map();
|
|
@@ -13521,7 +13698,7 @@ function serializePart(part) {
|
|
|
13521
13698
|
return part;
|
|
13522
13699
|
}
|
|
13523
13700
|
function serializeTools(params) {
|
|
13524
|
-
if (!_optionalChain([params, 'access',
|
|
13701
|
+
if (!_optionalChain([params, 'access', _311 => _311.config, 'optionalAccess', _312 => _312.tools])) {
|
|
13525
13702
|
return null;
|
|
13526
13703
|
}
|
|
13527
13704
|
try {
|
|
@@ -13604,7 +13781,7 @@ function aggregateGenerateContentChunks(chunks, start, firstTokenTime) {
|
|
|
13604
13781
|
}
|
|
13605
13782
|
if (chunk.candidates && Array.isArray(chunk.candidates)) {
|
|
13606
13783
|
for (const candidate of chunk.candidates) {
|
|
13607
|
-
if (_optionalChain([candidate, 'access',
|
|
13784
|
+
if (_optionalChain([candidate, 'access', _313 => _313.content, 'optionalAccess', _314 => _314.parts])) {
|
|
13608
13785
|
for (const part of candidate.content.parts) {
|
|
13609
13786
|
if (part.text !== void 0) {
|
|
13610
13787
|
if (part.thought) {
|
|
@@ -13635,7 +13812,7 @@ function aggregateGenerateContentChunks(chunks, start, firstTokenTime) {
|
|
|
13635
13812
|
parts.push({ text });
|
|
13636
13813
|
}
|
|
13637
13814
|
parts.push(...otherParts);
|
|
13638
|
-
if (parts.length > 0 && _optionalChain([lastResponse, 'optionalAccess',
|
|
13815
|
+
if (parts.length > 0 && _optionalChain([lastResponse, 'optionalAccess', _315 => _315.candidates])) {
|
|
13639
13816
|
const candidates = [];
|
|
13640
13817
|
for (const candidate of lastResponse.candidates) {
|
|
13641
13818
|
const candidateDict = {
|
|
@@ -13982,7 +14159,7 @@ function unescapePath(path2) {
|
|
|
13982
14159
|
}
|
|
13983
14160
|
var graph_framework_default = { createGraph };
|
|
13984
14161
|
|
|
13985
|
-
// ../node_modules
|
|
14162
|
+
// ../node_modules/async/dist/async.mjs
|
|
13986
14163
|
function initialParams(fn) {
|
|
13987
14164
|
return function(...args) {
|
|
13988
14165
|
var callback = args.pop();
|
|
@@ -14053,7 +14230,7 @@ function isAsync(fn) {
|
|
|
14053
14230
|
function isAsyncGenerator2(fn) {
|
|
14054
14231
|
return fn[Symbol.toStringTag] === "AsyncGenerator";
|
|
14055
14232
|
}
|
|
14056
|
-
function
|
|
14233
|
+
function isAsyncIterable2(obj) {
|
|
14057
14234
|
return typeof obj[Symbol.asyncIterator] === "function";
|
|
14058
14235
|
}
|
|
14059
14236
|
function wrapAsync(asyncFn) {
|
|
@@ -14107,7 +14284,6 @@ function isArrayLike(value) {
|
|
|
14107
14284
|
return value && typeof value.length === "number" && value.length >= 0 && value.length % 1 === 0;
|
|
14108
14285
|
}
|
|
14109
14286
|
var breakLoop = {};
|
|
14110
|
-
var breakLoop$1 = breakLoop;
|
|
14111
14287
|
function once(fn) {
|
|
14112
14288
|
function wrapper(...args) {
|
|
14113
14289
|
if (fn === null) return;
|
|
@@ -14199,7 +14375,7 @@ function asyncEachOfLimit(generator, limit, iteratee, callback) {
|
|
|
14199
14375
|
canceled = true;
|
|
14200
14376
|
return;
|
|
14201
14377
|
}
|
|
14202
|
-
if (result === breakLoop
|
|
14378
|
+
if (result === breakLoop || done && running <= 0) {
|
|
14203
14379
|
done = true;
|
|
14204
14380
|
return callback(null);
|
|
14205
14381
|
}
|
|
@@ -14225,7 +14401,7 @@ var eachOfLimit$2 = (limit) => {
|
|
|
14225
14401
|
if (isAsyncGenerator2(obj)) {
|
|
14226
14402
|
return asyncEachOfLimit(obj, limit, iteratee, callback);
|
|
14227
14403
|
}
|
|
14228
|
-
if (
|
|
14404
|
+
if (isAsyncIterable2(obj)) {
|
|
14229
14405
|
return asyncEachOfLimit(obj[Symbol.asyncIterator](), limit, iteratee, callback);
|
|
14230
14406
|
}
|
|
14231
14407
|
var nextElem = createIterator(obj);
|
|
@@ -14242,7 +14418,7 @@ var eachOfLimit$2 = (limit) => {
|
|
|
14242
14418
|
} else if (err === false) {
|
|
14243
14419
|
done = true;
|
|
14244
14420
|
canceled = true;
|
|
14245
|
-
} else if (value === breakLoop
|
|
14421
|
+
} else if (value === breakLoop || done && running <= 0) {
|
|
14246
14422
|
done = true;
|
|
14247
14423
|
return callback(null);
|
|
14248
14424
|
} else if (!looping) {
|
|
@@ -14285,7 +14461,7 @@ function eachOfArrayLike(coll, iteratee, callback) {
|
|
|
14285
14461
|
if (canceled === true) return;
|
|
14286
14462
|
if (err) {
|
|
14287
14463
|
callback(err);
|
|
14288
|
-
} else if (++completed === length || value === breakLoop
|
|
14464
|
+
} else if (++completed === length || value === breakLoop) {
|
|
14289
14465
|
callback(null);
|
|
14290
14466
|
}
|
|
14291
14467
|
}
|
|
@@ -14681,7 +14857,7 @@ function _createTester(check, getResult) {
|
|
|
14681
14857
|
if (check(result) && !testResult) {
|
|
14682
14858
|
testPassed = true;
|
|
14683
14859
|
testResult = getResult(true, value);
|
|
14684
|
-
return callback(null, breakLoop
|
|
14860
|
+
return callback(null, breakLoop);
|
|
14685
14861
|
}
|
|
14686
14862
|
callback();
|
|
14687
14863
|
});
|
|
@@ -15093,7 +15269,7 @@ var CachedSpanFetcher = (_class18 = class {
|
|
|
15093
15269
|
spanType
|
|
15094
15270
|
);
|
|
15095
15271
|
const rows = await fetcher.fetchedData();
|
|
15096
|
-
return rows.filter((row) => _optionalChain([row, 'access',
|
|
15272
|
+
return rows.filter((row) => _optionalChain([row, 'access', _316 => _316.span_attributes, 'optionalAccess', _317 => _317.purpose]) !== "scorer").map((row) => ({
|
|
15097
15273
|
input: row.input,
|
|
15098
15274
|
output: row.output,
|
|
15099
15275
|
metadata: row.metadata,
|
|
@@ -15127,7 +15303,7 @@ var CachedSpanFetcher = (_class18 = class {
|
|
|
15127
15303
|
async fetchSpans(spanType) {
|
|
15128
15304
|
const spans = await this.fetchFn(spanType);
|
|
15129
15305
|
for (const span of spans) {
|
|
15130
|
-
const type = _nullishCoalesce(_optionalChain([span, 'access',
|
|
15306
|
+
const type = _nullishCoalesce(_optionalChain([span, 'access', _318 => _318.span_attributes, 'optionalAccess', _319 => _319.type]), () => ( ""));
|
|
15131
15307
|
const existing = _nullishCoalesce(this.spanCache.get(type), () => ( []));
|
|
15132
15308
|
existing.push(span);
|
|
15133
15309
|
this.spanCache.set(type, existing);
|
|
@@ -15207,11 +15383,11 @@ var LocalTrace = (_class19 = class {
|
|
|
15207
15383
|
const cachedSpans = this.state.spanCache.getByRootSpanId(this.rootSpanId);
|
|
15208
15384
|
if (cachedSpans && cachedSpans.length > 0) {
|
|
15209
15385
|
let spans = cachedSpans.filter(
|
|
15210
|
-
(span) => _optionalChain([span, 'access',
|
|
15386
|
+
(span) => _optionalChain([span, 'access', _320 => _320.span_attributes, 'optionalAccess', _321 => _321.purpose]) !== "scorer"
|
|
15211
15387
|
);
|
|
15212
15388
|
if (spanType && spanType.length > 0) {
|
|
15213
15389
|
spans = spans.filter(
|
|
15214
|
-
(span) => spanType.includes(_nullishCoalesce(_optionalChain([span, 'access',
|
|
15390
|
+
(span) => spanType.includes(_nullishCoalesce(_optionalChain([span, 'access', _322 => _322.span_attributes, 'optionalAccess', _323 => _323.type]), () => ( "")))
|
|
15215
15391
|
);
|
|
15216
15392
|
}
|
|
15217
15393
|
return spans.map((span) => ({
|
|
@@ -15230,7 +15406,7 @@ var LocalTrace = (_class19 = class {
|
|
|
15230
15406
|
* Calls the API with the project_default preprocessor (which falls back to "thread").
|
|
15231
15407
|
*/
|
|
15232
15408
|
async getThread(options) {
|
|
15233
|
-
const cacheKey = _nullishCoalesce(_optionalChain([options, 'optionalAccess',
|
|
15409
|
+
const cacheKey = _nullishCoalesce(_optionalChain([options, 'optionalAccess', _324 => _324.preprocessor]), () => ( "project_default"));
|
|
15234
15410
|
if (!this.threadCache.has(cacheKey)) {
|
|
15235
15411
|
const promise = this.fetchThread(options);
|
|
15236
15412
|
this.threadCache.set(cacheKey, promise);
|
|
@@ -15241,7 +15417,7 @@ var LocalTrace = (_class19 = class {
|
|
|
15241
15417
|
await this.ensureSpansReady();
|
|
15242
15418
|
await this.state.login({});
|
|
15243
15419
|
const result = await invoke({
|
|
15244
|
-
globalFunction: _nullishCoalesce(_optionalChain([options, 'optionalAccess',
|
|
15420
|
+
globalFunction: _nullishCoalesce(_optionalChain([options, 'optionalAccess', _325 => _325.preprocessor]), () => ( "project_default")),
|
|
15245
15421
|
functionType: "preprocessor",
|
|
15246
15422
|
input: {
|
|
15247
15423
|
trace_ref: {
|
|
@@ -15400,10 +15576,10 @@ function validateParametersWithJsonSchema(parameters, schema) {
|
|
|
15400
15576
|
const ajv = new (0, _ajv2.default)({ coerceTypes: true, useDefaults: true, strict: false });
|
|
15401
15577
|
const validate = ajv.compile(schema);
|
|
15402
15578
|
if (!validate(parameters)) {
|
|
15403
|
-
const errorMessages = _optionalChain([validate, 'access',
|
|
15579
|
+
const errorMessages = _optionalChain([validate, 'access', _326 => _326.errors, 'optionalAccess', _327 => _327.map, 'call', _328 => _328((err) => {
|
|
15404
15580
|
const path2 = err.instancePath || "root";
|
|
15405
15581
|
return `${path2}: ${err.message}`;
|
|
15406
|
-
}), 'access',
|
|
15582
|
+
}), 'access', _329 => _329.join, 'call', _330 => _330(", ")]);
|
|
15407
15583
|
throw Error(`Invalid parameters: ${errorMessages}`);
|
|
15408
15584
|
}
|
|
15409
15585
|
return parameters;
|
|
@@ -15459,7 +15635,7 @@ function callEvaluatorData(data) {
|
|
|
15459
15635
|
baseExperiment
|
|
15460
15636
|
};
|
|
15461
15637
|
}
|
|
15462
|
-
function
|
|
15638
|
+
function isAsyncIterable3(value) {
|
|
15463
15639
|
return typeof value === "object" && value !== null && typeof value[Symbol.asyncIterator] === "function";
|
|
15464
15640
|
}
|
|
15465
15641
|
function isIterable(value) {
|
|
@@ -15644,7 +15820,7 @@ var defaultErrorScoreHandler = ({
|
|
|
15644
15820
|
};
|
|
15645
15821
|
async function runEvaluatorInternal(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults, enableCache) {
|
|
15646
15822
|
if (enableCache) {
|
|
15647
|
-
_optionalChain([(_nullishCoalesce(evaluator.state, () => ( _internalGetGlobalState()))), 'optionalAccess',
|
|
15823
|
+
_optionalChain([(_nullishCoalesce(evaluator.state, () => ( _internalGetGlobalState()))), 'optionalAccess', _331 => _331.spanCache, 'optionalAccess', _332 => _332.start, 'call', _333 => _333()]);
|
|
15648
15824
|
}
|
|
15649
15825
|
try {
|
|
15650
15826
|
if (typeof evaluator.data === "string") {
|
|
@@ -15680,7 +15856,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
15680
15856
|
}
|
|
15681
15857
|
const resolvedDataResult = dataResult instanceof Promise ? await dataResult : dataResult;
|
|
15682
15858
|
const dataIterable = (() => {
|
|
15683
|
-
if (
|
|
15859
|
+
if (isAsyncIterable3(resolvedDataResult)) {
|
|
15684
15860
|
return resolvedDataResult;
|
|
15685
15861
|
}
|
|
15686
15862
|
if (Array.isArray(resolvedDataResult) || isIterable(resolvedDataResult)) {
|
|
@@ -15755,7 +15931,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
15755
15931
|
objectType: parentComponents ? spanObjectTypeV3ToTypedString(
|
|
15756
15932
|
parentComponents.data.object_type
|
|
15757
15933
|
) : "experiment",
|
|
15758
|
-
objectId: await _asyncNullishCoalesce(await _asyncOptionalChain([parentComponents, 'optionalAccess', async
|
|
15934
|
+
objectId: await _asyncNullishCoalesce(await _asyncOptionalChain([parentComponents, 'optionalAccess', async _334 => _334.data, 'access', async _335 => _335.object_id]), async () => ( (experimentIdPromise ? await _asyncNullishCoalesce(await experimentIdPromise, async () => ( "")) : ""))),
|
|
15759
15935
|
rootSpanId: rootSpan.rootSpanId,
|
|
15760
15936
|
ensureSpansFlushed,
|
|
15761
15937
|
state
|
|
@@ -15781,10 +15957,10 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
15781
15957
|
span,
|
|
15782
15958
|
parameters: _nullishCoalesce(parameters, () => ( {})),
|
|
15783
15959
|
reportProgress: (event) => {
|
|
15784
|
-
_optionalChain([stream, 'optionalCall',
|
|
15960
|
+
_optionalChain([stream, 'optionalCall', _336 => _336({
|
|
15785
15961
|
...event,
|
|
15786
15962
|
id: rootSpan.id,
|
|
15787
|
-
origin: _optionalChain([baseEvent, 'access',
|
|
15963
|
+
origin: _optionalChain([baseEvent, 'access', _337 => _337.event, 'optionalAccess', _338 => _338.origin]),
|
|
15788
15964
|
name: evaluator.evalName,
|
|
15789
15965
|
object_type: "task"
|
|
15790
15966
|
})]);
|
|
@@ -15948,7 +16124,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
15948
16124
|
metadata,
|
|
15949
16125
|
scores: mergedScores,
|
|
15950
16126
|
error,
|
|
15951
|
-
origin: _optionalChain([baseEvent, 'access',
|
|
16127
|
+
origin: _optionalChain([baseEvent, 'access', _339 => _339.event, 'optionalAccess', _340 => _340.origin])
|
|
15952
16128
|
});
|
|
15953
16129
|
}
|
|
15954
16130
|
};
|
|
@@ -15981,7 +16157,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
15981
16157
|
break;
|
|
15982
16158
|
}
|
|
15983
16159
|
scheduledTrials++;
|
|
15984
|
-
_optionalChain([progressReporter, 'access',
|
|
16160
|
+
_optionalChain([progressReporter, 'access', _341 => _341.setTotal, 'optionalCall', _342 => _342(evaluator.evalName, scheduledTrials)]);
|
|
15985
16161
|
q.push({ datum, trialIndex });
|
|
15986
16162
|
}
|
|
15987
16163
|
}
|
|
@@ -16056,9 +16232,9 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
16056
16232
|
);
|
|
16057
16233
|
} finally {
|
|
16058
16234
|
if (enableCache) {
|
|
16059
|
-
const spanCache = _optionalChain([(_nullishCoalesce(evaluator.state, () => ( _internalGetGlobalState()))), 'optionalAccess',
|
|
16060
|
-
_optionalChain([spanCache, 'optionalAccess',
|
|
16061
|
-
_optionalChain([spanCache, 'optionalAccess',
|
|
16235
|
+
const spanCache = _optionalChain([(_nullishCoalesce(evaluator.state, () => ( _internalGetGlobalState()))), 'optionalAccess', _343 => _343.spanCache]);
|
|
16236
|
+
_optionalChain([spanCache, 'optionalAccess', _344 => _344.dispose, 'call', _345 => _345()]);
|
|
16237
|
+
_optionalChain([spanCache, 'optionalAccess', _346 => _346.stop, 'call', _347 => _347()]);
|
|
16062
16238
|
}
|
|
16063
16239
|
}
|
|
16064
16240
|
}
|
|
@@ -16532,6 +16708,7 @@ var CodeParameters = class {
|
|
|
16532
16708
|
this.metadata = opts.metadata;
|
|
16533
16709
|
}
|
|
16534
16710
|
async toFunctionDefinition(projectNameToId) {
|
|
16711
|
+
const schema = serializeEvalParameterstoParametersSchema(this.schema);
|
|
16535
16712
|
return {
|
|
16536
16713
|
project_id: await projectNameToId.resolve(this.project),
|
|
16537
16714
|
name: this.name,
|
|
@@ -16540,8 +16717,8 @@ var CodeParameters = class {
|
|
|
16540
16717
|
function_type: "parameters",
|
|
16541
16718
|
function_data: {
|
|
16542
16719
|
type: "parameters",
|
|
16543
|
-
data:
|
|
16544
|
-
__schema:
|
|
16720
|
+
data: getDefaultDataFromParametersSchema(schema),
|
|
16721
|
+
__schema: schema
|
|
16545
16722
|
},
|
|
16546
16723
|
if_exists: this.ifExists,
|
|
16547
16724
|
metadata: this.metadata
|
|
@@ -16596,6 +16773,16 @@ function serializeEvalParameterstoParametersSchema(parameters) {
|
|
|
16596
16773
|
additionalProperties: true
|
|
16597
16774
|
};
|
|
16598
16775
|
}
|
|
16776
|
+
function getDefaultDataFromParametersSchema(schema) {
|
|
16777
|
+
return Object.fromEntries(
|
|
16778
|
+
Object.entries(schema.properties).flatMap(([name, value]) => {
|
|
16779
|
+
if (!("default" in value)) {
|
|
16780
|
+
return [];
|
|
16781
|
+
}
|
|
16782
|
+
return [[name, value.default]];
|
|
16783
|
+
})
|
|
16784
|
+
);
|
|
16785
|
+
}
|
|
16599
16786
|
var ProjectNameIdMap = (_class23 = class {constructor() { _class23.prototype.__init75.call(this);_class23.prototype.__init76.call(this); }
|
|
16600
16787
|
__init75() {this.nameToId = {}}
|
|
16601
16788
|
__init76() {this.idToName = {}}
|