braintrust 2.2.1-rc.0 → 2.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dev/dist/index.d.mts +1 -1
- package/dev/dist/index.d.ts +1 -1
- package/dev/dist/index.js +34 -12
- package/dev/dist/index.mjs +40 -18
- package/dist/browser.d.mts +240 -240
- package/dist/browser.d.ts +240 -240
- package/dist/browser.js +249 -62
- package/dist/browser.mjs +221 -34
- package/dist/cli.js +1241 -723
- package/dist/index.d.mts +240 -240
- package/dist/index.d.ts +240 -240
- package/dist/index.js +249 -62
- package/dist/index.mjs +227 -40
- package/package.json +23 -23
- package/util/dist/index.d.mts +3 -2
- package/util/dist/index.d.ts +3 -2
- package/util/dist/index.js +3 -1
- package/util/dist/index.mjs +3 -1
- package/LICENSE +0 -201
- package/dist/eval-runner.js +0 -12680
package/dist/browser.js
CHANGED
|
@@ -1928,7 +1928,8 @@ var SpanType = _v3.z.union([
|
|
|
1928
1928
|
"automation",
|
|
1929
1929
|
"facet",
|
|
1930
1930
|
"preprocessor",
|
|
1931
|
-
"classifier"
|
|
1931
|
+
"classifier",
|
|
1932
|
+
"review"
|
|
1932
1933
|
]),
|
|
1933
1934
|
_v3.z.null()
|
|
1934
1935
|
]);
|
|
@@ -5187,12 +5188,19 @@ function updateSpanImpl({
|
|
|
5187
5188
|
parentObjectType,
|
|
5188
5189
|
parentObjectId,
|
|
5189
5190
|
id,
|
|
5191
|
+
root_span_id,
|
|
5192
|
+
span_id,
|
|
5190
5193
|
event
|
|
5191
5194
|
}) {
|
|
5195
|
+
if (isEmpty2(root_span_id) !== isEmpty2(span_id)) {
|
|
5196
|
+
throw new Error("both root_span_id and span_id must be set, or neither");
|
|
5197
|
+
}
|
|
5198
|
+
const hasExplicitSpanIds = root_span_id !== void 0 && span_id !== void 0;
|
|
5192
5199
|
const updateEvent = deepCopyEvent(
|
|
5193
5200
|
validateAndSanitizeExperimentLogPartialArgs({
|
|
5201
|
+
...event,
|
|
5194
5202
|
id,
|
|
5195
|
-
...
|
|
5203
|
+
...hasExplicitSpanIds ? { root_span_id, span_id } : {}
|
|
5196
5204
|
})
|
|
5197
5205
|
);
|
|
5198
5206
|
const parentIds = async () => new SpanComponentsV3({
|
|
@@ -5224,6 +5232,8 @@ function updateSpan({
|
|
|
5224
5232
|
spanComponentsToObjectIdLambda(resolvedState, components)
|
|
5225
5233
|
),
|
|
5226
5234
|
id: components.data.row_id,
|
|
5235
|
+
root_span_id: components.data.root_span_id,
|
|
5236
|
+
span_id: components.data.span_id,
|
|
5227
5237
|
event
|
|
5228
5238
|
});
|
|
5229
5239
|
}
|
|
@@ -5535,7 +5545,7 @@ var Logger = (_class7 = class {
|
|
|
5535
5545
|
* @param event The event data to update the span with. Must include `id`. See {@link Experiment.log} for a full list of valid fields.
|
|
5536
5546
|
*/
|
|
5537
5547
|
updateSpan(event) {
|
|
5538
|
-
const { id, ...eventRest } = event;
|
|
5548
|
+
const { id, root_span_id, span_id, ...eventRest } = event;
|
|
5539
5549
|
if (!id) {
|
|
5540
5550
|
throw new Error("Span id is required to update a span");
|
|
5541
5551
|
}
|
|
@@ -5544,6 +5554,8 @@ var Logger = (_class7 = class {
|
|
|
5544
5554
|
parentObjectType: this.parentObjectType(),
|
|
5545
5555
|
parentObjectId: this.lazyId,
|
|
5546
5556
|
id,
|
|
5557
|
+
root_span_id,
|
|
5558
|
+
span_id,
|
|
5547
5559
|
event: eventRest
|
|
5548
5560
|
});
|
|
5549
5561
|
}
|
|
@@ -7826,7 +7838,7 @@ View complete results in Braintrust or run experiment.summarize() again.`
|
|
|
7826
7838
|
* @param event The event data to update the span with. Must include `id`. See {@link Experiment.log} for a full list of valid fields.
|
|
7827
7839
|
*/
|
|
7828
7840
|
updateSpan(event) {
|
|
7829
|
-
const { id, ...eventRest } = event;
|
|
7841
|
+
const { id, root_span_id, span_id, ...eventRest } = event;
|
|
7830
7842
|
if (!id) {
|
|
7831
7843
|
throw new Error("Span id is required to update a span");
|
|
7832
7844
|
}
|
|
@@ -7835,6 +7847,8 @@ View complete results in Braintrust or run experiment.summarize() again.`
|
|
|
7835
7847
|
parentObjectType: this.parentObjectType(),
|
|
7836
7848
|
parentObjectId: this.lazyId,
|
|
7837
7849
|
id,
|
|
7850
|
+
root_span_id,
|
|
7851
|
+
span_id,
|
|
7838
7852
|
event: eventRest
|
|
7839
7853
|
});
|
|
7840
7854
|
}
|
|
@@ -12695,14 +12709,17 @@ function parseToolName(rawToolName) {
|
|
|
12695
12709
|
rawToolName
|
|
12696
12710
|
};
|
|
12697
12711
|
}
|
|
12698
|
-
function createToolTracingHooks(
|
|
12712
|
+
function createToolTracingHooks(resolveParentSpan, activeToolSpans, mcpServers, subAgentSpans, endedSubAgentSpans) {
|
|
12699
12713
|
const preToolUse = async (input, toolUseID) => {
|
|
12700
12714
|
if (input.hook_event_name !== "PreToolUse" || !toolUseID) {
|
|
12701
12715
|
return {};
|
|
12702
12716
|
}
|
|
12717
|
+
if (input.tool_name === "Task") {
|
|
12718
|
+
return {};
|
|
12719
|
+
}
|
|
12703
12720
|
const parsed = parseToolName(input.tool_name);
|
|
12704
12721
|
const mcpMetadata = getMcpServerMetadata(parsed.mcpServer, mcpServers);
|
|
12705
|
-
const parentExport = await
|
|
12722
|
+
const parentExport = await resolveParentSpan(toolUseID);
|
|
12706
12723
|
const toolSpan = startSpan({
|
|
12707
12724
|
name: parsed.displayName,
|
|
12708
12725
|
spanAttributes: { type: "tool" /* TOOL */ },
|
|
@@ -12730,6 +12747,30 @@ function createToolTracingHooks(parentSpanExportPromise, activeToolSpans, mcpSer
|
|
|
12730
12747
|
if (input.hook_event_name !== "PostToolUse" || !toolUseID) {
|
|
12731
12748
|
return {};
|
|
12732
12749
|
}
|
|
12750
|
+
const subAgentSpan = subAgentSpans.get(toolUseID);
|
|
12751
|
+
if (subAgentSpan) {
|
|
12752
|
+
try {
|
|
12753
|
+
const response = input.tool_response;
|
|
12754
|
+
const metadata = {};
|
|
12755
|
+
if (_optionalChain([response, 'optionalAccess', _267 => _267.status])) {
|
|
12756
|
+
metadata["claude_agent_sdk.status"] = response.status;
|
|
12757
|
+
}
|
|
12758
|
+
if (_optionalChain([response, 'optionalAccess', _268 => _268.totalDurationMs])) {
|
|
12759
|
+
metadata["claude_agent_sdk.duration_ms"] = response.totalDurationMs;
|
|
12760
|
+
}
|
|
12761
|
+
if (_optionalChain([response, 'optionalAccess', _269 => _269.totalToolUseCount]) !== void 0) {
|
|
12762
|
+
metadata["claude_agent_sdk.tool_use_count"] = response.totalToolUseCount;
|
|
12763
|
+
}
|
|
12764
|
+
subAgentSpan.log({
|
|
12765
|
+
output: _optionalChain([response, 'optionalAccess', _270 => _270.content]),
|
|
12766
|
+
metadata
|
|
12767
|
+
});
|
|
12768
|
+
} finally {
|
|
12769
|
+
subAgentSpan.end();
|
|
12770
|
+
endedSubAgentSpans.add(toolUseID);
|
|
12771
|
+
}
|
|
12772
|
+
return {};
|
|
12773
|
+
}
|
|
12733
12774
|
const toolSpan = activeToolSpans.get(toolUseID);
|
|
12734
12775
|
if (!toolSpan) {
|
|
12735
12776
|
return {};
|
|
@@ -12746,6 +12787,16 @@ function createToolTracingHooks(parentSpanExportPromise, activeToolSpans, mcpSer
|
|
|
12746
12787
|
if (input.hook_event_name !== "PostToolUseFailure" || !toolUseID) {
|
|
12747
12788
|
return {};
|
|
12748
12789
|
}
|
|
12790
|
+
const subAgentSpan = subAgentSpans.get(toolUseID);
|
|
12791
|
+
if (subAgentSpan) {
|
|
12792
|
+
try {
|
|
12793
|
+
subAgentSpan.log({ error: input.error });
|
|
12794
|
+
} finally {
|
|
12795
|
+
subAgentSpan.end();
|
|
12796
|
+
endedSubAgentSpans.add(toolUseID);
|
|
12797
|
+
}
|
|
12798
|
+
return {};
|
|
12799
|
+
}
|
|
12749
12800
|
const toolSpan = activeToolSpans.get(toolUseID);
|
|
12750
12801
|
if (!toolSpan) {
|
|
12751
12802
|
return {};
|
|
@@ -12770,12 +12821,14 @@ function createToolTracingHooks(parentSpanExportPromise, activeToolSpans, mcpSer
|
|
|
12770
12821
|
};
|
|
12771
12822
|
return { preToolUse, postToolUse, postToolUseFailure };
|
|
12772
12823
|
}
|
|
12773
|
-
function injectTracingHooks(options,
|
|
12824
|
+
function injectTracingHooks(options, resolveParentSpan, activeToolSpans, subAgentSpans, endedSubAgentSpans) {
|
|
12774
12825
|
const mcpServers = options.mcpServers;
|
|
12775
12826
|
const { preToolUse, postToolUse, postToolUseFailure } = createToolTracingHooks(
|
|
12776
|
-
|
|
12827
|
+
resolveParentSpan,
|
|
12777
12828
|
activeToolSpans,
|
|
12778
|
-
mcpServers
|
|
12829
|
+
mcpServers,
|
|
12830
|
+
subAgentSpans,
|
|
12831
|
+
endedSubAgentSpans
|
|
12779
12832
|
);
|
|
12780
12833
|
const existingHooks = _nullishCoalesce(options.hooks, () => ( {}));
|
|
12781
12834
|
return {
|
|
@@ -12821,18 +12874,45 @@ function filterSerializableOptions(options) {
|
|
|
12821
12874
|
}
|
|
12822
12875
|
return filtered;
|
|
12823
12876
|
}
|
|
12877
|
+
function isAsyncIterable(value) {
|
|
12878
|
+
return value !== null && value !== void 0 && typeof value[Symbol.asyncIterator] === "function";
|
|
12879
|
+
}
|
|
12824
12880
|
function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
12825
12881
|
const proxy = new Proxy(queryFn, {
|
|
12826
12882
|
apply(target, thisArg, argArray) {
|
|
12827
12883
|
const params = _nullishCoalesce(argArray[0], () => ( {}));
|
|
12828
12884
|
const { prompt, options = {} } = params;
|
|
12885
|
+
const promptIsAsyncIterable = isAsyncIterable(prompt);
|
|
12886
|
+
let capturedPromptMessages;
|
|
12887
|
+
let promptForQuery = prompt;
|
|
12888
|
+
let promptStarted = false;
|
|
12889
|
+
let resolvePromptDone;
|
|
12890
|
+
const promptDone = new Promise((resolve) => {
|
|
12891
|
+
resolvePromptDone = resolve;
|
|
12892
|
+
});
|
|
12893
|
+
if (promptIsAsyncIterable) {
|
|
12894
|
+
capturedPromptMessages = [];
|
|
12895
|
+
const originalPrompt = prompt;
|
|
12896
|
+
const capturingPrompt = (async function* () {
|
|
12897
|
+
promptStarted = true;
|
|
12898
|
+
try {
|
|
12899
|
+
for await (const msg of originalPrompt) {
|
|
12900
|
+
capturedPromptMessages.push(msg);
|
|
12901
|
+
yield msg;
|
|
12902
|
+
}
|
|
12903
|
+
} finally {
|
|
12904
|
+
_optionalChain([resolvePromptDone, 'optionalCall', _271 => _271()]);
|
|
12905
|
+
}
|
|
12906
|
+
})();
|
|
12907
|
+
promptForQuery = capturingPrompt;
|
|
12908
|
+
}
|
|
12829
12909
|
const span = startSpan({
|
|
12830
12910
|
name: "Claude Agent",
|
|
12831
12911
|
spanAttributes: {
|
|
12832
12912
|
type: "task" /* TASK */
|
|
12833
12913
|
},
|
|
12834
12914
|
event: {
|
|
12835
|
-
input: typeof prompt === "string" ? prompt :
|
|
12915
|
+
input: typeof prompt === "string" ? prompt : promptIsAsyncIterable ? void 0 : prompt !== void 0 ? String(prompt) : void 0,
|
|
12836
12916
|
metadata: filterSerializableOptions(options)
|
|
12837
12917
|
}
|
|
12838
12918
|
});
|
|
@@ -12843,19 +12923,28 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
|
12843
12923
|
let currentMessageStartTime = getCurrentUnixTimestamp();
|
|
12844
12924
|
const currentMessages = [];
|
|
12845
12925
|
const createLLMSpan = async () => {
|
|
12926
|
+
const parentToolUseId = _nullishCoalesce(_optionalChain([currentMessages, 'access', _272 => _272[0], 'optionalAccess', _273 => _273.parent_tool_use_id]), () => ( null));
|
|
12927
|
+
let parentSpanExport;
|
|
12928
|
+
if (parentToolUseId) {
|
|
12929
|
+
const subAgentSpan = subAgentSpans.get(parentToolUseId);
|
|
12930
|
+
parentSpanExport = subAgentSpan ? await subAgentSpan.export() : await span.export();
|
|
12931
|
+
} else {
|
|
12932
|
+
parentSpanExport = await span.export();
|
|
12933
|
+
}
|
|
12846
12934
|
const finalMessageContent = await _createLLMSpanForMessages(
|
|
12847
12935
|
currentMessages,
|
|
12848
12936
|
prompt,
|
|
12849
12937
|
finalResults,
|
|
12850
12938
|
options,
|
|
12851
12939
|
currentMessageStartTime,
|
|
12852
|
-
|
|
12940
|
+
capturedPromptMessages,
|
|
12941
|
+
parentSpanExport
|
|
12853
12942
|
);
|
|
12854
12943
|
if (finalMessageContent) {
|
|
12855
12944
|
finalResults.push(finalMessageContent);
|
|
12856
12945
|
}
|
|
12857
12946
|
const lastMessage = currentMessages[currentMessages.length - 1];
|
|
12858
|
-
if (_optionalChain([lastMessage, 'optionalAccess',
|
|
12947
|
+
if (_optionalChain([lastMessage, 'optionalAccess', _274 => _274.message, 'optionalAccess', _275 => _275.usage])) {
|
|
12859
12948
|
const outputTokens = getNumberProperty2(lastMessage.message.usage, "output_tokens") || 0;
|
|
12860
12949
|
accumulatedOutputTokens += outputTokens;
|
|
12861
12950
|
}
|
|
@@ -12863,12 +12952,34 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
|
12863
12952
|
};
|
|
12864
12953
|
const invocationTarget = thisArg === proxy || thisArg === void 0 ? _nullishCoalesce(defaultThis, () => ( thisArg)) : thisArg;
|
|
12865
12954
|
const activeToolSpans = /* @__PURE__ */ new Map();
|
|
12955
|
+
const subAgentSpans = /* @__PURE__ */ new Map();
|
|
12956
|
+
const endedSubAgentSpans = /* @__PURE__ */ new Set();
|
|
12957
|
+
const toolUseToParent = /* @__PURE__ */ new Map();
|
|
12958
|
+
const pendingSubAgentNames = /* @__PURE__ */ new Map();
|
|
12959
|
+
const resolveParentSpan = async (toolUseID) => {
|
|
12960
|
+
const parentToolUseId = toolUseToParent.get(toolUseID);
|
|
12961
|
+
if (parentToolUseId) {
|
|
12962
|
+
const subAgentSpan = subAgentSpans.get(parentToolUseId);
|
|
12963
|
+
if (subAgentSpan) {
|
|
12964
|
+
return subAgentSpan.export();
|
|
12965
|
+
}
|
|
12966
|
+
}
|
|
12967
|
+
return span.export();
|
|
12968
|
+
};
|
|
12866
12969
|
const optionsWithHooks = injectTracingHooks(
|
|
12867
12970
|
options,
|
|
12868
|
-
|
|
12869
|
-
activeToolSpans
|
|
12971
|
+
resolveParentSpan,
|
|
12972
|
+
activeToolSpans,
|
|
12973
|
+
subAgentSpans,
|
|
12974
|
+
endedSubAgentSpans
|
|
12870
12975
|
);
|
|
12871
|
-
const modifiedArgArray = [
|
|
12976
|
+
const modifiedArgArray = [
|
|
12977
|
+
{
|
|
12978
|
+
...params,
|
|
12979
|
+
...promptForQuery !== void 0 ? { prompt: promptForQuery } : {},
|
|
12980
|
+
options: optionsWithHooks
|
|
12981
|
+
}
|
|
12982
|
+
];
|
|
12872
12983
|
const originalGenerator = withCurrent(
|
|
12873
12984
|
span,
|
|
12874
12985
|
() => Reflect.apply(target, invocationTarget, modifiedArgArray)
|
|
@@ -12877,20 +12988,55 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
|
12877
12988
|
try {
|
|
12878
12989
|
for await (const message of originalGenerator) {
|
|
12879
12990
|
const currentTime = getCurrentUnixTimestamp();
|
|
12880
|
-
|
|
12991
|
+
if (message.type === "assistant" && Array.isArray(_optionalChain([message, 'access', _276 => _276.message, 'optionalAccess', _277 => _277.content]))) {
|
|
12992
|
+
const parentToolUseId = _nullishCoalesce(message.parent_tool_use_id, () => ( null));
|
|
12993
|
+
for (const block of message.message.content) {
|
|
12994
|
+
if (block.type === "tool_use" && block.id) {
|
|
12995
|
+
toolUseToParent.set(block.id, parentToolUseId);
|
|
12996
|
+
if (block.name === "Task" && _optionalChain([block, 'access', _278 => _278.input, 'optionalAccess', _279 => _279.subagent_type])) {
|
|
12997
|
+
pendingSubAgentNames.set(
|
|
12998
|
+
block.id,
|
|
12999
|
+
block.input.subagent_type
|
|
13000
|
+
);
|
|
13001
|
+
}
|
|
13002
|
+
}
|
|
13003
|
+
}
|
|
13004
|
+
}
|
|
13005
|
+
if ("parent_tool_use_id" in message) {
|
|
13006
|
+
const parentToolUseId = message.parent_tool_use_id;
|
|
13007
|
+
if (parentToolUseId && !subAgentSpans.has(parentToolUseId)) {
|
|
13008
|
+
const agentName = pendingSubAgentNames.get(parentToolUseId);
|
|
13009
|
+
const spanName = agentName ? `Agent: ${agentName}` : "Agent: sub-agent";
|
|
13010
|
+
const parentExport = await span.export();
|
|
13011
|
+
const subAgentSpan = startSpan({
|
|
13012
|
+
name: spanName,
|
|
13013
|
+
spanAttributes: { type: "task" /* TASK */ },
|
|
13014
|
+
event: {
|
|
13015
|
+
metadata: {
|
|
13016
|
+
...agentName && {
|
|
13017
|
+
"claude_agent_sdk.agent_type": agentName
|
|
13018
|
+
}
|
|
13019
|
+
}
|
|
13020
|
+
},
|
|
13021
|
+
parent: parentExport
|
|
13022
|
+
});
|
|
13023
|
+
subAgentSpans.set(parentToolUseId, subAgentSpan);
|
|
13024
|
+
}
|
|
13025
|
+
}
|
|
13026
|
+
const messageId = _optionalChain([message, 'access', _280 => _280.message, 'optionalAccess', _281 => _281.id]);
|
|
12881
13027
|
if (messageId && messageId !== currentMessageId) {
|
|
12882
13028
|
await createLLMSpan();
|
|
12883
13029
|
currentMessageId = messageId;
|
|
12884
13030
|
currentMessageStartTime = currentTime;
|
|
12885
13031
|
}
|
|
12886
|
-
if (message.type === "assistant" && _optionalChain([message, 'access',
|
|
13032
|
+
if (message.type === "assistant" && _optionalChain([message, 'access', _282 => _282.message, 'optionalAccess', _283 => _283.usage])) {
|
|
12887
13033
|
currentMessages.push(message);
|
|
12888
13034
|
}
|
|
12889
13035
|
if (message.type === "result" && message.usage) {
|
|
12890
13036
|
finalUsageMetrics = _extractUsageFromMessage(message);
|
|
12891
13037
|
if (currentMessages.length > 0 && finalUsageMetrics.completion_tokens !== void 0) {
|
|
12892
13038
|
const lastMessage = currentMessages[currentMessages.length - 1];
|
|
12893
|
-
if (_optionalChain([lastMessage, 'optionalAccess',
|
|
13039
|
+
if (_optionalChain([lastMessage, 'optionalAccess', _284 => _284.message, 'optionalAccess', _285 => _285.usage])) {
|
|
12894
13040
|
const adjustedTokens = finalUsageMetrics.completion_tokens - accumulatedOutputTokens;
|
|
12895
13041
|
if (adjustedTokens >= 0) {
|
|
12896
13042
|
lastMessage.message.usage.output_tokens = adjustedTokens;
|
|
@@ -12922,6 +13068,22 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
|
12922
13068
|
});
|
|
12923
13069
|
throw error;
|
|
12924
13070
|
} finally {
|
|
13071
|
+
for (const [id, subSpan] of subAgentSpans) {
|
|
13072
|
+
if (!endedSubAgentSpans.has(id)) {
|
|
13073
|
+
subSpan.end();
|
|
13074
|
+
}
|
|
13075
|
+
}
|
|
13076
|
+
subAgentSpans.clear();
|
|
13077
|
+
if (capturedPromptMessages) {
|
|
13078
|
+
if (promptStarted) {
|
|
13079
|
+
await promptDone;
|
|
13080
|
+
}
|
|
13081
|
+
if (capturedPromptMessages.length > 0) {
|
|
13082
|
+
span.log({
|
|
13083
|
+
input: _formatCapturedMessages(capturedPromptMessages)
|
|
13084
|
+
});
|
|
13085
|
+
}
|
|
13086
|
+
}
|
|
12925
13087
|
span.end();
|
|
12926
13088
|
}
|
|
12927
13089
|
})();
|
|
@@ -12949,19 +13111,30 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
|
12949
13111
|
});
|
|
12950
13112
|
return proxy;
|
|
12951
13113
|
}
|
|
12952
|
-
function _buildLLMInput(prompt, conversationHistory) {
|
|
12953
|
-
const
|
|
12954
|
-
|
|
12955
|
-
|
|
12956
|
-
|
|
12957
|
-
|
|
13114
|
+
function _buildLLMInput(prompt, conversationHistory, capturedPromptMessages) {
|
|
13115
|
+
const promptMessages = [];
|
|
13116
|
+
if (typeof prompt === "string") {
|
|
13117
|
+
promptMessages.push({ content: prompt, role: "user" });
|
|
13118
|
+
} else if (capturedPromptMessages && capturedPromptMessages.length > 0) {
|
|
13119
|
+
for (const msg of capturedPromptMessages) {
|
|
13120
|
+
const role = _optionalChain([msg, 'access', _286 => _286.message, 'optionalAccess', _287 => _287.role]);
|
|
13121
|
+
const content = _optionalChain([msg, 'access', _288 => _288.message, 'optionalAccess', _289 => _289.content]);
|
|
13122
|
+
if (role && content !== void 0) {
|
|
13123
|
+
promptMessages.push({ content, role });
|
|
13124
|
+
}
|
|
13125
|
+
}
|
|
13126
|
+
}
|
|
13127
|
+
const inputParts = [...promptMessages, ...conversationHistory];
|
|
12958
13128
|
return inputParts.length > 0 ? inputParts : void 0;
|
|
12959
13129
|
}
|
|
13130
|
+
function _formatCapturedMessages(messages) {
|
|
13131
|
+
return messages.length > 0 ? messages : [];
|
|
13132
|
+
}
|
|
12960
13133
|
function _extractUsageFromMessage(message) {
|
|
12961
13134
|
const metrics = {};
|
|
12962
13135
|
let usage;
|
|
12963
13136
|
if (message.type === "assistant") {
|
|
12964
|
-
usage = _optionalChain([message, 'access',
|
|
13137
|
+
usage = _optionalChain([message, 'access', _290 => _290.message, 'optionalAccess', _291 => _291.usage]);
|
|
12965
13138
|
} else if (message.type === "result") {
|
|
12966
13139
|
usage = message.usage;
|
|
12967
13140
|
}
|
|
@@ -12990,17 +13163,21 @@ function _extractUsageFromMessage(message) {
|
|
|
12990
13163
|
}
|
|
12991
13164
|
return metrics;
|
|
12992
13165
|
}
|
|
12993
|
-
async function _createLLMSpanForMessages(messages, prompt, conversationHistory, options, startTime, parentSpan) {
|
|
13166
|
+
async function _createLLMSpanForMessages(messages, prompt, conversationHistory, options, startTime, capturedPromptMessages, parentSpan) {
|
|
12994
13167
|
if (messages.length === 0) return void 0;
|
|
12995
13168
|
const lastMessage = messages[messages.length - 1];
|
|
12996
|
-
if (lastMessage.type !== "assistant" || !_optionalChain([lastMessage, 'access',
|
|
13169
|
+
if (lastMessage.type !== "assistant" || !_optionalChain([lastMessage, 'access', _292 => _292.message, 'optionalAccess', _293 => _293.usage])) {
|
|
12997
13170
|
return void 0;
|
|
12998
13171
|
}
|
|
12999
13172
|
const model = lastMessage.message.model || options.model;
|
|
13000
13173
|
const usage = _extractUsageFromMessage(lastMessage);
|
|
13001
|
-
const input = _buildLLMInput(
|
|
13174
|
+
const input = _buildLLMInput(
|
|
13175
|
+
prompt,
|
|
13176
|
+
conversationHistory,
|
|
13177
|
+
capturedPromptMessages
|
|
13178
|
+
);
|
|
13002
13179
|
const outputs = messages.map(
|
|
13003
|
-
(m) => _optionalChain([m, 'access',
|
|
13180
|
+
(m) => _optionalChain([m, 'access', _294 => _294.message, 'optionalAccess', _295 => _295.content]) && _optionalChain([m, 'access', _296 => _296.message, 'optionalAccess', _297 => _297.role]) ? { content: m.message.content, role: m.message.role } : void 0
|
|
13004
13181
|
).filter((c) => c !== void 0);
|
|
13005
13182
|
await traced(
|
|
13006
13183
|
(llmSpan) => {
|
|
@@ -13020,7 +13197,7 @@ async function _createLLMSpanForMessages(messages, prompt, conversationHistory,
|
|
|
13020
13197
|
parent: parentSpan
|
|
13021
13198
|
}
|
|
13022
13199
|
);
|
|
13023
|
-
return _optionalChain([lastMessage, 'access',
|
|
13200
|
+
return _optionalChain([lastMessage, 'access', _298 => _298.message, 'optionalAccess', _299 => _299.content]) && _optionalChain([lastMessage, 'access', _300 => _300.message, 'optionalAccess', _301 => _301.role]) ? { content: lastMessage.message.content, role: lastMessage.message.role } : void 0;
|
|
13024
13201
|
}
|
|
13025
13202
|
function wrapClaudeAgentSDK(sdk) {
|
|
13026
13203
|
const cache = /* @__PURE__ */ new Map();
|
|
@@ -13294,7 +13471,7 @@ function serializePart(part) {
|
|
|
13294
13471
|
return part;
|
|
13295
13472
|
}
|
|
13296
13473
|
function serializeTools(params) {
|
|
13297
|
-
if (!_optionalChain([params, 'access',
|
|
13474
|
+
if (!_optionalChain([params, 'access', _302 => _302.config, 'optionalAccess', _303 => _303.tools])) {
|
|
13298
13475
|
return null;
|
|
13299
13476
|
}
|
|
13300
13477
|
try {
|
|
@@ -13377,7 +13554,7 @@ function aggregateGenerateContentChunks(chunks, start, firstTokenTime) {
|
|
|
13377
13554
|
}
|
|
13378
13555
|
if (chunk.candidates && Array.isArray(chunk.candidates)) {
|
|
13379
13556
|
for (const candidate of chunk.candidates) {
|
|
13380
|
-
if (_optionalChain([candidate, 'access',
|
|
13557
|
+
if (_optionalChain([candidate, 'access', _304 => _304.content, 'optionalAccess', _305 => _305.parts])) {
|
|
13381
13558
|
for (const part of candidate.content.parts) {
|
|
13382
13559
|
if (part.text !== void 0) {
|
|
13383
13560
|
if (part.thought) {
|
|
@@ -13408,7 +13585,7 @@ function aggregateGenerateContentChunks(chunks, start, firstTokenTime) {
|
|
|
13408
13585
|
parts.push({ text });
|
|
13409
13586
|
}
|
|
13410
13587
|
parts.push(...otherParts);
|
|
13411
|
-
if (parts.length > 0 && _optionalChain([lastResponse, 'optionalAccess',
|
|
13588
|
+
if (parts.length > 0 && _optionalChain([lastResponse, 'optionalAccess', _306 => _306.candidates])) {
|
|
13412
13589
|
const candidates = [];
|
|
13413
13590
|
for (const candidate of lastResponse.candidates) {
|
|
13414
13591
|
const candidateDict = {
|
|
@@ -13755,7 +13932,7 @@ function unescapePath(path) {
|
|
|
13755
13932
|
}
|
|
13756
13933
|
var graph_framework_default = { createGraph };
|
|
13757
13934
|
|
|
13758
|
-
// ../node_modules
|
|
13935
|
+
// ../node_modules/async/dist/async.mjs
|
|
13759
13936
|
function initialParams(fn) {
|
|
13760
13937
|
return function(...args) {
|
|
13761
13938
|
var callback = args.pop();
|
|
@@ -13826,7 +14003,7 @@ function isAsync(fn) {
|
|
|
13826
14003
|
function isAsyncGenerator2(fn) {
|
|
13827
14004
|
return fn[Symbol.toStringTag] === "AsyncGenerator";
|
|
13828
14005
|
}
|
|
13829
|
-
function
|
|
14006
|
+
function isAsyncIterable2(obj) {
|
|
13830
14007
|
return typeof obj[Symbol.asyncIterator] === "function";
|
|
13831
14008
|
}
|
|
13832
14009
|
function wrapAsync(asyncFn) {
|
|
@@ -13880,7 +14057,6 @@ function isArrayLike(value) {
|
|
|
13880
14057
|
return value && typeof value.length === "number" && value.length >= 0 && value.length % 1 === 0;
|
|
13881
14058
|
}
|
|
13882
14059
|
var breakLoop = {};
|
|
13883
|
-
var breakLoop$1 = breakLoop;
|
|
13884
14060
|
function once(fn) {
|
|
13885
14061
|
function wrapper(...args) {
|
|
13886
14062
|
if (fn === null) return;
|
|
@@ -13972,7 +14148,7 @@ function asyncEachOfLimit(generator, limit, iteratee, callback) {
|
|
|
13972
14148
|
canceled = true;
|
|
13973
14149
|
return;
|
|
13974
14150
|
}
|
|
13975
|
-
if (result === breakLoop
|
|
14151
|
+
if (result === breakLoop || done && running <= 0) {
|
|
13976
14152
|
done = true;
|
|
13977
14153
|
return callback(null);
|
|
13978
14154
|
}
|
|
@@ -13998,7 +14174,7 @@ var eachOfLimit$2 = (limit) => {
|
|
|
13998
14174
|
if (isAsyncGenerator2(obj)) {
|
|
13999
14175
|
return asyncEachOfLimit(obj, limit, iteratee, callback);
|
|
14000
14176
|
}
|
|
14001
|
-
if (
|
|
14177
|
+
if (isAsyncIterable2(obj)) {
|
|
14002
14178
|
return asyncEachOfLimit(obj[Symbol.asyncIterator](), limit, iteratee, callback);
|
|
14003
14179
|
}
|
|
14004
14180
|
var nextElem = createIterator(obj);
|
|
@@ -14015,7 +14191,7 @@ var eachOfLimit$2 = (limit) => {
|
|
|
14015
14191
|
} else if (err === false) {
|
|
14016
14192
|
done = true;
|
|
14017
14193
|
canceled = true;
|
|
14018
|
-
} else if (value === breakLoop
|
|
14194
|
+
} else if (value === breakLoop || done && running <= 0) {
|
|
14019
14195
|
done = true;
|
|
14020
14196
|
return callback(null);
|
|
14021
14197
|
} else if (!looping) {
|
|
@@ -14058,7 +14234,7 @@ function eachOfArrayLike(coll, iteratee, callback) {
|
|
|
14058
14234
|
if (canceled === true) return;
|
|
14059
14235
|
if (err) {
|
|
14060
14236
|
callback(err);
|
|
14061
|
-
} else if (++completed === length || value === breakLoop
|
|
14237
|
+
} else if (++completed === length || value === breakLoop) {
|
|
14062
14238
|
callback(null);
|
|
14063
14239
|
}
|
|
14064
14240
|
}
|
|
@@ -14454,7 +14630,7 @@ function _createTester(check, getResult) {
|
|
|
14454
14630
|
if (check(result) && !testResult) {
|
|
14455
14631
|
testPassed = true;
|
|
14456
14632
|
testResult = getResult(true, value);
|
|
14457
|
-
return callback(null, breakLoop
|
|
14633
|
+
return callback(null, breakLoop);
|
|
14458
14634
|
}
|
|
14459
14635
|
callback();
|
|
14460
14636
|
});
|
|
@@ -14866,7 +15042,7 @@ var CachedSpanFetcher = (_class18 = class {
|
|
|
14866
15042
|
spanType
|
|
14867
15043
|
);
|
|
14868
15044
|
const rows = await fetcher.fetchedData();
|
|
14869
|
-
return rows.filter((row) => _optionalChain([row, 'access',
|
|
15045
|
+
return rows.filter((row) => _optionalChain([row, 'access', _307 => _307.span_attributes, 'optionalAccess', _308 => _308.purpose]) !== "scorer").map((row) => ({
|
|
14870
15046
|
input: row.input,
|
|
14871
15047
|
output: row.output,
|
|
14872
15048
|
metadata: row.metadata,
|
|
@@ -14900,7 +15076,7 @@ var CachedSpanFetcher = (_class18 = class {
|
|
|
14900
15076
|
async fetchSpans(spanType) {
|
|
14901
15077
|
const spans = await this.fetchFn(spanType);
|
|
14902
15078
|
for (const span of spans) {
|
|
14903
|
-
const type = _nullishCoalesce(_optionalChain([span, 'access',
|
|
15079
|
+
const type = _nullishCoalesce(_optionalChain([span, 'access', _309 => _309.span_attributes, 'optionalAccess', _310 => _310.type]), () => ( ""));
|
|
14904
15080
|
const existing = _nullishCoalesce(this.spanCache.get(type), () => ( []));
|
|
14905
15081
|
existing.push(span);
|
|
14906
15082
|
this.spanCache.set(type, existing);
|
|
@@ -14980,11 +15156,11 @@ var LocalTrace = (_class19 = class {
|
|
|
14980
15156
|
const cachedSpans = this.state.spanCache.getByRootSpanId(this.rootSpanId);
|
|
14981
15157
|
if (cachedSpans && cachedSpans.length > 0) {
|
|
14982
15158
|
let spans = cachedSpans.filter(
|
|
14983
|
-
(span) => _optionalChain([span, 'access',
|
|
15159
|
+
(span) => _optionalChain([span, 'access', _311 => _311.span_attributes, 'optionalAccess', _312 => _312.purpose]) !== "scorer"
|
|
14984
15160
|
);
|
|
14985
15161
|
if (spanType && spanType.length > 0) {
|
|
14986
15162
|
spans = spans.filter(
|
|
14987
|
-
(span) => spanType.includes(_nullishCoalesce(_optionalChain([span, 'access',
|
|
15163
|
+
(span) => spanType.includes(_nullishCoalesce(_optionalChain([span, 'access', _313 => _313.span_attributes, 'optionalAccess', _314 => _314.type]), () => ( "")))
|
|
14988
15164
|
);
|
|
14989
15165
|
}
|
|
14990
15166
|
return spans.map((span) => ({
|
|
@@ -15003,7 +15179,7 @@ var LocalTrace = (_class19 = class {
|
|
|
15003
15179
|
* Calls the API with the project_default preprocessor (which falls back to "thread").
|
|
15004
15180
|
*/
|
|
15005
15181
|
async getThread(options) {
|
|
15006
|
-
const cacheKey = _nullishCoalesce(_optionalChain([options, 'optionalAccess',
|
|
15182
|
+
const cacheKey = _nullishCoalesce(_optionalChain([options, 'optionalAccess', _315 => _315.preprocessor]), () => ( "project_default"));
|
|
15007
15183
|
if (!this.threadCache.has(cacheKey)) {
|
|
15008
15184
|
const promise = this.fetchThread(options);
|
|
15009
15185
|
this.threadCache.set(cacheKey, promise);
|
|
@@ -15014,7 +15190,7 @@ var LocalTrace = (_class19 = class {
|
|
|
15014
15190
|
await this.ensureSpansReady();
|
|
15015
15191
|
await this.state.login({});
|
|
15016
15192
|
const result = await invoke({
|
|
15017
|
-
globalFunction: _nullishCoalesce(_optionalChain([options, 'optionalAccess',
|
|
15193
|
+
globalFunction: _nullishCoalesce(_optionalChain([options, 'optionalAccess', _316 => _316.preprocessor]), () => ( "project_default")),
|
|
15018
15194
|
functionType: "preprocessor",
|
|
15019
15195
|
input: {
|
|
15020
15196
|
trace_ref: {
|
|
@@ -15173,10 +15349,10 @@ function validateParametersWithJsonSchema(parameters, schema) {
|
|
|
15173
15349
|
const ajv = new (0, _ajv2.default)({ coerceTypes: true, useDefaults: true, strict: false });
|
|
15174
15350
|
const validate = ajv.compile(schema);
|
|
15175
15351
|
if (!validate(parameters)) {
|
|
15176
|
-
const errorMessages = _optionalChain([validate, 'access',
|
|
15352
|
+
const errorMessages = _optionalChain([validate, 'access', _317 => _317.errors, 'optionalAccess', _318 => _318.map, 'call', _319 => _319((err) => {
|
|
15177
15353
|
const path = err.instancePath || "root";
|
|
15178
15354
|
return `${path}: ${err.message}`;
|
|
15179
|
-
}), 'access',
|
|
15355
|
+
}), 'access', _320 => _320.join, 'call', _321 => _321(", ")]);
|
|
15180
15356
|
throw Error(`Invalid parameters: ${errorMessages}`);
|
|
15181
15357
|
}
|
|
15182
15358
|
return parameters;
|
|
@@ -15232,7 +15408,7 @@ function callEvaluatorData(data) {
|
|
|
15232
15408
|
baseExperiment
|
|
15233
15409
|
};
|
|
15234
15410
|
}
|
|
15235
|
-
function
|
|
15411
|
+
function isAsyncIterable3(value) {
|
|
15236
15412
|
return typeof value === "object" && value !== null && typeof value[Symbol.asyncIterator] === "function";
|
|
15237
15413
|
}
|
|
15238
15414
|
function isIterable(value) {
|
|
@@ -15417,7 +15593,7 @@ var defaultErrorScoreHandler = ({
|
|
|
15417
15593
|
};
|
|
15418
15594
|
async function runEvaluatorInternal(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults, enableCache) {
|
|
15419
15595
|
if (enableCache) {
|
|
15420
|
-
_optionalChain([(_nullishCoalesce(evaluator.state, () => ( _internalGetGlobalState()))), 'optionalAccess',
|
|
15596
|
+
_optionalChain([(_nullishCoalesce(evaluator.state, () => ( _internalGetGlobalState()))), 'optionalAccess', _322 => _322.spanCache, 'optionalAccess', _323 => _323.start, 'call', _324 => _324()]);
|
|
15421
15597
|
}
|
|
15422
15598
|
try {
|
|
15423
15599
|
if (typeof evaluator.data === "string") {
|
|
@@ -15453,7 +15629,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
15453
15629
|
}
|
|
15454
15630
|
const resolvedDataResult = dataResult instanceof Promise ? await dataResult : dataResult;
|
|
15455
15631
|
const dataIterable = (() => {
|
|
15456
|
-
if (
|
|
15632
|
+
if (isAsyncIterable3(resolvedDataResult)) {
|
|
15457
15633
|
return resolvedDataResult;
|
|
15458
15634
|
}
|
|
15459
15635
|
if (Array.isArray(resolvedDataResult) || isIterable(resolvedDataResult)) {
|
|
@@ -15528,7 +15704,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
15528
15704
|
objectType: parentComponents ? spanObjectTypeV3ToTypedString(
|
|
15529
15705
|
parentComponents.data.object_type
|
|
15530
15706
|
) : "experiment",
|
|
15531
|
-
objectId: await _asyncNullishCoalesce(await _asyncOptionalChain([parentComponents, 'optionalAccess', async
|
|
15707
|
+
objectId: await _asyncNullishCoalesce(await _asyncOptionalChain([parentComponents, 'optionalAccess', async _325 => _325.data, 'access', async _326 => _326.object_id]), async () => ( (experimentIdPromise ? await _asyncNullishCoalesce(await experimentIdPromise, async () => ( "")) : ""))),
|
|
15532
15708
|
rootSpanId: rootSpan.rootSpanId,
|
|
15533
15709
|
ensureSpansFlushed,
|
|
15534
15710
|
state
|
|
@@ -15554,10 +15730,10 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
15554
15730
|
span,
|
|
15555
15731
|
parameters: _nullishCoalesce(parameters, () => ( {})),
|
|
15556
15732
|
reportProgress: (event) => {
|
|
15557
|
-
_optionalChain([stream, 'optionalCall',
|
|
15733
|
+
_optionalChain([stream, 'optionalCall', _327 => _327({
|
|
15558
15734
|
...event,
|
|
15559
15735
|
id: rootSpan.id,
|
|
15560
|
-
origin: _optionalChain([baseEvent, 'access',
|
|
15736
|
+
origin: _optionalChain([baseEvent, 'access', _328 => _328.event, 'optionalAccess', _329 => _329.origin]),
|
|
15561
15737
|
name: evaluator.evalName,
|
|
15562
15738
|
object_type: "task"
|
|
15563
15739
|
})]);
|
|
@@ -15721,7 +15897,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
15721
15897
|
metadata,
|
|
15722
15898
|
scores: mergedScores,
|
|
15723
15899
|
error,
|
|
15724
|
-
origin: _optionalChain([baseEvent, 'access',
|
|
15900
|
+
origin: _optionalChain([baseEvent, 'access', _330 => _330.event, 'optionalAccess', _331 => _331.origin])
|
|
15725
15901
|
});
|
|
15726
15902
|
}
|
|
15727
15903
|
};
|
|
@@ -15754,7 +15930,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
15754
15930
|
break;
|
|
15755
15931
|
}
|
|
15756
15932
|
scheduledTrials++;
|
|
15757
|
-
_optionalChain([progressReporter, 'access',
|
|
15933
|
+
_optionalChain([progressReporter, 'access', _332 => _332.setTotal, 'optionalCall', _333 => _333(evaluator.evalName, scheduledTrials)]);
|
|
15758
15934
|
q.push({ datum, trialIndex });
|
|
15759
15935
|
}
|
|
15760
15936
|
}
|
|
@@ -15829,9 +16005,9 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
15829
16005
|
);
|
|
15830
16006
|
} finally {
|
|
15831
16007
|
if (enableCache) {
|
|
15832
|
-
const spanCache = _optionalChain([(_nullishCoalesce(evaluator.state, () => ( _internalGetGlobalState()))), 'optionalAccess',
|
|
15833
|
-
_optionalChain([spanCache, 'optionalAccess',
|
|
15834
|
-
_optionalChain([spanCache, 'optionalAccess',
|
|
16008
|
+
const spanCache = _optionalChain([(_nullishCoalesce(evaluator.state, () => ( _internalGetGlobalState()))), 'optionalAccess', _334 => _334.spanCache]);
|
|
16009
|
+
_optionalChain([spanCache, 'optionalAccess', _335 => _335.dispose, 'call', _336 => _336()]);
|
|
16010
|
+
_optionalChain([spanCache, 'optionalAccess', _337 => _337.stop, 'call', _338 => _338()]);
|
|
15835
16011
|
}
|
|
15836
16012
|
}
|
|
15837
16013
|
}
|
|
@@ -16305,6 +16481,7 @@ var CodeParameters = class {
|
|
|
16305
16481
|
this.metadata = opts.metadata;
|
|
16306
16482
|
}
|
|
16307
16483
|
async toFunctionDefinition(projectNameToId) {
|
|
16484
|
+
const schema = serializeEvalParameterstoParametersSchema(this.schema);
|
|
16308
16485
|
return {
|
|
16309
16486
|
project_id: await projectNameToId.resolve(this.project),
|
|
16310
16487
|
name: this.name,
|
|
@@ -16313,8 +16490,8 @@ var CodeParameters = class {
|
|
|
16313
16490
|
function_type: "parameters",
|
|
16314
16491
|
function_data: {
|
|
16315
16492
|
type: "parameters",
|
|
16316
|
-
data:
|
|
16317
|
-
__schema:
|
|
16493
|
+
data: getDefaultDataFromParametersSchema(schema),
|
|
16494
|
+
__schema: schema
|
|
16318
16495
|
},
|
|
16319
16496
|
if_exists: this.ifExists,
|
|
16320
16497
|
metadata: this.metadata
|
|
@@ -16369,6 +16546,16 @@ function serializeEvalParameterstoParametersSchema(parameters) {
|
|
|
16369
16546
|
additionalProperties: true
|
|
16370
16547
|
};
|
|
16371
16548
|
}
|
|
16549
|
+
function getDefaultDataFromParametersSchema(schema) {
|
|
16550
|
+
return Object.fromEntries(
|
|
16551
|
+
Object.entries(schema.properties).flatMap(([name, value]) => {
|
|
16552
|
+
if (!("default" in value)) {
|
|
16553
|
+
return [];
|
|
16554
|
+
}
|
|
16555
|
+
return [[name, value.default]];
|
|
16556
|
+
})
|
|
16557
|
+
);
|
|
16558
|
+
}
|
|
16372
16559
|
var ProjectNameIdMap = (_class23 = class {constructor() { _class23.prototype.__init75.call(this);_class23.prototype.__init76.call(this); }
|
|
16373
16560
|
__init75() {this.nameToId = {}}
|
|
16374
16561
|
__init76() {this.idToName = {}}
|