@agentv/core 2.18.3 → 2.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-V42NUK73.js → chunk-ACTIPQZ3.js} +1 -1
- package/dist/{chunk-V42NUK73.js.map → chunk-ACTIPQZ3.js.map} +1 -1
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/index.cjs +50 -7
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +15 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.js +51 -8
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -2409,6 +2409,11 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2409
2409
|
}
|
|
2410
2410
|
if (typeValue === "code-judge") {
|
|
2411
2411
|
let command;
|
|
2412
|
+
if (rawEvaluator.script !== void 0 && rawEvaluator.command === void 0) {
|
|
2413
|
+
console.warn(
|
|
2414
|
+
`${ANSI_YELLOW4}Warning: 'script' is deprecated in evaluator '${name}' in '${evalId}'. Use 'command' instead.${ANSI_RESET4}`
|
|
2415
|
+
);
|
|
2416
|
+
}
|
|
2412
2417
|
const rawCommand = rawEvaluator.command ?? rawEvaluator.script;
|
|
2413
2418
|
if (typeof rawCommand === "string") {
|
|
2414
2419
|
const trimmed = rawCommand.trim();
|
|
@@ -3167,6 +3172,11 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3167
3172
|
let resolvedPromptScript;
|
|
3168
3173
|
let promptScriptConfig;
|
|
3169
3174
|
if (isJsonObject2(rawPrompt)) {
|
|
3175
|
+
if (rawPrompt.script !== void 0 && rawPrompt.command === void 0) {
|
|
3176
|
+
console.warn(
|
|
3177
|
+
`${ANSI_YELLOW4}Warning: 'prompt.script' is deprecated in evaluator '${name}' in '${evalId}'. Use 'prompt.command' instead.${ANSI_RESET4}`
|
|
3178
|
+
);
|
|
3179
|
+
}
|
|
3170
3180
|
const commandArray = asStringArray(
|
|
3171
3181
|
rawPrompt.command ?? rawPrompt.script,
|
|
3172
3182
|
`prompt.command for evaluator '${name}' in '${evalId}'`
|
|
@@ -4697,6 +4707,9 @@ var loadEvalCaseById = loadTestById;
|
|
|
4697
4707
|
function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
4698
4708
|
if (!isJsonObject(raw)) return void 0;
|
|
4699
4709
|
const obj = raw;
|
|
4710
|
+
if (obj.script !== void 0 && obj.command === void 0) {
|
|
4711
|
+
logWarning6("'script' is deprecated. Use 'command' instead.");
|
|
4712
|
+
}
|
|
4700
4713
|
const commandSource = obj.command ?? obj.script;
|
|
4701
4714
|
if (!Array.isArray(commandSource) || commandSource.length === 0) return void 0;
|
|
4702
4715
|
const commandArr = commandSource.filter((s) => typeof s === "string");
|
|
@@ -5552,7 +5565,7 @@ var ClaudeProvider = class {
|
|
|
5552
5565
|
// a Claude Code session the CLAUDECODE env var is set, which causes the
|
|
5553
5566
|
// subprocess to refuse to start ("cannot be launched inside another Claude
|
|
5554
5567
|
// Code session"). Passing a sanitized env removes that guard.
|
|
5555
|
-
env: sanitizeEnvForClaudeSdk()
|
|
5568
|
+
env: sanitizeEnvForClaudeSdk(request.braintrustSpanIds)
|
|
5556
5569
|
};
|
|
5557
5570
|
if (this.config.model) {
|
|
5558
5571
|
queryOptions.model = this.config.model;
|
|
@@ -5856,10 +5869,14 @@ function summarizeMessage(msg) {
|
|
|
5856
5869
|
return void 0;
|
|
5857
5870
|
}
|
|
5858
5871
|
}
|
|
5859
|
-
function sanitizeEnvForClaudeSdk() {
|
|
5872
|
+
function sanitizeEnvForClaudeSdk(braintrustSpanIds) {
|
|
5860
5873
|
const env = { ...process.env };
|
|
5861
5874
|
env.CLAUDECODE = void 0;
|
|
5862
5875
|
env.CLAUDE_CODE_ENTRYPOINT = void 0;
|
|
5876
|
+
if (braintrustSpanIds) {
|
|
5877
|
+
env.CC_PARENT_SPAN_ID = braintrustSpanIds.parentSpanId;
|
|
5878
|
+
env.CC_ROOT_SPAN_ID = braintrustSpanIds.rootSpanId;
|
|
5879
|
+
}
|
|
5863
5880
|
return env;
|
|
5864
5881
|
}
|
|
5865
5882
|
function isClaudeLogStreamingDisabled() {
|
|
@@ -16267,6 +16284,11 @@ async function executeWorkspaceScript(config, context2, failureMode = "fatal") {
|
|
|
16267
16284
|
});
|
|
16268
16285
|
const timeoutMs = config.timeout_ms ?? (failureMode === "fatal" ? 6e4 : 3e4);
|
|
16269
16286
|
const cwd = config.cwd ?? context2.evalDir;
|
|
16287
|
+
if (config.script !== void 0 && config.command === void 0) {
|
|
16288
|
+
console.warn(
|
|
16289
|
+
"\x1B[33mWarning: 'script' is deprecated in workspace config. Use 'command' instead.\x1B[0m"
|
|
16290
|
+
);
|
|
16291
|
+
}
|
|
16270
16292
|
const rawCommand = config.command ?? config.script ?? [];
|
|
16271
16293
|
const commandArray = interpolateArgs(rawCommand, context2);
|
|
16272
16294
|
const result = await execFileWithStdin(commandArray, stdin, {
|
|
@@ -17937,6 +17959,7 @@ async function invokeProvider(provider, options) {
|
|
|
17937
17959
|
signal.addEventListener("abort", () => controller.abort(), { once: true });
|
|
17938
17960
|
}
|
|
17939
17961
|
try {
|
|
17962
|
+
const braintrustSpanIds = streamCallbacks?.getActiveSpanIds?.() ?? void 0;
|
|
17940
17963
|
return await provider.invoke({
|
|
17941
17964
|
question: promptInputs.question,
|
|
17942
17965
|
guidelines: promptInputs.guidelines,
|
|
@@ -17952,7 +17975,8 @@ async function invokeProvider(provider, options) {
|
|
|
17952
17975
|
cwd,
|
|
17953
17976
|
workspaceFile,
|
|
17954
17977
|
captureFileChanges: captureFileChanges2,
|
|
17955
|
-
streamCallbacks
|
|
17978
|
+
streamCallbacks,
|
|
17979
|
+
braintrustSpanIds: braintrustSpanIds ?? void 0
|
|
17956
17980
|
});
|
|
17957
17981
|
} finally {
|
|
17958
17982
|
if (timeout !== void 0) {
|
|
@@ -18507,9 +18531,16 @@ var OTEL_BACKEND_PRESETS = {
|
|
|
18507
18531
|
braintrust: {
|
|
18508
18532
|
name: "braintrust",
|
|
18509
18533
|
endpoint: "https://api.braintrust.dev/otel/v1/traces",
|
|
18510
|
-
headers: (env) =>
|
|
18511
|
-
|
|
18512
|
-
|
|
18534
|
+
headers: (env) => {
|
|
18535
|
+
const headers = {
|
|
18536
|
+
Authorization: `Bearer ${env.BRAINTRUST_API_KEY ?? ""}`
|
|
18537
|
+
};
|
|
18538
|
+
const parent = env.BRAINTRUST_PARENT ?? (env.BRAINTRUST_PROJECT_ID ? `project_id:${env.BRAINTRUST_PROJECT_ID}` : void 0) ?? (env.BRAINTRUST_PROJECT ? `project_name:${env.BRAINTRUST_PROJECT}` : void 0);
|
|
18539
|
+
if (parent) {
|
|
18540
|
+
headers["x-bt-parent"] = parent;
|
|
18541
|
+
}
|
|
18542
|
+
return headers;
|
|
18543
|
+
}
|
|
18513
18544
|
},
|
|
18514
18545
|
confident: {
|
|
18515
18546
|
name: "confident",
|
|
@@ -18857,11 +18888,23 @@ var OtelStreamingObserver = class {
|
|
|
18857
18888
|
this.rootSpan = null;
|
|
18858
18889
|
this.rootCtx = null;
|
|
18859
18890
|
}
|
|
18891
|
+
/** Return the active eval span's trace ID and span ID for Braintrust trace bridging */
|
|
18892
|
+
getActiveSpanIds() {
|
|
18893
|
+
if (!this.rootSpan) return null;
|
|
18894
|
+
try {
|
|
18895
|
+
const spanCtx = this.rootSpan.spanContext?.() ?? this.rootSpan._spanContext;
|
|
18896
|
+
if (!spanCtx?.traceId || !spanCtx?.spanId) return null;
|
|
18897
|
+
return { parentSpanId: spanCtx.spanId, rootSpanId: spanCtx.traceId };
|
|
18898
|
+
} catch {
|
|
18899
|
+
return null;
|
|
18900
|
+
}
|
|
18901
|
+
}
|
|
18860
18902
|
/** Get ProviderStreamCallbacks for passing to providers */
|
|
18861
18903
|
getStreamCallbacks() {
|
|
18862
18904
|
return {
|
|
18863
18905
|
onToolCallEnd: (name, input, output, durationMs, toolCallId) => this.onToolCall(name, input, output, durationMs, toolCallId),
|
|
18864
|
-
onLlmCallEnd: (model, tokenUsage) => this.onLlmCall(model, tokenUsage)
|
|
18906
|
+
onLlmCallEnd: (model, tokenUsage) => this.onLlmCall(model, tokenUsage),
|
|
18907
|
+
getActiveSpanIds: () => this.getActiveSpanIds()
|
|
18865
18908
|
};
|
|
18866
18909
|
}
|
|
18867
18910
|
};
|