@agentv/core 2.18.3 → 2.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-V42NUK73.js → chunk-ACTIPQZ3.js} +1 -1
- package/dist/{chunk-V42NUK73.js.map → chunk-ACTIPQZ3.js.map} +1 -1
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/index.cjs +50 -7
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +15 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.js +51 -8
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -14,6 +14,11 @@ interface ProviderStreamCallbacks {
|
|
|
14
14
|
onToolCallStart?: (toolName: string, toolCallId?: string) => void;
|
|
15
15
|
onToolCallEnd?: (toolName: string, input: unknown, output: unknown, durationMs: number, toolCallId?: string) => void;
|
|
16
16
|
onLlmCallEnd?: (model: string, tokenUsage?: ProviderTokenUsage) => void;
|
|
17
|
+
/** Returns active OTel span IDs for Braintrust trace bridging (optional) */
|
|
18
|
+
getActiveSpanIds?: () => {
|
|
19
|
+
parentSpanId: string;
|
|
20
|
+
rootSpanId: string;
|
|
21
|
+
} | null;
|
|
17
22
|
}
|
|
18
23
|
interface ProviderRequest {
|
|
19
24
|
readonly question: string;
|
|
@@ -36,6 +41,11 @@ interface ProviderRequest {
|
|
|
36
41
|
readonly captureFileChanges?: boolean;
|
|
37
42
|
/** Real-time observability callbacks (optional) */
|
|
38
43
|
readonly streamCallbacks?: ProviderStreamCallbacks;
|
|
44
|
+
/** Braintrust span IDs for trace-claude-code plugin (optional) */
|
|
45
|
+
readonly braintrustSpanIds?: {
|
|
46
|
+
readonly parentSpanId: string;
|
|
47
|
+
readonly rootSpanId: string;
|
|
48
|
+
};
|
|
39
49
|
}
|
|
40
50
|
/**
|
|
41
51
|
* A tool call within an output message.
|
|
@@ -3207,6 +3217,11 @@ declare class OtelStreamingObserver {
|
|
|
3207
3217
|
onLlmCall(model: string, tokenUsage?: ProviderTokenUsage): void;
|
|
3208
3218
|
/** Finalize root span with score/verdict after evaluation completes */
|
|
3209
3219
|
finalizeEvalCase(score: number, error?: string): void;
|
|
3220
|
+
/** Return the active eval span's trace ID and span ID for Braintrust trace bridging */
|
|
3221
|
+
getActiveSpanIds(): {
|
|
3222
|
+
parentSpanId: string;
|
|
3223
|
+
rootSpanId: string;
|
|
3224
|
+
} | null;
|
|
3210
3225
|
/** Get ProviderStreamCallbacks for passing to providers */
|
|
3211
3226
|
getStreamCallbacks(): ProviderStreamCallbacks;
|
|
3212
3227
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -14,6 +14,11 @@ interface ProviderStreamCallbacks {
|
|
|
14
14
|
onToolCallStart?: (toolName: string, toolCallId?: string) => void;
|
|
15
15
|
onToolCallEnd?: (toolName: string, input: unknown, output: unknown, durationMs: number, toolCallId?: string) => void;
|
|
16
16
|
onLlmCallEnd?: (model: string, tokenUsage?: ProviderTokenUsage) => void;
|
|
17
|
+
/** Returns active OTel span IDs for Braintrust trace bridging (optional) */
|
|
18
|
+
getActiveSpanIds?: () => {
|
|
19
|
+
parentSpanId: string;
|
|
20
|
+
rootSpanId: string;
|
|
21
|
+
} | null;
|
|
17
22
|
}
|
|
18
23
|
interface ProviderRequest {
|
|
19
24
|
readonly question: string;
|
|
@@ -36,6 +41,11 @@ interface ProviderRequest {
|
|
|
36
41
|
readonly captureFileChanges?: boolean;
|
|
37
42
|
/** Real-time observability callbacks (optional) */
|
|
38
43
|
readonly streamCallbacks?: ProviderStreamCallbacks;
|
|
44
|
+
/** Braintrust span IDs for trace-claude-code plugin (optional) */
|
|
45
|
+
readonly braintrustSpanIds?: {
|
|
46
|
+
readonly parentSpanId: string;
|
|
47
|
+
readonly rootSpanId: string;
|
|
48
|
+
};
|
|
39
49
|
}
|
|
40
50
|
/**
|
|
41
51
|
* A tool call within an output message.
|
|
@@ -3207,6 +3217,11 @@ declare class OtelStreamingObserver {
|
|
|
3207
3217
|
onLlmCall(model: string, tokenUsage?: ProviderTokenUsage): void;
|
|
3208
3218
|
/** Finalize root span with score/verdict after evaluation completes */
|
|
3209
3219
|
finalizeEvalCase(score: number, error?: string): void;
|
|
3220
|
+
/** Return the active eval span's trace ID and span ID for Braintrust trace bridging */
|
|
3221
|
+
getActiveSpanIds(): {
|
|
3222
|
+
parentSpanId: string;
|
|
3223
|
+
rootSpanId: string;
|
|
3224
|
+
} | null;
|
|
3210
3225
|
/** Get ProviderStreamCallbacks for passing to providers */
|
|
3211
3226
|
getStreamCallbacks(): ProviderStreamCallbacks;
|
|
3212
3227
|
}
|
package/dist/index.js
CHANGED
|
@@ -17,7 +17,7 @@ import {
|
|
|
17
17
|
readTextFile,
|
|
18
18
|
resolveFileReference,
|
|
19
19
|
resolveTargetDefinition
|
|
20
|
-
} from "./chunk-
|
|
20
|
+
} from "./chunk-ACTIPQZ3.js";
|
|
21
21
|
import {
|
|
22
22
|
OtlpJsonFileExporter
|
|
23
23
|
} from "./chunk-HFSYZHGF.js";
|
|
@@ -783,6 +783,11 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
783
783
|
}
|
|
784
784
|
if (typeValue === "code-judge") {
|
|
785
785
|
let command;
|
|
786
|
+
if (rawEvaluator.script !== void 0 && rawEvaluator.command === void 0) {
|
|
787
|
+
console.warn(
|
|
788
|
+
`${ANSI_YELLOW4}Warning: 'script' is deprecated in evaluator '${name}' in '${evalId}'. Use 'command' instead.${ANSI_RESET4}`
|
|
789
|
+
);
|
|
790
|
+
}
|
|
786
791
|
const rawCommand = rawEvaluator.command ?? rawEvaluator.script;
|
|
787
792
|
if (typeof rawCommand === "string") {
|
|
788
793
|
const trimmed = rawCommand.trim();
|
|
@@ -1541,6 +1546,11 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1541
1546
|
let resolvedPromptScript;
|
|
1542
1547
|
let promptScriptConfig;
|
|
1543
1548
|
if (isJsonObject2(rawPrompt)) {
|
|
1549
|
+
if (rawPrompt.script !== void 0 && rawPrompt.command === void 0) {
|
|
1550
|
+
console.warn(
|
|
1551
|
+
`${ANSI_YELLOW4}Warning: 'prompt.script' is deprecated in evaluator '${name}' in '${evalId}'. Use 'prompt.command' instead.${ANSI_RESET4}`
|
|
1552
|
+
);
|
|
1553
|
+
}
|
|
1544
1554
|
const commandArray = asStringArray(
|
|
1545
1555
|
rawPrompt.command ?? rawPrompt.script,
|
|
1546
1556
|
`prompt.command for evaluator '${name}' in '${evalId}'`
|
|
@@ -3071,6 +3081,9 @@ var loadEvalCaseById = loadTestById;
|
|
|
3071
3081
|
function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
3072
3082
|
if (!isJsonObject(raw)) return void 0;
|
|
3073
3083
|
const obj = raw;
|
|
3084
|
+
if (obj.script !== void 0 && obj.command === void 0) {
|
|
3085
|
+
logWarning6("'script' is deprecated. Use 'command' instead.");
|
|
3086
|
+
}
|
|
3074
3087
|
const commandSource = obj.command ?? obj.script;
|
|
3075
3088
|
if (!Array.isArray(commandSource) || commandSource.length === 0) return void 0;
|
|
3076
3089
|
const commandArr = commandSource.filter((s) => typeof s === "string");
|
|
@@ -3811,7 +3824,7 @@ var ClaudeProvider = class {
|
|
|
3811
3824
|
// a Claude Code session the CLAUDECODE env var is set, which causes the
|
|
3812
3825
|
// subprocess to refuse to start ("cannot be launched inside another Claude
|
|
3813
3826
|
// Code session"). Passing a sanitized env removes that guard.
|
|
3814
|
-
env: sanitizeEnvForClaudeSdk()
|
|
3827
|
+
env: sanitizeEnvForClaudeSdk(request.braintrustSpanIds)
|
|
3815
3828
|
};
|
|
3816
3829
|
if (this.config.model) {
|
|
3817
3830
|
queryOptions.model = this.config.model;
|
|
@@ -4115,10 +4128,14 @@ function summarizeMessage(msg) {
|
|
|
4115
4128
|
return void 0;
|
|
4116
4129
|
}
|
|
4117
4130
|
}
|
|
4118
|
-
function sanitizeEnvForClaudeSdk() {
|
|
4131
|
+
function sanitizeEnvForClaudeSdk(braintrustSpanIds) {
|
|
4119
4132
|
const env = { ...process.env };
|
|
4120
4133
|
env.CLAUDECODE = void 0;
|
|
4121
4134
|
env.CLAUDE_CODE_ENTRYPOINT = void 0;
|
|
4135
|
+
if (braintrustSpanIds) {
|
|
4136
|
+
env.CC_PARENT_SPAN_ID = braintrustSpanIds.parentSpanId;
|
|
4137
|
+
env.CC_ROOT_SPAN_ID = braintrustSpanIds.rootSpanId;
|
|
4138
|
+
}
|
|
4122
4139
|
return env;
|
|
4123
4140
|
}
|
|
4124
4141
|
function isClaudeLogStreamingDisabled() {
|
|
@@ -13395,6 +13412,11 @@ async function executeWorkspaceScript(config, context, failureMode = "fatal") {
|
|
|
13395
13412
|
});
|
|
13396
13413
|
const timeoutMs = config.timeout_ms ?? (failureMode === "fatal" ? 6e4 : 3e4);
|
|
13397
13414
|
const cwd = config.cwd ?? context.evalDir;
|
|
13415
|
+
if (config.script !== void 0 && config.command === void 0) {
|
|
13416
|
+
console.warn(
|
|
13417
|
+
"\x1B[33mWarning: 'script' is deprecated in workspace config. Use 'command' instead.\x1B[0m"
|
|
13418
|
+
);
|
|
13419
|
+
}
|
|
13398
13420
|
const rawCommand = config.command ?? config.script ?? [];
|
|
13399
13421
|
const commandArray = interpolateArgs(rawCommand, context);
|
|
13400
13422
|
const result = await execFileWithStdin(commandArray, stdin, {
|
|
@@ -15065,6 +15087,7 @@ async function invokeProvider(provider, options) {
|
|
|
15065
15087
|
signal.addEventListener("abort", () => controller.abort(), { once: true });
|
|
15066
15088
|
}
|
|
15067
15089
|
try {
|
|
15090
|
+
const braintrustSpanIds = streamCallbacks?.getActiveSpanIds?.() ?? void 0;
|
|
15068
15091
|
return await provider.invoke({
|
|
15069
15092
|
question: promptInputs.question,
|
|
15070
15093
|
guidelines: promptInputs.guidelines,
|
|
@@ -15080,7 +15103,8 @@ async function invokeProvider(provider, options) {
|
|
|
15080
15103
|
cwd,
|
|
15081
15104
|
workspaceFile,
|
|
15082
15105
|
captureFileChanges: captureFileChanges2,
|
|
15083
|
-
streamCallbacks
|
|
15106
|
+
streamCallbacks,
|
|
15107
|
+
braintrustSpanIds: braintrustSpanIds ?? void 0
|
|
15084
15108
|
});
|
|
15085
15109
|
} finally {
|
|
15086
15110
|
if (timeout !== void 0) {
|
|
@@ -15635,9 +15659,16 @@ var OTEL_BACKEND_PRESETS = {
|
|
|
15635
15659
|
braintrust: {
|
|
15636
15660
|
name: "braintrust",
|
|
15637
15661
|
endpoint: "https://api.braintrust.dev/otel/v1/traces",
|
|
15638
|
-
headers: (env) =>
|
|
15639
|
-
|
|
15640
|
-
|
|
15662
|
+
headers: (env) => {
|
|
15663
|
+
const headers = {
|
|
15664
|
+
Authorization: `Bearer ${env.BRAINTRUST_API_KEY ?? ""}`
|
|
15665
|
+
};
|
|
15666
|
+
const parent = env.BRAINTRUST_PARENT ?? (env.BRAINTRUST_PROJECT_ID ? `project_id:${env.BRAINTRUST_PROJECT_ID}` : void 0) ?? (env.BRAINTRUST_PROJECT ? `project_name:${env.BRAINTRUST_PROJECT}` : void 0);
|
|
15667
|
+
if (parent) {
|
|
15668
|
+
headers["x-bt-parent"] = parent;
|
|
15669
|
+
}
|
|
15670
|
+
return headers;
|
|
15671
|
+
}
|
|
15641
15672
|
},
|
|
15642
15673
|
confident: {
|
|
15643
15674
|
name: "confident",
|
|
@@ -15985,11 +16016,23 @@ var OtelStreamingObserver = class {
|
|
|
15985
16016
|
this.rootSpan = null;
|
|
15986
16017
|
this.rootCtx = null;
|
|
15987
16018
|
}
|
|
16019
|
+
/** Return the active eval span's trace ID and span ID for Braintrust trace bridging */
|
|
16020
|
+
getActiveSpanIds() {
|
|
16021
|
+
if (!this.rootSpan) return null;
|
|
16022
|
+
try {
|
|
16023
|
+
const spanCtx = this.rootSpan.spanContext?.() ?? this.rootSpan._spanContext;
|
|
16024
|
+
if (!spanCtx?.traceId || !spanCtx?.spanId) return null;
|
|
16025
|
+
return { parentSpanId: spanCtx.spanId, rootSpanId: spanCtx.traceId };
|
|
16026
|
+
} catch {
|
|
16027
|
+
return null;
|
|
16028
|
+
}
|
|
16029
|
+
}
|
|
15988
16030
|
/** Get ProviderStreamCallbacks for passing to providers */
|
|
15989
16031
|
getStreamCallbacks() {
|
|
15990
16032
|
return {
|
|
15991
16033
|
onToolCallEnd: (name, input, output, durationMs, toolCallId) => this.onToolCall(name, input, output, durationMs, toolCallId),
|
|
15992
|
-
onLlmCallEnd: (model, tokenUsage) => this.onLlmCall(model, tokenUsage)
|
|
16034
|
+
onLlmCallEnd: (model, tokenUsage) => this.onLlmCall(model, tokenUsage),
|
|
16035
|
+
getActiveSpanIds: () => this.getActiveSpanIds()
|
|
15993
16036
|
};
|
|
15994
16037
|
}
|
|
15995
16038
|
};
|