agentv 3.2.4 → 3.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-VBGYESW7.js → chunk-6XTYVCMN.js} +70 -8
- package/dist/chunk-6XTYVCMN.js.map +1 -0
- package/dist/{chunk-O3KO5MUH.js → chunk-BPK64EWF.js} +4 -4
- package/dist/{chunk-E5M4NOFQ.js → chunk-WQGBWX5Y.js} +8 -5
- package/dist/chunk-WQGBWX5Y.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-WJ4A7XQQ.js → dist-JXD6WHHI.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-VWMIIF4F.js → interactive-B72SWNWB.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-E5M4NOFQ.js.map +0 -1
- package/dist/chunk-VBGYESW7.js.map +0 -1
- /package/dist/{chunk-O3KO5MUH.js.map → chunk-BPK64EWF.js.map} +0 -0
- /package/dist/{dist-WJ4A7XQQ.js.map → dist-JXD6WHHI.js.map} +0 -0
- /package/dist/{interactive-VWMIIF4F.js.map → interactive-B72SWNWB.js.map} +0 -0
|
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
|
|
|
301
301
|
}
|
|
302
302
|
});
|
|
303
303
|
|
|
304
|
-
// ../../packages/core/dist/chunk-
|
|
304
|
+
// ../../packages/core/dist/chunk-DVFNM65P.js
|
|
305
305
|
import { constants } from "node:fs";
|
|
306
306
|
import { access, readFile } from "node:fs/promises";
|
|
307
307
|
import path from "node:path";
|
|
@@ -419,7 +419,7 @@ __export(external_exports2, {
|
|
|
419
419
|
void: () => voidType
|
|
420
420
|
});
|
|
421
421
|
|
|
422
|
-
// ../../packages/core/dist/chunk-
|
|
422
|
+
// ../../packages/core/dist/chunk-DVFNM65P.js
|
|
423
423
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
424
424
|
var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
|
|
425
425
|
var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
|
|
@@ -17856,7 +17856,14 @@ async function invokeModel(options) {
|
|
|
17856
17856
|
function mapResponse(result) {
|
|
17857
17857
|
const content = result.text ?? "";
|
|
17858
17858
|
const rawUsage = result.totalUsage ?? result.usage;
|
|
17859
|
-
const
|
|
17859
|
+
const reasoning = rawUsage?.outputTokenDetails?.reasoningTokens ?? void 0;
|
|
17860
|
+
const cached = rawUsage?.inputTokenDetails?.cacheReadTokens ?? void 0;
|
|
17861
|
+
const tokenUsage = rawUsage?.inputTokens != null && rawUsage?.outputTokens != null ? {
|
|
17862
|
+
input: rawUsage.inputTokens,
|
|
17863
|
+
output: rawUsage.outputTokens,
|
|
17864
|
+
...reasoning != null ? { reasoning } : {},
|
|
17865
|
+
...cached != null ? { cached } : {}
|
|
17866
|
+
} : void 0;
|
|
17860
17867
|
return {
|
|
17861
17868
|
raw: result,
|
|
17862
17869
|
usage: toJsonObject(rawUsage),
|
|
@@ -18191,10 +18198,12 @@ var ClaudeCliProvider = class {
|
|
|
18191
18198
|
if (usage) {
|
|
18192
18199
|
const inputTokens = (usage.input_tokens ?? 0) + (usage.cache_read_input_tokens ?? 0) + (usage.cache_creation_input_tokens ?? 0);
|
|
18193
18200
|
const outputTokens = usage.output_tokens ?? 0;
|
|
18201
|
+
const reasoningTokens = usage.reasoning_tokens ?? void 0;
|
|
18194
18202
|
tokenUsage = {
|
|
18195
18203
|
input: inputTokens,
|
|
18196
18204
|
output: outputTokens,
|
|
18197
|
-
cached: usage.cache_read_input_tokens ?? void 0
|
|
18205
|
+
cached: usage.cache_read_input_tokens ?? void 0,
|
|
18206
|
+
reasoning: reasoningTokens
|
|
18198
18207
|
};
|
|
18199
18208
|
request.streamCallbacks?.onLlmCallEnd?.(this.config.model ?? "claude", tokenUsage);
|
|
18200
18209
|
}
|
|
@@ -29279,6 +29288,7 @@ async function runEvalCase(options) {
|
|
|
29279
29288
|
} catch {
|
|
29280
29289
|
}
|
|
29281
29290
|
}
|
|
29291
|
+
const caseStartMs = Date.now();
|
|
29282
29292
|
const attemptBudget = (maxRetries ?? 0) + 1;
|
|
29283
29293
|
let attempt = 0;
|
|
29284
29294
|
let providerResponse = cachedResponse;
|
|
@@ -29427,9 +29437,22 @@ async function runEvalCase(options) {
|
|
|
29427
29437
|
fileChanges,
|
|
29428
29438
|
workspacePath
|
|
29429
29439
|
});
|
|
29440
|
+
const totalDurationMs = Date.now() - caseStartMs;
|
|
29441
|
+
const graderTokens = aggregateEvaluatorTokenUsage(result.scores);
|
|
29442
|
+
const evalRunTokenUsage = tokenUsage || graderTokens ? {
|
|
29443
|
+
input: (tokenUsage?.input ?? 0) + (graderTokens?.input ?? 0),
|
|
29444
|
+
output: (tokenUsage?.output ?? 0) + (graderTokens?.output ?? 0),
|
|
29445
|
+
...tokenUsage?.reasoning != null || graderTokens?.reasoning != null ? { reasoning: (tokenUsage?.reasoning ?? 0) + (graderTokens?.reasoning ?? 0) } : {},
|
|
29446
|
+
...tokenUsage?.cached != null || graderTokens?.cached != null ? { cached: (tokenUsage?.cached ?? 0) + (graderTokens?.cached ?? 0) } : {}
|
|
29447
|
+
} : void 0;
|
|
29448
|
+
const evalRun = {
|
|
29449
|
+
durationMs: totalDurationMs,
|
|
29450
|
+
...evalRunTokenUsage ? { tokenUsage: evalRunTokenUsage } : {}
|
|
29451
|
+
};
|
|
29430
29452
|
const executionStatus = providerError ? "execution_error" : classifyQualityStatus(result.score);
|
|
29431
29453
|
const finalResult = providerError ? {
|
|
29432
29454
|
...result,
|
|
29455
|
+
evalRun,
|
|
29433
29456
|
error: providerError,
|
|
29434
29457
|
executionStatus,
|
|
29435
29458
|
failureStage: "agent",
|
|
@@ -29438,7 +29461,7 @@ async function runEvalCase(options) {
|
|
|
29438
29461
|
beforeAllOutput,
|
|
29439
29462
|
beforeEachOutput,
|
|
29440
29463
|
afterEachOutput
|
|
29441
|
-
} : { ...result, executionStatus, beforeAllOutput, beforeEachOutput, afterEachOutput };
|
|
29464
|
+
} : { ...result, evalRun, executionStatus, beforeAllOutput, beforeEachOutput, afterEachOutput };
|
|
29442
29465
|
const isFailure = !!finalResult.error || finalResult.score < 0.5;
|
|
29443
29466
|
if (workspacePath && !isSharedWorkspace) {
|
|
29444
29467
|
if (forceCleanup) {
|
|
@@ -29458,6 +29481,7 @@ async function runEvalCase(options) {
|
|
|
29458
29481
|
}
|
|
29459
29482
|
return finalResult;
|
|
29460
29483
|
} catch (error) {
|
|
29484
|
+
const evalRun = { durationMs: Date.now() - caseStartMs };
|
|
29461
29485
|
const errorResult = buildErrorResult(
|
|
29462
29486
|
evalCase,
|
|
29463
29487
|
target.name,
|
|
@@ -29473,10 +29497,10 @@ async function runEvalCase(options) {
|
|
|
29473
29497
|
await cleanupWorkspace(workspacePath).catch(() => {
|
|
29474
29498
|
});
|
|
29475
29499
|
} else {
|
|
29476
|
-
return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
|
|
29500
|
+
return { ...errorResult, evalRun, workspacePath, beforeEachOutput, afterEachOutput };
|
|
29477
29501
|
}
|
|
29478
29502
|
}
|
|
29479
|
-
return { ...errorResult, beforeEachOutput, afterEachOutput };
|
|
29503
|
+
return { ...errorResult, evalRun, beforeEachOutput, afterEachOutput };
|
|
29480
29504
|
}
|
|
29481
29505
|
}
|
|
29482
29506
|
async function runEvalCaseWithTrials(options, trialsConfig) {
|
|
@@ -30051,6 +30075,44 @@ function buildResultInput(promptInputs) {
|
|
|
30051
30075
|
}
|
|
30052
30076
|
return promptInputs.question;
|
|
30053
30077
|
}
|
|
30078
|
+
function aggregateEvaluatorTokenUsage(scores) {
|
|
30079
|
+
if (!scores || scores.length === 0) return void 0;
|
|
30080
|
+
let hasAny = false;
|
|
30081
|
+
let input = 0;
|
|
30082
|
+
let output = 0;
|
|
30083
|
+
let reasoning = 0;
|
|
30084
|
+
let cached = 0;
|
|
30085
|
+
let hasReasoning = false;
|
|
30086
|
+
let hasCached = false;
|
|
30087
|
+
const visit = (items) => {
|
|
30088
|
+
for (const item of items) {
|
|
30089
|
+
if (item.tokenUsage) {
|
|
30090
|
+
hasAny = true;
|
|
30091
|
+
input += item.tokenUsage.input;
|
|
30092
|
+
output += item.tokenUsage.output;
|
|
30093
|
+
if (item.tokenUsage.reasoning != null) {
|
|
30094
|
+
hasReasoning = true;
|
|
30095
|
+
reasoning += item.tokenUsage.reasoning;
|
|
30096
|
+
}
|
|
30097
|
+
if (item.tokenUsage.cached != null) {
|
|
30098
|
+
hasCached = true;
|
|
30099
|
+
cached += item.tokenUsage.cached;
|
|
30100
|
+
}
|
|
30101
|
+
}
|
|
30102
|
+
if (item.scores) {
|
|
30103
|
+
visit(item.scores);
|
|
30104
|
+
}
|
|
30105
|
+
}
|
|
30106
|
+
};
|
|
30107
|
+
visit(scores);
|
|
30108
|
+
if (!hasAny) return void 0;
|
|
30109
|
+
return {
|
|
30110
|
+
input,
|
|
30111
|
+
output,
|
|
30112
|
+
...hasReasoning ? { reasoning } : {},
|
|
30113
|
+
...hasCached ? { cached } : {}
|
|
30114
|
+
};
|
|
30115
|
+
}
|
|
30054
30116
|
function isTimeoutLike(error) {
|
|
30055
30117
|
if (!error) {
|
|
30056
30118
|
return false;
|
|
@@ -31086,4 +31148,4 @@ export {
|
|
|
31086
31148
|
OtelStreamingObserver,
|
|
31087
31149
|
createAgentKernel
|
|
31088
31150
|
};
|
|
31089
|
-
//# sourceMappingURL=chunk-
|
|
31151
|
+
//# sourceMappingURL=chunk-6XTYVCMN.js.map
|