agentv 3.2.5 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/{chunk-BPK64EWF.js → chunk-A7ZDUB46.js} +238 -116
- package/dist/chunk-A7ZDUB46.js.map +1 -0
- package/dist/{chunk-6XTYVCMN.js → chunk-GOZV2HN2.js} +236 -94
- package/dist/chunk-GOZV2HN2.js.map +1 -0
- package/dist/{chunk-WQGBWX5Y.js → chunk-RE5I3U2S.js} +156 -46
- package/dist/chunk-RE5I3U2S.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-JXD6WHHI.js → dist-AFDYFH6Y.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-B72SWNWB.js → interactive-WXXTZ7PD.js} +44 -4
- package/dist/interactive-WXXTZ7PD.js.map +1 -0
- package/dist/templates/.agentv/.env.example +11 -9
- package/dist/templates/.agentv/config.yaml +5 -0
- package/dist/templates/.agentv/targets.yaml +0 -16
- package/package.json +2 -2
- package/dist/chunk-6XTYVCMN.js.map +0 -1
- package/dist/chunk-BPK64EWF.js.map +0 -1
- package/dist/chunk-WQGBWX5Y.js.map +0 -1
- package/dist/interactive-B72SWNWB.js.map +0 -1
- /package/dist/{dist-JXD6WHHI.js.map → dist-AFDYFH6Y.js.map} +0 -0
|
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
|
|
|
301
301
|
}
|
|
302
302
|
});
|
|
303
303
|
|
|
304
|
-
// ../../packages/core/dist/chunk-
|
|
304
|
+
// ../../packages/core/dist/chunk-JO4HIAEF.js
|
|
305
305
|
import { constants } from "node:fs";
|
|
306
306
|
import { access, readFile } from "node:fs/promises";
|
|
307
307
|
import path from "node:path";
|
|
@@ -419,7 +419,7 @@ __export(external_exports2, {
|
|
|
419
419
|
void: () => voidType
|
|
420
420
|
});
|
|
421
421
|
|
|
422
|
-
// ../../packages/core/dist/chunk-
|
|
422
|
+
// ../../packages/core/dist/chunk-JO4HIAEF.js
|
|
423
423
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
424
424
|
var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
|
|
425
425
|
var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
|
|
@@ -1810,6 +1810,7 @@ var AGENT_PROVIDER_KINDS = [
|
|
|
1810
1810
|
"copilot-sdk",
|
|
1811
1811
|
"copilot-cli",
|
|
1812
1812
|
"pi-coding-agent",
|
|
1813
|
+
"pi-agent-sdk",
|
|
1813
1814
|
"claude",
|
|
1814
1815
|
"claude-cli",
|
|
1815
1816
|
"claude-sdk",
|
|
@@ -14657,19 +14658,26 @@ function logWarning(message) {
|
|
|
14657
14658
|
console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET3}`);
|
|
14658
14659
|
}
|
|
14659
14660
|
var TEMPLATE_VARIABLES = {
|
|
14661
|
+
/** @deprecated Use OUTPUT_TEXT instead */
|
|
14660
14662
|
ANSWER: "answer",
|
|
14661
14663
|
EXPECTED_OUTPUT: "expected_output",
|
|
14664
|
+
/** @deprecated Use INPUT_TEXT instead */
|
|
14662
14665
|
QUESTION: "question",
|
|
14663
14666
|
CRITERIA: "criteria",
|
|
14667
|
+
/** @deprecated Use EXPECTED_OUTPUT_TEXT instead */
|
|
14664
14668
|
REFERENCE_ANSWER: "reference_answer",
|
|
14665
14669
|
INPUT: "input",
|
|
14666
14670
|
OUTPUT: "output",
|
|
14667
|
-
FILE_CHANGES: "file_changes"
|
|
14671
|
+
FILE_CHANGES: "file_changes",
|
|
14672
|
+
INPUT_TEXT: "input_text",
|
|
14673
|
+
OUTPUT_TEXT: "output_text",
|
|
14674
|
+
EXPECTED_OUTPUT_TEXT: "expected_output_text"
|
|
14668
14675
|
};
|
|
14669
14676
|
var VALID_TEMPLATE_VARIABLES = new Set(Object.values(TEMPLATE_VARIABLES));
|
|
14670
14677
|
var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
|
|
14671
14678
|
TEMPLATE_VARIABLES.ANSWER,
|
|
14672
|
-
TEMPLATE_VARIABLES.EXPECTED_OUTPUT
|
|
14679
|
+
TEMPLATE_VARIABLES.EXPECTED_OUTPUT,
|
|
14680
|
+
TEMPLATE_VARIABLES.OUTPUT_TEXT
|
|
14673
14681
|
]);
|
|
14674
14682
|
var ANSI_YELLOW3 = "\x1B[33m";
|
|
14675
14683
|
var ANSI_RESET4 = "\x1B[0m";
|
|
@@ -14690,13 +14698,13 @@ function validateTemplateVariables(content, source) {
|
|
|
14690
14698
|
}
|
|
14691
14699
|
match = variablePattern.exec(content);
|
|
14692
14700
|
}
|
|
14693
|
-
const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.ANSWER);
|
|
14701
|
+
const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.ANSWER) || foundVariables.has(TEMPLATE_VARIABLES.OUTPUT_TEXT);
|
|
14694
14702
|
const hasExpectedOutput = foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_OUTPUT);
|
|
14695
14703
|
const hasRequiredFields = hasCandidateAnswer || hasExpectedOutput;
|
|
14696
14704
|
if (!hasRequiredFields) {
|
|
14697
14705
|
throw new Error(
|
|
14698
14706
|
`Missing required fields. Must include at least one of:
|
|
14699
|
-
- {{ ${TEMPLATE_VARIABLES.ANSWER} }}
|
|
14707
|
+
- {{ ${TEMPLATE_VARIABLES.ANSWER} }} or {{ ${TEMPLATE_VARIABLES.OUTPUT_TEXT} }}
|
|
14700
14708
|
- {{ ${TEMPLATE_VARIABLES.EXPECTED_OUTPUT} }}`
|
|
14701
14709
|
);
|
|
14702
14710
|
}
|
|
@@ -20905,6 +20913,29 @@ var MockProvider = class {
|
|
|
20905
20913
|
return this.delayMs;
|
|
20906
20914
|
}
|
|
20907
20915
|
};
|
|
20916
|
+
function extractPiTextContent(content) {
|
|
20917
|
+
if (typeof content === "string") {
|
|
20918
|
+
return content;
|
|
20919
|
+
}
|
|
20920
|
+
if (!Array.isArray(content)) {
|
|
20921
|
+
return void 0;
|
|
20922
|
+
}
|
|
20923
|
+
const textParts = [];
|
|
20924
|
+
for (const part of content) {
|
|
20925
|
+
if (!part || typeof part !== "object") {
|
|
20926
|
+
continue;
|
|
20927
|
+
}
|
|
20928
|
+
const p = part;
|
|
20929
|
+
if (p.type === "text" && typeof p.text === "string") {
|
|
20930
|
+
textParts.push(p.text);
|
|
20931
|
+
}
|
|
20932
|
+
}
|
|
20933
|
+
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
20934
|
+
}
|
|
20935
|
+
function toFiniteNumber(value) {
|
|
20936
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
20937
|
+
return void 0;
|
|
20938
|
+
}
|
|
20908
20939
|
var piAgentModule = null;
|
|
20909
20940
|
var piAiModule = null;
|
|
20910
20941
|
async function loadPiModules() {
|
|
@@ -20945,7 +20976,8 @@ var PiAgentSdkProvider = class {
|
|
|
20945
20976
|
throw new Error("Pi agent SDK request was aborted before execution");
|
|
20946
20977
|
}
|
|
20947
20978
|
const { Agent, getModel, getEnvApiKey } = await loadPiModules();
|
|
20948
|
-
const
|
|
20979
|
+
const startTimeIso = (/* @__PURE__ */ new Date()).toISOString();
|
|
20980
|
+
const startMs = Date.now();
|
|
20949
20981
|
const providerName = this.config.provider ?? "anthropic";
|
|
20950
20982
|
const modelId = this.config.model ?? "claude-sonnet-4-20250514";
|
|
20951
20983
|
const model = getModel(providerName, modelId);
|
|
@@ -20962,16 +20994,73 @@ var PiAgentSdkProvider = class {
|
|
|
20962
20994
|
return this.config.apiKey ?? getEnvApiKey(provider) ?? void 0;
|
|
20963
20995
|
}
|
|
20964
20996
|
});
|
|
20965
|
-
|
|
20966
|
-
let
|
|
20997
|
+
let tokenUsage;
|
|
20998
|
+
let costUsd;
|
|
20999
|
+
const toolTrackers = /* @__PURE__ */ new Map();
|
|
21000
|
+
const completedToolResults = /* @__PURE__ */ new Map();
|
|
20967
21001
|
const unsubscribe = agent.subscribe((event) => {
|
|
20968
|
-
|
|
20969
|
-
|
|
20970
|
-
|
|
20971
|
-
|
|
20972
|
-
|
|
20973
|
-
|
|
21002
|
+
switch (event.type) {
|
|
21003
|
+
case "message_end": {
|
|
21004
|
+
const msg = event.message;
|
|
21005
|
+
if (msg && typeof msg === "object" && "role" in msg && msg.role === "assistant" && "usage" in msg) {
|
|
21006
|
+
const usage = msg.usage;
|
|
21007
|
+
if (usage && typeof usage === "object") {
|
|
21008
|
+
const u = usage;
|
|
21009
|
+
const input = toFiniteNumber(u.input);
|
|
21010
|
+
const output = toFiniteNumber(u.output);
|
|
21011
|
+
const cached = toFiniteNumber(u.cacheRead);
|
|
21012
|
+
let callDelta;
|
|
21013
|
+
if (input !== void 0 || output !== void 0) {
|
|
21014
|
+
callDelta = {
|
|
21015
|
+
input: input ?? 0,
|
|
21016
|
+
output: output ?? 0,
|
|
21017
|
+
...cached !== void 0 ? { cached } : {}
|
|
21018
|
+
};
|
|
21019
|
+
tokenUsage = {
|
|
21020
|
+
input: (tokenUsage?.input ?? 0) + callDelta.input,
|
|
21021
|
+
output: (tokenUsage?.output ?? 0) + callDelta.output,
|
|
21022
|
+
...cached !== void 0 ? { cached: (tokenUsage?.cached ?? 0) + cached } : tokenUsage?.cached !== void 0 ? { cached: tokenUsage.cached } : {}
|
|
21023
|
+
};
|
|
21024
|
+
}
|
|
21025
|
+
const cost = u.cost;
|
|
21026
|
+
if (cost && typeof cost === "object") {
|
|
21027
|
+
const total = toFiniteNumber(cost.total);
|
|
21028
|
+
if (total !== void 0) {
|
|
21029
|
+
costUsd = (costUsd ?? 0) + total;
|
|
21030
|
+
}
|
|
21031
|
+
}
|
|
21032
|
+
request.streamCallbacks?.onLlmCallEnd?.(modelId, callDelta);
|
|
21033
|
+
}
|
|
20974
21034
|
}
|
|
21035
|
+
break;
|
|
21036
|
+
}
|
|
21037
|
+
case "tool_execution_start": {
|
|
21038
|
+
toolTrackers.set(event.toolCallId, {
|
|
21039
|
+
toolCallId: event.toolCallId,
|
|
21040
|
+
toolName: event.toolName,
|
|
21041
|
+
args: event.args,
|
|
21042
|
+
startMs: Date.now(),
|
|
21043
|
+
startTime: (/* @__PURE__ */ new Date()).toISOString()
|
|
21044
|
+
});
|
|
21045
|
+
request.streamCallbacks?.onToolCallStart?.(event.toolName, event.toolCallId);
|
|
21046
|
+
break;
|
|
21047
|
+
}
|
|
21048
|
+
case "tool_execution_end": {
|
|
21049
|
+
const tracker = toolTrackers.get(event.toolCallId);
|
|
21050
|
+
const durationMs = tracker ? Date.now() - tracker.startMs : 0;
|
|
21051
|
+
completedToolResults.set(event.toolCallId, {
|
|
21052
|
+
output: event.result,
|
|
21053
|
+
durationMs
|
|
21054
|
+
});
|
|
21055
|
+
request.streamCallbacks?.onToolCallEnd?.(
|
|
21056
|
+
event.toolName,
|
|
21057
|
+
tracker?.args,
|
|
21058
|
+
event.result,
|
|
21059
|
+
durationMs,
|
|
21060
|
+
event.toolCallId
|
|
21061
|
+
);
|
|
21062
|
+
toolTrackers.delete(event.toolCallId);
|
|
21063
|
+
break;
|
|
20975
21064
|
}
|
|
20976
21065
|
}
|
|
20977
21066
|
});
|
|
@@ -20990,10 +21079,12 @@ var PiAgentSdkProvider = class {
|
|
|
20990
21079
|
}
|
|
20991
21080
|
await agent.waitForIdle();
|
|
20992
21081
|
const agentMessages = agent.state.messages;
|
|
21082
|
+
const output = [];
|
|
20993
21083
|
for (const msg of agentMessages) {
|
|
20994
|
-
output.push(convertAgentMessage(msg));
|
|
21084
|
+
output.push(convertAgentMessage(msg, toolTrackers, completedToolResults));
|
|
20995
21085
|
}
|
|
20996
|
-
const
|
|
21086
|
+
const endTimeIso = (/* @__PURE__ */ new Date()).toISOString();
|
|
21087
|
+
const durationMs = Date.now() - startMs;
|
|
20997
21088
|
return {
|
|
20998
21089
|
raw: {
|
|
20999
21090
|
messages: agentMessages,
|
|
@@ -21002,49 +21093,54 @@ var PiAgentSdkProvider = class {
|
|
|
21002
21093
|
provider: this.config.provider
|
|
21003
21094
|
},
|
|
21004
21095
|
output,
|
|
21005
|
-
|
|
21096
|
+
tokenUsage,
|
|
21097
|
+
costUsd,
|
|
21098
|
+
durationMs,
|
|
21099
|
+
startTime: startTimeIso,
|
|
21100
|
+
endTime: endTimeIso
|
|
21006
21101
|
};
|
|
21007
21102
|
} finally {
|
|
21008
21103
|
unsubscribe();
|
|
21009
21104
|
}
|
|
21010
21105
|
}
|
|
21011
21106
|
};
|
|
21012
|
-
function
|
|
21013
|
-
if (typeof content === "string") {
|
|
21014
|
-
return content;
|
|
21015
|
-
}
|
|
21016
|
-
if (!Array.isArray(content)) {
|
|
21017
|
-
return void 0;
|
|
21018
|
-
}
|
|
21019
|
-
const textParts = [];
|
|
21020
|
-
for (const part of content) {
|
|
21021
|
-
if (!part || typeof part !== "object") {
|
|
21022
|
-
continue;
|
|
21023
|
-
}
|
|
21024
|
-
const p = part;
|
|
21025
|
-
if (p.type === "text" && typeof p.text === "string") {
|
|
21026
|
-
textParts.push(p.text);
|
|
21027
|
-
}
|
|
21028
|
-
}
|
|
21029
|
-
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
21030
|
-
}
|
|
21031
|
-
function convertAgentMessage(message) {
|
|
21107
|
+
function convertAgentMessage(message, toolTrackers, completedToolResults) {
|
|
21032
21108
|
if (!message || typeof message !== "object") {
|
|
21033
21109
|
return { role: "unknown", content: String(message) };
|
|
21034
21110
|
}
|
|
21035
21111
|
const msg = message;
|
|
21036
21112
|
const role = typeof msg.role === "string" ? msg.role : "unknown";
|
|
21037
|
-
const content =
|
|
21038
|
-
const toolCalls = extractToolCalls3(msg.content);
|
|
21113
|
+
const content = extractPiTextContent(msg.content);
|
|
21114
|
+
const toolCalls = extractToolCalls3(msg.content, toolTrackers, completedToolResults);
|
|
21039
21115
|
const startTime = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
|
|
21116
|
+
let msgTokenUsage;
|
|
21117
|
+
if (msg.usage && typeof msg.usage === "object") {
|
|
21118
|
+
const u = msg.usage;
|
|
21119
|
+
const input = toFiniteNumber(u.input);
|
|
21120
|
+
const output = toFiniteNumber(u.output);
|
|
21121
|
+
if (input !== void 0 || output !== void 0) {
|
|
21122
|
+
msgTokenUsage = {
|
|
21123
|
+
input: input ?? 0,
|
|
21124
|
+
output: output ?? 0,
|
|
21125
|
+
...toFiniteNumber(u.cacheRead) !== void 0 ? { cached: toFiniteNumber(u.cacheRead) } : {}
|
|
21126
|
+
};
|
|
21127
|
+
}
|
|
21128
|
+
}
|
|
21129
|
+
const metadata = {};
|
|
21130
|
+
if (msg.api) metadata.api = msg.api;
|
|
21131
|
+
if (msg.provider) metadata.provider = msg.provider;
|
|
21132
|
+
if (msg.model) metadata.model = msg.model;
|
|
21133
|
+
if (msg.stopReason) metadata.stopReason = msg.stopReason;
|
|
21040
21134
|
return {
|
|
21041
21135
|
role,
|
|
21042
21136
|
content,
|
|
21043
21137
|
toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
|
|
21044
|
-
startTime
|
|
21138
|
+
startTime,
|
|
21139
|
+
metadata: Object.keys(metadata).length > 0 ? metadata : void 0,
|
|
21140
|
+
tokenUsage: msgTokenUsage
|
|
21045
21141
|
};
|
|
21046
21142
|
}
|
|
21047
|
-
function extractToolCalls3(content) {
|
|
21143
|
+
function extractToolCalls3(content, toolTrackers, completedToolResults) {
|
|
21048
21144
|
if (!Array.isArray(content)) {
|
|
21049
21145
|
return [];
|
|
21050
21146
|
}
|
|
@@ -21055,10 +21151,17 @@ function extractToolCalls3(content) {
|
|
|
21055
21151
|
}
|
|
21056
21152
|
const p = part;
|
|
21057
21153
|
if (p.type === "toolCall" && typeof p.name === "string") {
|
|
21154
|
+
const id = typeof p.id === "string" ? p.id : void 0;
|
|
21155
|
+
const tracker = id ? toolTrackers.get(id) : void 0;
|
|
21156
|
+
const completed = id ? completedToolResults.get(id) : void 0;
|
|
21058
21157
|
toolCalls.push({
|
|
21059
21158
|
tool: p.name,
|
|
21060
21159
|
input: p.arguments,
|
|
21061
|
-
id
|
|
21160
|
+
id,
|
|
21161
|
+
output: completed?.output,
|
|
21162
|
+
durationMs: completed?.durationMs,
|
|
21163
|
+
startTime: tracker?.startTime,
|
|
21164
|
+
endTime: tracker?.startTime && completed?.durationMs !== void 0 ? new Date(new Date(tracker.startTime).getTime() + completed.durationMs).toISOString() : void 0
|
|
21062
21165
|
});
|
|
21063
21166
|
}
|
|
21064
21167
|
}
|
|
@@ -21590,14 +21693,14 @@ function extractTokenUsage(events) {
|
|
|
21590
21693
|
const usage = record.usage;
|
|
21591
21694
|
if (usage && typeof usage === "object") {
|
|
21592
21695
|
const u = usage;
|
|
21593
|
-
const input =
|
|
21594
|
-
const output =
|
|
21696
|
+
const input = toFiniteNumber(u.input_tokens ?? u.inputTokens ?? u.input);
|
|
21697
|
+
const output = toFiniteNumber(u.output_tokens ?? u.outputTokens ?? u.output);
|
|
21595
21698
|
if (input !== void 0 || output !== void 0) {
|
|
21596
21699
|
const result = {
|
|
21597
21700
|
input: input ?? 0,
|
|
21598
21701
|
output: output ?? 0
|
|
21599
21702
|
};
|
|
21600
|
-
const cached =
|
|
21703
|
+
const cached = toFiniteNumber(u.cache_read_input_tokens ?? u.cached ?? u.cachedTokens);
|
|
21601
21704
|
if (cached !== void 0) {
|
|
21602
21705
|
return { ...result, cached };
|
|
21603
21706
|
}
|
|
@@ -21622,13 +21725,13 @@ function aggregateUsageFromMessages(messages) {
|
|
|
21622
21725
|
const usage = m.usage;
|
|
21623
21726
|
if (!usage || typeof usage !== "object") continue;
|
|
21624
21727
|
const u = usage;
|
|
21625
|
-
const input =
|
|
21626
|
-
const output =
|
|
21728
|
+
const input = toFiniteNumber(u.input_tokens ?? u.inputTokens ?? u.input);
|
|
21729
|
+
const output = toFiniteNumber(u.output_tokens ?? u.outputTokens ?? u.output);
|
|
21627
21730
|
if (input !== void 0 || output !== void 0) {
|
|
21628
21731
|
found = true;
|
|
21629
21732
|
totalInput += input ?? 0;
|
|
21630
21733
|
totalOutput += output ?? 0;
|
|
21631
|
-
const cached =
|
|
21734
|
+
const cached = toFiniteNumber(u.cache_read_input_tokens ?? u.cached ?? u.cachedTokens);
|
|
21632
21735
|
if (cached !== void 0) {
|
|
21633
21736
|
totalCached = (totalCached ?? 0) + cached;
|
|
21634
21737
|
}
|
|
@@ -21641,10 +21744,6 @@ function aggregateUsageFromMessages(messages) {
|
|
|
21641
21744
|
}
|
|
21642
21745
|
return result;
|
|
21643
21746
|
}
|
|
21644
|
-
function toNumber(value) {
|
|
21645
|
-
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
21646
|
-
return void 0;
|
|
21647
|
-
}
|
|
21648
21747
|
function convertPiMessage(message) {
|
|
21649
21748
|
if (!message || typeof message !== "object") {
|
|
21650
21749
|
return void 0;
|
|
@@ -21654,7 +21753,7 @@ function convertPiMessage(message) {
|
|
|
21654
21753
|
if (typeof role !== "string") {
|
|
21655
21754
|
return void 0;
|
|
21656
21755
|
}
|
|
21657
|
-
const content =
|
|
21756
|
+
const content = extractPiTextContent(msg.content);
|
|
21658
21757
|
const toolCalls = extractToolCalls4(msg.content);
|
|
21659
21758
|
const startTime = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
|
|
21660
21759
|
const metadata = {};
|
|
@@ -21671,25 +21770,6 @@ function convertPiMessage(message) {
|
|
|
21671
21770
|
metadata: Object.keys(metadata).length > 0 ? metadata : void 0
|
|
21672
21771
|
};
|
|
21673
21772
|
}
|
|
21674
|
-
function extractTextContent4(content) {
|
|
21675
|
-
if (typeof content === "string") {
|
|
21676
|
-
return content;
|
|
21677
|
-
}
|
|
21678
|
-
if (!Array.isArray(content)) {
|
|
21679
|
-
return void 0;
|
|
21680
|
-
}
|
|
21681
|
-
const textParts = [];
|
|
21682
|
-
for (const part of content) {
|
|
21683
|
-
if (!part || typeof part !== "object") {
|
|
21684
|
-
continue;
|
|
21685
|
-
}
|
|
21686
|
-
const p = part;
|
|
21687
|
-
if (p.type === "text" && typeof p.text === "string") {
|
|
21688
|
-
textParts.push(p.text);
|
|
21689
|
-
}
|
|
21690
|
-
}
|
|
21691
|
-
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
21692
|
-
}
|
|
21693
21773
|
function extractToolCalls4(content) {
|
|
21694
21774
|
if (!Array.isArray(content)) {
|
|
21695
21775
|
return [];
|
|
@@ -23868,7 +23948,11 @@ var CodeEvaluator = class {
|
|
|
23868
23948
|
endTime: context2.endTime ?? null,
|
|
23869
23949
|
fileChanges: context2.fileChanges ?? null,
|
|
23870
23950
|
workspacePath: context2.workspacePath ?? null,
|
|
23871
|
-
config: this.config ?? null
|
|
23951
|
+
config: this.config ?? null,
|
|
23952
|
+
// Text convenience accessors (new names, always strings)
|
|
23953
|
+
inputText: context2.evalCase.question,
|
|
23954
|
+
outputText: context2.candidate,
|
|
23955
|
+
expectedOutputText: context2.evalCase.reference_answer ?? ""
|
|
23872
23956
|
};
|
|
23873
23957
|
const inputPayload = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
23874
23958
|
let proxyEnv;
|
|
@@ -24110,7 +24194,11 @@ var LlmGraderEvaluator = class {
|
|
|
24110
24194
|
[TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context2.evalCase.reference_answer ?? "").trim(),
|
|
24111
24195
|
[TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
|
|
24112
24196
|
[TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
|
|
24113
|
-
[TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? ""
|
|
24197
|
+
[TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? "",
|
|
24198
|
+
// Text convenience accessors (new names, always strings)
|
|
24199
|
+
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
24200
|
+
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
|
|
24201
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context2.evalCase.reference_answer ?? "").trim()
|
|
24114
24202
|
};
|
|
24115
24203
|
const systemPrompt = buildOutputSchema();
|
|
24116
24204
|
const evaluatorTemplate = context2.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
|
|
@@ -25694,8 +25782,8 @@ var FieldAccuracyEvaluator = class {
|
|
|
25694
25782
|
*/
|
|
25695
25783
|
compareNumericTolerance(path46, candidateValue, expectedValue, fieldConfig, weight) {
|
|
25696
25784
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
25697
|
-
const candidateNum =
|
|
25698
|
-
const expectedNum =
|
|
25785
|
+
const candidateNum = toNumber(candidateValue);
|
|
25786
|
+
const expectedNum = toNumber(expectedValue);
|
|
25699
25787
|
if (candidateNum === null || expectedNum === null) {
|
|
25700
25788
|
return {
|
|
25701
25789
|
path: path46,
|
|
@@ -25841,7 +25929,7 @@ function resolvePath(obj, path46) {
|
|
|
25841
25929
|
}
|
|
25842
25930
|
return current;
|
|
25843
25931
|
}
|
|
25844
|
-
function
|
|
25932
|
+
function toNumber(value) {
|
|
25845
25933
|
if (typeof value === "number") {
|
|
25846
25934
|
return value;
|
|
25847
25935
|
}
|
|
@@ -25950,28 +26038,60 @@ var LatencyEvaluator = class {
|
|
|
25950
26038
|
};
|
|
25951
26039
|
}
|
|
25952
26040
|
};
|
|
26041
|
+
var CLAUDE_MATCHER = {
|
|
26042
|
+
skillTools: ["Skill"],
|
|
26043
|
+
skillInputField: "skill",
|
|
26044
|
+
readTools: ["Read"],
|
|
26045
|
+
readInputField: "file_path"
|
|
26046
|
+
};
|
|
26047
|
+
var COPILOT_MATCHER = {
|
|
26048
|
+
skillTools: ["Skill", "skill"],
|
|
26049
|
+
skillInputField: "skill",
|
|
26050
|
+
readTools: ["Read File", "readFile", "Read", "readTextFile"],
|
|
26051
|
+
readInputField: "file_path"
|
|
26052
|
+
};
|
|
26053
|
+
var PROVIDER_TOOL_SEMANTICS = {
|
|
26054
|
+
claude: CLAUDE_MATCHER,
|
|
26055
|
+
"claude-cli": CLAUDE_MATCHER,
|
|
26056
|
+
"claude-sdk": CLAUDE_MATCHER,
|
|
26057
|
+
"pi-coding-agent": CLAUDE_MATCHER,
|
|
26058
|
+
"pi-agent-sdk": CLAUDE_MATCHER,
|
|
26059
|
+
"copilot-cli": COPILOT_MATCHER,
|
|
26060
|
+
"copilot-sdk": COPILOT_MATCHER,
|
|
26061
|
+
vscode: COPILOT_MATCHER,
|
|
26062
|
+
"vscode-insiders": COPILOT_MATCHER
|
|
26063
|
+
};
|
|
25953
26064
|
var SkillTriggerEvaluator = class {
|
|
25954
26065
|
kind = "skill-trigger";
|
|
25955
26066
|
config;
|
|
25956
26067
|
constructor(config) {
|
|
25957
26068
|
this.config = config;
|
|
25958
26069
|
}
|
|
26070
|
+
resolveMatcher(providerKind) {
|
|
26071
|
+
if (providerKind) {
|
|
26072
|
+
const match = PROVIDER_TOOL_SEMANTICS[providerKind];
|
|
26073
|
+
if (match) return match;
|
|
26074
|
+
}
|
|
26075
|
+
return CLAUDE_MATCHER;
|
|
26076
|
+
}
|
|
25959
26077
|
evaluate(context2) {
|
|
25960
26078
|
const skillName = this.config.skill;
|
|
25961
26079
|
const shouldTrigger = this.config.should_trigger !== false;
|
|
26080
|
+
const providerKind = context2.provider?.kind;
|
|
26081
|
+
const matcher = this.resolveMatcher(providerKind);
|
|
25962
26082
|
const firstTool = (context2.output ?? []).flatMap((msg) => msg.toolCalls ?? [])[0];
|
|
25963
26083
|
let triggered = false;
|
|
25964
26084
|
let evidence = "";
|
|
25965
26085
|
if (firstTool) {
|
|
25966
26086
|
const input = firstTool.input ?? {};
|
|
25967
|
-
if (firstTool.tool
|
|
25968
|
-
const skillArg = String(input.
|
|
26087
|
+
if (matcher.skillTools.includes(firstTool.tool)) {
|
|
26088
|
+
const skillArg = String(input[matcher.skillInputField] ?? "");
|
|
25969
26089
|
if (skillArg.includes(skillName)) {
|
|
25970
26090
|
triggered = true;
|
|
25971
|
-
evidence = `Skill tool invoked with
|
|
26091
|
+
evidence = `Skill tool invoked with ${matcher.skillInputField}="${skillArg}"`;
|
|
25972
26092
|
}
|
|
25973
|
-
} else if (firstTool.tool
|
|
25974
|
-
const filePath = String(input.
|
|
26093
|
+
} else if (matcher.readTools.includes(firstTool.tool)) {
|
|
26094
|
+
const filePath = String(input[matcher.readInputField] ?? "");
|
|
25975
26095
|
if (filePath.includes(skillName)) {
|
|
25976
26096
|
triggered = true;
|
|
25977
26097
|
evidence = `Read tool loaded skill file: ${filePath}`;
|
|
@@ -25996,7 +26116,7 @@ var SkillTriggerEvaluator = class {
|
|
|
25996
26116
|
verdict: "fail",
|
|
25997
26117
|
hits: [],
|
|
25998
26118
|
misses: [
|
|
25999
|
-
shouldTrigger ? firstTool ? `First tool was "${firstTool.tool}" \u2014 not
|
|
26119
|
+
shouldTrigger ? firstTool ? `First tool was "${firstTool.tool}" \u2014 not a skill/read tool for "${skillName}"` : "No tool calls recorded" : evidence || `Skill "${skillName}" triggered unexpectedly`
|
|
26000
26120
|
],
|
|
26001
26121
|
expectedAspectCount: 1,
|
|
26002
26122
|
reasoning: shouldTrigger ? `Skill "${skillName}" was not triggered` : "False trigger: skill fired when it should not have"
|
|
@@ -26038,7 +26158,11 @@ function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, evalua
|
|
|
26038
26158
|
[TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (evalCase.reference_answer ?? "").trim(),
|
|
26039
26159
|
[TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
|
|
26040
26160
|
[TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
|
|
26041
|
-
[TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? ""
|
|
26161
|
+
[TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? "",
|
|
26162
|
+
// Text convenience accessors (new names, always strings)
|
|
26163
|
+
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
26164
|
+
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: candidate.trim(),
|
|
26165
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (evalCase.reference_answer ?? "").trim()
|
|
26042
26166
|
};
|
|
26043
26167
|
const systemPrompt = buildOutputSchema();
|
|
26044
26168
|
const template = evaluatorTemplateOverride ?? DEFAULT_EVALUATOR_TEMPLATE;
|
|
@@ -27029,7 +27153,11 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
|
|
|
27029
27153
|
trace: context2.trace ?? null,
|
|
27030
27154
|
fileChanges: context2.fileChanges ?? null,
|
|
27031
27155
|
workspacePath: context2.workspacePath ?? null,
|
|
27032
|
-
config: config ?? context2.config ?? null
|
|
27156
|
+
config: config ?? context2.config ?? null,
|
|
27157
|
+
// Text convenience accessors (new names, always strings)
|
|
27158
|
+
inputText: context2.evalCase.question,
|
|
27159
|
+
outputText: context2.candidate,
|
|
27160
|
+
expectedOutputText: context2.evalCase.reference_answer ?? ""
|
|
27033
27161
|
};
|
|
27034
27162
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
27035
27163
|
const scriptPath = script[script.length - 1];
|
|
@@ -28689,7 +28817,9 @@ async function runEvaluation(options) {
|
|
|
28689
28817
|
testId: evalCase.id,
|
|
28690
28818
|
status: "failed",
|
|
28691
28819
|
completedAt: Date.now(),
|
|
28692
|
-
error: budgetResult.error
|
|
28820
|
+
error: budgetResult.error,
|
|
28821
|
+
score: budgetResult.score,
|
|
28822
|
+
executionStatus: budgetResult.executionStatus
|
|
28693
28823
|
});
|
|
28694
28824
|
}
|
|
28695
28825
|
if (onResult) {
|
|
@@ -28720,7 +28850,9 @@ async function runEvaluation(options) {
|
|
|
28720
28850
|
testId: evalCase.id,
|
|
28721
28851
|
status: "failed",
|
|
28722
28852
|
completedAt: Date.now(),
|
|
28723
|
-
error: haltResult.error
|
|
28853
|
+
error: haltResult.error,
|
|
28854
|
+
score: haltResult.score,
|
|
28855
|
+
executionStatus: haltResult.executionStatus
|
|
28724
28856
|
});
|
|
28725
28857
|
}
|
|
28726
28858
|
if (onResult) {
|
|
@@ -28800,7 +28932,9 @@ async function runEvaluation(options) {
|
|
|
28800
28932
|
startedAt: 0,
|
|
28801
28933
|
// Not used for completed status
|
|
28802
28934
|
completedAt: Date.now(),
|
|
28803
|
-
error: result.error
|
|
28935
|
+
error: result.error,
|
|
28936
|
+
score: result.score,
|
|
28937
|
+
executionStatus: result.executionStatus
|
|
28804
28938
|
});
|
|
28805
28939
|
}
|
|
28806
28940
|
if (onResult) {
|
|
@@ -28971,7 +29105,9 @@ async function runBatchEvaluation(options) {
|
|
|
28971
29105
|
const merged = computed ? mergeExecutionMetrics(computed, {
|
|
28972
29106
|
tokenUsage: providerResponse.tokenUsage,
|
|
28973
29107
|
costUsd: providerResponse.costUsd,
|
|
28974
|
-
durationMs: providerResponse.durationMs
|
|
29108
|
+
durationMs: providerResponse.durationMs,
|
|
29109
|
+
startTime: providerResponse.startTime,
|
|
29110
|
+
endTime: providerResponse.endTime
|
|
28975
29111
|
}) : void 0;
|
|
28976
29112
|
const trace2 = merged?.trace;
|
|
28977
29113
|
const costUsd = merged?.costUsd;
|
|
@@ -29036,7 +29172,9 @@ async function runBatchEvaluation(options) {
|
|
|
29036
29172
|
testId: evalCase.id,
|
|
29037
29173
|
status: "failed",
|
|
29038
29174
|
completedAt: Date.now(),
|
|
29039
|
-
error: error instanceof Error ? error.message : String(error)
|
|
29175
|
+
error: error instanceof Error ? error.message : String(error),
|
|
29176
|
+
score: errorResult.score,
|
|
29177
|
+
executionStatus: errorResult.executionStatus
|
|
29040
29178
|
});
|
|
29041
29179
|
}
|
|
29042
29180
|
continue;
|
|
@@ -29052,7 +29190,9 @@ async function runBatchEvaluation(options) {
|
|
|
29052
29190
|
status: result.error ? "failed" : "completed",
|
|
29053
29191
|
startedAt: 0,
|
|
29054
29192
|
completedAt: Date.now(),
|
|
29055
|
-
error: result.error
|
|
29193
|
+
error: result.error,
|
|
29194
|
+
score: result.score,
|
|
29195
|
+
executionStatus: result.executionStatus
|
|
29056
29196
|
});
|
|
29057
29197
|
}
|
|
29058
29198
|
}
|
|
@@ -29362,7 +29502,9 @@ async function runEvalCase(options) {
|
|
|
29362
29502
|
const merged = computed ? mergeExecutionMetrics(computed, {
|
|
29363
29503
|
tokenUsage: providerResponse.tokenUsage,
|
|
29364
29504
|
costUsd: providerResponse.costUsd,
|
|
29365
|
-
durationMs: providerResponse.durationMs
|
|
29505
|
+
durationMs: providerResponse.durationMs,
|
|
29506
|
+
startTime: providerResponse.startTime,
|
|
29507
|
+
endTime: providerResponse.endTime
|
|
29366
29508
|
}) : void 0;
|
|
29367
29509
|
const trace2 = merged?.trace;
|
|
29368
29510
|
const costUsd = merged?.costUsd;
|
|
@@ -31148,4 +31290,4 @@ export {
|
|
|
31148
31290
|
OtelStreamingObserver,
|
|
31149
31291
|
createAgentKernel
|
|
31150
31292
|
};
|
|
31151
|
-
//# sourceMappingURL=chunk-
|
|
31293
|
+
//# sourceMappingURL=chunk-GOZV2HN2.js.map
|