@reconcrap/boss-recommend-mcp 1.3.31 → 1.3.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
package/src/test-boss-chat.js
CHANGED
|
@@ -18,7 +18,7 @@ import { __testables as indexTestables } from "./index.js";
|
|
|
18
18
|
import { BossChatApp } from "../vendor/boss-chat-cli/src/app.js";
|
|
19
19
|
import { __testables as vendorCliTestables } from "../vendor/boss-chat-cli/src/cli.js";
|
|
20
20
|
import { BossChatPage } from "../vendor/boss-chat-cli/src/browser/chat-page.js";
|
|
21
|
-
import { LlmClient, parseLlmJson } from "../vendor/boss-chat-cli/src/services/llm.js";
|
|
21
|
+
import { LlmClient, parseLlmJson, __testables as llmTestables } from "../vendor/boss-chat-cli/src/services/llm.js";
|
|
22
22
|
import { ReportStore } from "../vendor/boss-chat-cli/src/services/report-store.js";
|
|
23
23
|
import {
|
|
24
24
|
NETWORK_RESUME_IMAGE_MODE_GRACE_MS,
|
|
@@ -36,6 +36,7 @@ const TOOL_BOSS_CHAT_GET_RUN = "get_boss_chat_run";
|
|
|
36
36
|
const TOOL_BOSS_CHAT_PAUSE_RUN = "pause_boss_chat_run";
|
|
37
37
|
const TOOL_BOSS_CHAT_RESUME_RUN = "resume_boss_chat_run";
|
|
38
38
|
const TOOL_BOSS_CHAT_CANCEL_RUN = "cancel_boss_chat_run";
|
|
39
|
+
const { extractCompletionReasoningText, extractResponsesReasoningText } = llmTestables;
|
|
39
40
|
|
|
40
41
|
function makeToolCall(id, name, args = {}) {
|
|
41
42
|
return {
|
|
@@ -949,6 +950,50 @@ function testBossChatLlmParserShouldAcceptDecisionField() {
|
|
|
949
950
|
assert.equal(parsed.passed, false);
|
|
950
951
|
}
|
|
951
952
|
|
|
953
|
+
function testBossChatLlmParserShouldPreserveReasoningFields() {
|
|
954
|
+
const parsed = parseLlmJson(
|
|
955
|
+
JSON.stringify({
|
|
956
|
+
passed: true,
|
|
957
|
+
reason: "候选人具备 2 段 AI Agent 项目经验",
|
|
958
|
+
summary: "符合筛选要求",
|
|
959
|
+
evidence: ["AI Agent", "MCP"],
|
|
960
|
+
}),
|
|
961
|
+
{
|
|
962
|
+
reasoningText: "先检查项目经历,再核对技能栈,结论为通过。",
|
|
963
|
+
},
|
|
964
|
+
);
|
|
965
|
+
assert.equal(parsed.passed, true);
|
|
966
|
+
assert.equal(parsed.reason, "候选人具备 2 段 AI Agent 项目经验");
|
|
967
|
+
assert.equal(parsed.summary, "符合筛选要求");
|
|
968
|
+
assert.equal(parsed.cot, "先检查项目经历,再核对技能栈,结论为通过。");
|
|
969
|
+
assert.deepEqual(parsed.evidence, ["AI Agent", "MCP"]);
|
|
970
|
+
assert.equal(parsed.rawReasoningText, "先检查项目经历,再核对技能栈,结论为通过。");
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
function testBossChatLlmExtractorsShouldReadProviderReasoningFields() {
|
|
974
|
+
const completionReasoning = extractCompletionReasoningText({
|
|
975
|
+
choices: [
|
|
976
|
+
{
|
|
977
|
+
message: {
|
|
978
|
+
content: [{ type: "text", text: "{\"passed\":true}" }],
|
|
979
|
+
reasoning_content: [{ text: "先核对教育背景,再核对项目经历。" }],
|
|
980
|
+
},
|
|
981
|
+
},
|
|
982
|
+
],
|
|
983
|
+
});
|
|
984
|
+
assert.equal(completionReasoning.includes("教育背景"), true);
|
|
985
|
+
|
|
986
|
+
const responsesReasoning = extractResponsesReasoningText({
|
|
987
|
+
output: [
|
|
988
|
+
{
|
|
989
|
+
type: "reasoning",
|
|
990
|
+
summary: [{ text: "根据项目经历与技能关键词判断为通过。" }],
|
|
991
|
+
},
|
|
992
|
+
],
|
|
993
|
+
});
|
|
994
|
+
assert.equal(responsesReasoning.includes("技能关键词"), true);
|
|
995
|
+
}
|
|
996
|
+
|
|
952
997
|
async function testBossChatLlmTextChunkFallbackShouldWork() {
|
|
953
998
|
const originalChunkSize = process.env.BOSS_CHAT_TEXT_CHUNK_SIZE_CHARS;
|
|
954
999
|
const originalChunkOverlap = process.env.BOSS_CHAT_TEXT_CHUNK_OVERLAP_CHARS;
|
|
@@ -2133,6 +2178,11 @@ async function testBossChatAppShouldPersistEvidenceArtifacts() {
|
|
|
2133
2178
|
return {
|
|
2134
2179
|
passed: false,
|
|
2135
2180
|
rawOutputText: '{"passed":false}',
|
|
2181
|
+
rawReasoningText: "先看项目经验,再看技能,结论不通过。",
|
|
2182
|
+
cot: "先看项目经验,再看技能,结论不通过。",
|
|
2183
|
+
reason: "项目经验与岗位要求不符",
|
|
2184
|
+
summary: "不符合要求",
|
|
2185
|
+
evidence: ["Python"],
|
|
2136
2186
|
evaluationMode: "image-multi-chunk",
|
|
2137
2187
|
imageCount: 3,
|
|
2138
2188
|
chunkIndex: 1,
|
|
@@ -2197,9 +2247,14 @@ async function testBossChatAppShouldPersistEvidenceArtifacts() {
|
|
|
2197
2247
|
|
|
2198
2248
|
assert.equal(result.passed, false);
|
|
2199
2249
|
assert.equal(result.artifacts.finalPassed, false);
|
|
2200
|
-
assert.equal(result.reason, "
|
|
2250
|
+
assert.equal(result.reason, "项目经验与岗位要求不符");
|
|
2201
2251
|
assert.equal(result.artifacts.evaluationMode, "image-multi-chunk");
|
|
2202
2252
|
assert.equal(result.artifacts.evaluationImageCount, 3);
|
|
2253
|
+
assert.equal(result.artifacts.llmReason, "项目经验与岗位要求不符");
|
|
2254
|
+
assert.equal(result.artifacts.llmSummary, "不符合要求");
|
|
2255
|
+
assert.equal(result.artifacts.llmCot, "先看项目经验,再看技能,结论不通过。");
|
|
2256
|
+
assert.deepEqual(result.artifacts.llmEvidence, ["Python"]);
|
|
2257
|
+
assert.equal(result.artifacts.llmRawReasoning, "先看项目经验,再看技能,结论不通过。");
|
|
2203
2258
|
assert.equal(result.artifacts.llmRawOutput, '{"passed":false}');
|
|
2204
2259
|
assert.equal(Array.isArray(result.artifacts.modelImagePaths), true);
|
|
2205
2260
|
assert.equal(result.artifacts.modelImagePaths.length, 3);
|
|
@@ -2241,6 +2296,10 @@ async function testBossChatReportStoreShouldWriteReadableMarkdownAndCsv() {
|
|
|
2241
2296
|
textModelMs: 18234,
|
|
2242
2297
|
initialNetworkWaitMs: 4200,
|
|
2243
2298
|
evaluationMode: "text",
|
|
2299
|
+
llmSummary: "教育与项目经历匹配",
|
|
2300
|
+
llmCot: "先看教育背景,再看项目经历,结论通过。",
|
|
2301
|
+
llmEvidence: ["AI Agent", "MCP"],
|
|
2302
|
+
llmRawReasoning: "先看教育背景,再看项目经历,结论通过。",
|
|
2244
2303
|
llmRawOutput: '{"passed":true}',
|
|
2245
2304
|
},
|
|
2246
2305
|
},
|
|
@@ -2260,6 +2319,10 @@ async function testBossChatReportStoreShouldWriteReadableMarkdownAndCsv() {
|
|
|
2260
2319
|
lateNetworkRetryMs: 3000,
|
|
2261
2320
|
evaluationMode: "image-multi-chunk",
|
|
2262
2321
|
evaluationImageCount: 3,
|
|
2322
|
+
llmSummary: "项目经历不足",
|
|
2323
|
+
llmCot: "先看项目经历,再看实习时长,结论不通过。",
|
|
2324
|
+
llmEvidence: ["数据分析"],
|
|
2325
|
+
llmRawReasoning: "先看项目经历,再看实习时长,结论不通过。",
|
|
2263
2326
|
llmRawOutput: '{"passed":false}',
|
|
2264
2327
|
},
|
|
2265
2328
|
},
|
|
@@ -2297,8 +2360,13 @@ async function testBossChatReportStoreShouldWriteReadableMarkdownAndCsv() {
|
|
|
2297
2360
|
assert.match(csvContent, /resume_acquisition_mode/);
|
|
2298
2361
|
assert.match(csvContent, /initial_network_wait_ms/);
|
|
2299
2362
|
assert.match(csvContent, /late_network_retry_ms/);
|
|
2363
|
+
assert.match(csvContent, /llm_summary/);
|
|
2364
|
+
assert.match(csvContent, /llm_cot/);
|
|
2365
|
+
assert.match(csvContent, /llm_raw_reasoning/);
|
|
2366
|
+
assert.match(csvContent, /llm_raw_output/);
|
|
2300
2367
|
assert.match(csvContent, /候选人B/);
|
|
2301
2368
|
assert.match(csvContent, /image-multi-chunk/);
|
|
2369
|
+
assert.match(csvContent, /先看项目经历,再看实习时长/);
|
|
2302
2370
|
}
|
|
2303
2371
|
|
|
2304
2372
|
async function main() {
|
|
@@ -2316,6 +2384,8 @@ async function main() {
|
|
|
2316
2384
|
testBossChatLlmParserShouldAcceptMinimalDecisionJson();
|
|
2317
2385
|
testBossChatLlmParserShouldAcceptPlainPassFailText();
|
|
2318
2386
|
testBossChatLlmParserShouldAcceptDecisionField();
|
|
2387
|
+
testBossChatLlmParserShouldPreserveReasoningFields();
|
|
2388
|
+
testBossChatLlmExtractorsShouldReadProviderReasoningFields();
|
|
2319
2389
|
await testBossChatLlmTextChunkFallbackShouldWork();
|
|
2320
2390
|
await testBossChatLlmShouldApplyThinkingDefaultsAndOverrides();
|
|
2321
2391
|
await testBossChatLlmShouldSendAllImageChunksInSingleRequest();
|
|
@@ -18,6 +18,18 @@ function normalizeText(value) {
|
|
|
18
18
|
return String(value || '').replace(/\s+/g, ' ').trim();
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
+
function toStringArray(value, maxItems = 8) {
|
|
22
|
+
if (!Array.isArray(value)) return [];
|
|
23
|
+
const normalized = [];
|
|
24
|
+
for (const item of value) {
|
|
25
|
+
const text = normalizeText(item);
|
|
26
|
+
if (!text) continue;
|
|
27
|
+
normalized.push(text);
|
|
28
|
+
if (normalized.length >= maxItems) break;
|
|
29
|
+
}
|
|
30
|
+
return normalized;
|
|
31
|
+
}
|
|
32
|
+
|
|
21
33
|
function shouldContinue(summary, targetCount) {
|
|
22
34
|
if (!targetCount || !Number.isFinite(targetCount) || targetCount <= 0) {
|
|
23
35
|
return true;
|
|
@@ -1064,7 +1076,9 @@ export class BossChatApp {
|
|
|
1064
1076
|
const evaluation = acquisition.evaluation;
|
|
1065
1077
|
const capture = acquisition.capture;
|
|
1066
1078
|
modalOpened = Boolean(acquisition.modalOpened);
|
|
1067
|
-
const finalReason =
|
|
1079
|
+
const finalReason =
|
|
1080
|
+
normalizeText(evaluation.reason || evaluation.summary || evaluation.cot) ||
|
|
1081
|
+
(evaluation.passed ? 'LLM判定通过' : 'LLM判定不通过');
|
|
1068
1082
|
this.logger.log(
|
|
1069
1083
|
`LLM评估完成:passed=${evaluation.passed} | source=${acquisition.acquisitionMode} | reason=${acquisition.acquisitionReason || 'n/a'} | mode=${evaluation.evaluationMode || 'unknown'} | imageCount=${Number(evaluation.imageCount || baseResult.artifacts.modelImagePaths?.length || 0)} | result=${normalizeText(evaluation.rawOutputText || '') || 'n/a'}`,
|
|
1070
1084
|
);
|
|
@@ -1085,6 +1099,11 @@ export class BossChatApp {
|
|
|
1085
1099
|
baseResult.artifacts.evaluationChunkTotal = Number.isFinite(Number(evaluation.chunkTotal))
|
|
1086
1100
|
? Number(evaluation.chunkTotal)
|
|
1087
1101
|
: null;
|
|
1102
|
+
baseResult.artifacts.llmReason = normalizeText(evaluation.reason || '');
|
|
1103
|
+
baseResult.artifacts.llmSummary = normalizeText(evaluation.summary || '');
|
|
1104
|
+
baseResult.artifacts.llmCot = normalizeText(evaluation.cot || '');
|
|
1105
|
+
baseResult.artifacts.llmEvidence = toStringArray(evaluation.evidence);
|
|
1106
|
+
baseResult.artifacts.llmRawReasoning = String(evaluation.rawReasoningText || '');
|
|
1088
1107
|
baseResult.artifacts.llmRawOutput = String(evaluation.rawOutputText || '');
|
|
1089
1108
|
baseResult.artifacts.resumeAcquisitionMode = String(acquisition.acquisitionMode || '');
|
|
1090
1109
|
baseResult.artifacts.resumeAcquisitionReason = String(acquisition.acquisitionReason || '');
|
|
@@ -43,6 +43,22 @@ function getCompletionContent(data) {
|
|
|
43
43
|
return '';
|
|
44
44
|
}
|
|
45
45
|
|
|
46
|
+
function flattenChatMessageContent(content) {
|
|
47
|
+
if (Array.isArray(content)) {
|
|
48
|
+
return content
|
|
49
|
+
.map((item) => {
|
|
50
|
+
if (typeof item === 'string') return item;
|
|
51
|
+
if (item && typeof item === 'object') {
|
|
52
|
+
return item.text || item.content || item.reasoning_content || '';
|
|
53
|
+
}
|
|
54
|
+
return '';
|
|
55
|
+
})
|
|
56
|
+
.filter(Boolean)
|
|
57
|
+
.join('\n');
|
|
58
|
+
}
|
|
59
|
+
return String(content || '');
|
|
60
|
+
}
|
|
61
|
+
|
|
46
62
|
function getResponsesContent(data) {
|
|
47
63
|
if (typeof data?.output_text === 'string' && data.output_text.trim()) {
|
|
48
64
|
return data.output_text;
|
|
@@ -163,6 +179,100 @@ function toStringArray(value, maxItems = 8) {
|
|
|
163
179
|
return normalized;
|
|
164
180
|
}
|
|
165
181
|
|
|
182
|
+
function collectNestedText(value, out = [], depth = 0) {
|
|
183
|
+
if (depth > 6 || value === null || value === undefined) return out;
|
|
184
|
+
if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
|
|
185
|
+
const normalized = normalizeText(String(value));
|
|
186
|
+
if (normalized) out.push(normalized);
|
|
187
|
+
return out;
|
|
188
|
+
}
|
|
189
|
+
if (Array.isArray(value)) {
|
|
190
|
+
for (const item of value) {
|
|
191
|
+
collectNestedText(item, out, depth + 1);
|
|
192
|
+
}
|
|
193
|
+
return out;
|
|
194
|
+
}
|
|
195
|
+
if (typeof value === 'object') {
|
|
196
|
+
const priorityKeys = ['text', 'reasoning_content', 'summary_text', 'summary', 'content', 'cot', 'reason'];
|
|
197
|
+
const seen = new Set();
|
|
198
|
+
for (const key of priorityKeys) {
|
|
199
|
+
if (Object.prototype.hasOwnProperty.call(value, key)) {
|
|
200
|
+
seen.add(key);
|
|
201
|
+
collectNestedText(value[key], out, depth + 1);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
for (const [key, nested] of Object.entries(value)) {
|
|
205
|
+
if (seen.has(key)) continue;
|
|
206
|
+
collectNestedText(nested, out, depth + 1);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
return out;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function dedupeTextFragments(fragments = []) {
|
|
213
|
+
const deduped = [];
|
|
214
|
+
const seen = new Set();
|
|
215
|
+
for (const item of fragments) {
|
|
216
|
+
const normalized = normalizeText(item);
|
|
217
|
+
if (!normalized) continue;
|
|
218
|
+
if (seen.has(normalized)) continue;
|
|
219
|
+
seen.add(normalized);
|
|
220
|
+
deduped.push(normalized);
|
|
221
|
+
}
|
|
222
|
+
return deduped;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
function joinTextFragments(fragments = []) {
|
|
226
|
+
return dedupeTextFragments(fragments).join('\n');
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
function extractCompletionReasoningText(data) {
|
|
230
|
+
const choice = data?.choices?.[0] || {};
|
|
231
|
+
const fragments = [];
|
|
232
|
+
const content = choice?.message?.content;
|
|
233
|
+
if (Array.isArray(content)) {
|
|
234
|
+
for (const part of content) {
|
|
235
|
+
const partType = normalizeText(part?.type || '').toLowerCase();
|
|
236
|
+
if (partType.includes('reason') || partType.includes('summary')) {
|
|
237
|
+
collectNestedText(part, fragments);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
const candidates = [
|
|
242
|
+
choice?.message?.reasoning_content,
|
|
243
|
+
choice?.message?.reasoning,
|
|
244
|
+
choice?.reasoning_content,
|
|
245
|
+
choice?.reasoning,
|
|
246
|
+
];
|
|
247
|
+
for (const candidate of candidates) {
|
|
248
|
+
collectNestedText(candidate, fragments);
|
|
249
|
+
}
|
|
250
|
+
return joinTextFragments(fragments);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
function extractResponsesReasoningText(data) {
|
|
254
|
+
const fragments = [];
|
|
255
|
+
collectNestedText(data?.reasoning, fragments);
|
|
256
|
+
collectNestedText(data?.reasoning_content, fragments);
|
|
257
|
+
|
|
258
|
+
const output = Array.isArray(data?.output) ? data.output : [];
|
|
259
|
+
for (const item of output) {
|
|
260
|
+
const itemType = normalizeText(item?.type || '').toLowerCase();
|
|
261
|
+
if (itemType.includes('reason') || itemType.includes('summary')) {
|
|
262
|
+
collectNestedText(item, fragments);
|
|
263
|
+
}
|
|
264
|
+
const content = Array.isArray(item?.content) ? item.content : [];
|
|
265
|
+
for (const chunk of content) {
|
|
266
|
+
const chunkType = normalizeText(chunk?.type || '').toLowerCase();
|
|
267
|
+
if (chunkType.includes('reason') || chunkType.includes('summary')) {
|
|
268
|
+
collectNestedText(chunk, fragments);
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
return joinTextFragments(fragments);
|
|
274
|
+
}
|
|
275
|
+
|
|
166
276
|
function extractEvidenceTokens(text, maxItems = MAX_EVIDENCE_TOKENS) {
|
|
167
277
|
const normalized = normalizeText(text);
|
|
168
278
|
if (!normalized) return [];
|
|
@@ -341,6 +451,14 @@ function buildTextPrompt({ screeningCriteria, candidate, resumeText, chunkIndex
|
|
|
341
451
|
].join('\n');
|
|
342
452
|
}
|
|
343
453
|
|
|
454
|
+
function pickFirstText(...values) {
|
|
455
|
+
for (const value of values) {
|
|
456
|
+
const normalized = normalizeText(value);
|
|
457
|
+
if (normalized) return normalized;
|
|
458
|
+
}
|
|
459
|
+
return '';
|
|
460
|
+
}
|
|
461
|
+
|
|
344
462
|
export function parseLlmJson(content, options = {}) {
|
|
345
463
|
const text = String(content || '').trim();
|
|
346
464
|
if (!text) {
|
|
@@ -355,6 +473,11 @@ export function parseLlmJson(content, options = {}) {
|
|
|
355
473
|
return {
|
|
356
474
|
passed: true,
|
|
357
475
|
rawOutputText: text,
|
|
476
|
+
rawReasoningText: normalizeText(options.reasoningText || ''),
|
|
477
|
+
cot: normalizeText(options.reasoningText || ''),
|
|
478
|
+
reason: '',
|
|
479
|
+
summary: '',
|
|
480
|
+
evidence: [],
|
|
358
481
|
chunkIndex,
|
|
359
482
|
chunkTotal,
|
|
360
483
|
};
|
|
@@ -364,6 +487,11 @@ export function parseLlmJson(content, options = {}) {
|
|
|
364
487
|
return {
|
|
365
488
|
passed: false,
|
|
366
489
|
rawOutputText: text,
|
|
490
|
+
rawReasoningText: normalizeText(options.reasoningText || ''),
|
|
491
|
+
cot: normalizeText(options.reasoningText || ''),
|
|
492
|
+
reason: '',
|
|
493
|
+
summary: '',
|
|
494
|
+
evidence: [],
|
|
367
495
|
chunkIndex,
|
|
368
496
|
chunkTotal,
|
|
369
497
|
};
|
|
@@ -391,9 +519,26 @@ export function parseLlmJson(content, options = {}) {
|
|
|
391
519
|
throw new Error('LLM response missing boolean "passed"');
|
|
392
520
|
}
|
|
393
521
|
|
|
522
|
+
const parsedReason = pickFirstText(parsed?.reason, parsed?.summary, parsed?.summary_text);
|
|
523
|
+
const parsedSummary = pickFirstText(parsed?.summary, parsed?.summary_text, parsed?.reason);
|
|
524
|
+
const parsedCot = pickFirstText(
|
|
525
|
+
options.reasoningText,
|
|
526
|
+
parsed?.cot,
|
|
527
|
+
parsed?.reasoning_content,
|
|
528
|
+
parsed?.reasoning,
|
|
529
|
+
parsedReason,
|
|
530
|
+
parsedSummary,
|
|
531
|
+
);
|
|
532
|
+
const parsedEvidence = toStringArray(parsed?.evidence);
|
|
533
|
+
|
|
394
534
|
return {
|
|
395
535
|
passed: parsedPassed,
|
|
396
536
|
rawOutputText: text,
|
|
537
|
+
rawReasoningText: normalizeText(options.reasoningText || ''),
|
|
538
|
+
cot: parsedCot,
|
|
539
|
+
reason: parsedReason || parsedCot,
|
|
540
|
+
summary: parsedSummary || parsedReason || parsedCot,
|
|
541
|
+
evidence: parsedEvidence,
|
|
397
542
|
chunkIndex,
|
|
398
543
|
chunkTotal,
|
|
399
544
|
};
|
|
@@ -525,6 +670,7 @@ export class LlmClient {
|
|
|
525
670
|
}
|
|
526
671
|
|
|
527
672
|
const outputContent = getResponsesContent(data);
|
|
673
|
+
const reasoningText = extractResponsesReasoningText(data);
|
|
528
674
|
if (!outputContent) {
|
|
529
675
|
const incompleteReason = String(data?.incomplete_details?.reason || '').trim();
|
|
530
676
|
const outputTypes = Array.isArray(data?.output)
|
|
@@ -547,6 +693,7 @@ export class LlmClient {
|
|
|
547
693
|
try {
|
|
548
694
|
return parseLlmJson(outputContent, {
|
|
549
695
|
evidenceCorpus,
|
|
696
|
+
reasoningText,
|
|
550
697
|
chunkIndex,
|
|
551
698
|
chunkTotal,
|
|
552
699
|
});
|
|
@@ -610,6 +757,7 @@ export class LlmClient {
|
|
|
610
757
|
}
|
|
611
758
|
|
|
612
759
|
const outputContent = getCompletionContent(data);
|
|
760
|
+
const reasoningText = extractCompletionReasoningText(data);
|
|
613
761
|
if (!String(outputContent || '').trim()) {
|
|
614
762
|
const emptyError = new Error('Completions API empty textual content');
|
|
615
763
|
emptyError.code = 'COMPLETIONS_EMPTY_CONTENT';
|
|
@@ -619,6 +767,7 @@ export class LlmClient {
|
|
|
619
767
|
try {
|
|
620
768
|
return parseLlmJson(outputContent, {
|
|
621
769
|
evidenceCorpus,
|
|
770
|
+
reasoningText,
|
|
622
771
|
chunkIndex,
|
|
623
772
|
chunkTotal,
|
|
624
773
|
});
|
|
@@ -759,6 +908,11 @@ export class LlmClient {
|
|
|
759
908
|
rawOutputText:
|
|
760
909
|
chunkResults.map((item) => normalizeText(item?.rawOutputText)).find(Boolean) ||
|
|
761
910
|
`{"passed":false,"mode":"text-chunk-fallback","chunks":${chunks.length}}`,
|
|
911
|
+
rawReasoningText: chunkResults.map((item) => normalizeText(item?.rawReasoningText)).find(Boolean) || '',
|
|
912
|
+
cot: chunkResults.map((item) => normalizeText(item?.cot)).find(Boolean) || '',
|
|
913
|
+
reason: chunkResults.map((item) => normalizeText(item?.reason)).find(Boolean) || '',
|
|
914
|
+
summary: chunkResults.map((item) => normalizeText(item?.summary)).find(Boolean) || '',
|
|
915
|
+
evidence: [],
|
|
762
916
|
chunkIndex: null,
|
|
763
917
|
chunkTotal: chunks.length,
|
|
764
918
|
evaluationMode: 'text',
|
|
@@ -791,6 +945,10 @@ export class LlmClient {
|
|
|
791
945
|
}
|
|
792
946
|
|
|
793
947
|
export const __testables = {
|
|
948
|
+
flattenChatMessageContent,
|
|
949
|
+
collectNestedText,
|
|
950
|
+
extractCompletionReasoningText,
|
|
951
|
+
extractResponsesReasoningText,
|
|
794
952
|
extractEvidenceTokens,
|
|
795
953
|
matchEvidenceAgainstResume,
|
|
796
954
|
splitTextByChunks,
|
|
@@ -31,7 +31,12 @@ const CSV_HEADER = [
|
|
|
31
31
|
'text_model_ms',
|
|
32
32
|
'timing_summary',
|
|
33
33
|
'reason',
|
|
34
|
+
'llm_summary',
|
|
35
|
+
'llm_cot',
|
|
36
|
+
'llm_evidence',
|
|
37
|
+
'llm_raw_reasoning',
|
|
34
38
|
'error_message',
|
|
39
|
+
'llm_raw_output',
|
|
35
40
|
'llm_raw_output_preview',
|
|
36
41
|
];
|
|
37
42
|
|
|
@@ -100,6 +105,24 @@ function getTimingValue(result, key) {
|
|
|
100
105
|
return normalizeMs(getArtifacts(result)[key]);
|
|
101
106
|
}
|
|
102
107
|
|
|
108
|
+
function getLlmSummary(result) {
|
|
109
|
+
return normalizeText(getArtifacts(result).llmSummary);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function getLlmCot(result) {
|
|
113
|
+
return normalizeText(getArtifacts(result).llmCot);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function getLlmEvidence(result) {
|
|
117
|
+
const evidence = getArtifacts(result).llmEvidence;
|
|
118
|
+
if (!Array.isArray(evidence)) return '';
|
|
119
|
+
return evidence.map((item) => normalizeText(item)).filter(Boolean).join(' | ');
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function getLlmRawReasoning(result) {
|
|
123
|
+
return normalizeText(getArtifacts(result).llmRawReasoning);
|
|
124
|
+
}
|
|
125
|
+
|
|
103
126
|
function formatTimingSummary(result) {
|
|
104
127
|
const parts = [];
|
|
105
128
|
for (const [key, label] of TIMING_BUCKETS) {
|
|
@@ -113,10 +136,16 @@ function formatTimingSummary(result) {
|
|
|
113
136
|
function formatResultNotes(result) {
|
|
114
137
|
const parts = [];
|
|
115
138
|
const reason = previewText(result?.reason, 120);
|
|
139
|
+
const summary = previewText(getLlmSummary(result), 120);
|
|
140
|
+
const cot = previewText(getLlmCot(result), 180);
|
|
116
141
|
const errorMessage = previewText(result?.error, 120);
|
|
142
|
+
const llmRawReasoning = previewText(getLlmRawReasoning(result), 180);
|
|
117
143
|
const llmRawOutput = previewText(getArtifacts(result).llmRawOutput, 180);
|
|
118
144
|
if (reason) parts.push(`原因: ${reason}`);
|
|
145
|
+
if (summary) parts.push(`摘要: ${summary}`);
|
|
146
|
+
if (cot) parts.push(`CoT: ${cot}`);
|
|
119
147
|
if (errorMessage) parts.push(`错误: ${errorMessage}`);
|
|
148
|
+
if (llmRawReasoning) parts.push(`Reasoning: ${llmRawReasoning}`);
|
|
120
149
|
if (llmRawOutput) parts.push(`LLM: ${llmRawOutput}`);
|
|
121
150
|
return parts.length > 0 ? parts.join(' | ') : '-';
|
|
122
151
|
}
|
|
@@ -243,7 +272,12 @@ function buildCsvSummary(summary) {
|
|
|
243
272
|
csvEscape(getTimingValue(result, 'textModelMs') ?? ''),
|
|
244
273
|
csvEscape(formatTimingSummary(result)),
|
|
245
274
|
csvEscape(result?.reason || ''),
|
|
275
|
+
csvEscape(getLlmSummary(result)),
|
|
276
|
+
csvEscape(getLlmCot(result)),
|
|
277
|
+
csvEscape(getLlmEvidence(result)),
|
|
278
|
+
csvEscape(getLlmRawReasoning(result)),
|
|
246
279
|
csvEscape(result?.error || ''),
|
|
280
|
+
csvEscape(artifacts.llmRawOutput || ''),
|
|
247
281
|
csvEscape(previewText(artifacts.llmRawOutput, 500)),
|
|
248
282
|
].join(','));
|
|
249
283
|
});
|