@zhouchangui/math-ati 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +4 -1
- package/README.md +11 -0
- package/bin/math-ati.js +136 -5
- package/dist/assets/{index-CGZslJ0a.css → index-DOg8CQsE.css} +1 -1
- package/dist/assets/index-DyfeTKmg.js +22 -0
- package/dist/index.html +3 -3
- package/package.json +9 -5
- package/prompts/geometry-practice-experience.md +44 -0
- package/prompts/grading.system.md +3 -1
- package/prompts/knowledge-extract.system.md +35 -54
- package/prompts/knowledge-structure.system.md +75 -0
- package/prompts/knowledge-summarize.system.md +21 -7
- package/prompts/pdf-grading.system.md +4 -1
- package/prompts/pdf-recheck.system.md +2 -0
- package/prompts/practice-answers.system.md +154 -0
- package/prompts/practice-coverage-repair.system.md +112 -0
- package/prompts/practice-generate.system.md +51 -9
- package/prompts/practice-review.system.md +4 -2
- package/prompts/practice-revise.system.md +5 -4
- package/prompts/practice-rules.md +61 -0
- package/prompts/svg-figure-review.system.md +13 -0
- package/prompts/svg-figure-revise.system.md +21 -0
- package/server/agentClient.js +179 -10
- package/server/coveragePlanner.js +174 -0
- package/server/fileStore.js +49 -9
- package/server/index.js +78 -1
- package/server/knowledgeExtractor.js +717 -120
- package/server/knowledgeFeedback.js +69 -0
- package/server/practiceGenerator.js +637 -116
- package/server/practicePaperHtml.js +105 -35
- package/server/practiceService.js +27 -2
- package/server/promptStore.js +14 -0
- package/server/submissionService.js +1 -1
- package/server/svgFigureVerifier.js +315 -0
- package/dist/assets/index-CGfjl7nO.js +0 -22
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { readdir } from 'node:fs/promises';
|
|
1
|
+
import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises';
|
|
2
2
|
import path from 'node:path';
|
|
3
|
-
import { callChatAgent,
|
|
3
|
+
import { callChatAgent, callChatTextAgent, callVisionTextAgent } from './agentClient.js';
|
|
4
4
|
import {
|
|
5
5
|
chapterDataPaths,
|
|
6
6
|
ensureChapterDataDirs,
|
|
@@ -13,17 +13,28 @@ import {
|
|
|
13
13
|
} from './fileStore.js';
|
|
14
14
|
import { promptPayload, readPrompt } from './promptStore.js';
|
|
15
15
|
|
|
16
|
-
const KNOWLEDGE_PAGE_TIMEOUT_MS = Number(process.env.KNOWLEDGE_EXTRACT_PAGE_TIMEOUT_MS ||
|
|
17
|
-
const KNOWLEDGE_SUMMARY_TIMEOUT_MS = Number(process.env.KNOWLEDGE_EXTRACT_SUMMARY_TIMEOUT_MS ||
|
|
18
|
-
const KNOWLEDGE_PAGE_RETRIES = Number(process.env.KNOWLEDGE_EXTRACT_PAGE_RETRIES ||
|
|
19
|
-
const KNOWLEDGE_SUMMARY_RETRIES = Number(process.env.KNOWLEDGE_EXTRACT_SUMMARY_RETRIES ||
|
|
16
|
+
const KNOWLEDGE_PAGE_TIMEOUT_MS = Number(process.env.KNOWLEDGE_EXTRACT_PAGE_TIMEOUT_MS || 300000);
|
|
17
|
+
const KNOWLEDGE_SUMMARY_TIMEOUT_MS = Number(process.env.KNOWLEDGE_EXTRACT_SUMMARY_TIMEOUT_MS || 600000);
|
|
18
|
+
const KNOWLEDGE_PAGE_RETRIES = Number(process.env.KNOWLEDGE_EXTRACT_PAGE_RETRIES || 3);
|
|
19
|
+
const KNOWLEDGE_SUMMARY_RETRIES = Number(process.env.KNOWLEDGE_EXTRACT_SUMMARY_RETRIES || 3);
|
|
20
|
+
const KNOWLEDGE_SUMMARY_PAGE_CHUNK_SIZE = Number(process.env.KNOWLEDGE_SUMMARY_PAGE_CHUNK_SIZE || 4);
|
|
21
|
+
const KNOWLEDGE_MAX_CORE_POINTS = Number(process.env.KNOWLEDGE_MAX_CORE_POINTS || 24);
|
|
22
|
+
const KNOWLEDGE_MAX_MISTAKE_POINTS = Number(process.env.KNOWLEDGE_MAX_MISTAKE_POINTS || 8);
|
|
20
23
|
|
|
21
24
|
function extractionDir(chapterId) {
|
|
22
25
|
return chapterDataPaths(chapterId).pageExtracts;
|
|
23
26
|
}
|
|
24
27
|
|
|
25
28
|
function pageExtractPath(chapterId, imageFile) {
|
|
26
|
-
return path.join(extractionDir(chapterId), `${path.basename(imageFile, path.extname(imageFile))}.
|
|
29
|
+
return path.join(extractionDir(chapterId), `${path.basename(imageFile, path.extname(imageFile))}.md`);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function chunkSummaryDir(chapterId) {
|
|
33
|
+
return path.join(chapterDataPaths(chapterId).knowledge, 'chunk_summaries');
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function chunkSummaryPath(chapterId, chunkIndex) {
|
|
37
|
+
return path.join(chunkSummaryDir(chapterId), `chunk-${String(chunkIndex + 1).padStart(2, '0')}.md`);
|
|
27
38
|
}
|
|
28
39
|
|
|
29
40
|
function summaryPath(chapterId) {
|
|
@@ -46,17 +57,15 @@ async function chapterImages(chapter) {
|
|
|
46
57
|
}
|
|
47
58
|
|
|
48
59
|
function normalizeExtractProfile(profile = {}) {
|
|
49
|
-
const detailLevels = new Set(['exam_focus', 'balanced', 'fine_grained']);
|
|
50
60
|
const baselines = new Set(['strong', 'normal', 'weak']);
|
|
51
|
-
const allowedFocus = new Set(['exam_points', 'error_prone', 'prerequisite_gaps', 'calculation_links']);
|
|
52
|
-
const focus = Array.isArray(profile.focus)
|
|
53
|
-
? profile.focus.filter((item) => allowedFocus.has(item))
|
|
54
|
-
: [];
|
|
55
61
|
return {
|
|
56
|
-
detailLevel:
|
|
62
|
+
detailLevel: 'exam_focus',
|
|
57
63
|
studentBaseline: baselines.has(profile.studentBaseline) ? profile.studentBaseline : 'strong',
|
|
58
|
-
focus:
|
|
59
|
-
displayLayer:
|
|
64
|
+
focus: ['exam_points', 'error_prone'],
|
|
65
|
+
displayLayer: 'exam_point_wall',
|
|
66
|
+
maxCorePointCount: Math.max(8, Math.min(40, Number(profile.maxCorePointCount || KNOWLEDGE_MAX_CORE_POINTS))),
|
|
67
|
+
maxMistakePointCount: Math.max(0, Math.min(16, Number(profile.maxMistakePointCount || KNOWLEDGE_MAX_MISTAKE_POINTS))),
|
|
68
|
+
summaryChunkSize: Math.max(2, Math.min(8, Number(profile.summaryChunkSize || KNOWLEDGE_SUMMARY_PAGE_CHUNK_SIZE)))
|
|
60
69
|
};
|
|
61
70
|
}
|
|
62
71
|
|
|
@@ -90,6 +99,97 @@ function normalizePageExtract(chapter, imagePath, pageIndex, data, source = 'age
|
|
|
90
99
|
};
|
|
91
100
|
}
|
|
92
101
|
|
|
102
|
+
function markdownSection(markdown, title) {
|
|
103
|
+
const lines = String(markdown || '').split(/\r?\n/);
|
|
104
|
+
const start = lines.findIndex((line) => new RegExp(`^##\\s+${title}\\s*$`).test(line.trim()));
|
|
105
|
+
if (start < 0) return '';
|
|
106
|
+
const end = lines.findIndex((line, index) => index > start && /^##\s+/.test(line.trim()));
|
|
107
|
+
return lines.slice(start + 1, end < 0 ? undefined : end).join('\n').trim();
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function markdownHeadingItems(sectionText) {
|
|
111
|
+
const items = [];
|
|
112
|
+
const lines = String(sectionText || '').split(/\r?\n/);
|
|
113
|
+
let current = null;
|
|
114
|
+
for (const line of lines) {
|
|
115
|
+
const heading = line.match(/^###\s+(.+?)\s*$/);
|
|
116
|
+
if (heading) {
|
|
117
|
+
current = { title: heading[1].trim(), body: [] };
|
|
118
|
+
items.push(current);
|
|
119
|
+
} else if (current) {
|
|
120
|
+
current.body.push(line);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return items;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function fieldFromMarkdownBody(body, label) {
|
|
127
|
+
const pattern = new RegExp(`^\\s*-\\s*(?:\\*\\*)?${label}(?:\\*\\*)?[::]\\s*`);
|
|
128
|
+
const line = body.find((item) => pattern.test(item.trim()));
|
|
129
|
+
return line ? line.replace(pattern, '').trim() : '';
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function markdownFieldList(body, label) {
|
|
133
|
+
const pattern = new RegExp(`^\\s*-\\s*(?:\\*\\*)?${label}(?:\\*\\*)?[::]\\s*`);
|
|
134
|
+
const nextFieldPattern = /^\s*-\s*(?:\*\*)?[\u4e00-\u9fa5A-Za-z0-9 /_-]+(?:\*\*)?[::]/;
|
|
135
|
+
const start = body.findIndex((line) => pattern.test(line.trim()));
|
|
136
|
+
if (start < 0) return [];
|
|
137
|
+
const firstValue = body[start].replace(pattern, '').trim();
|
|
138
|
+
const values = firstValue ? [firstValue] : [];
|
|
139
|
+
for (let index = start + 1; index < body.length; index += 1) {
|
|
140
|
+
const line = body[index];
|
|
141
|
+
if (nextFieldPattern.test(line.trim())) break;
|
|
142
|
+
const listItem = line.match(/^\s*-\s+(.+?)\s*$/);
|
|
143
|
+
if (listItem?.[1]) values.push(listItem[1].trim());
|
|
144
|
+
}
|
|
145
|
+
return values.filter(Boolean);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function parsePageMarkdownExtract({ chapter, imagePath, pageIndex, pageCount, markdown }) {
|
|
149
|
+
const knowledgeItems = markdownHeadingItems(markdownSection(markdown, '知识点'));
|
|
150
|
+
const mistakeItems = markdownHeadingItems(markdownSection(markdown, '易错点'));
|
|
151
|
+
const pageTitle = markdownSection(markdown, '页面标题').split(/\r?\n/).find(Boolean) || path.basename(imagePath);
|
|
152
|
+
return {
|
|
153
|
+
chapterId: chapter.id,
|
|
154
|
+
chapterTitle: chapter.fullTitle,
|
|
155
|
+
imageFile: path.basename(imagePath),
|
|
156
|
+
pageIndex,
|
|
157
|
+
pageCount,
|
|
158
|
+
source: 'agent_markdown',
|
|
159
|
+
extractedAt: new Date().toISOString(),
|
|
160
|
+
pageTitle: pageTitle.replace(/^#+\s*/, '').trim(),
|
|
161
|
+
rawOutline: markdownSection(markdown, '原文结构')
|
|
162
|
+
.split(/\r?\n/)
|
|
163
|
+
.map((line) => line.replace(/^\s*-\s*/, '').trim())
|
|
164
|
+
.filter(Boolean),
|
|
165
|
+
knowledgePoints: knowledgeItems.map((item) => ({
|
|
166
|
+
title: item.title,
|
|
167
|
+
summary: fieldFromMarkdownBody(item.body, '摘要'),
|
|
168
|
+
formulas: fieldFromMarkdownBody(item.body, '公式')
|
|
169
|
+
.split(/[;;]/)
|
|
170
|
+
.map((value) => value.trim())
|
|
171
|
+
.filter(Boolean),
|
|
172
|
+
examples: fieldFromMarkdownBody(item.body, '例子')
|
|
173
|
+
.split(/[;;]/)
|
|
174
|
+
.map((value) => value.trim())
|
|
175
|
+
.filter(Boolean),
|
|
176
|
+
prerequisite: fieldFromMarkdownBody(item.body, '前置'),
|
|
177
|
+
difficulty: fieldFromMarkdownBody(item.body, '难度') || 'basic'
|
|
178
|
+
})),
|
|
179
|
+
easyMistakes: mistakeItems.map((item) => ({
|
|
180
|
+
title: item.title,
|
|
181
|
+
errorType: fieldFromMarkdownBody(item.body, '错因') || item.title,
|
|
182
|
+
description: fieldFromMarkdownBody(item.body, '说明'),
|
|
183
|
+
correction: fieldFromMarkdownBody(item.body, '纠正')
|
|
184
|
+
})),
|
|
185
|
+
exerciseHints: markdownSection(markdown, '出题方向')
|
|
186
|
+
.split(/\r?\n/)
|
|
187
|
+
.map((line) => line.replace(/^\s*-\s*/, '').trim())
|
|
188
|
+
.filter(Boolean),
|
|
189
|
+
markdown
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
|
|
93
193
|
function knowledgeExtractionError(reason, detail = '') {
|
|
94
194
|
const error = new Error(`knowledge_extraction_failed:${reason}`);
|
|
95
195
|
error.status = 502;
|
|
@@ -113,13 +213,15 @@ export async function extractChapterPage({
|
|
|
113
213
|
pageCount = 0,
|
|
114
214
|
force = false,
|
|
115
215
|
extractProfile = null,
|
|
216
|
+
chapterStructure = '',
|
|
116
217
|
onProgress = null
|
|
117
218
|
}) {
|
|
118
219
|
await ensureChapterWorkspace(chapter);
|
|
119
220
|
const outputPath = pageExtractPath(chapter.id, imagePath);
|
|
120
221
|
if (!force) {
|
|
121
|
-
const existing = await
|
|
222
|
+
const existing = await readFile(outputPath, 'utf8').catch(() => '');
|
|
122
223
|
if (existing) {
|
|
224
|
+
const parsed = parsePageMarkdownExtract({ chapter, imagePath, pageIndex, pageCount, markdown: existing });
|
|
123
225
|
onProgress?.({
|
|
124
226
|
step: 'knowledge_extract.page.cached',
|
|
125
227
|
message: `第 ${pageIndex}/${pageCount || '?'} 页已有提取缓存,直接复用。`,
|
|
@@ -127,7 +229,7 @@ export async function extractChapterPage({
|
|
|
127
229
|
pageCount,
|
|
128
230
|
imageFile: path.basename(imagePath)
|
|
129
231
|
});
|
|
130
|
-
return
|
|
232
|
+
return parsed;
|
|
131
233
|
}
|
|
132
234
|
}
|
|
133
235
|
const systemPrompt = await readPrompt('knowledge-extract.system.md');
|
|
@@ -138,46 +240,37 @@ export async function extractChapterPage({
|
|
|
138
240
|
pageCount,
|
|
139
241
|
imageFile: path.basename(imagePath)
|
|
140
242
|
});
|
|
141
|
-
const agent = await
|
|
243
|
+
const agent = await callVisionTextAgent({
|
|
142
244
|
timeoutMs: KNOWLEDGE_PAGE_TIMEOUT_MS,
|
|
143
245
|
retries: KNOWLEDGE_PAGE_RETRIES,
|
|
144
246
|
system: systemPrompt,
|
|
145
|
-
text:
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
easyMistakes: [{
|
|
173
|
-
title: 'string',
|
|
174
|
-
errorType: 'string',
|
|
175
|
-
description: 'string',
|
|
176
|
-
correction: 'string'
|
|
177
|
-
}],
|
|
178
|
-
exerciseHints: ['string']
|
|
179
|
-
}
|
|
180
|
-
}),
|
|
247
|
+
text: [
|
|
248
|
+
`任务:从这一页提分笔记图片中提取可用于后续汇总的 Markdown 笔记。`,
|
|
249
|
+
'',
|
|
250
|
+
`章节:${chapter.id} ${chapter.fullTitle}`,
|
|
251
|
+
`主线:${chapter.track}`,
|
|
252
|
+
`提取策略:考点和易错点优先;不要把例子拆成独立知识点;不要补充图片没有出现的内容。`,
|
|
253
|
+
chapterStructure ? `\n以下是本《${chapter.fullTitle}》的章节整体结构分析,供逐页提取时参考。请在提取本页时注意:\n- 本页在章节中的大致角色\n- 本页涉及的核心概念是否已在结构分析中列出\n- 本页知识点与前后页的关联关系\n\n${chapterStructure.slice(0, 3000)}` : '',
|
|
254
|
+
'',
|
|
255
|
+
'输出必须是 Markdown,且只包含以下标题:',
|
|
256
|
+
'# 页面知识提取',
|
|
257
|
+
'## 页面标题',
|
|
258
|
+
'## 原文结构',
|
|
259
|
+
'## 知识点',
|
|
260
|
+
'### <知识点标题>',
|
|
261
|
+
'- 摘要:<一句话>',
|
|
262
|
+
'- 公式:<可为空,多个用分号>',
|
|
263
|
+
'- 例子:<可为空,多个用分号>',
|
|
264
|
+
'- 前置:<可为空>',
|
|
265
|
+
'- 难度:basic|medium|challenge',
|
|
266
|
+
'## 易错点',
|
|
267
|
+
'### <易错点标题>',
|
|
268
|
+
'- 错因:<错误类型>',
|
|
269
|
+
'- 说明:<错误表现>',
|
|
270
|
+
'- 纠正:<正确做法>',
|
|
271
|
+
'## 出题方向',
|
|
272
|
+
'- <题型方向,只写方向,不写完整答案>'
|
|
273
|
+
].join('\n'),
|
|
181
274
|
imagePaths: [imagePath],
|
|
182
275
|
onAttempt: ({ phase, attempt, attempts, delayMs, result }) => {
|
|
183
276
|
const base = `第 ${pageIndex}/${pageCount || '?'} 页识别`;
|
|
@@ -210,9 +303,12 @@ export async function extractChapterPage({
|
|
|
210
303
|
`第 ${pageIndex}/${pageCount || '?'} 页 ${path.basename(imagePath)} 识别失败,已尝试 ${agent.attempts || 1} 次。${agent.detail || ''}`.trim()
|
|
211
304
|
);
|
|
212
305
|
}
|
|
213
|
-
const
|
|
214
|
-
|
|
215
|
-
await
|
|
306
|
+
const markdown = String(agent.data || '').trim();
|
|
307
|
+
const extract = parsePageMarkdownExtract({ chapter, imagePath, pageIndex, pageCount, markdown });
|
|
308
|
+
await writeFile(outputPath, `${markdown}\n`, 'utf8');
|
|
309
|
+
const mirrorPath = path.join(paths.knowledgeExtracts, chapter.id, `${path.basename(imagePath, path.extname(imagePath))}.md`);
|
|
310
|
+
await mkdir(path.dirname(mirrorPath), { recursive: true });
|
|
311
|
+
await writeFile(mirrorPath, `${markdown}\n`, 'utf8');
|
|
216
312
|
onProgress?.({
|
|
217
313
|
step: 'knowledge_extract.page.done',
|
|
218
314
|
message: `第 ${pageIndex}/${pageCount || '?'} 页识别完成,提取 ${extract.knowledgePoints.length} 个知识点。`,
|
|
@@ -226,7 +322,7 @@ export async function extractChapterPage({
|
|
|
226
322
|
function dedupeByTitle(items) {
|
|
227
323
|
const seen = new Map();
|
|
228
324
|
for (const item of items) {
|
|
229
|
-
const key =
|
|
325
|
+
const key = normalizedKnowledgeKey(item.title || item.errorType || '');
|
|
230
326
|
if (!key) continue;
|
|
231
327
|
if (!seen.has(key)) {
|
|
232
328
|
seen.set(key, { ...item });
|
|
@@ -242,6 +338,171 @@ function dedupeByTitle(items) {
|
|
|
242
338
|
return [...seen.values()];
|
|
243
339
|
}
|
|
244
340
|
|
|
341
|
+
function normalizedKnowledgeKey(value) {
|
|
342
|
+
return String(value || '')
|
|
343
|
+
.toLowerCase()
|
|
344
|
+
.replace(/[“”"‘'`]/g, '')
|
|
345
|
+
.replace(/[((].*?[))]/g, '')
|
|
346
|
+
.replace(/[::,,。;;、\s·\-—_]/g, '')
|
|
347
|
+
.replace(/的概念|概念|性质|定义|方法|判定|定理|公式/g, '')
|
|
348
|
+
.trim();
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
function chunkArray(items, size) {
|
|
352
|
+
const chunks = [];
|
|
353
|
+
for (let index = 0; index < items.length; index += size) {
|
|
354
|
+
chunks.push(items.slice(index, index + size));
|
|
355
|
+
}
|
|
356
|
+
return chunks;
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
function countDocPoints(doc) {
|
|
360
|
+
return (doc.sections || []).reduce((sum, section) => sum + (section.points?.length || 0), 0);
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
function pointHasUsableTemplate(point) {
|
|
364
|
+
return Array.isArray(point.questionTemplates)
|
|
365
|
+
&& point.questionTemplates.some((template) => Array.isArray(template) && template[0] && template[1]);
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
function normalizePointForBudget(point) {
|
|
369
|
+
return {
|
|
370
|
+
...point,
|
|
371
|
+
title: String(point.title || '').trim(),
|
|
372
|
+
summary: String(point.summary || '').trim(),
|
|
373
|
+
formulas: Array.isArray(point.formulas) ? point.formulas.filter(Boolean).slice(0, 4) : [],
|
|
374
|
+
pitfalls: Array.isArray(point.pitfalls) ? point.pitfalls.filter(Boolean).slice(0, 5) : [],
|
|
375
|
+
examples: Array.isArray(point.examples) ? point.examples.filter(Boolean).slice(0, 4) : [],
|
|
376
|
+
teachingTips: point.teachingTips && typeof point.teachingTips === 'object'
|
|
377
|
+
? {
|
|
378
|
+
commonMisconceptions: Array.isArray(point.teachingTips.commonMisconceptions)
|
|
379
|
+
? point.teachingTips.commonMisconceptions.filter(Boolean).slice(0, 3)
|
|
380
|
+
: [],
|
|
381
|
+
scaffoldingOrder: Array.isArray(point.teachingTips.scaffoldingOrder)
|
|
382
|
+
? point.teachingTips.scaffoldingOrder.filter(Boolean).slice(0, 4)
|
|
383
|
+
: [],
|
|
384
|
+
checkUnderstandingQuestions: Array.isArray(point.teachingTips.checkUnderstandingQuestions)
|
|
385
|
+
? point.teachingTips.checkUnderstandingQuestions.filter(Boolean).slice(0, 2)
|
|
386
|
+
: []
|
|
387
|
+
}
|
|
388
|
+
: point.teachingTips || {
|
|
389
|
+
commonMisconceptions: [],
|
|
390
|
+
scaffoldingOrder: [],
|
|
391
|
+
checkUnderstandingQuestions: []
|
|
392
|
+
},
|
|
393
|
+
questionTemplates: Array.isArray(point.questionTemplates) && point.questionTemplates.length
|
|
394
|
+
? point.questionTemplates.filter((template) => Array.isArray(template)).slice(0, 3)
|
|
395
|
+
: [[
|
|
396
|
+
`围绕「${point.title || '本知识点'}」完成一道基础覆盖题,并写出关键结论。`,
|
|
397
|
+
point.summary || '答案需符合知识点定义、性质或方法。',
|
|
398
|
+
point.title || '知识点理解错误'
|
|
399
|
+
]]
|
|
400
|
+
};
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
function mergeDuplicatePoints(points) {
|
|
404
|
+
const byKey = new Map();
|
|
405
|
+
for (const rawPoint of points) {
|
|
406
|
+
const point = normalizePointForBudget(rawPoint);
|
|
407
|
+
const key = normalizedKnowledgeKey(point.title);
|
|
408
|
+
if (!key) continue;
|
|
409
|
+
if (!byKey.has(key)) {
|
|
410
|
+
byKey.set(key, point);
|
|
411
|
+
continue;
|
|
412
|
+
}
|
|
413
|
+
const current = byKey.get(key);
|
|
414
|
+
current.summary = current.summary.length >= point.summary.length ? current.summary : point.summary;
|
|
415
|
+
current.formulas = [...new Set([...(current.formulas || []), ...(point.formulas || [])])].slice(0, 4);
|
|
416
|
+
current.pitfalls = [...new Set([...(current.pitfalls || []), ...(point.pitfalls || [])])].slice(0, 5);
|
|
417
|
+
current.examples = [...new Set([...(current.examples || []), ...(point.examples || [])])].slice(0, 4);
|
|
418
|
+
current.questionTemplates = [...(current.questionTemplates || []), ...(point.questionTemplates || [])]
|
|
419
|
+
.filter((template, index, templates) => (
|
|
420
|
+
Array.isArray(template)
|
|
421
|
+
&& templates.findIndex((candidate) => Array.isArray(candidate) && candidate[0] === template[0]) === index
|
|
422
|
+
))
|
|
423
|
+
.slice(0, 3);
|
|
424
|
+
current.sources = [...new Set([...(current.sources || []), ...(point.sources || [])])];
|
|
425
|
+
if (point.teachingTips && typeof point.teachingTips === 'object') {
|
|
426
|
+
const currentTips = current.teachingTips || { commonMisconceptions: [], scaffoldingOrder: [], checkUnderstandingQuestions: [] };
|
|
427
|
+
const pointTips = point.teachingTips;
|
|
428
|
+
current.teachingTips = {
|
|
429
|
+
commonMisconceptions: [
|
|
430
|
+
...new Set([
|
|
431
|
+
...(Array.isArray(currentTips.commonMisconceptions) ? currentTips.commonMisconceptions : []),
|
|
432
|
+
...(Array.isArray(pointTips.commonMisconceptions) ? pointTips.commonMisconceptions : [])
|
|
433
|
+
])
|
|
434
|
+
].slice(0, 4),
|
|
435
|
+
scaffoldingOrder: Array.isArray(pointTips.scaffoldingOrder) && pointTips.scaffoldingOrder.length
|
|
436
|
+
? pointTips.scaffoldingOrder
|
|
437
|
+
: currentTips.scaffoldingOrder || [],
|
|
438
|
+
checkUnderstandingQuestions: [
|
|
439
|
+
...new Set([
|
|
440
|
+
...(Array.isArray(currentTips.checkUnderstandingQuestions) ? currentTips.checkUnderstandingQuestions : []),
|
|
441
|
+
...(Array.isArray(pointTips.checkUnderstandingQuestions) ? pointTips.checkUnderstandingQuestions : [])
|
|
442
|
+
])
|
|
443
|
+
].slice(0, 3)
|
|
444
|
+
};
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
return [...byKey.values()];
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
function pointPriority(point) {
|
|
451
|
+
let score = 0;
|
|
452
|
+
if (pointHasUsableTemplate(point)) score += 4;
|
|
453
|
+
if (point.summary) score += 2;
|
|
454
|
+
if (point.pitfalls?.length) score += 2;
|
|
455
|
+
if (point.formulas?.length) score += 1;
|
|
456
|
+
if (point.examples?.length) score += 1;
|
|
457
|
+
return score;
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
function enforceKnowledgeBudget(chapter, doc, profile) {
|
|
461
|
+
const maxCore = profile.maxCorePointCount || KNOWLEDGE_MAX_CORE_POINTS;
|
|
462
|
+
const maxMistakes = profile.maxMistakePointCount ?? KNOWLEDGE_MAX_MISTAKE_POINTS;
|
|
463
|
+
const corePoints = [];
|
|
464
|
+
const mistakePoints = [];
|
|
465
|
+
for (const section of doc.sections || []) {
|
|
466
|
+
const isMistakeSection = /易错|错题|错误|mistake/i.test(section.title || '');
|
|
467
|
+
for (const point of section.points || []) {
|
|
468
|
+
if (isMistakeSection) mistakePoints.push(point);
|
|
469
|
+
else corePoints.push(point);
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
// Knowledge points may be de-duplicated and merged, but must never be dropped
|
|
473
|
+
// just to fit a numeric budget — losing knowledge points corrupts the chapter's
|
|
474
|
+
// coverage/mastery loop. The budget values are only a target hint passed to the
|
|
475
|
+
// extract/summary agent prompts upstream; the post-processing here only de-dups.
|
|
476
|
+
const dedupedCore = mergeDuplicatePoints(corePoints)
|
|
477
|
+
.sort((a, b) => pointPriority(b) - pointPriority(a))
|
|
478
|
+
.map((point, index) => ({
|
|
479
|
+
...point,
|
|
480
|
+
id: `${chapter.id}-kp-${String(index + 1).padStart(2, '0')}`
|
|
481
|
+
}));
|
|
482
|
+
const dedupedMistakes = mergeDuplicatePoints(mistakePoints)
|
|
483
|
+
.sort((a, b) => pointPriority(b) - pointPriority(a))
|
|
484
|
+
.map((point, index) => ({
|
|
485
|
+
...point,
|
|
486
|
+
id: `${chapter.id}-mistake-${String(index + 1).padStart(2, '0')}`
|
|
487
|
+
}));
|
|
488
|
+
return {
|
|
489
|
+
...doc,
|
|
490
|
+
sections: [
|
|
491
|
+
{ title: '知识点覆盖', points: dedupedCore },
|
|
492
|
+
{ title: '易错题专项', points: dedupedMistakes }
|
|
493
|
+
].filter((section) => section.points.length),
|
|
494
|
+
review: {
|
|
495
|
+
...(doc.review || {}),
|
|
496
|
+
pointBudget: {
|
|
497
|
+
maxCorePointCount: maxCore,
|
|
498
|
+
maxMistakePointCount: maxMistakes,
|
|
499
|
+
corePointCount: dedupedCore.length,
|
|
500
|
+
mistakePointCount: dedupedMistakes.length
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
};
|
|
504
|
+
}
|
|
505
|
+
|
|
245
506
|
function localMergeChapter(chapter, pageExtracts) {
|
|
246
507
|
const points = dedupeByTitle(pageExtracts.flatMap((page) =>
|
|
247
508
|
page.knowledgePoints.map((point) => ({
|
|
@@ -276,6 +537,14 @@ function localMergeChapter(chapter, pageExtracts) {
|
|
|
276
537
|
point.title
|
|
277
538
|
]
|
|
278
539
|
],
|
|
540
|
+
teachingTips: {
|
|
541
|
+
commonMisconceptions: (point.pitfalls || []).slice(0, 2)
|
|
542
|
+
.map((pitfall) => `常见的误解:${pitfall}`),
|
|
543
|
+
scaffoldingOrder: [`先理解「${point.title}」的基本定义`, `再通过例子巩固`, `最后独立完成变式题`],
|
|
544
|
+
checkUnderstandingQuestions: [
|
|
545
|
+
`用自己的话解释什么是「${point.title}」,并举一个例子。`
|
|
546
|
+
]
|
|
547
|
+
},
|
|
279
548
|
sources: point.sources || []
|
|
280
549
|
}))
|
|
281
550
|
},
|
|
@@ -295,6 +564,11 @@ function localMergeChapter(chapter, pageExtracts) {
|
|
|
295
564
|
mistake.errorType || mistake.title
|
|
296
565
|
]
|
|
297
566
|
],
|
|
567
|
+
teachingTips: {
|
|
568
|
+
commonMisconceptions: [mistake.description || mistake.errorType || mistake.title || '易错点'].filter(Boolean).slice(0, 2),
|
|
569
|
+
scaffoldingOrder: ['先识别错误类型', '再用正确方法重新做一遍'],
|
|
570
|
+
checkUnderstandingQuestions: ['这个易错点最容易在什么情况下出现?如何避免?']
|
|
571
|
+
},
|
|
298
572
|
sources: mistake.sources || []
|
|
299
573
|
}))
|
|
300
574
|
}
|
|
@@ -302,79 +576,35 @@ function localMergeChapter(chapter, pageExtracts) {
|
|
|
302
576
|
};
|
|
303
577
|
}
|
|
304
578
|
|
|
305
|
-
|
|
579
|
+
async function callKnowledgeSummaryAgent({
|
|
580
|
+
systemPrompt,
|
|
306
581
|
chapter,
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
582
|
+
task,
|
|
583
|
+
context,
|
|
584
|
+
requirements,
|
|
585
|
+
schema,
|
|
586
|
+
onProgress,
|
|
587
|
+
progressPrefix
|
|
311
588
|
}) {
|
|
312
|
-
|
|
313
|
-
const normalizedProfile = normalizeExtractProfile(extractProfile || {});
|
|
314
|
-
const systemPrompt = await readPrompt('knowledge-summarize.system.md');
|
|
315
|
-
onProgress?.({
|
|
316
|
-
step: 'knowledge_extract.summary.start',
|
|
317
|
-
message: `正在合并 ${pageExtracts.length} 页提取结果,生成章节知识点。`,
|
|
318
|
-
pageCount: pageExtracts.length
|
|
319
|
-
});
|
|
320
|
-
const agent = await callChatAgent({
|
|
589
|
+
return callChatAgent({
|
|
321
590
|
timeoutMs: KNOWLEDGE_SUMMARY_TIMEOUT_MS,
|
|
322
591
|
retries: KNOWLEDGE_SUMMARY_RETRIES,
|
|
323
592
|
temperature: 0.1,
|
|
324
593
|
system: systemPrompt,
|
|
325
|
-
user: promptPayload({
|
|
326
|
-
task: '把逐页提取结果合并成章节知识文档,并做覆盖检查。',
|
|
327
|
-
context: {
|
|
328
|
-
chapter,
|
|
329
|
-
pageExtracts,
|
|
330
|
-
localDraft: local,
|
|
331
|
-
extractProfile: normalizedProfile
|
|
332
|
-
},
|
|
333
|
-
requirements: [
|
|
334
|
-
'合并同义知识点,保留来源页。',
|
|
335
|
-
normalizedProfile.detailLevel === 'fine_grained'
|
|
336
|
-
? '当前提取画像要求细粒度拆分:保留必要前置概念、步骤性方法和基础易错点。'
|
|
337
|
-
: '当前提取画像要求考点优先:优先保留考试常见考点、易错点、变式边界和必要前置关系,不把教材说明拆得过碎。',
|
|
338
|
-
'sections 至少包含“知识点覆盖”;如果有易错点,单独包含“易错题专项”。',
|
|
339
|
-
'每个知识点必须有 id、title、summary、formulas、pitfalls、questionTemplates。',
|
|
340
|
-
'questionTemplates 用于后续出题,题干只写题目,不写解题过程。',
|
|
341
|
-
'review.missingOrWeak 列出疑似遗漏或需要人工复核的点。'
|
|
342
|
-
],
|
|
343
|
-
schema: {
|
|
344
|
-
sections: [{
|
|
345
|
-
title: '知识点覆盖',
|
|
346
|
-
points: [{
|
|
347
|
-
id: `${chapter.id}-kp-01`,
|
|
348
|
-
title: 'string',
|
|
349
|
-
summary: 'string',
|
|
350
|
-
formulas: ['string with LaTeX'],
|
|
351
|
-
pitfalls: ['string'],
|
|
352
|
-
examples: ['string'],
|
|
353
|
-
questionTemplates: [['stem', 'answer', 'expectedErrorType']],
|
|
354
|
-
sources: ['image filename']
|
|
355
|
-
}]
|
|
356
|
-
}],
|
|
357
|
-
review: {
|
|
358
|
-
passed: true,
|
|
359
|
-
coverageSummary: 'string',
|
|
360
|
-
missingOrWeak: ['string'],
|
|
361
|
-
duplicateMerged: ['string']
|
|
362
|
-
}
|
|
363
|
-
}
|
|
364
|
-
}),
|
|
594
|
+
user: promptPayload({ task, context, requirements, schema }),
|
|
365
595
|
onAttempt: ({ phase, attempt, attempts, delayMs, result }) => {
|
|
366
596
|
if (phase === 'start') {
|
|
367
597
|
onProgress?.({
|
|
368
|
-
step:
|
|
369
|
-
message:
|
|
598
|
+
step: `${progressPrefix}.attempt`,
|
|
599
|
+
message: `${chapter.fullTitle}知识点合并:第 ${attempt}/${attempts} 次尝试。`,
|
|
370
600
|
attempt,
|
|
371
601
|
attempts
|
|
372
602
|
});
|
|
373
603
|
}
|
|
374
604
|
if (phase === 'retry') {
|
|
375
605
|
onProgress?.({
|
|
376
|
-
step:
|
|
377
|
-
message:
|
|
606
|
+
step: `${progressPrefix}.retry`,
|
|
607
|
+
message: `知识点合并遇到${retryReasonText(result?.reason)},${Math.round(delayMs / 1000)} 秒后自动重试。`,
|
|
378
608
|
attempt,
|
|
379
609
|
attempts,
|
|
380
610
|
reason: result?.reason || null
|
|
@@ -382,13 +612,329 @@ export async function summarizeChapterExtraction({
|
|
|
382
612
|
}
|
|
383
613
|
}
|
|
384
614
|
});
|
|
385
|
-
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
function knowledgeSummarySchema(chapter) {
|
|
618
|
+
return {
|
|
619
|
+
sections: [{
|
|
620
|
+
title: '知识点覆盖',
|
|
621
|
+
points: [{
|
|
622
|
+
id: `${chapter.id}-kp-01`,
|
|
623
|
+
title: 'string',
|
|
624
|
+
summary: 'string',
|
|
625
|
+
formulas: ['string with LaTeX'],
|
|
626
|
+
pitfalls: ['string'],
|
|
627
|
+
examples: ['string'],
|
|
628
|
+
questionTemplates: [['stem', 'answer', 'expectedErrorType']],
|
|
629
|
+
teachingTips: {
|
|
630
|
+
commonMisconceptions: ['string'],
|
|
631
|
+
scaffoldingOrder: ['string'],
|
|
632
|
+
checkUnderstandingQuestions: ['string']
|
|
633
|
+
},
|
|
634
|
+
sources: ['image filename']
|
|
635
|
+
}]
|
|
636
|
+
}],
|
|
637
|
+
review: {
|
|
638
|
+
passed: true,
|
|
639
|
+
coverageSummary: 'string',
|
|
640
|
+
missingOrWeak: ['string'],
|
|
641
|
+
duplicateMerged: ['string']
|
|
642
|
+
}
|
|
643
|
+
};
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
async function summarizePageChunk({ chapter, pages, chunkIndex, chunkCount, systemPrompt, normalizedProfile, onProgress }) {
|
|
647
|
+
const pageLabels = pages.map((page) => page.imageFile).join('、');
|
|
648
|
+
const cachedMarkdown = await readFile(chunkSummaryPath(chapter.id, chunkIndex), 'utf8').catch(() => '');
|
|
649
|
+
if (cachedMarkdown) {
|
|
650
|
+
const knowledgePointCount = (cachedMarkdown.match(/^###\s+/gm) || []).length;
|
|
651
|
+
onProgress?.({
|
|
652
|
+
step: 'knowledge_extract.summary.chunk.cached',
|
|
653
|
+
message: `知识点分组 ${chunkIndex + 1}/${chunkCount} 已有 Markdown 汇总缓存,直接复用。`,
|
|
654
|
+
chunkIndex: chunkIndex + 1,
|
|
655
|
+
chunkCount,
|
|
656
|
+
knowledgePointCount
|
|
657
|
+
});
|
|
658
|
+
return {
|
|
659
|
+
chunkIndex: chunkIndex + 1,
|
|
660
|
+
pageFiles: pages.map((page) => page.imageFile),
|
|
661
|
+
markdown: cachedMarkdown
|
|
662
|
+
};
|
|
663
|
+
}
|
|
664
|
+
onProgress?.({
|
|
665
|
+
step: 'knowledge_extract.summary.chunk.start',
|
|
666
|
+
message: `正在合并知识点分组 ${chunkIndex + 1}/${chunkCount}(${pageLabels})。`,
|
|
667
|
+
chunkIndex: chunkIndex + 1,
|
|
668
|
+
chunkCount,
|
|
669
|
+
pageCount: pages.length
|
|
670
|
+
});
|
|
671
|
+
// The prompt requires a structured Markdown doc ("# 分组知识汇总", "## 知识点覆盖",
|
|
672
|
+
// "## 易错题专项"). Some models leak reasoning as a leading sentence and skip the
|
|
673
|
+
// requested structure, producing a chunk with zero knowledge points. callChatTextAgent
|
|
674
|
+
// returns responseFormat: 'text', so such leakage is reported as ok and is not retried
|
|
675
|
+
// at the agent level. Retry the chunk in place so one bad chunk does not force a whole-
|
|
676
|
+
// chapter redo (which would discard the other chunk's valid cached summary).
|
|
677
|
+
const requiredHeadings = /^#\s+分组知识汇总/m;
|
|
678
|
+
const structureAttempts = Number(process.env.KNOWLEDGE_SUMMARY_STRUCTURE_RETRIES || 3);
|
|
679
|
+
const userPayload = [
|
|
680
|
+
`任务:合并《${chapter.fullTitle}》第 ${chunkIndex + 1}/${chunkCount} 组逐页 Markdown 提取结果。`,
|
|
681
|
+
'',
|
|
682
|
+
`本组页面:${pageLabels}`,
|
|
683
|
+
`数量控制:核心知识点不超过 ${Math.ceil(normalizedProfile.maxCorePointCount / chunkCount) + 4} 个;易错点不超过 ${Math.ceil(normalizedProfile.maxMistakePointCount / chunkCount) + 2} 个。`,
|
|
684
|
+
'',
|
|
685
|
+
'要求:',
|
|
686
|
+
'- 只基于输入页面合并知识点,不新增页面没有依据的内容。',
|
|
687
|
+
'- 合并同义、过细、重复候选,保留来源页。',
|
|
688
|
+
'- 优先保留考试常见考点、易错边界、几何概念辨析和可出题的方法。',
|
|
689
|
+
'- 输出 Markdown,不输出 JSON。',
|
|
690
|
+
'- 必须包含标题:# 分组知识汇总、## 知识点覆盖、## 易错题专项、## 合并说明。',
|
|
691
|
+
'- 每个知识点用三级标题,包含:摘要、来源、公式、易错边界、出题模板。直接输出结果,不要输出思考过程或开场白。',
|
|
692
|
+
'',
|
|
693
|
+
'逐页 Markdown:',
|
|
694
|
+
...pages.map((page) => [
|
|
695
|
+
`\n---\n`,
|
|
696
|
+
`来源页:${page.imageFile}`,
|
|
697
|
+
page.markdown || ''
|
|
698
|
+
].join('\n'))
|
|
699
|
+
].join('\n');
|
|
700
|
+
const handleAttempt = ({ phase, attempt, attempts, delayMs, result }) => {
|
|
701
|
+
if (phase === 'start') {
|
|
702
|
+
onProgress?.({
|
|
703
|
+
step: 'knowledge_extract.summary.chunk.attempt',
|
|
704
|
+
message: `知识点分组 ${chunkIndex + 1}/${chunkCount}:第 ${attempt}/${attempts} 次尝试。`,
|
|
705
|
+
attempt,
|
|
706
|
+
attempts
|
|
707
|
+
});
|
|
708
|
+
}
|
|
709
|
+
if (phase === 'retry') {
|
|
710
|
+
onProgress?.({
|
|
711
|
+
step: 'knowledge_extract.summary.chunk.retry',
|
|
712
|
+
message: `知识点分组 ${chunkIndex + 1}/${chunkCount} 遇到${retryReasonText(result?.reason)},${Math.round(delayMs / 1000)} 秒后自动重试。`,
|
|
713
|
+
attempt,
|
|
714
|
+
attempts,
|
|
715
|
+
reason: result?.reason || null
|
|
716
|
+
});
|
|
717
|
+
}
|
|
718
|
+
};
|
|
719
|
+
let markdown = '';
|
|
720
|
+
let pointHeadings = 0;
|
|
721
|
+
for (let structureAttempt = 1; structureAttempt <= structureAttempts; structureAttempt += 1) {
|
|
722
|
+
const agent = await callChatTextAgent({
|
|
723
|
+
system: systemPrompt,
|
|
724
|
+
timeoutMs: KNOWLEDGE_SUMMARY_TIMEOUT_MS,
|
|
725
|
+
retries: KNOWLEDGE_SUMMARY_RETRIES,
|
|
726
|
+
temperature: 0.1,
|
|
727
|
+
user: userPayload,
|
|
728
|
+
onAttempt: handleAttempt
|
|
729
|
+
});
|
|
730
|
+
if (!agent.ok || !agent.data) {
|
|
731
|
+
throw knowledgeExtractionError(
|
|
732
|
+
agent.reason || 'empty_response',
|
|
733
|
+
`知识点分组 ${chunkIndex + 1}/${chunkCount} 合并失败,已尝试 ${agent.attempts || 1} 次。${agent.detail || ''}`.trim()
|
|
734
|
+
);
|
|
735
|
+
}
|
|
736
|
+
markdown = String(agent.data || '').trim();
|
|
737
|
+
pointHeadings = (markdown.match(/^###\s+/gm) || []).length;
|
|
738
|
+
if (requiredHeadings.test(markdown) && pointHeadings > 0) {
|
|
739
|
+
break;
|
|
740
|
+
}
|
|
741
|
+
onProgress?.({
|
|
742
|
+
step: 'knowledge_extract.summary.chunk.structure_retry',
|
|
743
|
+
message: `知识点分组 ${chunkIndex + 1}/${chunkCount} 第 ${structureAttempt}/${structureAttempts} 次返回缺少结构或知识点,重新生成。`,
|
|
744
|
+
chunkIndex: chunkIndex + 1,
|
|
745
|
+
chunkCount,
|
|
746
|
+
structureAttempt,
|
|
747
|
+
structureAttempts
|
|
748
|
+
});
|
|
749
|
+
if (structureAttempt < structureAttempts) {
|
|
750
|
+
await new Promise((resolve) => setTimeout(resolve, 2000));
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
if (!requiredHeadings.test(markdown) || pointHeadings === 0) {
|
|
386
754
|
throw knowledgeExtractionError(
|
|
387
|
-
|
|
388
|
-
|
|
755
|
+
'invalid_chunk_structure',
|
|
756
|
+
`知识点分组 ${chunkIndex + 1}/${chunkCount} 输出缺少必需的 Markdown 结构或知识点,已重试 ${structureAttempts} 次。`
|
|
389
757
|
);
|
|
390
758
|
}
|
|
391
|
-
|
|
759
|
+
await mkdir(chunkSummaryDir(chapter.id), { recursive: true });
|
|
760
|
+
await writeFile(chunkSummaryPath(chapter.id, chunkIndex), `${markdown}\n`, 'utf8');
|
|
761
|
+
const knowledgePointCount = pointHeadings;
|
|
762
|
+
onProgress?.({
|
|
763
|
+
step: 'knowledge_extract.summary.chunk.done',
|
|
764
|
+
message: `知识点分组 ${chunkIndex + 1}/${chunkCount} 合并完成,得到约 ${knowledgePointCount} 个候选点。`,
|
|
765
|
+
chunkIndex: chunkIndex + 1,
|
|
766
|
+
chunkCount,
|
|
767
|
+
knowledgePointCount
|
|
768
|
+
});
|
|
769
|
+
return {
|
|
770
|
+
chunkIndex: chunkIndex + 1,
|
|
771
|
+
pageFiles: pages.map((page) => page.imageFile),
|
|
772
|
+
markdown
|
|
773
|
+
};
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
function cleanMarkdownTitle(title) {
|
|
777
|
+
return String(title || '')
|
|
778
|
+
.replace(/^\s*\d+[.、]\s*/, '')
|
|
779
|
+
.replace(/^#+\s*/, '')
|
|
780
|
+
.trim();
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
function splitMarkdownValues(value) {
|
|
784
|
+
return String(value || '')
|
|
785
|
+
.split(/[;;、,,]/)
|
|
786
|
+
.map((item) => item.trim())
|
|
787
|
+
.filter((item) => item && item !== '无');
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
function chunkMarkdownPoints(markdown, sectionTitle, pointType = 'core') {
|
|
791
|
+
const section = markdownSection(markdown, sectionTitle);
|
|
792
|
+
return markdownHeadingItems(section).map((item) => {
|
|
793
|
+
const summary = fieldFromMarkdownBody(item.body, '摘要');
|
|
794
|
+
const sources = splitMarkdownValues(fieldFromMarkdownBody(item.body, '来源'));
|
|
795
|
+
const formulas = splitMarkdownValues(fieldFromMarkdownBody(item.body, '公式'));
|
|
796
|
+
const pitfalls = [
|
|
797
|
+
...splitMarkdownValues(fieldFromMarkdownBody(item.body, '易错边界')),
|
|
798
|
+
...splitMarkdownValues(fieldFromMarkdownBody(item.body, '错因')),
|
|
799
|
+
...splitMarkdownValues(fieldFromMarkdownBody(item.body, '说明'))
|
|
800
|
+
].filter(Boolean);
|
|
801
|
+
const templateStems = markdownFieldList(item.body, '出题模板');
|
|
802
|
+
const title = cleanMarkdownTitle(item.title);
|
|
803
|
+
return {
|
|
804
|
+
title,
|
|
805
|
+
summary,
|
|
806
|
+
formulas,
|
|
807
|
+
pitfalls: [...new Set(pitfalls)].slice(0, 5),
|
|
808
|
+
examples: [],
|
|
809
|
+
questionTemplates: (templateStems.length ? templateStems : [
|
|
810
|
+
`围绕「${title}」设计一道${pointType === 'mistake' ? '易错辨析' : '基础覆盖'}题。`
|
|
811
|
+
]).slice(0, 3).map((stem) => [
|
|
812
|
+
stem,
|
|
813
|
+
summary || `正确运用「${title}」相关概念、性质或方法。`,
|
|
814
|
+
pitfalls[0] || title
|
|
815
|
+
]),
|
|
816
|
+
sources
|
|
817
|
+
};
|
|
818
|
+
}).filter((point) => point.title);
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
function localMergeChunkSummaries(chapter, chunkDocs) {
|
|
822
|
+
const corePoints = [];
|
|
823
|
+
const mistakePoints = [];
|
|
824
|
+
const mergeNotes = [];
|
|
825
|
+
for (const chunk of chunkDocs) {
|
|
826
|
+
corePoints.push(...chunkMarkdownPoints(chunk.markdown, '知识点覆盖', 'core'));
|
|
827
|
+
mistakePoints.push(...chunkMarkdownPoints(chunk.markdown, '易错题专项', 'mistake'));
|
|
828
|
+
mergeNotes.push(...markdownSection(chunk.markdown, '合并说明')
|
|
829
|
+
.split(/\r?\n/)
|
|
830
|
+
.map((line) => line.replace(/^\s*-\s*/, '').trim())
|
|
831
|
+
.filter(Boolean));
|
|
832
|
+
}
|
|
833
|
+
return {
|
|
834
|
+
sections: [
|
|
835
|
+
{ title: '知识点覆盖', points: corePoints },
|
|
836
|
+
{ title: '易错题专项', points: mistakePoints }
|
|
837
|
+
].filter((section) => section.points.length),
|
|
838
|
+
review: {
|
|
839
|
+
passed: corePoints.length > 0,
|
|
840
|
+
coverageSummary: `由 ${chunkDocs.length} 个分组 Markdown 汇总合并生成最终章节知识点,并在保存前执行去重和数量控制。`,
|
|
841
|
+
missingOrWeak: [],
|
|
842
|
+
duplicateMerged: mergeNotes.slice(0, 20)
|
|
843
|
+
}
|
|
844
|
+
};
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
export async function summarizeChapterExtraction({
|
|
848
|
+
chapter,
|
|
849
|
+
pageExtracts,
|
|
850
|
+
extractProfile = null,
|
|
851
|
+
resetLearningState = false,
|
|
852
|
+
onProgress = null
|
|
853
|
+
}) {
|
|
854
|
+
const local = localMergeChapter(chapter, pageExtracts);
|
|
855
|
+
const normalizedProfile = normalizeExtractProfile(extractProfile || {});
|
|
856
|
+
const systemPrompt = await readPrompt('knowledge-summarize.system.md');
|
|
857
|
+
onProgress?.({
|
|
858
|
+
step: 'knowledge_extract.summary.start',
|
|
859
|
+
message: `正在合并 ${pageExtracts.length} 页提取结果,生成章节知识点。`,
|
|
860
|
+
pageCount: pageExtracts.length
|
|
861
|
+
});
|
|
862
|
+
const pageChunks = chunkArray(pageExtracts, normalizedProfile.summaryChunkSize);
|
|
863
|
+
const chunkDocs = [];
|
|
864
|
+
if (pageChunks.length > 1) {
|
|
865
|
+
for (let index = 0; index < pageChunks.length; index += 1) {
|
|
866
|
+
chunkDocs.push(await summarizePageChunk({
|
|
867
|
+
chapter,
|
|
868
|
+
pages: pageChunks[index],
|
|
869
|
+
chunkIndex: index,
|
|
870
|
+
chunkCount: pageChunks.length,
|
|
871
|
+
systemPrompt,
|
|
872
|
+
normalizedProfile,
|
|
873
|
+
onProgress
|
|
874
|
+
}));
|
|
875
|
+
}
|
|
876
|
+
}
|
|
877
|
+
const finalContext = pageChunks.length > 1
|
|
878
|
+
? { chapter, chunkDocs, localDraft: local, extractProfile: normalizedProfile }
|
|
879
|
+
: { chapter, pageExtracts, localDraft: local, extractProfile: normalizedProfile };
|
|
880
|
+
onProgress?.({
|
|
881
|
+
step: 'knowledge_extract.summary.final.start',
|
|
882
|
+
message: pageChunks.length > 1
|
|
883
|
+
? `正在把 ${chunkDocs.length} 个知识点分组合并为最终章节知识。`
|
|
884
|
+
: '正在生成最终章节知识点。',
|
|
885
|
+
chunkCount: chunkDocs.length
|
|
886
|
+
});
|
|
887
|
+
let finalDoc = null;
|
|
888
|
+
// Prefer the deterministic local merge so the final chapter doc is built
|
|
889
|
+
// from real per-page / per-chunk agent outputs (now structurally validated,
|
|
890
|
+
// with chunk-level retries on bad output) instead of an additional free-form
|
|
891
|
+
// LLM merge that can lose points or return invalid JSON. When the chapter
|
|
892
|
+
// fits in a single chunk the chunk summary is skipped, so fall back to
|
|
893
|
+
// localMergeChapter which parses per-page knowledge points directly.
|
|
894
|
+
const localDoc = chunkDocs.length > 0
|
|
895
|
+
? localMergeChunkSummaries(chapter, chunkDocs)
|
|
896
|
+
: local;
|
|
897
|
+
const hasUsableSections = (localDoc.sections || []).some(
|
|
898
|
+
(section) => Array.isArray(section.points) && section.points.length > 0
|
|
899
|
+
);
|
|
900
|
+
if (hasUsableSections) {
|
|
901
|
+
finalDoc = localDoc;
|
|
902
|
+
onProgress?.({
|
|
903
|
+
step: 'knowledge_extract.summary.final.local',
|
|
904
|
+
message: `已从 ${chunkDocs.length} 个分组 Markdown 本地合并为最终章节知识,共约 ${
|
|
905
|
+
(localDoc.sections || []).reduce((sum, s) => sum + (s.points || []).length, 0)
|
|
906
|
+
} 个候选点。`,
|
|
907
|
+
chunkCount: chunkDocs.length
|
|
908
|
+
});
|
|
909
|
+
} else {
|
|
910
|
+
const agent = await callKnowledgeSummaryAgent({
|
|
911
|
+
systemPrompt,
|
|
912
|
+
chapter,
|
|
913
|
+
task: '把逐页提取结果合并成章节知识文档,并做覆盖检查。',
|
|
914
|
+
progressPrefix: 'knowledge_extract.summary.final',
|
|
915
|
+
onProgress,
|
|
916
|
+
context: finalContext,
|
|
917
|
+
requirements: [
|
|
918
|
+
'合并同义知识点,保留来源页。',
|
|
919
|
+
`最终“知识点覆盖”核心点数量控制在 ${normalizedProfile.maxCorePointCount} 个以内。`,
|
|
920
|
+
`最终“易错题专项”数量控制在 ${normalizedProfile.maxMistakePointCount} 个以内。`,
|
|
921
|
+
'优先保留考试常见考点、易错点、变式边界、几何概念辨析和必要前置关系,不把教材说明拆得过碎。',
|
|
922
|
+
'sections 至少包含“知识点覆盖”;如果有易错点,单独包含“易错题专项”。',
|
|
923
|
+
'每个知识点必须有 title、summary、formulas、pitfalls、questionTemplates。',
|
|
924
|
+
'questionTemplates 用于后续出题,题干只写题目,不写解题过程。',
|
|
925
|
+
'review.missingOrWeak 列出疑似遗漏或需要人工复核的点。'
|
|
926
|
+
],
|
|
927
|
+
schema: knowledgeSummarySchema(chapter)
|
|
928
|
+
});
|
|
929
|
+
if (!agent.ok || !Array.isArray(agent.data?.sections)) {
|
|
930
|
+
throw knowledgeExtractionError(
|
|
931
|
+
agent.reason || 'invalid_agent_response',
|
|
932
|
+
`章节汇总失败,已尝试 ${agent.attempts || 1} 次。${agent.detail || ''}`.trim()
|
|
933
|
+
);
|
|
934
|
+
}
|
|
935
|
+
finalDoc = agent.data;
|
|
936
|
+
}
|
|
937
|
+
const merged = enforceKnowledgeBudget(chapter, finalDoc, normalizedProfile);
|
|
392
938
|
const extractedAt = new Date().toISOString();
|
|
393
939
|
const normalized = await saveKnowledgeDoc(chapter, merged, 'agent', {
|
|
394
940
|
extractProfile: normalizedProfile,
|
|
@@ -404,6 +950,7 @@ export async function summarizeChapterExtraction({
|
|
|
404
950
|
updatedAt: extractedAt,
|
|
405
951
|
extractProfile: normalizedProfile,
|
|
406
952
|
extractorVersion: 1,
|
|
953
|
+
summaryStrategy: pageChunks.length > 1 ? 'chunked_markdown_local_merge' : 'single_merge',
|
|
407
954
|
review: merged.review || null,
|
|
408
955
|
knowledgePointCount: normalized.sections.reduce((sum, section) => sum + section.points.length, 0)
|
|
409
956
|
};
|
|
@@ -440,6 +987,55 @@ export async function extractChapterKnowledge({
|
|
|
440
987
|
pageCount: scopedImages.length
|
|
441
988
|
});
|
|
442
989
|
const pageExtracts = [];
|
|
990
|
+
// Phase 0: analyze chapter structure with sampled pages
|
|
991
|
+
const structureSampleSize = Math.min(
|
|
992
|
+
Number(process.env.KNOWLEDGE_STRUCTURE_SAMPLE_PAGES || 6),
|
|
993
|
+
scopedImages.length
|
|
994
|
+
);
|
|
995
|
+
let chapterStructure = '';
|
|
996
|
+
const structureCachePath = path.join(chapterDataPaths(chapter.id).context, 'chapter_structure.md');
|
|
997
|
+
const cachedStructure = force ? '' : await readFile(structureCachePath, 'utf8').catch(() => '');
|
|
998
|
+
if (cachedStructure) {
|
|
999
|
+
chapterStructure = cachedStructure;
|
|
1000
|
+
onProgress?.({
|
|
1001
|
+
step: 'knowledge_extract.structure.cached',
|
|
1002
|
+
message: '章节结构分析已有缓存,直接复用。'
|
|
1003
|
+
});
|
|
1004
|
+
} else {
|
|
1005
|
+
onProgress?.({
|
|
1006
|
+
step: 'knowledge_extract.structure.start',
|
|
1007
|
+
message: `正在分析《${chapter.fullTitle}》章节结构(抽样 ${structureSampleSize}/${scopedImages.length} 页)。`
|
|
1008
|
+
});
|
|
1009
|
+
const sampleImages = scopedImages.slice(0, structureSampleSize);
|
|
1010
|
+
const structurePrompt = await readPrompt('knowledge-structure.system.md');
|
|
1011
|
+
const structureAgent = await callVisionTextAgent({
|
|
1012
|
+
timeoutMs: Math.max(120000, KNOWLEDGE_PAGE_TIMEOUT_MS),
|
|
1013
|
+
retries: 1,
|
|
1014
|
+
system: structurePrompt,
|
|
1015
|
+
text: [
|
|
1016
|
+
`任务:快速浏览《${chapter.fullTitle}》(${chapter.track})的章节图片,输出整体结构框架。`,
|
|
1017
|
+
'',
|
|
1018
|
+
'要求:只做结构概览,不做详细提取。识别核心概念、主要公式/法则、常见易错类型、每页角色(概念引入/定义/推导/例题/总结)。'
|
|
1019
|
+
].join('\n'),
|
|
1020
|
+
imagePaths: sampleImages,
|
|
1021
|
+
onAttempt: null
|
|
1022
|
+
});
|
|
1023
|
+
if (structureAgent.ok && structureAgent.data) {
|
|
1024
|
+
chapterStructure = String(structureAgent.data || '').trim();
|
|
1025
|
+
await mkdir(path.dirname(structureCachePath), { recursive: true });
|
|
1026
|
+
await writeFile(structureCachePath, `${chapterStructure}\n`, 'utf8');
|
|
1027
|
+
onProgress?.({
|
|
1028
|
+
step: 'knowledge_extract.structure.done',
|
|
1029
|
+
message: `章节结构分析完成,已缓存。`
|
|
1030
|
+
});
|
|
1031
|
+
} else {
|
|
1032
|
+
chapterStructure = `# 章节结构分析\n\n## 章节主题\n${chapter.fullTitle}\n\n## 核心概念\n(结构分析未能完成,逐页提取将独立进行)\n`;
|
|
1033
|
+
onProgress?.({
|
|
1034
|
+
step: 'knowledge_extract.structure.failed',
|
|
1035
|
+
message: `章节结构分析失败:${structureAgent.reason || 'unknown'},继续逐页提取。`
|
|
1036
|
+
});
|
|
1037
|
+
}
|
|
1038
|
+
}
|
|
443
1039
|
for (let index = 0; index < scopedImages.length; index += 1) {
|
|
444
1040
|
pageExtracts.push(await extractChapterPage({
|
|
445
1041
|
chapter,
|
|
@@ -448,6 +1044,7 @@ export async function extractChapterKnowledge({
|
|
|
448
1044
|
pageCount: scopedImages.length,
|
|
449
1045
|
force,
|
|
450
1046
|
extractProfile: normalizedProfile,
|
|
1047
|
+
chapterStructure,
|
|
451
1048
|
onProgress
|
|
452
1049
|
}));
|
|
453
1050
|
}
|