npm - @zhouchangui/math-ati - Versions diffs - 0.1.2 → 0.1.3 - Mend

@zhouchangui/math-ati 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/AGENTS.md +3 -1
package/README.md +11 -0
package/dist/assets/{index-CGZslJ0a.css → index--Um9OfFu.css} +1 -1
package/dist/assets/index-CS-PgjYi.js +22 -0
package/dist/index.html +3 -3
package/package.json +3 -2
package/prompts/geometry-practice-experience.md +44 -0
package/prompts/knowledge-extract.system.md +35 -54
package/prompts/knowledge-summarize.system.md +8 -6
package/prompts/practice-generate.system.md +6 -4
package/prompts/practice-review.system.md +4 -2
package/prompts/practice-revise.system.md +5 -4
package/prompts/svg-figure-review.system.md +13 -0
package/prompts/svg-figure-revise.system.md +21 -0
package/server/agentClient.js +179 -10
package/server/coveragePlanner.js +174 -0
package/server/fileStore.js +40 -7
package/server/index.js +30 -1
package/server/knowledgeExtractor.js +553 -115
package/server/practiceGenerator.js +610 -83
package/server/practicePaperHtml.js +105 -35
package/server/practiceService.js +27 -2
package/server/submissionService.js +1 -1
package/server/svgFigureVerifier.js +315 -0
package/dist/assets/index-CGfjl7nO.js +0 -22

package/server/knowledgeExtractor.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { readdir } from 'node:fs/promises';
+import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises';
 import path from 'node:path';
-import { callChatAgent, callVisionAgent } from './agentClient.js';
+import { callChatAgent, callChatTextAgent, callVisionTextAgent } from './agentClient.js';
 import {
   chapterDataPaths,
   ensureChapterDataDirs,
@@ -17,13 +17,24 @@ const KNOWLEDGE_PAGE_TIMEOUT_MS = Number(process.env.KNOWLEDGE_EXTRACT_PAGE_TIME
 const KNOWLEDGE_SUMMARY_TIMEOUT_MS = Number(process.env.KNOWLEDGE_EXTRACT_SUMMARY_TIMEOUT_MS || 120000);
 const KNOWLEDGE_PAGE_RETRIES = Number(process.env.KNOWLEDGE_EXTRACT_PAGE_RETRIES || 2);
 const KNOWLEDGE_SUMMARY_RETRIES = Number(process.env.KNOWLEDGE_EXTRACT_SUMMARY_RETRIES || 2);
+const KNOWLEDGE_SUMMARY_PAGE_CHUNK_SIZE = Number(process.env.KNOWLEDGE_SUMMARY_PAGE_CHUNK_SIZE || 4);
+const KNOWLEDGE_MAX_CORE_POINTS = Number(process.env.KNOWLEDGE_MAX_CORE_POINTS || 24);
+const KNOWLEDGE_MAX_MISTAKE_POINTS = Number(process.env.KNOWLEDGE_MAX_MISTAKE_POINTS || 8);
 function extractionDir(chapterId) {
   return chapterDataPaths(chapterId).pageExtracts;
 }
 function pageExtractPath(chapterId, imageFile) {
-  return path.join(extractionDir(chapterId), `${path.basename(imageFile, path.extname(imageFile))}.json`);
+  return path.join(extractionDir(chapterId), `${path.basename(imageFile, path.extname(imageFile))}.md`);
+}
+function chunkSummaryDir(chapterId) {
+  return path.join(chapterDataPaths(chapterId).knowledge, 'chunk_summaries');
+}
+function chunkSummaryPath(chapterId, chunkIndex) {
+  return path.join(chunkSummaryDir(chapterId), `chunk-${String(chunkIndex + 1).padStart(2, '0')}.md`);
 }
 function summaryPath(chapterId) {
@@ -46,17 +57,15 @@ async function chapterImages(chapter) {
 }
 function normalizeExtractProfile(profile = {}) {
-  const detailLevels = new Set(['exam_focus', 'balanced', 'fine_grained']);
   const baselines = new Set(['strong', 'normal', 'weak']);
-  const allowedFocus = new Set(['exam_points', 'error_prone', 'prerequisite_gaps', 'calculation_links']);
-  const focus = Array.isArray(profile.focus)
-    ? profile.focus.filter((item) => allowedFocus.has(item))
-    : [];
   return {
-    detailLevel: detailLevels.has(profile.detailLevel) ? profile.detailLevel : 'exam_focus',
+    detailLevel: 'exam_focus',
     studentBaseline: baselines.has(profile.studentBaseline) ? profile.studentBaseline : 'strong',
-    focus: focus.length ? focus : ['exam_points', 'error_prone'],
-    displayLayer: profile.displayLayer === 'fine_grained_wall' ? 'fine_grained_wall' : 'exam_point_wall'
+    focus: ['exam_points', 'error_prone'],
+    displayLayer: 'exam_point_wall',
+    maxCorePointCount: Math.max(8, Math.min(40, Number(profile.maxCorePointCount || KNOWLEDGE_MAX_CORE_POINTS))),
+    maxMistakePointCount: Math.max(0, Math.min(16, Number(profile.maxMistakePointCount || KNOWLEDGE_MAX_MISTAKE_POINTS))),
+    summaryChunkSize: Math.max(2, Math.min(8, Number(profile.summaryChunkSize || KNOWLEDGE_SUMMARY_PAGE_CHUNK_SIZE)))
   };
 }
@@ -90,6 +99,97 @@ function normalizePageExtract(chapter, imagePath, pageIndex, data, source = 'age
   };
 }
+function markdownSection(markdown, title) {
+  const lines = String(markdown || '').split(/\r?\n/);
+  const start = lines.findIndex((line) => new RegExp(`^##\\s+${title}\\s*$`).test(line.trim()));
+  if (start < 0) return '';
+  const end = lines.findIndex((line, index) => index > start && /^##\s+/.test(line.trim()));
+  return lines.slice(start + 1, end < 0 ? undefined : end).join('\n').trim();
+}
+function markdownHeadingItems(sectionText) {
+  const items = [];
+  const lines = String(sectionText || '').split(/\r?\n/);
+  let current = null;
+  for (const line of lines) {
+    const heading = line.match(/^###\s+(.+?)\s*$/);
+    if (heading) {
+      current = { title: heading[1].trim(), body: [] };
+      items.push(current);
+    } else if (current) {
+      current.body.push(line);
+    }
+  }
+  return items;
+}
+function fieldFromMarkdownBody(body, label) {
+  const pattern = new RegExp(`^\\s*-\\s*(?:\\*\\*)?${label}(?:\\*\\*)?[：:]\\s*`);
+  const line = body.find((item) => pattern.test(item.trim()));
+  return line ? line.replace(pattern, '').trim() : '';
+}
+function markdownFieldList(body, label) {
+  const pattern = new RegExp(`^\\s*-\\s*(?:\\*\\*)?${label}(?:\\*\\*)?[：:]\\s*`);
+  const nextFieldPattern = /^\s*-\s*(?:\*\*)?[\u4e00-\u9fa5A-Za-z0-9 /_-]+(?:\*\*)?[：:]/;
+  const start = body.findIndex((line) => pattern.test(line.trim()));
+  if (start < 0) return [];
+  const firstValue = body[start].replace(pattern, '').trim();
+  const values = firstValue ? [firstValue] : [];
+  for (let index = start + 1; index < body.length; index += 1) {
+    const line = body[index];
+    if (nextFieldPattern.test(line.trim())) break;
+    const listItem = line.match(/^\s*-\s+(.+?)\s*$/);
+    if (listItem?.[1]) values.push(listItem[1].trim());
+  }
+  return values.filter(Boolean);
+}
+function parsePageMarkdownExtract({ chapter, imagePath, pageIndex, pageCount, markdown }) {
+  const knowledgeItems = markdownHeadingItems(markdownSection(markdown, '知识点'));
+  const mistakeItems = markdownHeadingItems(markdownSection(markdown, '易错点'));
+  const pageTitle = markdownSection(markdown, '页面标题').split(/\r?\n/).find(Boolean) || path.basename(imagePath);
+  return {
+    chapterId: chapter.id,
+    chapterTitle: chapter.fullTitle,
+    imageFile: path.basename(imagePath),
+    pageIndex,
+    pageCount,
+    source: 'agent_markdown',
+    extractedAt: new Date().toISOString(),
+    pageTitle: pageTitle.replace(/^#+\s*/, '').trim(),
+    rawOutline: markdownSection(markdown, '原文结构')
+      .split(/\r?\n/)
+      .map((line) => line.replace(/^\s*-\s*/, '').trim())
+      .filter(Boolean),
+    knowledgePoints: knowledgeItems.map((item) => ({
+      title: item.title,
+      summary: fieldFromMarkdownBody(item.body, '摘要'),
+      formulas: fieldFromMarkdownBody(item.body, '公式')
+        .split(/[；;]/)
+        .map((value) => value.trim())
+        .filter(Boolean),
+      examples: fieldFromMarkdownBody(item.body, '例子')
+        .split(/[；;]/)
+        .map((value) => value.trim())
+        .filter(Boolean),
+      prerequisite: fieldFromMarkdownBody(item.body, '前置'),
+      difficulty: fieldFromMarkdownBody(item.body, '难度') || 'basic'
+    })),
+    easyMistakes: mistakeItems.map((item) => ({
+      title: item.title,
+      errorType: fieldFromMarkdownBody(item.body, '错因') || item.title,
+      description: fieldFromMarkdownBody(item.body, '说明'),
+      correction: fieldFromMarkdownBody(item.body, '纠正')
+    })),
+    exerciseHints: markdownSection(markdown, '出题方向')
+      .split(/\r?\n/)
+      .map((line) => line.replace(/^\s*-\s*/, '').trim())
+      .filter(Boolean),
+    markdown
+  };
+}
 function knowledgeExtractionError(reason, detail = '') {
   const error = new Error(`knowledge_extraction_failed:${reason}`);
   error.status = 502;
@@ -118,8 +218,9 @@ export async function extractChapterPage({
   await ensureChapterWorkspace(chapter);
   const outputPath = pageExtractPath(chapter.id, imagePath);
   if (!force) {
-    const existing = await readJson(outputPath, null);
+    const existing = await readFile(outputPath, 'utf8').catch(() => '');
     if (existing) {
+      const parsed = parsePageMarkdownExtract({ chapter, imagePath, pageIndex, pageCount, markdown: existing });
       onProgress?.({
         step: 'knowledge_extract.page.cached',
         message: `第 ${pageIndex}/${pageCount || '?'} 页已有提取缓存，直接复用。`,
@@ -127,7 +228,7 @@ export async function extractChapterPage({
         pageCount,
         imageFile: path.basename(imagePath)
       });
-      return existing;
+      return parsed;
     }
   }
   const systemPrompt = await readPrompt('knowledge-extract.system.md');
@@ -138,46 +239,36 @@ export async function extractChapterPage({
     pageCount,
     imageFile: path.basename(imagePath)
   });
-  const agent = await callVisionAgent({
+  const agent = await callVisionTextAgent({
     timeoutMs: KNOWLEDGE_PAGE_TIMEOUT_MS,
     retries: KNOWLEDGE_PAGE_RETRIES,
     system: systemPrompt,
-    text: promptPayload({
-      task: '从这一页提分笔记图片中逐项提取知识点、公式、例题线索和易错点。',
-      context: {
-        chapter: {
-          id: chapter.id,
-          title: chapter.fullTitle,
-          track: chapter.track
-        },
-        extractProfile: normalizeExtractProfile(extractProfile || {})
-      },
-      requirements: [
-        'knowledgePoints 要覆盖页面出现的每个概念、性质、公式、方法或题型。',
-        'easyMistakes 要提取页面明确写出的易错点，也可以从页面中的提醒、比较、条件限制中归纳，但不能凭空添加。',
-        'summary 用学生能懂的短句；公式必须使用 $...$。',
-        'exerciseHints 只写题型方向，不写完整答案。'
-      ],
-      schema: {
-        pageTitle: 'string',
-        rawOutline: ['string'],
-        knowledgePoints: [{
-          title: 'string',
-          summary: 'string',
-          formulas: ['string with LaTeX'],
-          examples: ['short example or expression'],
-          prerequisite: 'string',
-          difficulty: 'basic|medium|challenge'
-        }],
-        easyMistakes: [{
-          title: 'string',
-          errorType: 'string',
-          description: 'string',
-          correction: 'string'
-        }],
-        exerciseHints: ['string']
-      }
-    }),
+    text: [
+      `任务：从这一页提分笔记图片中提取可用于后续汇总的 Markdown 笔记。`,
+      '',
+      `章节：${chapter.id} ${chapter.fullTitle}`,
+      `主线：${chapter.track}`,
+      `提取策略：考点和易错点优先；不要把例子拆成独立知识点；不要补充图片没有出现的内容。`,
+      '',
+      '输出必须是 Markdown，且只包含以下标题：',
+      '# 页面知识提取',
+      '## 页面标题',
+      '## 原文结构',
+      '## 知识点',
+      '### <知识点标题>',
+      '- 摘要：<一句话>',
+      '- 公式：<可为空，多个用分号>',
+      '- 例子：<可为空，多个用分号>',
+      '- 前置：<可为空>',
+      '- 难度：basic|medium|challenge',
+      '## 易错点',
+      '### <易错点标题>',
+      '- 错因：<错误类型>',
+      '- 说明：<错误表现>',
+      '- 纠正：<正确做法>',
+      '## 出题方向',
+      '- <题型方向，只写方向，不写完整答案>'
+    ].join('\n'),
     imagePaths: [imagePath],
     onAttempt: ({ phase, attempt, attempts, delayMs, result }) => {
       const base = `第 ${pageIndex}/${pageCount || '?'} 页识别`;
@@ -210,9 +301,12 @@ export async function extractChapterPage({
       `第 ${pageIndex}/${pageCount || '?'} 页 ${path.basename(imagePath)} 识别失败，已尝试 ${agent.attempts || 1} 次。${agent.detail || ''}`.trim()
     );
   }
-  const extract = normalizePageExtract(chapter, imagePath, pageIndex, agent.data, 'agent');
-  await writeJson(outputPath, extract);
-  await writeJson(path.join(paths.knowledgeExtracts, chapter.id, `${path.basename(imagePath, path.extname(imagePath))}.json`), extract);
+  const markdown = String(agent.data || '').trim();
+  const extract = parsePageMarkdownExtract({ chapter, imagePath, pageIndex, pageCount, markdown });
+  await writeFile(outputPath, `${markdown}\n`, 'utf8');
+  const mirrorPath = path.join(paths.knowledgeExtracts, chapter.id, `${path.basename(imagePath, path.extname(imagePath))}.md`);
+  await mkdir(path.dirname(mirrorPath), { recursive: true });
+  await writeFile(mirrorPath, `${markdown}\n`, 'utf8');
   onProgress?.({
     step: 'knowledge_extract.page.done',
     message: `第 ${pageIndex}/${pageCount || '?'} 页识别完成，提取 ${extract.knowledgePoints.length} 个知识点。`,
@@ -226,7 +320,7 @@ export async function extractChapterPage({
 function dedupeByTitle(items) {
   const seen = new Map();
   for (const item of items) {
-    const key = String(item.title || item.errorType || '').replace(/\s+/g, '');
+    const key = normalizedKnowledgeKey(item.title || item.errorType || '');
     if (!key) continue;
     if (!seen.has(key)) {
       seen.set(key, { ...item });
@@ -242,6 +336,131 @@ function dedupeByTitle(items) {
   return [...seen.values()];
 }
+function normalizedKnowledgeKey(value) {
+  return String(value || '')
+    .toLowerCase()
+    .replace(/[“”"‘'`]/g, '')
+    .replace(/[（(].*?[）)]/g, '')
+    .replace(/[：:，,。；;、\s·\-—_]/g, '')
+    .replace(/的概念|概念|性质|定义|方法|判定|定理|公式/g, '')
+    .trim();
+}
+function chunkArray(items, size) {
+  const chunks = [];
+  for (let index = 0; index < items.length; index += size) {
+    chunks.push(items.slice(index, index + size));
+  }
+  return chunks;
+}
+function countDocPoints(doc) {
+  return (doc.sections || []).reduce((sum, section) => sum + (section.points?.length || 0), 0);
+}
+function pointHasUsableTemplate(point) {
+  return Array.isArray(point.questionTemplates)
+    && point.questionTemplates.some((template) => Array.isArray(template) && template[0] && template[1]);
+}
+function normalizePointForBudget(point) {
+  return {
+    ...point,
+    title: String(point.title || '').trim(),
+    summary: String(point.summary || '').trim(),
+    formulas: Array.isArray(point.formulas) ? point.formulas.filter(Boolean).slice(0, 4) : [],
+    pitfalls: Array.isArray(point.pitfalls) ? point.pitfalls.filter(Boolean).slice(0, 5) : [],
+    examples: Array.isArray(point.examples) ? point.examples.filter(Boolean).slice(0, 4) : [],
+    questionTemplates: Array.isArray(point.questionTemplates) && point.questionTemplates.length
+      ? point.questionTemplates.filter((template) => Array.isArray(template)).slice(0, 3)
+      : [[
+          `围绕「${point.title || '本知识点'}」完成一道基础覆盖题，并写出关键结论。`,
+          point.summary || '答案需符合知识点定义、性质或方法。',
+          point.title || '知识点理解错误'
+        ]]
+  };
+}
+function mergeDuplicatePoints(points) {
+  const byKey = new Map();
+  for (const rawPoint of points) {
+    const point = normalizePointForBudget(rawPoint);
+    const key = normalizedKnowledgeKey(point.title);
+    if (!key) continue;
+    if (!byKey.has(key)) {
+      byKey.set(key, point);
+      continue;
+    }
+    const current = byKey.get(key);
+    current.summary = current.summary.length >= point.summary.length ? current.summary : point.summary;
+    current.formulas = [...new Set([...(current.formulas || []), ...(point.formulas || [])])].slice(0, 4);
+    current.pitfalls = [...new Set([...(current.pitfalls || []), ...(point.pitfalls || [])])].slice(0, 5);
+    current.examples = [...new Set([...(current.examples || []), ...(point.examples || [])])].slice(0, 4);
+    current.questionTemplates = [...(current.questionTemplates || []), ...(point.questionTemplates || [])]
+      .filter((template, index, templates) => (
+        Array.isArray(template)
+          && templates.findIndex((candidate) => Array.isArray(candidate) && candidate[0] === template[0]) === index
+      ))
+      .slice(0, 3);
+    current.sources = [...new Set([...(current.sources || []), ...(point.sources || [])])];
+  }
+  return [...byKey.values()];
+}
+function pointPriority(point) {
+  let score = 0;
+  if (pointHasUsableTemplate(point)) score += 4;
+  if (point.summary) score += 2;
+  if (point.pitfalls?.length) score += 2;
+  if (point.formulas?.length) score += 1;
+  if (point.examples?.length) score += 1;
+  return score;
+}
+function enforceKnowledgeBudget(chapter, doc, profile) {
+  const maxCore = profile.maxCorePointCount || KNOWLEDGE_MAX_CORE_POINTS;
+  const maxMistakes = profile.maxMistakePointCount ?? KNOWLEDGE_MAX_MISTAKE_POINTS;
+  const corePoints = [];
+  const mistakePoints = [];
+  for (const section of doc.sections || []) {
+    const isMistakeSection = /易错|错题|错误|mistake/i.test(section.title || '');
+    for (const point of section.points || []) {
+      if (isMistakeSection) mistakePoints.push(point);
+      else corePoints.push(point);
+    }
+  }
+  const dedupedCore = mergeDuplicatePoints(corePoints)
+    .sort((a, b) => pointPriority(b) - pointPriority(a))
+    .slice(0, maxCore)
+    .map((point, index) => ({
+      ...point,
+      id: `${chapter.id}-kp-${String(index + 1).padStart(2, '0')}`
+    }));
+  const dedupedMistakes = mergeDuplicatePoints(mistakePoints)
+    .sort((a, b) => pointPriority(b) - pointPriority(a))
+    .slice(0, maxMistakes)
+    .map((point, index) => ({
+      ...point,
+      id: `${chapter.id}-mistake-${String(index + 1).padStart(2, '0')}`
+    }));
+  return {
+    ...doc,
+    sections: [
+      { title: '知识点覆盖', points: dedupedCore },
+      { title: '易错题专项', points: dedupedMistakes }
+    ].filter((section) => section.points.length),
+    review: {
+      ...(doc.review || {}),
+      pointBudget: {
+        maxCorePointCount: maxCore,
+        maxMistakePointCount: maxMistakes,
+        corePointCount: dedupedCore.length,
+        mistakePointCount: dedupedMistakes.length
+      }
+    }
+  };
+}
 function localMergeChapter(chapter, pageExtracts) {
   const points = dedupeByTitle(pageExtracts.flatMap((page) =>
     page.knowledgePoints.map((point) => ({
@@ -302,79 +521,132 @@ function localMergeChapter(chapter, pageExtracts) {
   };
 }
-export async function summarizeChapterExtraction({
+async function callKnowledgeSummaryAgent({
+  systemPrompt,
   chapter,
-  pageExtracts,
-  extractProfile = null,
-  resetLearningState = false,
-  onProgress = null
+  task,
+  context,
+  requirements,
+  schema,
+  onProgress,
+  progressPrefix
 }) {
-  const local = localMergeChapter(chapter, pageExtracts);
-  const normalizedProfile = normalizeExtractProfile(extractProfile || {});
-  const systemPrompt = await readPrompt('knowledge-summarize.system.md');
-  onProgress?.({
-    step: 'knowledge_extract.summary.start',
-    message: `正在合并 ${pageExtracts.length} 页提取结果，生成章节知识点。`,
-    pageCount: pageExtracts.length
-  });
-  const agent = await callChatAgent({
+  return callChatAgent({
     timeoutMs: KNOWLEDGE_SUMMARY_TIMEOUT_MS,
     retries: KNOWLEDGE_SUMMARY_RETRIES,
     temperature: 0.1,
     system: systemPrompt,
-    user: promptPayload({
-      task: '把逐页提取结果合并成章节知识文档，并做覆盖检查。',
-      context: {
-        chapter,
-        pageExtracts,
-        localDraft: local,
-        extractProfile: normalizedProfile
-      },
-      requirements: [
-        '合并同义知识点，保留来源页。',
-        normalizedProfile.detailLevel === 'fine_grained'
-          ? '当前提取画像要求细粒度拆分：保留必要前置概念、步骤性方法和基础易错点。'
-          : '当前提取画像要求考点优先：优先保留考试常见考点、易错点、变式边界和必要前置关系，不把教材说明拆得过碎。',
-        'sections 至少包含“知识点覆盖”；如果有易错点，单独包含“易错题专项”。',
-        '每个知识点必须有 id、title、summary、formulas、pitfalls、questionTemplates。',
-        'questionTemplates 用于后续出题，题干只写题目，不写解题过程。',
-        'review.missingOrWeak 列出疑似遗漏或需要人工复核的点。'
-      ],
-      schema: {
-        sections: [{
-          title: '知识点覆盖',
-          points: [{
-            id: `${chapter.id}-kp-01`,
-            title: 'string',
-            summary: 'string',
-            formulas: ['string with LaTeX'],
-            pitfalls: ['string'],
-            examples: ['string'],
-            questionTemplates: [['stem', 'answer', 'expectedErrorType']],
-            sources: ['image filename']
-          }]
-        }],
-        review: {
-          passed: true,
-          coverageSummary: 'string',
-          missingOrWeak: ['string'],
-          duplicateMerged: ['string']
-        }
+    user: promptPayload({ task, context, requirements, schema }),
+    onAttempt: ({ phase, attempt, attempts, delayMs, result }) => {
+      if (phase === 'start') {
+        onProgress?.({
+          step: `${progressPrefix}.attempt`,
+          message: `${chapter.fullTitle}知识点合并：第 ${attempt}/${attempts} 次尝试。`,
+          attempt,
+          attempts
+        });
+      }
+      if (phase === 'retry') {
+        onProgress?.({
+          step: `${progressPrefix}.retry`,
+          message: `知识点合并遇到${retryReasonText(result?.reason)}，${Math.round(delayMs / 1000)} 秒后自动重试。`,
+          attempt,
+          attempts,
+          reason: result?.reason || null
+        });
       }
-    }),
+    }
+  });
+}
+function knowledgeSummarySchema(chapter) {
+  return {
+    sections: [{
+      title: '知识点覆盖',
+      points: [{
+        id: `${chapter.id}-kp-01`,
+        title: 'string',
+        summary: 'string',
+        formulas: ['string with LaTeX'],
+        pitfalls: ['string'],
+        examples: ['string'],
+        questionTemplates: [['stem', 'answer', 'expectedErrorType']],
+        sources: ['image filename']
+      }]
+    }],
+    review: {
+      passed: true,
+      coverageSummary: 'string',
+      missingOrWeak: ['string'],
+      duplicateMerged: ['string']
+    }
+  };
+}
+async function summarizePageChunk({ chapter, pages, chunkIndex, chunkCount, systemPrompt, normalizedProfile, onProgress }) {
+  const pageLabels = pages.map((page) => page.imageFile).join('、');
+  const cachedMarkdown = await readFile(chunkSummaryPath(chapter.id, chunkIndex), 'utf8').catch(() => '');
+  if (cachedMarkdown) {
+    const knowledgePointCount = (cachedMarkdown.match(/^###\s+/gm) || []).length;
+    onProgress?.({
+      step: 'knowledge_extract.summary.chunk.cached',
+      message: `知识点分组 ${chunkIndex + 1}/${chunkCount} 已有 Markdown 汇总缓存，直接复用。`,
+      chunkIndex: chunkIndex + 1,
+      chunkCount,
+      knowledgePointCount
+    });
+    return {
+      chunkIndex: chunkIndex + 1,
+      pageFiles: pages.map((page) => page.imageFile),
+      markdown: cachedMarkdown
+    };
+  }
+  onProgress?.({
+    step: 'knowledge_extract.summary.chunk.start',
+    message: `正在合并知识点分组 ${chunkIndex + 1}/${chunkCount}（${pageLabels}）。`,
+    chunkIndex: chunkIndex + 1,
+    chunkCount,
+    pageCount: pages.length
+  });
+  const agent = await callChatTextAgent({
+    system: systemPrompt,
+    timeoutMs: KNOWLEDGE_SUMMARY_TIMEOUT_MS,
+    retries: KNOWLEDGE_SUMMARY_RETRIES,
+    temperature: 0.1,
+    user: [
+      `任务：合并《${chapter.fullTitle}》第 ${chunkIndex + 1}/${chunkCount} 组逐页 Markdown 提取结果。`,
+      '',
+      `本组页面：${pageLabels}`,
+      `数量控制：核心知识点不超过 ${Math.ceil(normalizedProfile.maxCorePointCount / chunkCount) + 4} 个；易错点不超过 ${Math.ceil(normalizedProfile.maxMistakePointCount / chunkCount) + 2} 个。`,
+      '',
+      '要求：',
+      '- 只基于输入页面合并知识点，不新增页面没有依据的内容。',
+      '- 合并同义、过细、重复候选，保留来源页。',
+      '- 优先保留考试常见考点、易错边界、几何概念辨析和可出题的方法。',
+      '- 输出 Markdown，不输出 JSON。',
+      '- 必须包含标题：# 分组知识汇总、## 知识点覆盖、## 易错题专项、## 合并说明。',
+      '- 每个知识点用三级标题，包含：摘要、来源、公式、易错边界、出题模板。',
+      '',
+      '逐页 Markdown：',
+      ...pages.map((page) => [
+        `\n---\n`,
+        `来源页：${page.imageFile}`,
+        page.markdown || ''
+      ].join('\n'))
+    ].join('\n'),
     onAttempt: ({ phase, attempt, attempts, delayMs, result }) => {
       if (phase === 'start') {
         onProgress?.({
-          step: 'knowledge_extract.summary.attempt',
-          message: `章节汇总：第 ${attempt}/${attempts} 次尝试。`,
+          step: 'knowledge_extract.summary.chunk.attempt',
+          message: `知识点分组 ${chunkIndex + 1}/${chunkCount}：第 ${attempt}/${attempts} 次尝试。`,
           attempt,
           attempts
         });
       }
       if (phase === 'retry') {
         onProgress?.({
-          step: 'knowledge_extract.summary.retry',
-          message: `章节汇总遇到${retryReasonText(result?.reason)}，${Math.round(delayMs / 1000)} 秒后自动重试。`,
+          step: 'knowledge_extract.summary.chunk.retry',
+          message: `知识点分组 ${chunkIndex + 1}/${chunkCount} 遇到${retryReasonText(result?.reason)}，${Math.round(delayMs / 1000)} 秒后自动重试。`,
           attempt,
           attempts,
           reason: result?.reason || null
@@ -382,13 +654,178 @@ export async function summarizeChapterExtraction({
       }
     }
   });
-  if (!agent.ok || !Array.isArray(agent.data?.sections)) {
+  if (!agent.ok || !agent.data) {
     throw knowledgeExtractionError(
-      agent.reason || 'invalid_agent_response',
-      `章节汇总失败，已尝试 ${agent.attempts || 1} 次。${agent.detail || ''}`.trim()
+      agent.reason || 'empty_response',
+      `知识点分组 ${chunkIndex + 1}/${chunkCount} 合并失败，已尝试 ${agent.attempts || 1} 次。${agent.detail || ''}`.trim()
     );
   }
-  const merged = agent.data;
+  const markdown = String(agent.data || '').trim();
+  await mkdir(chunkSummaryDir(chapter.id), { recursive: true });
+  await writeFile(chunkSummaryPath(chapter.id, chunkIndex), `${markdown}\n`, 'utf8');
+  const knowledgePointCount = (markdown.match(/^###\s+/gm) || []).length;
+  onProgress?.({
+    step: 'knowledge_extract.summary.chunk.done',
+    message: `知识点分组 ${chunkIndex + 1}/${chunkCount} 合并完成，得到约 ${knowledgePointCount} 个候选点。`,
+    chunkIndex: chunkIndex + 1,
+    chunkCount,
+    knowledgePointCount
+  });
+  return {
+    chunkIndex: chunkIndex + 1,
+    pageFiles: pages.map((page) => page.imageFile),
+    markdown
+  };
+}
+function cleanMarkdownTitle(title) {
+  return String(title || '')
+    .replace(/^\s*\d+[.、]\s*/, '')
+    .replace(/^#+\s*/, '')
+    .trim();
+}
+function splitMarkdownValues(value) {
+  return String(value || '')
+    .split(/[；;、,，]/)
+    .map((item) => item.trim())
+    .filter((item) => item && item !== '无');
+}
+function chunkMarkdownPoints(markdown, sectionTitle, pointType = 'core') {
+  const section = markdownSection(markdown, sectionTitle);
+  return markdownHeadingItems(section).map((item) => {
+    const summary = fieldFromMarkdownBody(item.body, '摘要');
+    const sources = splitMarkdownValues(fieldFromMarkdownBody(item.body, '来源'));
+    const formulas = splitMarkdownValues(fieldFromMarkdownBody(item.body, '公式'));
+    const pitfalls = [
+      ...splitMarkdownValues(fieldFromMarkdownBody(item.body, '易错边界')),
+      ...splitMarkdownValues(fieldFromMarkdownBody(item.body, '错因')),
+      ...splitMarkdownValues(fieldFromMarkdownBody(item.body, '说明'))
+    ].filter(Boolean);
+    const templateStems = markdownFieldList(item.body, '出题模板');
+    const title = cleanMarkdownTitle(item.title);
+    return {
+      title,
+      summary,
+      formulas,
+      pitfalls: [...new Set(pitfalls)].slice(0, 5),
+      examples: [],
+      questionTemplates: (templateStems.length ? templateStems : [
+        `围绕「${title}」设计一道${pointType === 'mistake' ? '易错辨析' : '基础覆盖'}题。`
+      ]).slice(0, 3).map((stem) => [
+        stem,
+        summary || `正确运用「${title}」相关概念、性质或方法。`,
+        pitfalls[0] || title
+      ]),
+      sources
+    };
+  }).filter((point) => point.title);
+}
+function localMergeChunkSummaries(chapter, chunkDocs) {
+  const corePoints = [];
+  const mistakePoints = [];
+  const mergeNotes = [];
+  for (const chunk of chunkDocs) {
+    corePoints.push(...chunkMarkdownPoints(chunk.markdown, '知识点覆盖', 'core'));
+    mistakePoints.push(...chunkMarkdownPoints(chunk.markdown, '易错题专项', 'mistake'));
+    mergeNotes.push(...markdownSection(chunk.markdown, '合并说明')
+      .split(/\r?\n/)
+      .map((line) => line.replace(/^\s*-\s*/, '').trim())
+      .filter(Boolean));
+  }
+  return {
+    sections: [
+      { title: '知识点覆盖', points: corePoints },
+      { title: '易错题专项', points: mistakePoints }
+    ].filter((section) => section.points.length),
+    review: {
+      passed: corePoints.length > 0,
+      coverageSummary: `由 ${chunkDocs.length} 个分组 Markdown 汇总合并生成最终章节知识点，并在保存前执行去重和数量控制。`,
+      missingOrWeak: [],
+      duplicateMerged: mergeNotes.slice(0, 20)
+    }
+  };
+}
+export async function summarizeChapterExtraction({
+  chapter,
+  pageExtracts,
+  extractProfile = null,
+  resetLearningState = false,
+  onProgress = null
+}) {
+  const local = localMergeChapter(chapter, pageExtracts);
+  const normalizedProfile = normalizeExtractProfile(extractProfile || {});
+  const systemPrompt = await readPrompt('knowledge-summarize.system.md');
+  onProgress?.({
+    step: 'knowledge_extract.summary.start',
+    message: `正在合并 ${pageExtracts.length} 页提取结果，生成章节知识点。`,
+    pageCount: pageExtracts.length
+  });
+  const pageChunks = chunkArray(pageExtracts, normalizedProfile.summaryChunkSize);
+  const chunkDocs = [];
+  if (pageChunks.length > 1) {
+    for (let index = 0; index < pageChunks.length; index += 1) {
+      chunkDocs.push(await summarizePageChunk({
+        chapter,
+        pages: pageChunks[index],
+        chunkIndex: index,
+        chunkCount: pageChunks.length,
+        systemPrompt,
+        normalizedProfile,
+        onProgress
+      }));
+    }
+  }
+  const finalContext = pageChunks.length > 1
+    ? { chapter, chunkDocs, localDraft: local, extractProfile: normalizedProfile }
+    : { chapter, pageExtracts, localDraft: local, extractProfile: normalizedProfile };
+  onProgress?.({
+    step: 'knowledge_extract.summary.final.start',
+    message: pageChunks.length > 1
+      ? `正在把 ${chunkDocs.length} 个知识点分组合并为最终章节知识。`
+      : '正在生成最终章节知识点。',
+    chunkCount: chunkDocs.length
+  });
+  let finalDoc = null;
+  if (pageChunks.length > 1) {
+    finalDoc = localMergeChunkSummaries(chapter, chunkDocs);
+    onProgress?.({
+      step: 'knowledge_extract.summary.final.local',
+      message: `已从 ${chunkDocs.length} 个分组 Markdown 生成最终章节知识草稿，正在去重和限量。`,
+      chunkCount: chunkDocs.length
+    });
+  } else {
+    const agent = await callKnowledgeSummaryAgent({
+      systemPrompt,
+      chapter,
+      task: '把逐页提取结果合并成章节知识文档，并做覆盖检查。',
+      progressPrefix: 'knowledge_extract.summary.final',
+      onProgress,
+      context: finalContext,
+      requirements: [
+        '合并同义知识点，保留来源页。',
+        `最终“知识点覆盖”核心点数量控制在 ${normalizedProfile.maxCorePointCount} 个以内。`,
+        `最终“易错题专项”数量控制在 ${normalizedProfile.maxMistakePointCount} 个以内。`,
+        '优先保留考试常见考点、易错点、变式边界、几何概念辨析和必要前置关系，不把教材说明拆得过碎。',
+        'sections 至少包含“知识点覆盖”；如果有易错点，单独包含“易错题专项”。',
+        '每个知识点必须有 title、summary、formulas、pitfalls、questionTemplates。',
+        'questionTemplates 用于后续出题，题干只写题目，不写解题过程。',
+        'review.missingOrWeak 列出疑似遗漏或需要人工复核的点。'
+      ],
+      schema: knowledgeSummarySchema(chapter)
+    });
+    if (!agent.ok || !Array.isArray(agent.data?.sections)) {
+      throw knowledgeExtractionError(
+        agent.reason || 'invalid_agent_response',
+        `章节汇总失败，已尝试 ${agent.attempts || 1} 次。${agent.detail || ''}`.trim()
+      );
+    }
+    finalDoc = agent.data;
+  }
+  const merged = enforceKnowledgeBudget(chapter, finalDoc, normalizedProfile);
   const extractedAt = new Date().toISOString();
   const normalized = await saveKnowledgeDoc(chapter, merged, 'agent', {
     extractProfile: normalizedProfile,
@@ -404,6 +841,7 @@ export async function summarizeChapterExtraction({
     updatedAt: extractedAt,
     extractProfile: normalizedProfile,
     extractorVersion: 1,
+    summaryStrategy: pageChunks.length > 1 ? 'chunked_markdown_local_merge' : 'single_merge',
     review: merged.review || null,
     knowledgePointCount: normalized.sections.reduce((sum, section) => sum + section.points.length, 0)
   };